llvm.org GIT mirror llvm / 3996e52
[DAG] Further improve the logic in DAGCombiner that folds a pair of shuffles into a single shuffle if the resulting mask is legal. This patch teaches the DAGCombiner how to fold shuffles according to the following new rules: 1. shuffle(shuffle(x, y), undef) -> x 2. shuffle(shuffle(x, y), undef) -> y 3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef) 4. shuffle(shuffle(x, y), undef) -> shuffle(y, undef) The backend avoids to combine shuffles according to rules 3. and 4. if the resulting shuffle does not have a legal mask. This is to avoid introducing illegal shuffles that are potentially expanded into a sub-optimal sequence of target specific dag nodes during vector legalization. Added test case combine-vec-shuffle-2.ll to verify that we correctly triggers the new rules when combining shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212748 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 5 years ago
3 changed file(s) with 219 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
1067610676 }
1067710677
1067810678 // If this shuffle node is simply a swizzle of another shuffle node,
10679 // and it reverses the swizzle of the previous shuffle then we can
10680 // optimize shuffle(shuffle(x, undef), undef) -> x.
10679 // then try to simplify it.
1068110680 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
1068210681 N1.getOpcode() == ISD::UNDEF) {
1068310682
1068410683 ShuffleVectorSDNode *OtherSV = cast(N0);
10685
10686 // Shuffle nodes can only reverse shuffles with a single non-undef value.
10687 if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
10688 return SDValue();
1068910684
1069010685 // The incoming shuffle must be of the same type as the result of the
1069110686 // current shuffle.
1070310698 Idx = OtherSV->getMaskElt(Idx);
1070410699 Mask.push_back(Idx);
1070510700 }
10706
10701
10702 bool CommuteOperands = false;
10703 if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
10704 // To be valid, the combine shuffle mask should only reference elements
10705 // from one of the two vectors in input to the inner shufflevector.
10706 bool IsValidMask = true;
10707 for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
10708 // See if the combined mask only reference undefs or elements coming
10709 // from the first shufflevector operand.
10710 IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
10711
10712 if (!IsValidMask) {
10713 IsValidMask = true;
10714 for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
10715 // Check that all the elements come from the second shuffle operand.
10716 IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
10717 CommuteOperands = IsValidMask;
10718 }
10719
10720 // Early exit if the combined shuffle mask is not valid.
10721 if (!IsValidMask)
10722 return SDValue();
10723 }
10724
10725 // See if this pair of shuffles can be safely folded according to either
10726 // of the following rules:
10727 // shuffle(shuffle(x, y), undef) -> x
10728 // shuffle(shuffle(x, undef), undef) -> x
10729 // shuffle(shuffle(x, y), undef) -> y
1070710730 bool IsIdentityMask = true;
10731 unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
1070810732 for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
1070910733 // Skip Undefs.
1071010734 if (Mask[i] < 0)
1071110735 continue;
1071210736
1071310737 // The combined shuffle must map each index to itself.
10714 IsIdentityMask = (unsigned)Mask[i] == i;
10715 }
10716
10717 if (IsIdentityMask)
10718 // optimize shuffle(shuffle(x, undef), undef) -> x.
10738 IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
10739 }
10740
10741 if (IsIdentityMask) {
10742 if (CommuteOperands)
10743 // optimize shuffle(shuffle(x, y), undef) -> y.
10744 return OtherSV->getOperand(1);
10745
10746 // optimize shuffle(shuffle(x, undef), undef) -> x
10747 // optimize shuffle(shuffle(x, y), undef) -> x
1071910748 return OtherSV->getOperand(0);
10749 }
1072010750
1072110751 // It may still be beneficial to combine the two shuffles if the
1072210752 // resulting shuffle is legal.
10723 // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
10724 if (TLI.isShuffleMaskLegal(Mask, VT))
10725 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
10753 if (TLI.isShuffleMaskLegal(Mask, VT)) {
10754 if (!CommuteOperands)
10755 // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
10756 // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
10757 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
10758 &Mask[0]);
10759
10760 // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
10761 return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
1072610762 &Mask[0]);
10763 }
1072710764 }
1072810765
1072910766 return SDValue();
0 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
1
2 ; Check that DAGCombiner correctly folds the following pairs of shuffles
3 ; using the following rules:
4 ; 1. shuffle(shuffle(x, y), undef) -> x
5 ; 2. shuffle(shuffle(x, y), undef) -> y
6 ; 3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef)
7 ; 4. shuffle(shuffle(x, y), undef) -> shuffle(undef, y)
8 ;
9 ; Rules 3. and 4. are used only if the resulting shuffle mask is legal.
10
11 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
12 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
13 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
14 ret <4 x i32> %2
15 }
16 ; CHECK-LABEL: test1
17 ; Mask: [3,0,0,1]
18 ; CHECK: pshufd $67
19 ; CHECK-NEXT: ret
20
21
22 define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
23 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
24 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
25 ret <4 x i32> %2
26 }
27 ; CHECK-LABEL: test2
28 ; Mask: [2,0,0,3]
29 ; CHECK: pshufd $-62
30 ; CHECK-NEXT: ret
31
32
33 define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) {
34 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
35 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
36 ret <4 x i32> %2
37 }
38 ; CHECK-LABEL: test3
39 ; Mask: [2,0,0,3]
40 ; CHECK: pshufd $-62
41 ; CHECK-NEXT: ret
42
43
44 define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) {
45 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
46 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
47 ret <4 x i32> %2
48 }
49 ; CHECK-LABEL: test4
50 ; Mask: [0,0,0,1]
51 ; CHECK: pshufd $64
52 ; CHECK-NEXT: ret
53
54
55 define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) {
56 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
57 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
58 ret <4 x i32> %2
59 }
60 ; CHECK-LABEL: test5
61 ; Mask: [1,1]
62 ; CHECK: movhlps
63 ; CHECK-NEXT: ret
64
65
66 define <4 x i32> @test6(<4 x i32> %A, <4 x i32> %B) {
67 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
68 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
69 ret <4 x i32> %2
70 }
71 ; CHECK-LABEL: test6
72 ; Mask: [2,0,0,0]
73 ; CHECK: pshufd $2
74 ; CHECK-NEXT: ret
75
76
77 define <4 x i32> @test7(<4 x i32> %A, <4 x i32> %B) {
78 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
79 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
80 ret <4 x i32> %2
81 }
82 ; CHECK-LABEL: test7
83 ; Mask: [0,2,0,2]
84 ; CHECK: pshufd $-120
85 ; CHECK-NEXT: ret
86
87
88 define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) {
89 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
90 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
91 ret <4 x i32> %2
92 }
93 ; CHECK-LABEL: test8
94 ; Mask: [1,0,3,0]
95 ; CHECK: pshufd $49
96 ; CHECK-NEXT: ret
97
98
99 define <4 x i32> @test9(<4 x i32> %A, <4 x i32> %B) {
100 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
101 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
102 ret <4 x i32> %2
103 }
104 ; CHECK-LABEL: test9
105 ; Mask: [1,3,0,2]
106 ; CHECK: pshufd $-115
107 ; CHECK-NEXT: ret
108
109
110 define <4 x i32> @test10(<4 x i32> %A, <4 x i32> %B) {
111 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
112 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
113 ret <4 x i32> %2
114 }
115 ; CHECK-LABEL: test10
116 ; Mask: [1,0,1,0]
117 ; CHECK: pshufd $17
118 ; CHECK-NEXT: ret
119
120
121 define <4 x i32> @test11(<4 x i32> %A, <4 x i32> %B) {
122 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
123 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
124 ret <4 x i32> %2
125 }
126 ; CHECK-LABEL: test11
127 ; Mask: [1,0,2,1]
128 ; CHECK: pshufd $97
129 ; CHECK-NEXT: ret
130
131
132 define <4 x i32> @test12(<4 x i32> %A, <4 x i32> %B) {
133 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
134 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
135 ret <4 x i32> %2
136 }
137 ; CHECK-LABEL: test12
138 ; Mask: [0,0,0,0]
139 ; CHECK: pshufd $0
140 ; CHECK-NEXT: ret
141
142
143 ; The following pair of shuffles is folded into vector %A.
144 define <4 x i32> @test13(<4 x i32> %A, <4 x i32> %B) {
145 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
146 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
147 ret <4 x i32> %2
148 }
149 ; CHECK-LABEL: test13
150 ; CHECK-NOT: pshufd
151 ; CHECK: ret
152
153
154 ; The following pair of shuffles is folded into vector %B.
155 define <4 x i32> @test14(<4 x i32> %A, <4 x i32> %B) {
156 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32>
157 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
158 ret <4 x i32> %2
159 }
160 ; CHECK-LABEL: test14
161 ; CHECK-NOT: pshufd
162 ; CHECK: ret
163
3232 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
3333 entry:
3434 ; CHECK-LABEL: shuf3:
35 ; CHECK: shufps
35 ; CHECK-NOT: movlhps
36 ; CHECK-NOT: shufps
37 ; CHECK: pshufd
3638 %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32>
3739 %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32>
3840 %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32>