llvm.org GIT mirror llvm / 6529ef6
[DAGCombiner][X86][PowerPC] Teach visitSIGN_EXTEND_INREG to fold (sext_in_reg (aext/sext x)) -> (sext x) when x has more than 1 sign bit and the sext_inreg is from one of them. If x has multiple sign bits than it doesn't matter which one we extend from so we can sext from x's msb instead. The X86 setcc-combine.ll changes are a little weird. It appears we ended up with a (sext_inreg (aext (trunc (extractelt)))) after type legalization. The sext_inreg+aext now gets optimized by this combine to leave (sext (trunc (extractelt))). Then we visit the trunc before we visit the sext. This ends up changing the truncate to an extractvectorelt from a bitcasted vector. I have a follow up patch to fix this. Differential Revision: https://reviews.llvm.org/D56156 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350235 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 9 months ago
5 changed file(s) with 56 addition(s) and 49 deletion(s). Raw diff Collapse all Expand all
94139413
94149414 // fold (sext_in_reg (sext x)) -> (sext x)
94159415 // fold (sext_in_reg (aext x)) -> (sext x)
9416 // if x is small enough.
9416 // if x is small enough or if we know that x has more than 1 sign bit and the
9417 // sign_extend_inreg is extending from one of them.
94179418 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
94189419 SDValue N00 = N0.getOperand(0);
9419 if (N00.getScalarValueSizeInBits() <= EVTBits &&
9420 unsigned N00Bits = N00.getScalarValueSizeInBits();
9421 if ((N00Bits <= EVTBits ||
9422 (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
94209423 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
94219424 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
94229425 }
354354 ; CHECK-NEXT: lxv v2, 0(r3)
355355 ; CHECK-NEXT: xscvqpswz v2, v2
356356 ; CHECK-NEXT: mfvsrwz r3, v2
357 ; CHECK-NEXT: extsh r3, r3
357 ; CHECK-NEXT: extsw r3, r3
358358 ; CHECK-NEXT: blr
359359 entry:
360360 %0 = load fp128, fp128* %a, align 16
392392 ; CHECK-NEXT: xsaddqp v2, v2, v3
393393 ; CHECK-NEXT: xscvqpswz v2, v2
394394 ; CHECK-NEXT: mfvsrwz r3, v2
395 ; CHECK-NEXT: extsh r3, r3
395 ; CHECK-NEXT: extsw r3, r3
396396 ; CHECK-NEXT: blr
397397 entry:
398398 %0 = load fp128, fp128* %a, align 16
508508 ; CHECK-NEXT: lxv v2, 0(r3)
509509 ; CHECK-NEXT: xscvqpswz v2, v2
510510 ; CHECK-NEXT: mfvsrwz r3, v2
511 ; CHECK-NEXT: extsb r3, r3
511 ; CHECK-NEXT: extsw r3, r3
512512 ; CHECK-NEXT: blr
513513 entry:
514514 %0 = load fp128, fp128* %a, align 16
546546 ; CHECK-NEXT: xsaddqp v2, v2, v3
547547 ; CHECK-NEXT: xscvqpswz v2, v2
548548 ; CHECK-NEXT: mfvsrwz r3, v2
549 ; CHECK-NEXT: extsb r3, r3
549 ; CHECK-NEXT: extsw r3, r3
550550 ; CHECK-NEXT: blr
551551 entry:
552552 %0 = load fp128, fp128* %a, align 16
743743 ; CHECK-NOT: addic
744744 ; CHECK-NOT: subfe
745745 ; CHECK-NOT: isel
746 ; CHECK: extsh
746 ; CHECK: extsw
747747 ; CHECK: blr
748748 ; CHECK-PWR8-LABEL: setb28
749749 ; CHECK-PWR8-DAG: cmpd
750750 ; CHECK-PWR8-DAG: addic
751751 ; CHECK-PWR8-DAG: subfe
752752 ; CHECK-PWR8: isel
753 ; CHECK-PWR8: extsh
753 ; CHECK-PWR8: extsw
754754 ; CHECK-PWR8: blr
755755 }
756756
77 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
88 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
99 ; SSE2-NEXT: pxor %xmm1, %xmm0
10 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
11 ; SSE2-NEXT: movd %xmm0, %eax
10 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
11 ; SSE2-NEXT: movsbl %al, %eax
1212 ; SSE2-NEXT: retq
1313 ;
1414 ; SSE41-LABEL: test_eq_1:
1616 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
1717 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
1818 ; SSE41-NEXT: pxor %xmm1, %xmm0
19 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
19 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
20 ; SSE41-NEXT: movsbl %al, %eax
2021 ; SSE41-NEXT: retq
2122 %cmp = icmp slt <4 x i32> %A, %B
2223 %sext = sext <4 x i1> %cmp to <4 x i32>
3031 ; SSE2-LABEL: test_ne_1:
3132 ; SSE2: # %bb.0:
3233 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
33 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
34 ; SSE2-NEXT: movd %xmm0, %eax
34 ; SSE2-NEXT: pextrw $2, %xmm1, %eax
35 ; SSE2-NEXT: movsbl %al, %eax
3536 ; SSE2-NEXT: retq
3637 ;
3738 ; SSE41-LABEL: test_ne_1:
3839 ; SSE41: # %bb.0:
3940 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
40 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
41 ; SSE41-NEXT: pextrb $4, %xmm1, %eax
42 ; SSE41-NEXT: movsbl %al, %eax
4143 ; SSE41-NEXT: retq
4244 %cmp = icmp slt <4 x i32> %A, %B
4345 %sext = sext <4 x i1> %cmp to <4 x i32>
6668 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
6769 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
6870 ; SSE2-NEXT: pxor %xmm1, %xmm0
69 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
70 ; SSE2-NEXT: movd %xmm0, %eax
71 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
72 ; SSE2-NEXT: movsbl %al, %eax
7173 ; SSE2-NEXT: retq
7274 ;
7375 ; SSE41-LABEL: test_ge_1:
7577 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
7678 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
7779 ; SSE41-NEXT: pxor %xmm1, %xmm0
78 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
80 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
81 ; SSE41-NEXT: movsbl %al, %eax
7982 ; SSE41-NEXT: retq
8083 %cmp = icmp slt <4 x i32> %A, %B
8184 %sext = sext <4 x i1> %cmp to <4 x i32>
8992 ; SSE2-LABEL: test_lt_1:
9093 ; SSE2: # %bb.0:
9194 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
92 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
93 ; SSE2-NEXT: movd %xmm0, %eax
95 ; SSE2-NEXT: pextrw $2, %xmm1, %eax
96 ; SSE2-NEXT: movsbl %al, %eax
9497 ; SSE2-NEXT: retq
9598 ;
9699 ; SSE41-LABEL: test_lt_1:
97100 ; SSE41: # %bb.0:
98101 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
99 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
102 ; SSE41-NEXT: pextrb $4, %xmm1, %eax
103 ; SSE41-NEXT: movsbl %al, %eax
100104 ; SSE41-NEXT: retq
101105 %cmp = icmp slt <4 x i32> %A, %B
102106 %sext = sext <4 x i1> %cmp to <4 x i32>
125129 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
126130 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
127131 ; SSE2-NEXT: pxor %xmm0, %xmm1
128 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
129 ; SSE2-NEXT: movd %xmm0, %eax
132 ; SSE2-NEXT: pextrw $2, %xmm1, %eax
133 ; SSE2-NEXT: movsbl %al, %eax
130134 ; SSE2-NEXT: retq
131135 ;
132136 ; SSE41-LABEL: test_eq_2:
134138 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
135139 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
136140 ; SSE41-NEXT: pxor %xmm0, %xmm1
137 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
141 ; SSE41-NEXT: pextrb $4, %xmm1, %eax
142 ; SSE41-NEXT: movsbl %al, %eax
138143 ; SSE41-NEXT: retq
139144 %cmp = icmp slt <4 x i32> %B, %A
140145 %sext = sext <4 x i1> %cmp to <4 x i32>
148153 ; SSE2-LABEL: test_ne_2:
149154 ; SSE2: # %bb.0:
150155 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
151 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
152 ; SSE2-NEXT: movd %xmm0, %eax
156 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
157 ; SSE2-NEXT: movsbl %al, %eax
153158 ; SSE2-NEXT: retq
154159 ;
155160 ; SSE41-LABEL: test_ne_2:
156161 ; SSE41: # %bb.0:
157162 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
158 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
163 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
164 ; SSE41-NEXT: movsbl %al, %eax
159165 ; SSE41-NEXT: retq
160166 %cmp = icmp slt <4 x i32> %B, %A
161167 %sext = sext <4 x i1> %cmp to <4 x i32>
171177 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
172178 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
173179 ; SSE2-NEXT: pxor %xmm0, %xmm1
174 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
175 ; SSE2-NEXT: movd %xmm0, %eax
180 ; SSE2-NEXT: pextrw $2, %xmm1, %eax
181 ; SSE2-NEXT: movsbl %al, %eax
176182 ; SSE2-NEXT: retq
177183 ;
178184 ; SSE41-LABEL: test_le_2:
180186 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
181187 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
182188 ; SSE41-NEXT: pxor %xmm0, %xmm1
183 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
189 ; SSE41-NEXT: pextrb $4, %xmm1, %eax
190 ; SSE41-NEXT: movsbl %al, %eax
184191 ; SSE41-NEXT: retq
185192 %cmp = icmp slt <4 x i32> %B, %A
186193 %sext = sext <4 x i1> %cmp to <4 x i32>
207214 ; SSE2-LABEL: test_lt_2:
208215 ; SSE2: # %bb.0:
209216 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
210 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
211 ; SSE2-NEXT: movd %xmm0, %eax
217 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
218 ; SSE2-NEXT: movsbl %al, %eax
212219 ; SSE2-NEXT: retq
213220 ;
214221 ; SSE41-LABEL: test_lt_2:
215222 ; SSE41: # %bb.0:
216223 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
217 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
224 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
225 ; SSE41-NEXT: movsbl %al, %eax
218226 ; SSE41-NEXT: retq
219227 %cmp = icmp slt <4 x i32> %B, %A
220228 %sext = sext <4 x i1> %cmp to <4 x i32>
228236 ; SSE2-LABEL: test_gt_2:
229237 ; SSE2: # %bb.0:
230238 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
231 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
232 ; SSE2-NEXT: movd %xmm0, %eax
239 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
240 ; SSE2-NEXT: movsbl %al, %eax
233241 ; SSE2-NEXT: retq
234242 ;
235243 ; SSE41-LABEL: test_gt_2:
236244 ; SSE41: # %bb.0:
237245 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
238 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
246 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
247 ; SSE41-NEXT: movsbl %al, %eax
239248 ; SSE41-NEXT: retq
240249 %cmp = icmp slt <4 x i32> %B, %A
241250 %sext = sext <4 x i1> %cmp to <4 x i32>
1010 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1111 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1212 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
13 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
14 ; AVX1-NEXT: vpslld $24, %xmm3, %xmm3
15 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
16 ; AVX1-NEXT: vpslld $24, %xmm2, %xmm2
13 ; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3
14 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
15 ; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
1716 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1817 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1918 ; AVX1-NEXT: retq
114113 ; AVX1-LABEL: slt_zero:
115114 ; AVX1: # %bb.0:
116115 ; AVX1-NEXT: vpmovsxbw (%rdi), %xmm2
117 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
118 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
119 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
120 ; AVX1-NEXT: vpslld $24, %xmm3, %xmm3
121 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
122 ; AVX1-NEXT: vpslld $24, %xmm2, %xmm2
116 ; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3
117 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
118 ; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
123119 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
124120 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
125121 ; AVX1-NEXT: retq
183179 ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
184180 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
185181 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
186 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
187 ; AVX1-NEXT: vpslld $24, %xmm3, %xmm3
188 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
189 ; AVX1-NEXT: vpslld $24, %xmm2, %xmm2
182 ; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3
183 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
184 ; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
190185 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
191186 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
192187 ; AVX1-NEXT: retq