llvm.org GIT mirror llvm / 6ed7489
[LegalizeIntegerTypes] When promoting the result of an extract_vector_elt also promote the input type if necessary By also promoting the input type we get a better idea for what scalar type to use. This can provide better results if the result of the extract is sign extended. What was previously happening is that the extract result would be legalized, sometime later the input of the sign extend would be legalized using the result of the extract. Then later the extract input would be legalized forcing a truncate into the input of the sign extend using a replace all uses. This requires DAG combine to combine out the sext/truncate pair. But sometimes we visited the truncate first and messed things up before the sext could be combined. By creating the extract with the correct scalar type when we create legalize the result type, the truncate will be added right away. Then when the sign_extend input is legalized it will create an any_extend of the truncate which can be optimized by getNode to maybe remove the truncate. And then a sign_extend_inreg. Now DAG combine doesn't have to worry about getting rid of the extend. This fixes the regression on X86 in D56156. Differential Revision: https://reviews.llvm.org/D56176 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350236 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 9 months ago
2 changed file(s) with 47 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
440440 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
441441 SDLoc dl(N);
442442 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
443 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
444 N->getOperand(1));
443
444 SDValue Op0 = N->getOperand(0);
445 SDValue Op1 = N->getOperand(1);
446
447 // If the input also needs to be promoted, do that first so we can get a
448 // get a good idea for the output type.
449 if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
450 == TargetLowering::TypePromoteInteger) {
451 SDValue In = GetPromotedInteger(Op0);
452
453 // If the new type is larger than NVT, use it. We probably won't need to
454 // promote it again.
455 EVT SVT = In.getValueType().getScalarType();
456 if (SVT.bitsGE(NVT)) {
457 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
458 return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
459 }
460 }
461
462 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
445463 }
446464
447465 SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
77 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
88 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
99 ; SSE2-NEXT: pxor %xmm1, %xmm0
10 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
11 ; SSE2-NEXT: movsbl %al, %eax
10 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
11 ; SSE2-NEXT: movd %xmm0, %eax
1212 ; SSE2-NEXT: retq
1313 ;
1414 ; SSE41-LABEL: test_eq_1:
1616 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
1717 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
1818 ; SSE41-NEXT: pxor %xmm1, %xmm0
19 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
20 ; SSE41-NEXT: movsbl %al, %eax
19 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
2120 ; SSE41-NEXT: retq
2221 %cmp = icmp slt <4 x i32> %A, %B
2322 %sext = sext <4 x i1> %cmp to <4 x i32>
3130 ; SSE2-LABEL: test_ne_1:
3231 ; SSE2: # %bb.0:
3332 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
34 ; SSE2-NEXT: pextrw $2, %xmm1, %eax
35 ; SSE2-NEXT: movsbl %al, %eax
33 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
34 ; SSE2-NEXT: movd %xmm0, %eax
3635 ; SSE2-NEXT: retq
3736 ;
3837 ; SSE41-LABEL: test_ne_1:
3938 ; SSE41: # %bb.0:
4039 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
41 ; SSE41-NEXT: pextrb $4, %xmm1, %eax
42 ; SSE41-NEXT: movsbl %al, %eax
40 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
4341 ; SSE41-NEXT: retq
4442 %cmp = icmp slt <4 x i32> %A, %B
4543 %sext = sext <4 x i1> %cmp to <4 x i32>
6866 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
6967 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
7068 ; SSE2-NEXT: pxor %xmm1, %xmm0
71 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
72 ; SSE2-NEXT: movsbl %al, %eax
69 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
70 ; SSE2-NEXT: movd %xmm0, %eax
7371 ; SSE2-NEXT: retq
7472 ;
7573 ; SSE41-LABEL: test_ge_1:
7775 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
7876 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
7977 ; SSE41-NEXT: pxor %xmm1, %xmm0
80 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
81 ; SSE41-NEXT: movsbl %al, %eax
78 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
8279 ; SSE41-NEXT: retq
8380 %cmp = icmp slt <4 x i32> %A, %B
8481 %sext = sext <4 x i1> %cmp to <4 x i32>
9289 ; SSE2-LABEL: test_lt_1:
9390 ; SSE2: # %bb.0:
9491 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
95 ; SSE2-NEXT: pextrw $2, %xmm1, %eax
96 ; SSE2-NEXT: movsbl %al, %eax
92 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
93 ; SSE2-NEXT: movd %xmm0, %eax
9794 ; SSE2-NEXT: retq
9895 ;
9996 ; SSE41-LABEL: test_lt_1:
10097 ; SSE41: # %bb.0:
10198 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
102 ; SSE41-NEXT: pextrb $4, %xmm1, %eax
103 ; SSE41-NEXT: movsbl %al, %eax
99 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
104100 ; SSE41-NEXT: retq
105101 %cmp = icmp slt <4 x i32> %A, %B
106102 %sext = sext <4 x i1> %cmp to <4 x i32>
129125 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
130126 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
131127 ; SSE2-NEXT: pxor %xmm0, %xmm1
132 ; SSE2-NEXT: pextrw $2, %xmm1, %eax
133 ; SSE2-NEXT: movsbl %al, %eax
128 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
129 ; SSE2-NEXT: movd %xmm0, %eax
134130 ; SSE2-NEXT: retq
135131 ;
136132 ; SSE41-LABEL: test_eq_2:
138134 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
139135 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
140136 ; SSE41-NEXT: pxor %xmm0, %xmm1
141 ; SSE41-NEXT: pextrb $4, %xmm1, %eax
142 ; SSE41-NEXT: movsbl %al, %eax
137 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
143138 ; SSE41-NEXT: retq
144139 %cmp = icmp slt <4 x i32> %B, %A
145140 %sext = sext <4 x i1> %cmp to <4 x i32>
153148 ; SSE2-LABEL: test_ne_2:
154149 ; SSE2: # %bb.0:
155150 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
156 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
157 ; SSE2-NEXT: movsbl %al, %eax
151 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
152 ; SSE2-NEXT: movd %xmm0, %eax
158153 ; SSE2-NEXT: retq
159154 ;
160155 ; SSE41-LABEL: test_ne_2:
161156 ; SSE41: # %bb.0:
162157 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
163 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
164 ; SSE41-NEXT: movsbl %al, %eax
158 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
165159 ; SSE41-NEXT: retq
166160 %cmp = icmp slt <4 x i32> %B, %A
167161 %sext = sext <4 x i1> %cmp to <4 x i32>
177171 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
178172 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
179173 ; SSE2-NEXT: pxor %xmm0, %xmm1
180 ; SSE2-NEXT: pextrw $2, %xmm1, %eax
181 ; SSE2-NEXT: movsbl %al, %eax
174 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
175 ; SSE2-NEXT: movd %xmm0, %eax
182176 ; SSE2-NEXT: retq
183177 ;
184178 ; SSE41-LABEL: test_le_2:
186180 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
187181 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
188182 ; SSE41-NEXT: pxor %xmm0, %xmm1
189 ; SSE41-NEXT: pextrb $4, %xmm1, %eax
190 ; SSE41-NEXT: movsbl %al, %eax
183 ; SSE41-NEXT: pextrd $1, %xmm1, %eax
191184 ; SSE41-NEXT: retq
192185 %cmp = icmp slt <4 x i32> %B, %A
193186 %sext = sext <4 x i1> %cmp to <4 x i32>
214207 ; SSE2-LABEL: test_lt_2:
215208 ; SSE2: # %bb.0:
216209 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
217 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
218 ; SSE2-NEXT: movsbl %al, %eax
210 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
211 ; SSE2-NEXT: movd %xmm0, %eax
219212 ; SSE2-NEXT: retq
220213 ;
221214 ; SSE41-LABEL: test_lt_2:
222215 ; SSE41: # %bb.0:
223216 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
224 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
225 ; SSE41-NEXT: movsbl %al, %eax
217 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
226218 ; SSE41-NEXT: retq
227219 %cmp = icmp slt <4 x i32> %B, %A
228220 %sext = sext <4 x i1> %cmp to <4 x i32>
236228 ; SSE2-LABEL: test_gt_2:
237229 ; SSE2: # %bb.0:
238230 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
239 ; SSE2-NEXT: pextrw $2, %xmm0, %eax
240 ; SSE2-NEXT: movsbl %al, %eax
231 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
232 ; SSE2-NEXT: movd %xmm0, %eax
241233 ; SSE2-NEXT: retq
242234 ;
243235 ; SSE41-LABEL: test_gt_2:
244236 ; SSE41: # %bb.0:
245237 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
246 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
247 ; SSE41-NEXT: movsbl %al, %eax
238 ; SSE41-NEXT: pextrd $1, %xmm0, %eax
248239 ; SSE41-NEXT: retq
249240 %cmp = icmp slt <4 x i32> %B, %A
250241 %sext = sext <4 x i1> %cmp to <4 x i32>