llvm.org GIT mirror llvm / 3ff1f39
Merging r292758: ------------------------------------------------------------------------ r292758 | spatel | 2017-01-22 09:06:12 -0800 (Sun, 22 Jan 2017) | 4 lines [x86] avoid crashing with illegal vector type (PR31672) https://llvm.org/bugs/show_bug.cgi?id=31672 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_40@292832 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 3 years ago
2 changed file(s) with 160 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
2878728787 return SDValue();
2878828788 }
2878928789
28790 /// If a vector select has an operand that is -1 or 0, simplify the select to a
28791 /// bitwise logic operation.
28792 static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
28793 const X86Subtarget &Subtarget) {
28790 /// If a vector select has an operand that is -1 or 0, try to simplify the
28791 /// select to a bitwise logic operation.
28792 static SDValue
28793 combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
28794 TargetLowering::DAGCombinerInfo &DCI,
28795 const X86Subtarget &Subtarget) {
2879428796 SDValue Cond = N->getOperand(0);
2879528797 SDValue LHS = N->getOperand(1);
2879628798 SDValue RHS = N->getOperand(2);
2885228854 }
2885328855 }
2885428856
28855 if (!TValIsAllOnes && !FValIsAllZeros)
28857 // vselect Cond, 111..., 000... -> Cond
28858 if (TValIsAllOnes && FValIsAllZeros)
28859 return DAG.getBitcast(VT, Cond);
28860
28861 if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT))
2885628862 return SDValue();
2885728863
28858 SDValue Ret;
28859 if (TValIsAllOnes && FValIsAllZeros)
28860 Ret = Cond;
28861 else if (TValIsAllOnes)
28862 Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS));
28863 else if (FValIsAllZeros)
28864 Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS));
28865
28866 return DAG.getBitcast(VT, Ret);
28864 // vselect Cond, 111..., X -> or Cond, X
28865 if (TValIsAllOnes) {
28866 SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
28867 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS);
28868 return DAG.getBitcast(VT, Or);
28869 }
28870
28871 // vselect Cond, X, 000... -> and Cond, X
28872 if (FValIsAllZeros) {
28873 SDValue CastLHS = DAG.getBitcast(CondVT, LHS);
28874 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS);
28875 return DAG.getBitcast(VT, And);
28876 }
28877
28878 return SDValue();
2886728879 }
2886828880
2886928881 static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
2935229364 }
2935329365 }
2935429366
29355 if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget))
29367 if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
2935629368 return V;
2935729369
2935829370 // If this is a *dynamic* select (non-constant condition) and we can match
214214 ret <4 x i32> %zext
215215 }
216216
217 ; Fragile test warning - we need to induce the generation of a vselect
218 ; post-legalization to cause the crash seen in:
219 ; https://llvm.org/bugs/show_bug.cgi?id=31672
220 ; Is there a way to do that without an unsafe/fast sqrt intrinsic call?
221 ; Also, although the goal for adding this test is to prove that we
222 ; don't crash, I have no idea what this code is doing, so I'm keeping
223 ; the full codegen checks in case there's motivation to improve this.
224
225 define <2 x float> @PR31672() #0 {
226 ; X32-LABEL: PR31672:
227 ; X32: # BB#0:
228 ; X32-NEXT: pushl %ebp
229 ; X32-NEXT: movl %esp, %ebp
230 ; X32-NEXT: andl $-16, %esp
231 ; X32-NEXT: subl $80, %esp
232 ; X32-NEXT: xorps %xmm0, %xmm0
233 ; X32-NEXT: movaps {{.*#+}} xmm1 = <42,3,u,u>
234 ; X32-NEXT: movaps %xmm1, %xmm2
235 ; X32-NEXT: cmpeqps %xmm0, %xmm2
236 ; X32-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
237 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
238 ; X32-NEXT: rsqrtps %xmm1, %xmm0
239 ; X32-NEXT: mulps %xmm0, %xmm1
240 ; X32-NEXT: mulps %xmm0, %xmm1
241 ; X32-NEXT: addps {{\.LCPI.*}}, %xmm1
242 ; X32-NEXT: mulps {{\.LCPI.*}}, %xmm0
243 ; X32-NEXT: mulps %xmm1, %xmm0
244 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
245 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
246 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
247 ; X32-NEXT: andl %eax, %ecx
248 ; X32-NEXT: notl %eax
249 ; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
250 ; X32-NEXT: orl %ecx, %eax
251 ; X32-NEXT: movl %eax, (%esp)
252 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
253 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
254 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
255 ; X32-NEXT: andl %ecx, %edx
256 ; X32-NEXT: notl %ecx
257 ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
258 ; X32-NEXT: orl %edx, %ecx
259 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
260 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
261 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
262 ; X32-NEXT: andl %ecx, %edx
263 ; X32-NEXT: notl %ecx
264 ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
265 ; X32-NEXT: orl %edx, %ecx
266 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
267 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
268 ; X32-NEXT: andl %eax, %ecx
269 ; X32-NEXT: notl %eax
270 ; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
271 ; X32-NEXT: orl %ecx, %eax
272 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
273 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
274 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
275 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
276 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
277 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
278 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
279 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
280 ; X32-NEXT: movl %ebp, %esp
281 ; X32-NEXT: popl %ebp
282 ; X32-NEXT: retl
283 ;
284 ; X64-LABEL: PR31672:
285 ; X64: # BB#0:
286 ; X64-NEXT: xorps %xmm0, %xmm0
287 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
288 ; X64-NEXT: movaps {{.*#+}} xmm1 = <42,3,u,u>
289 ; X64-NEXT: cmpeqps %xmm1, %xmm0
290 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
291 ; X64-NEXT: rsqrtps %xmm1, %xmm0
292 ; X64-NEXT: mulps %xmm0, %xmm1
293 ; X64-NEXT: mulps %xmm0, %xmm1
294 ; X64-NEXT: addps {{.*}}(%rip), %xmm1
295 ; X64-NEXT: mulps {{.*}}(%rip), %xmm0
296 ; X64-NEXT: mulps %xmm1, %xmm0
297 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
298 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
299 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9
300 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10
301 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
302 ; X64-NEXT: movl %r9d, %esi
303 ; X64-NEXT: andl %edi, %esi
304 ; X64-NEXT: movl %edi, %ecx
305 ; X64-NEXT: notl %ecx
306 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
307 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
308 ; X64-NEXT: andl %eax, %ecx
309 ; X64-NEXT: orl %esi, %ecx
310 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
311 ; X64-NEXT: movl %r8d, %ecx
312 ; X64-NEXT: andl %r10d, %ecx
313 ; X64-NEXT: movl %r10d, %esi
314 ; X64-NEXT: notl %esi
315 ; X64-NEXT: andl %edx, %esi
316 ; X64-NEXT: orl %ecx, %esi
317 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
318 ; X64-NEXT: shrq $32, %r9
319 ; X64-NEXT: shrq $32, %rdi
320 ; X64-NEXT: andl %edi, %r9d
321 ; X64-NEXT: notl %edi
322 ; X64-NEXT: shrq $32, %rax
323 ; X64-NEXT: andl %edi, %eax
324 ; X64-NEXT: orl %r9d, %eax
325 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
326 ; X64-NEXT: shrq $32, %r8
327 ; X64-NEXT: shrq $32, %r10
328 ; X64-NEXT: andl %r10d, %r8d
329 ; X64-NEXT: notl %r10d
330 ; X64-NEXT: shrq $32, %rdx
331 ; X64-NEXT: andl %r10d, %edx
332 ; X64-NEXT: orl %r8d, %edx
333 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
334 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
335 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
336 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
337 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
338 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
339 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
340 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
341 ; X64-NEXT: retq
342 %t0 = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> )
343 ret <2 x float> %t0
344 }
345
346 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #1
347
348 attributes #0 = { nounwind "unsafe-fp-math"="true" }
349