llvm.org GIT mirror llvm / 888225e
Merging r343373: ------------------------------------------------------------------------ r343373 | rksimon | 2018-09-29 06:25:22 -0700 (Sat, 29 Sep 2018) | 3 lines [X86][SSE] Fixed issue with v2i64 variable shifts on 32-bit targets The shift amount might have peeked through a extract_subvector, altering the number of vector elements in the 'Amt' variable - so we were incorrectly calculating the ratio when peeking through bitcasts, resulting in incorrectly detecting splats. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_70@344810 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 2 years ago
2 changed file(s) with 21 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
2331123311 }
2331223312
2331323313 // Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
23314 if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST &&
23314 if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST &&
2331523315 Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
2331623316 Amt = Amt.getOperand(0);
23317 unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
23318 VT.getVectorNumElements();
23317 unsigned Ratio = 64 / Amt.getScalarValueSizeInBits();
2331923318 std::vector Vals(Ratio);
2332023319 for (unsigned i = 0; i != Ratio; ++i)
2332123320 Vals[i] = Amt.getOperand(i);
23322 for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
23321 for (unsigned i = Ratio, e = Amt.getNumOperands(); i != e; i += Ratio) {
2332323322 for (unsigned j = 0; j != Ratio; ++j)
2332423323 if (Vals[j] != Amt.getOperand(i + j))
2332523324 return SDValue();
380380 ; X32-NEXT: movl %esp, %ebp
381381 ; X32-NEXT: andl $-16, %esp
382382 ; X32-NEXT: subl $16, %esp
383 ; X32-NEXT: vmovdqa {{.*#+}} xmm3 = [33,0,63,0]
384 ; X32-NEXT: vmovdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
385 ; X32-NEXT: vpsrlq %xmm3, %xmm4, %xmm5
386 ; X32-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[2,3,0,1]
387 ; X32-NEXT: vpsrlq %xmm6, %xmm4, %xmm4
388 ; X32-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
389 ; X32-NEXT: vextractf128 $1, %ymm2, %xmm5
390 ; X32-NEXT: vpsrlq %xmm6, %xmm5, %xmm7
391 ; X32-NEXT: vpsrlq %xmm3, %xmm5, %xmm5
392 ; X32-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
393 ; X32-NEXT: vpsrlq %xmm6, %xmm2, %xmm6
394 ; X32-NEXT: vpsrlq %xmm3, %xmm2, %xmm2
395 ; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
383396 ; X32-NEXT: vpmovsxdq 16(%ebp), %xmm3
397 ; X32-NEXT: vpxor %xmm4, %xmm5, %xmm5
398 ; X32-NEXT: vpsubq %xmm4, %xmm5, %xmm5
399 ; X32-NEXT: vpxor %xmm4, %xmm2, %xmm2
400 ; X32-NEXT: vpsubq %xmm4, %xmm2, %xmm2
384401 ; X32-NEXT: vpmovsxdq 8(%ebp), %xmm4
385 ; X32-NEXT: vmovdqa {{.*#+}} xmm5 = [33,0,63,0]
386 ; X32-NEXT: vmovdqa {{.*#+}} xmm6 = [0,2147483648,0,2147483648]
387 ; X32-NEXT: vpsrlq %xmm5, %xmm6, %xmm6
388 ; X32-NEXT: vextractf128 $1, %ymm2, %xmm7
389 ; X32-NEXT: vpsrlq %xmm5, %xmm7, %xmm7
390 ; X32-NEXT: vpxor %xmm6, %xmm7, %xmm7
391 ; X32-NEXT: vpsubq %xmm6, %xmm7, %xmm7
392 ; X32-NEXT: vpsrlq %xmm5, %xmm2, %xmm2
393 ; X32-NEXT: vpxor %xmm6, %xmm2, %xmm2
394 ; X32-NEXT: vpsubq %xmm6, %xmm2, %xmm2
395 ; X32-NEXT: vinsertf128 $1, %xmm7, %ymm2, %ymm2
402 ; X32-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
396403 ; X32-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
397404 ; X32-NEXT: vextractf128 $1, %ymm1, %xmm4
398405 ; X32-NEXT: vextractf128 $1, %ymm0, %xmm5