llvm.org GIT mirror llvm / 577c662
Revert r359392 and r358887 Reverts "[X86] Remove (V)MOV64toSDrr/m and (V)MOVDI2SSrr/m. Use 128-bit result MOVD/MOVQ and COPY_TO_REGCLASS instead" Reverts "[TargetLowering][AMDGPU][X86] Improve SimplifyDemandedBits bitcast handling" Eric Christopher and Jorge Gorbe Moya reported some issues with these patches to me off list. Removing the CodeGenOnly instructions has changed how fneg is handled during fast-isel with sse/sse2. We're now emitting fsub -0.0, x instead moving to the integer domain(in a GPR), xoring the sign bit, and then moving back to xmm. This is because the fast isel table no longer contains an entry for (f32/f64 bitcast (i32/i64)) so the target independent fneg code fails. The use of fsub changes the behavior of nan with respect to -O2 codegen which will always use a pxor. NOTE: We still have a difference with double with -m32 since the move to GPR doesn't work there. I'll file a separate PR for that and add test cases. Since removing the CodeGenOnly instructions was fixing PR41619, I'm reverting r358887 which exposed that PR. Though I wouldn't be surprised if that bug can still be hit independent of that. This should hopefully get Google back to green. I'll work with Simon and other X86 folks to figure out how to move forward again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360066 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 4 months ago
16 changed file(s) with 374 addition(s) and 208 deletion(s). Raw diff Collapse all Expand all
15701570 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
15711571 KnownSrcBits, TLO, Depth + 1))
15721572 return true;
1573 } else if ((NumSrcEltBits % BitWidth) == 0 &&
1574 TLO.DAG.getDataLayout().isLittleEndian()) {
1575 unsigned Scale = NumSrcEltBits / BitWidth;
1576 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1577 APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1578 APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1579 for (unsigned i = 0; i != NumElts; ++i)
1580 if (DemandedElts[i]) {
1581 unsigned Offset = (i % Scale) * BitWidth;
1582 DemandedSrcBits.insertBits(DemandedBits, Offset);
1583 DemandedSrcElts.setBit(i / Scale);
1584 }
1585
1586 if (SrcVT.isVector()) {
1587 APInt KnownSrcUndef, KnownSrcZero;
1588 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1589 KnownSrcZero, TLO, Depth + 1))
1590 return true;
1591 }
1592
1593 KnownBits KnownSrcBits;
1594 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1595 KnownSrcBits, TLO, Depth + 1))
1596 return true;
15971573 }
15981574
15991575 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
16001576 // recursive call where Known may be useful to the caller.
16011577 if (Depth > 0) {
1602 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1578 Known = TLO.DAG.computeKnownBits(Op, Depth);
16031579 return false;
16041580 }
16051581 break;
32013201
32023202 SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
32033203 DAGCombinerInfo &DCI) const {
3204 auto *RHS = dyn_cast(N->getOperand(1));
3204 if (N->getValueType(0) != MVT::i64)
3205 return SDValue();
3206
3207 const ConstantSDNode *RHS = dyn_cast(N->getOperand(1));
32053208 if (!RHS)
32063209 return SDValue();
32073210
3208 EVT VT = N->getValueType(0);
3209 SDValue LHS = N->getOperand(0);
32103211 unsigned ShiftAmt = RHS->getZExtValue();
3211 SelectionDAG &DAG = DCI.DAG;
3212 SDLoc SL(N);
3213
3214 // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1)
3215 // this improves the ability to match BFE patterns in isel.
3216 if (LHS.getOpcode() == ISD::AND) {
3217 if (auto *Mask = dyn_cast(LHS.getOperand(1))) {
3218 if (Mask->getAPIntValue().isShiftedMask() &&
3219 Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) {
3220 return DAG.getNode(
3221 ISD::AND, SL, VT,
3222 DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
3223 DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1)));
3224 }
3225 }
3226 }
3227
3228 if (VT != MVT::i64)
3229 return SDValue();
3230
32313212 if (ShiftAmt < 32)
32323213 return SDValue();
32333214
32343215 // srl i64:x, C for C >= 32
32353216 // =>
32363217 // build_pair (srl hi_32(x), C - 32), 0
3218
3219 SelectionDAG &DAG = DCI.DAG;
3220 SDLoc SL(N);
3221
32373222 SDValue One = DAG.getConstant(1, SL, MVT::i32);
32383223 SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
32393224
3240 SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS);
3241 SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One);
3225 SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0));
3226 SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
3227 VecOp, One);
32423228
32433229 SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
32443230 SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
38313831 "vmovq\t{$src, $dst|$dst, $src}", []>,
38323832 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
38333833 let isCodeGenOnly = 1 in {
3834 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3835 "vmovq\t{$src, $dst|$dst, $src}",
3836 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3837 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3838 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3839 "vmovq\t{$src, $dst|$dst, $src}",
3840 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3841 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
38343842 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
38353843 "vmovq\t{$src, $dst|$dst, $src}",
38363844 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
38423850 EVEX_CD8<64, CD8VT1>;
38433851 }
38443852 } // ExeDomain = SSEPackedInt
3853
3854 // Move Int Doubleword to Single Scalar
3855 //
3856 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3857 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3858 "vmovd\t{$src, $dst|$dst, $src}",
3859 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3860 EVEX, Sched<[WriteVecMoveFromGpr]>;
3861
3862 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3863 "vmovd\t{$src, $dst|$dst, $src}",
3864 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
3865 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3866 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
38453867
38463868 // Move doubleword from xmm register to r/m32
38473869 //
38583880 (iPTR 0))), addr:$dst)]>,
38593881 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
38603882 } // ExeDomain = SSEPackedInt
3861
3862 let Predicates = [HasAVX512] in {
3863 def : Pat<(f64 (bitconvert GR64:$src)),
3864 (COPY_TO_REGCLASS (VMOV64toPQIZrr GR64:$src), FR64X)>;
3865 def : Pat<(f32 (bitconvert GR32:$src)),
3866 (COPY_TO_REGCLASS (VMOVDI2PDIZrr GR32:$src), FR32X)>;
3867 }
38683883
38693884 // Move quadword from xmm1 register to r/m64
38703885 //
530530 { X86::MOV32rr, X86::MOV32rm, 0 },
531531 { X86::MOV64rr, X86::MOV64rm, 0 },
532532 { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
533 { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
533534 { X86::MOV8rr, X86::MOV8rm, 0 },
534535 { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
535536 { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
536537 { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
537538 { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
539 { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
538540 { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
539541 { X86::MOVDQUrr, X86::MOVDQUrm, 0 },
540542 { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
815817 { X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 },
816818 { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
817819 { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
820 { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
821 { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
818822 { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
819823 { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
820824 { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
832836 { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
833837 { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
834838 { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
839 { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
840 { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
835841 { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
836842 { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
837843 { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
41084108 def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
41094109 "movq\t{$src, $dst|$dst, $src}", []>,
41104110 VEX, Sched<[WriteVecLoad]>;
4111 let isCodeGenOnly = 1 in
4112 def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4113 "movq\t{$src, $dst|$dst, $src}",
4114 [(set FR64:$dst, (bitconvert GR64:$src))]>,
4115 VEX, Sched<[WriteVecMoveFromGpr]>;
41114116
41124117 def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
41134118 "movd\t{$src, $dst|$dst, $src}",
41284133 def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
41294134 "movq\t{$src, $dst|$dst, $src}", []>,
41304135 Sched<[WriteVecLoad]>;
4136 let isCodeGenOnly = 1 in
4137 def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4138 "movq\t{$src, $dst|$dst, $src}",
4139 [(set FR64:$dst, (bitconvert GR64:$src))]>,
4140 Sched<[WriteVecMoveFromGpr]>;
41314141 } // ExeDomain = SSEPackedInt
4142
4143 //===---------------------------------------------------------------------===//
4144 // Move Int Doubleword to Single Scalar
4145 //
4146 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4147 def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4148 "movd\t{$src, $dst|$dst, $src}",
4149 [(set FR32:$dst, (bitconvert GR32:$src))]>,
4150 VEX, Sched<[WriteVecMoveFromGpr]>;
4151
4152 def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
4153 "movd\t{$src, $dst|$dst, $src}",
4154 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
4155 VEX, Sched<[WriteVecLoad]>;
4156 def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4157 "movd\t{$src, $dst|$dst, $src}",
4158 [(set FR32:$dst, (bitconvert GR32:$src))]>,
4159 Sched<[WriteVecMoveFromGpr]>;
4160
4161 def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
4162 "movd\t{$src, $dst|$dst, $src}",
4163 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
4164 Sched<[WriteVecLoad]>;
4165 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
41324166
41334167 //===---------------------------------------------------------------------===//
41344168 // Move Packed Doubleword Int to Packed Double Int
41574191 Sched<[WriteVecStore]>;
41584192 } // ExeDomain = SSEPackedInt
41594193
4160 let Predicates = [UseAVX] in {
4161 def : Pat<(f64 (bitconvert GR64:$src)),
4162 (COPY_TO_REGCLASS (VMOV64toPQIrr GR64:$src), FR64)>;
4163 def : Pat<(f32 (bitconvert GR32:$src)),
4164 (COPY_TO_REGCLASS (VMOVDI2PDIrr GR32:$src), FR32)>;
4165 }
4166
4167 let Predicates = [UseSSE2] in
4168 def : Pat<(f64 (bitconvert GR64:$src)),
4169 (COPY_TO_REGCLASS (MOV64toPQIrr GR64:$src), FR64)>;
4170
4171 let Predicates = [UseSSE1] in
4172 def : Pat<(f32 (bitconvert GR32:$src)),
4173 (COPY_TO_REGCLASS (MOVDI2PDIrr GR32:$src), FR32)>;
4174
41754194 //===---------------------------------------------------------------------===//
41764195 // Move Packed Doubleword Int first element to Doubleword Int
41774196 //
42054224 //
42064225 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
42074226 let Predicates = [UseAVX] in
4227 def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
4228 "movq\t{$src, $dst|$dst, $src}",
4229 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
4230 VEX, Sched<[WriteVecLoad]>;
42084231 def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
42094232 "movq\t{$src, $dst|$dst, $src}",
42104233 [(set GR64:$dst, (bitconvert FR64:$src))]>,
42144237 [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
42154238 VEX, Sched<[WriteVecStore]>;
42164239
4240 def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
4241 "movq\t{$src, $dst|$dst, $src}",
4242 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
4243 Sched<[WriteVecLoad]>;
42174244 def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
42184245 "movq\t{$src, $dst|$dst, $src}",
42194246 [(set GR64:$dst, (bitconvert FR64:$src))]>,
8585 ; GFX9-NEXT: v_mov_b32_e32 v2, s2
8686 ; GFX9-NEXT: ds_write_b16 v1, v2 offset:4
8787 ; GFX9-NEXT: s_waitcnt vmcnt(0)
88 ; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 7
89 ; GFX9-NEXT: ds_write_b8 v1, v0 offset:6
88 ; GFX9-NEXT: v_and_b32_e32 v0, 0x7f0000, v0
89 ; GFX9-NEXT: ds_write_b8_d16_hi v1, v0 offset:6
9090 ; GFX9-NEXT: ds_write_b32 v1, v3
9191 ; GFX9-NEXT: s_endpgm
9292 store i55 %arg, i55 addrspace(3)* %ptr, align 8
447447 define void @bitcast_4i64_store(i4* %p, <4 x i64> %a0) {
448448 ; SSE2-SSSE3-LABEL: bitcast_4i64_store:
449449 ; SSE2-SSSE3: # %bb.0:
450 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
451 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
452 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
453 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3
454 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4
455 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
456 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
457 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
458 ; SSE2-SSSE3-NEXT: por %xmm4, %xmm1
459 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
460 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
461 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3
462 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
463 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
464 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
465 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
450466 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
451467 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
452468 ; SSE2-SSSE3-NEXT: movb %al, (%rdi)
608608 ;
609609 ; AVX1-LABEL: bitcast_8i64_store:
610610 ; AVX1: # %bb.0:
611 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
612 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
613 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
614 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
615 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
611616 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
612 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
613617 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
614618 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
615619 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
616 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
617 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
618620 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
619621 ; AVX1-NEXT: vmovmskps %ymm0, %eax
620622 ; AVX1-NEXT: movb %al, (%rdi)
6262 ; AVX12: # %bb.0:
6363 ; AVX12-NEXT: vmovmskps %xmm0, %eax
6464 ; AVX12-NEXT: movl %eax, %ecx
65 ; AVX12-NEXT: shrl $2, %ecx
66 ; AVX12-NEXT: vmovd %ecx, %xmm0
67 ; AVX12-NEXT: andl $3, %eax
68 ; AVX12-NEXT: vmovd %eax, %xmm1
69 ; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
70 ; AVX12-NEXT: vpextrb $0, %xmm0, %eax
65 ; AVX12-NEXT: andl $3, %ecx
66 ; AVX12-NEXT: vmovq %rcx, %xmm0
67 ; AVX12-NEXT: shrl $2, %eax
68 ; AVX12-NEXT: vmovq %rax, %xmm1
69 ; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
70 ; AVX12-NEXT: vpextrb $0, %xmm1, %eax
7171 ; AVX12-NEXT: addb %cl, %al
7272 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
7373 ; AVX12-NEXT: retq
8080 ; AVX512-NEXT: movzbl %al, %ecx
8181 ; AVX512-NEXT: shrl $2, %ecx
8282 ; AVX512-NEXT: andl $3, %ecx
83 ; AVX512-NEXT: vmovd %ecx, %xmm0
83 ; AVX512-NEXT: vmovq %rcx, %xmm0
84 ; AVX512-NEXT: movzwl %ax, %eax
8485 ; AVX512-NEXT: andl $3, %eax
85 ; AVX512-NEXT: vmovd %eax, %xmm1
86 ; AVX512-NEXT: vmovq %rax, %xmm1
8687 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
8788 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
8889 ; AVX512-NEXT: addb %cl, %al
118119 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
119120 ; AVX12-NEXT: movzbl %al, %ecx
120121 ; AVX12-NEXT: shrl $4, %ecx
121 ; AVX12-NEXT: vmovd %ecx, %xmm0
122 ; AVX12-NEXT: vmovq %rcx, %xmm0
122123 ; AVX12-NEXT: andl $15, %eax
123 ; AVX12-NEXT: vmovd %eax, %xmm1
124 ; AVX12-NEXT: vmovq %rax, %xmm1
124125 ; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
125126 ; AVX12-NEXT: vpextrb $0, %xmm0, %eax
126127 ; AVX12-NEXT: addb %cl, %al
133134 ; AVX512-NEXT: kmovd %k0, %eax
134135 ; AVX512-NEXT: movzbl %al, %ecx
135136 ; AVX512-NEXT: shrl $4, %ecx
136 ; AVX512-NEXT: vmovd %ecx, %xmm0
137 ; AVX512-NEXT: vmovq %rcx, %xmm0
138 ; AVX512-NEXT: movzwl %ax, %eax
137139 ; AVX512-NEXT: andl $15, %eax
138 ; AVX512-NEXT: vmovd %eax, %xmm1
140 ; AVX512-NEXT: vmovq %rax, %xmm1
139141 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
140142 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
141143 ; AVX512-NEXT: addb %cl, %al
207209 define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
208210 ; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:
209211 ; SSE2-SSSE3: # %bb.0:
212 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
213 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
214 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
215 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3
216 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4
217 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
218 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
219 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
220 ; SSE2-SSSE3-NEXT: por %xmm4, %xmm1
221 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
222 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
223 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3
224 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
225 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
226 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
227 ; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
210228 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
211229 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
212230 ; SSE2-SSSE3-NEXT: movl %eax, %ecx
224242 ; AVX12: # %bb.0:
225243 ; AVX12-NEXT: vmovmskpd %ymm0, %eax
226244 ; AVX12-NEXT: movl %eax, %ecx
227 ; AVX12-NEXT: shrl $2, %ecx
228 ; AVX12-NEXT: vmovd %ecx, %xmm0
229 ; AVX12-NEXT: andl $3, %eax
230 ; AVX12-NEXT: vmovd %eax, %xmm1
231 ; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
232 ; AVX12-NEXT: vpextrb $0, %xmm0, %eax
245 ; AVX12-NEXT: andl $3, %ecx
246 ; AVX12-NEXT: vmovq %rcx, %xmm0
247 ; AVX12-NEXT: shrl $2, %eax
248 ; AVX12-NEXT: vmovq %rax, %xmm1
249 ; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
250 ; AVX12-NEXT: vpextrb $0, %xmm1, %eax
233251 ; AVX12-NEXT: addb %cl, %al
234252 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
235253 ; AVX12-NEXT: vzeroupper
243261 ; AVX512-NEXT: movzbl %al, %ecx
244262 ; AVX512-NEXT: shrl $2, %ecx
245263 ; AVX512-NEXT: andl $3, %ecx
246 ; AVX512-NEXT: vmovd %ecx, %xmm0
264 ; AVX512-NEXT: vmovq %rcx, %xmm0
265 ; AVX512-NEXT: movzwl %ax, %eax
247266 ; AVX512-NEXT: andl $3, %eax
248 ; AVX512-NEXT: vmovd %eax, %xmm1
267 ; AVX512-NEXT: vmovq %rax, %xmm1
249268 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
250269 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
251270 ; AVX512-NEXT: addb %cl, %al
281300 ; AVX12: # %bb.0:
282301 ; AVX12-NEXT: vmovmskps %ymm0, %eax
283302 ; AVX12-NEXT: movl %eax, %ecx
284 ; AVX12-NEXT: shrl $4, %ecx
285 ; AVX12-NEXT: vmovd %ecx, %xmm0
286 ; AVX12-NEXT: andl $15, %eax
287 ; AVX12-NEXT: vmovd %eax, %xmm1
288 ; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
289 ; AVX12-NEXT: vpextrb $0, %xmm0, %eax
303 ; AVX12-NEXT: andl $15, %ecx
304 ; AVX12-NEXT: vmovq %rcx, %xmm0
305 ; AVX12-NEXT: shrl $4, %eax
306 ; AVX12-NEXT: vmovq %rax, %xmm1
307 ; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
308 ; AVX12-NEXT: vpextrb $0, %xmm1, %eax
290309 ; AVX12-NEXT: addb %cl, %al
291310 ; AVX12-NEXT: # kill: def $al killed $al killed $eax
292311 ; AVX12-NEXT: vzeroupper
299318 ; AVX512-NEXT: kmovd %k0, %eax
300319 ; AVX512-NEXT: movzbl %al, %ecx
301320 ; AVX512-NEXT: shrl $4, %ecx
302 ; AVX512-NEXT: vmovd %ecx, %xmm0
321 ; AVX512-NEXT: vmovq %rcx, %xmm0
322 ; AVX512-NEXT: movzwl %ax, %eax
303323 ; AVX512-NEXT: andl $15, %eax
304 ; AVX512-NEXT: vmovd %eax, %xmm1
324 ; AVX512-NEXT: vmovq %rax, %xmm1
305325 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
306326 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
307327 ; AVX512-NEXT: addb %cl, %al
515535 ;
516536 ; AVX1-LABEL: bitcast_v8i64_to_v2i4:
517537 ; AVX1: # %bb.0:
538 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
539 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
540 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
541 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
542 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
518543 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
519 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
520544 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
521545 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
522546 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
523 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
524 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
525547 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
526548 ; AVX1-NEXT: vmovmskps %ymm0, %eax
527549 ; AVX1-NEXT: movl %eax, %ecx
528550 ; AVX1-NEXT: shrl $4, %ecx
529 ; AVX1-NEXT: vmovd %ecx, %xmm0
551 ; AVX1-NEXT: vmovq %rcx, %xmm0
530552 ; AVX1-NEXT: andl $15, %eax
531 ; AVX1-NEXT: vmovd %eax, %xmm1
553 ; AVX1-NEXT: vmovq %rax, %xmm1
532554 ; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
533555 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax
534556 ; AVX1-NEXT: addb %cl, %al
546568 ; AVX2-NEXT: vmovmskps %ymm0, %eax
547569 ; AVX2-NEXT: movl %eax, %ecx
548570 ; AVX2-NEXT: shrl $4, %ecx
549 ; AVX2-NEXT: vmovd %ecx, %xmm0
571 ; AVX2-NEXT: vmovq %rcx, %xmm0
550572 ; AVX2-NEXT: andl $15, %eax
551 ; AVX2-NEXT: vmovd %eax, %xmm1
573 ; AVX2-NEXT: vmovq %rax, %xmm1
552574 ; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
553575 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax
554576 ; AVX2-NEXT: addb %cl, %al
563585 ; AVX512-NEXT: kmovd %k0, %eax
564586 ; AVX512-NEXT: movzbl %al, %ecx
565587 ; AVX512-NEXT: shrl $4, %ecx
566 ; AVX512-NEXT: vmovd %ecx, %xmm0
588 ; AVX512-NEXT: vmovq %rcx, %xmm0
589 ; AVX512-NEXT: movzwl %ax, %eax
567590 ; AVX512-NEXT: andl $15, %eax
568 ; AVX512-NEXT: vmovd %eax, %xmm1
591 ; AVX512-NEXT: vmovq %rax, %xmm1
569592 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
570593 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
571594 ; AVX512-NEXT: addb %cl, %al
1313 ;
1414 ; X64-LABEL: t:
1515 ; X64: ## %bb.0: ## %entry
16 ; X64-NEXT: ## kill: def $edx killed $edx def $rdx
17 ; X64-NEXT: ## kill: def $esi killed $esi def $rsi
1618 ; X64-NEXT: imull %ecx, %esi
17 ; X64-NEXT: addl %edx, %esi
18 ; X64-NEXT: movslq %esi, %rax
19 ; X64-NEXT: leal (%rsi,%rdx), %eax
20 ; X64-NEXT: cltq
1921 ; X64-NEXT: movl (%rdi,%rax), %eax
20 ; X64-NEXT: movq %rax, %xmm0
22 ; X64-NEXT: leal 4(%rsi,%rdx), %ecx
23 ; X64-NEXT: movslq %ecx, %rcx
24 ; X64-NEXT: movzwl (%rdi,%rcx), %ecx
25 ; X64-NEXT: shlq $32, %rcx
26 ; X64-NEXT: orq %rax, %rcx
27 ; X64-NEXT: movq %rcx, %xmm0
2128 ; X64-NEXT: movd %xmm0, %eax
2229 ; X64-NEXT: retq
2330 entry:
22152215 $edi = VCVTTSS2SIZrr $xmm0
22162216 ; CHECK: $edi = VCVTTSS2SIrr_Int $xmm0
22172217 $edi = VCVTTSS2SIZrr_Int $xmm0
2218 ; CHECK: $xmm0 = VMOV64toSDrr $rdi
2219 $xmm0 = VMOV64toSDZrr $rdi
2220 ; CHECK: $xmm0 = VMOVDI2SSrm $rip, $noreg, $noreg, $noreg, $noreg
2221 $xmm0 = VMOVDI2SSZrm $rip, $noreg, $noreg, $noreg, $noreg
2222 ; CHECK: $xmm0 = VMOVDI2SSrr $eax
2223 $xmm0 = VMOVDI2SSZrr $eax
22182224 ; CHECK: VMOVSDmr $rdi, $xmm0, $noreg, $noreg, $noreg, $noreg
22192225 VMOVSDZmr $rdi, $xmm0, $noreg, $noreg, $noreg, $noreg
22202226 ; CHECK: $xmm0 = VMOVSDrm $rip, $noreg, $noreg, $noreg, $noreg
22432249 $xmm0 = VMOV64toPQIZrr $rdi
22442250 ; CHECK: $xmm0 = VMOV64toPQIrm $rdi, $noreg, $noreg, $noreg, $noreg
22452251 $xmm0 = VMOV64toPQIZrm $rdi, $noreg, $noreg, $noreg, $noreg
2252 ; CHECK: $xmm0 = VMOV64toSDrr $rdi
2253 $xmm0 = VMOV64toSDZrr $rdi
22462254 ; CHECK: $xmm0 = VMOVDI2PDIrm $rip, $noreg, $noreg, $noreg, $noreg
22472255 $xmm0 = VMOVDI2PDIZrm $rip, $noreg, $noreg, $noreg, $noreg
22482256 ; CHECK: $xmm0 = VMOVDI2PDIrr $edi
45274535 $edi = VCVTTSS2SIZrr $xmm16
45284536 ; CHECK: $edi = VCVTTSS2SIZrr_Int $xmm16
45294537 $edi = VCVTTSS2SIZrr_Int $xmm16
4538 ; CHECK: $xmm16 = VMOV64toSDZrr $rdi
4539 $xmm16 = VMOV64toSDZrr $rdi
4540 ; CHECK: $xmm16 = VMOVDI2SSZrm $rip, $noreg, $noreg, $noreg, $noreg
4541 $xmm16 = VMOVDI2SSZrm $rip, $noreg, $noreg, $noreg, $noreg
4542 ; CHECK: $xmm16 = VMOVDI2SSZrr $eax
4543 $xmm16 = VMOVDI2SSZrr $eax
45304544 ; CHECK: VMOVSDZmr $rdi, $xmm16, $noreg, $noreg, $noreg, $noreg
45314545 VMOVSDZmr $rdi, $xmm16, $noreg, $noreg, $noreg, $noreg
45324546 ; CHECK: $xmm16 = VMOVSDZrm $rip, $noreg, $noreg, $noreg, $noreg
45554569 $xmm16 = VMOV64toPQIZrr $rdi
45564570 ; CHECK: $xmm16 = VMOV64toPQIZrm $rdi, $noreg, $noreg, $noreg, $noreg
45574571 $xmm16 = VMOV64toPQIZrm $rdi, $noreg, $noreg, $noreg, $noreg
4572 ; CHECK: $xmm16 = VMOV64toSDZrr $rdi
4573 $xmm16 = VMOV64toSDZrr $rdi
45584574 ; CHECK: $xmm16 = VMOVDI2PDIZrm $rip, $noreg, $noreg, $noreg, $noreg
45594575 $xmm16 = VMOVDI2PDIZrm $rip, $noreg, $noreg, $noreg, $noreg
45604576 ; CHECK: $xmm16 = VMOVDI2PDIZrr $edi
44 define double @doo(double %x) nounwind {
55 ; CHECK-LABEL: doo:
66 ; CHECK: ## %bb.0:
7 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
8 ; CHECK-NEXT: subsd %xmm0, %xmm1
9 ; CHECK-NEXT: movapd %xmm1, %xmm0
7 ; CHECK-NEXT: movq %xmm0, %rax
8 ; CHECK-NEXT: movabsq $-9223372036854775808, %rcx ## imm = 0x8000000000000000
9 ; CHECK-NEXT: xorq %rax, %rcx
10 ; CHECK-NEXT: movq %rcx, %xmm0
1011 ; CHECK-NEXT: retq
1112 ;
1213 ; SSE2-LABEL: doo:
2930 define float @foo(float %x) nounwind {
3031 ; CHECK-LABEL: foo:
3132 ; CHECK: ## %bb.0:
32 ; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
33 ; CHECK-NEXT: subss %xmm0, %xmm1
34 ; CHECK-NEXT: movaps %xmm1, %xmm0
33 ; CHECK-NEXT: movd %xmm0, %eax
34 ; CHECK-NEXT: xorl $2147483648, %eax ## imm = 0x80000000
35 ; CHECK-NEXT: movd %eax, %xmm0
3536 ; CHECK-NEXT: retq
3637 ;
3738 ; SSE2-LABEL: foo:
3636 define void @store_v2f64_v2i64(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %val) {
3737 ; SSE2-LABEL: store_v2f64_v2i64:
3838 ; SSE2: ## %bb.0:
39 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
40 ; SSE2-NEXT: pxor %xmm2, %xmm0
41 ; SSE2-NEXT: movdqa %xmm2, %xmm3
42 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
43 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
44 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
45 ; SSE2-NEXT: pand %xmm3, %xmm2
46 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
47 ; SSE2-NEXT: por %xmm2, %xmm0
48 ; SSE2-NEXT: movd %xmm0, %eax
39 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
40 ; SSE2-NEXT: pxor %xmm3, %xmm0
41 ; SSE2-NEXT: movdqa %xmm3, %xmm2
42 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
43 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
44 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
45 ; SSE2-NEXT: movdqa %xmm0, %xmm4
46 ; SSE2-NEXT: pand %xmm2, %xmm4
47 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
48 ; SSE2-NEXT: por %xmm3, %xmm4
49 ; SSE2-NEXT: movd %xmm4, %eax
4950 ; SSE2-NEXT: testb $1, %al
5051 ; SSE2-NEXT: je LBB1_2
5152 ; SSE2-NEXT: ## %bb.1: ## %cond.store
5253 ; SSE2-NEXT: movlpd %xmm1, (%rdi)
5354 ; SSE2-NEXT: LBB1_2: ## %else
55 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
56 ; SSE2-NEXT: pand %xmm2, %xmm0
57 ; SSE2-NEXT: por %xmm3, %xmm0
5458 ; SSE2-NEXT: pextrw $4, %xmm0, %eax
5559 ; SSE2-NEXT: testb $1, %al
5660 ; SSE2-NEXT: je LBB1_4
119123 ; SSE2-NEXT: movdqa %xmm4, %xmm5
120124 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
121125 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
122 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
123 ; SSE2-NEXT: pand %xmm5, %xmm6
124 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
125 ; SSE2-NEXT: por %xmm6, %xmm0
126 ; SSE2-NEXT: movd %xmm0, %eax
126 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
127 ; SSE2-NEXT: movdqa %xmm0, %xmm7
128 ; SSE2-NEXT: pand %xmm5, %xmm7
129 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
130 ; SSE2-NEXT: por %xmm6, %xmm7
131 ; SSE2-NEXT: movd %xmm7, %eax
127132 ; SSE2-NEXT: testb $1, %al
128133 ; SSE2-NEXT: je LBB2_2
129134 ; SSE2-NEXT: ## %bb.1: ## %cond.store
130135 ; SSE2-NEXT: movlpd %xmm2, (%rdi)
131136 ; SSE2-NEXT: LBB2_2: ## %else
137 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,0,2,2]
138 ; SSE2-NEXT: pand %xmm5, %xmm0
139 ; SSE2-NEXT: por %xmm6, %xmm0
132140 ; SSE2-NEXT: pextrw $4, %xmm0, %eax
133141 ; SSE2-NEXT: testb $1, %al
134142 ; SSE2-NEXT: je LBB2_4
138146 ; SSE2-NEXT: pxor %xmm4, %xmm1
139147 ; SSE2-NEXT: movdqa %xmm4, %xmm0
140148 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
149 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
141150 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
142151 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
143 ; SSE2-NEXT: pand %xmm0, %xmm1
152 ; SSE2-NEXT: pand %xmm2, %xmm1
144153 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
145154 ; SSE2-NEXT: por %xmm1, %xmm0
146155 ; SSE2-NEXT: pextrw $0, %xmm0, %eax
892901 define void @store_v2i64_v2i64(<2 x i64> %trigger, <2 x i64>* %addr, <2 x i64> %val) {
893902 ; SSE2-LABEL: store_v2i64_v2i64:
894903 ; SSE2: ## %bb.0:
895 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
896 ; SSE2-NEXT: pxor %xmm2, %xmm0
897 ; SSE2-NEXT: movdqa %xmm2, %xmm3
898 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
899 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
900 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
901 ; SSE2-NEXT: pand %xmm3, %xmm2
902 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
903 ; SSE2-NEXT: por %xmm2, %xmm0
904 ; SSE2-NEXT: movd %xmm0, %eax
904 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
905 ; SSE2-NEXT: pxor %xmm3, %xmm0
906 ; SSE2-NEXT: movdqa %xmm3, %xmm2
907 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
908 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
909 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
910 ; SSE2-NEXT: movdqa %xmm0, %xmm4
911 ; SSE2-NEXT: pand %xmm2, %xmm4
912 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
913 ; SSE2-NEXT: por %xmm3, %xmm4
914 ; SSE2-NEXT: movd %xmm4, %eax
905915 ; SSE2-NEXT: testb $1, %al
906916 ; SSE2-NEXT: je LBB7_2
907917 ; SSE2-NEXT: ## %bb.1: ## %cond.store
908918 ; SSE2-NEXT: movq %xmm1, (%rdi)
909919 ; SSE2-NEXT: LBB7_2: ## %else
920 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
921 ; SSE2-NEXT: pand %xmm2, %xmm0
922 ; SSE2-NEXT: por %xmm3, %xmm0
910923 ; SSE2-NEXT: pextrw $4, %xmm0, %eax
911924 ; SSE2-NEXT: testb $1, %al
912925 ; SSE2-NEXT: je LBB7_4
981994 ; SSE2-NEXT: movdqa %xmm4, %xmm5
982995 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
983996 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
984 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
985 ; SSE2-NEXT: pand %xmm5, %xmm6
986 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
987 ; SSE2-NEXT: por %xmm6, %xmm0
988 ; SSE2-NEXT: movd %xmm0, %eax
997 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
998 ; SSE2-NEXT: movdqa %xmm0, %xmm7
999 ; SSE2-NEXT: pand %xmm5, %xmm7
1000 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
1001 ; SSE2-NEXT: por %xmm6, %xmm7
1002 ; SSE2-NEXT: movd %xmm7, %eax
9891003 ; SSE2-NEXT: testb $1, %al
9901004 ; SSE2-NEXT: je LBB8_2
9911005 ; SSE2-NEXT: ## %bb.1: ## %cond.store
9921006 ; SSE2-NEXT: movq %xmm2, (%rdi)
9931007 ; SSE2-NEXT: LBB8_2: ## %else
1008 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,0,2,2]
1009 ; SSE2-NEXT: pand %xmm5, %xmm0
1010 ; SSE2-NEXT: por %xmm6, %xmm0
9941011 ; SSE2-NEXT: pextrw $4, %xmm0, %eax
9951012 ; SSE2-NEXT: testb $1, %al
9961013 ; SSE2-NEXT: je LBB8_4
10011018 ; SSE2-NEXT: pxor %xmm4, %xmm1
10021019 ; SSE2-NEXT: movdqa %xmm4, %xmm0
10031020 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
1021 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
10041022 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
10051023 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1006 ; SSE2-NEXT: pand %xmm0, %xmm1
1024 ; SSE2-NEXT: pand %xmm2, %xmm1
10071025 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
10081026 ; SSE2-NEXT: por %xmm1, %xmm0
10091027 ; SSE2-NEXT: pextrw $0, %xmm0, %eax
928928 define i1 @allones_v4i64_sign(<4 x i64> %arg) {
929929 ; SSE2-LABEL: allones_v4i64_sign:
930930 ; SSE2: # %bb.0:
931 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
932 ; SSE2-NEXT: pxor %xmm2, %xmm1
933 ; SSE2-NEXT: movdqa %xmm2, %xmm3
934 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
935 ; SSE2-NEXT: movdqa %xmm2, %xmm4
936 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
937 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
938 ; SSE2-NEXT: pand %xmm3, %xmm1
939 ; SSE2-NEXT: por %xmm4, %xmm1
940 ; SSE2-NEXT: pxor %xmm2, %xmm0
941 ; SSE2-NEXT: movdqa %xmm2, %xmm3
942 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
943 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
944 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
945 ; SSE2-NEXT: pand %xmm3, %xmm0
946 ; SSE2-NEXT: por %xmm2, %xmm0
931947 ; SSE2-NEXT: packssdw %xmm1, %xmm0
932948 ; SSE2-NEXT: movmskps %xmm0, %eax
933949 ; SSE2-NEXT: cmpb $15, %al
972988 define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
973989 ; SSE2-LABEL: allzeros_v4i64_sign:
974990 ; SSE2: # %bb.0:
991 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
992 ; SSE2-NEXT: pxor %xmm2, %xmm1
993 ; SSE2-NEXT: movdqa %xmm2, %xmm3
994 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
995 ; SSE2-NEXT: movdqa %xmm2, %xmm4
996 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
997 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
998 ; SSE2-NEXT: pand %xmm3, %xmm1
999 ; SSE2-NEXT: por %xmm4, %xmm1
1000 ; SSE2-NEXT: pxor %xmm2, %xmm0
1001 ; SSE2-NEXT: movdqa %xmm2, %xmm3
1002 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
1003 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1004 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
1005 ; SSE2-NEXT: pand %xmm3, %xmm0
1006 ; SSE2-NEXT: por %xmm2, %xmm0
9751007 ; SSE2-NEXT: packssdw %xmm1, %xmm0
9761008 ; SSE2-NEXT: movmskps %xmm0, %eax
9771009 ; SSE2-NEXT: testb %al, %al
10621094 ;
10631095 ; AVX1-LABEL: allones_v8i64_sign:
10641096 ; AVX1: # %bb.0:
1097 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1098 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1099 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1100 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
1101 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
10651102 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1066 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
10671103 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
10681104 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
10691105 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1070 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1071 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
10721106 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
10731107 ; AVX1-NEXT: vmovmskps %ymm0, %eax
10741108 ; AVX1-NEXT: cmpb $-1, %al
11631197 ;
11641198 ; AVX1-LABEL: allzeros_v8i64_sign:
11651199 ; AVX1: # %bb.0:
1200 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1201 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1202 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1203 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
1204 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
11661205 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1167 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
11681206 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
11691207 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
11701208 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1171 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1172 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
11731209 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
11741210 ; AVX1-NEXT: vmovmskps %ymm0, %eax
11751211 ; AVX1-NEXT: testb %al, %al
25022538 ;
25032539 ; AVX1-LABEL: allones_v8i64_and1:
25042540 ; AVX1: # %bb.0:
2541 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2542 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2543 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2544 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2545 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2546 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
2547 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
25052548 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
25062549 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2507 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
25082550 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
25092551 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
25102552 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
25112553 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2512 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2513 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2514 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2515 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
25162554 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
25172555 ; AVX1-NEXT: vmovmskps %ymm0, %eax
25182556 ; AVX1-NEXT: cmpb $-1, %al
25762614 ;
25772615 ; AVX1-LABEL: allzeros_v8i64_and1:
25782616 ; AVX1: # %bb.0:
2617 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2618 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2619 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2620 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2621 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2622 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
2623 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
25792624 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
25802625 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2581 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
25822626 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
25832627 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
25842628 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
25852629 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2586 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2587 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2588 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2589 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
25902630 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
25912631 ; AVX1-NEXT: vmovmskps %ymm0, %eax
25922632 ; AVX1-NEXT: testb %al, %al
39213961 ;
39223962 ; AVX1-LABEL: allones_v8i64_and4:
39233963 ; AVX1: # %bb.0:
3964 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3965 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3966 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3967 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
3968 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
3969 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
3970 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
39243971 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
39253972 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3926 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
39273973 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
39283974 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
39293975 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
39303976 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3931 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3932 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3933 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
3934 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
39353977 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
39363978 ; AVX1-NEXT: vmovmskps %ymm0, %eax
39373979 ; AVX1-NEXT: cmpb $-1, %al
39954037 ;
39964038 ; AVX1-LABEL: allzeros_v8i64_and4:
39974039 ; AVX1: # %bb.0:
4040 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4041 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
4042 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
4043 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
4044 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
4045 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
4046 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
39984047 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
39994048 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
4000 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
40014049 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
40024050 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
40034051 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
40044052 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
4005 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4006 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
4007 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
4008 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
40094053 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
40104054 ; AVX1-NEXT: vmovmskps %ymm0, %eax
40114055 ; AVX1-NEXT: testb %al, %al
41254169 define i32 @movmskpd256(<4 x double> %x) {
41264170 ; SSE2-LABEL: movmskpd256:
41274171 ; SSE2: # %bb.0:
4172 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
4173 ; SSE2-NEXT: pxor %xmm2, %xmm1
4174 ; SSE2-NEXT: movdqa %xmm2, %xmm3
4175 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
4176 ; SSE2-NEXT: movdqa %xmm2, %xmm4
4177 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
4178 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
4179 ; SSE2-NEXT: pand %xmm3, %xmm1
4180 ; SSE2-NEXT: por %xmm4, %xmm1
4181 ; SSE2-NEXT: pxor %xmm2, %xmm0
4182 ; SSE2-NEXT: movdqa %xmm2, %xmm3
4183 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
4184 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
4185 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
4186 ; SSE2-NEXT: pand %xmm3, %xmm0
4187 ; SSE2-NEXT: por %xmm2, %xmm0
41284188 ; SSE2-NEXT: packssdw %xmm1, %xmm0
41294189 ; SSE2-NEXT: movmskps %xmm0, %eax
41304190 ; SSE2-NEXT: retq
0 # RUN: llc -mtriple=x86_64-- -run-pass=peephole-opt %s -o - | FileCheck %s
1 --- |
2 define void @func() { ret void }
3 ...
4 ---
5 # Check that instructions with MI.isBitcast() are only replaced by COPY if there
6 # are no SUBREG_TO_REG users.
7 # CHECK-LABEL: name: func
8 name: func
9 registers:
10 - { id: 0, class: gr32 }
11 - { id: 1, class: fr32 }
12 - { id: 2, class: gr32 }
13
14 - { id: 3, class: gr32 }
15 - { id: 4, class: fr32 }
16 - { id: 5, class: gr32 }
17 - { id: 6, class: gr64 }
18
19 body: |
20 bb.0:
21 ; CHECK: %1:fr32 = VMOVDI2SSrr %0
22 ; CHECK: %7:gr32 = COPY %0
23 ; CHECK: NOOP implicit %7
24 %0 = MOV32ri 42
25 %1 = VMOVDI2SSrr %0
26 %2 = MOVSS2DIrr %1
27 NOOP implicit %2
28
29 ; CHECK: %4:fr32 = VMOVDI2SSrr %3
30 ; CHECK-NOT: COPY
31 ; CHECK: %5:gr32 = MOVSS2DIrr %4
32 ; CHECK: %6:gr64 = SUBREG_TO_REG %5, 0
33 ; CHECK: NOOP implicit %6
34 %3 = MOV32ri 42
35 %4 = VMOVDI2SSrr %3
36 %5 = MOVSS2DIrr %4
37 %6 = SUBREG_TO_REG %5, 0, %subreg.sub_32bit
38 NOOP implicit %6
39 ...
+0
-27
test/CodeGen/X86/pr41619.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.14.0 -mattr=avx2 | FileCheck %s
2
3 define void @foo(double %arg) {
4 ; CHECK-LABEL: foo:
5 ; CHECK: ## %bb.0: ## %bb
6 ; CHECK-NEXT: vmovq %xmm0, %rax
7 ; CHECK-NEXT: vmovd %eax, %xmm0
8 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
9 ; CHECK-NEXT: vmovq %xmm0, %rax
10 ; CHECK-NEXT: movl %eax, (%rax)
11 ; CHECK-NEXT: vmovlps %xmm1, (%rax)
12 ; CHECK-NEXT: retq
13 bb:
14 %tmp = bitcast double %arg to i64
15 %tmp1 = trunc i64 %tmp to i32
16 %tmp2 = bitcast i32 %tmp1 to float
17 %tmp3 = insertelement <4 x float> zeroinitializer, float %tmp2, i32 2
18 %tmp4 = bitcast <4 x float> %tmp3 to <2 x double>
19 %tmp5 = extractelement <2 x double> %tmp4, i32 0
20 %tmp6 = extractelement <2 x double> %tmp4, i32 1
21 %tmp7 = bitcast double %tmp6 to i64
22 %tmp8 = trunc i64 %tmp7 to i32
23 store i32 %tmp8, i32* undef, align 4
24 store double %tmp5, double* undef, align 16
25 ret void
26 }