llvm.org GIT mirror llvm / ee18eb9
[AVX-512] Fix accidental uses of AH/BH/CH/DH after copies to/from mask registers We've had several bugs(PR32256, PR32241) recently that resulted from usages of AH/BH/CH/DH either before or after a copy to/from a mask register. This ultimately occurs because we create COPY_TO_REGCLASS with VK1 and GR8. Then in CopyToFromAsymmetricReg in X86InstrInfo we find a 32-bit super register for the GR8 to emit the KMOV with. But as these tests are demonstrating, its possible for the GR8 register to be a high register and we end up doing an accidental extra or insert from bits 15:8. I think the best way forward is to stop making copies directly between mask registers and GR8/GR16. Instead I think we should restrict to only copies between mask registers and GR32/GR64 and use EXTRACT_SUBREG/INSERT_SUBREG to handle the conversion from GR32 to GR16/8 or vice versa. Unfortunately, this complicates fastisel a bit more now to create the subreg extracts where we used to create GR8 copies. We can probably make a helper function to bring down the repitition. This does result in KMOVD being used for copies when BWI is available because we don't know the original mask register size. This caused a lot of deltas on tests because we have to split the checks for KMOVD vs KMOVW based on BWI. Differential Revision: https://reviews.llvm.org/D30968 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298928 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 3 years ago
57 changed file(s) with 2929 addition(s) and 2429 deletion(s). Raw diff Collapse all Expand all
366366 switch (VT.getSimpleVT().SimpleTy) {
367367 default: return false;
368368 case MVT::i1:
369 // TODO: Support this properly.
370 if (Subtarget->hasAVX512())
371 return false;
372 LLVM_FALLTHROUGH;
369373 case MVT::i8:
370374 Opc = X86::MOV8rm;
371375 RC = &X86::GR8RegClass;
539543 // In case ValReg is a K register, COPY to a GPR
540544 if (MRI.getRegClass(ValReg) == &X86::VK1RegClass) {
541545 unsigned KValReg = ValReg;
542 ValReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass
543 : &X86::GR8_ABCD_LRegClass);
546 ValReg = createResultReg(&X86::GR32RegClass);
544547 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
545548 TII.get(TargetOpcode::COPY), ValReg)
546549 .addReg(KValReg);
550 ValReg = fastEmitInst_extractsubreg(MVT::i8, ValReg, /*Kill=*/true,
551 X86::sub_8bit);
547552 }
548553 // Mask out all but lowest bit.
549554 unsigned AndResult = createResultReg(&X86::GR8RegClass);
12791284 // In case SrcReg is a K register, COPY to a GPR
12801285 if (MRI.getRegClass(SrcReg) == &X86::VK1RegClass) {
12811286 unsigned KSrcReg = SrcReg;
1282 SrcReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass
1283 : &X86::GR8_ABCD_LRegClass);
1287 SrcReg = createResultReg(&X86::GR32RegClass);
12841288 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
12851289 TII.get(TargetOpcode::COPY), SrcReg)
12861290 .addReg(KSrcReg);
1291 SrcReg = fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
1292 X86::sub_8bit);
12871293 }
12881294 SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
12891295 SrcVT = MVT::i8;
15791585 // In case ResultReg is a K register, COPY to a GPR
15801586 if (MRI.getRegClass(ResultReg) == &X86::VK1RegClass) {
15811587 unsigned KResultReg = ResultReg;
1582 ResultReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass
1583 : &X86::GR8_ABCD_LRegClass);
1588 ResultReg = createResultReg(&X86::GR32RegClass);
15841589 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
15851590 TII.get(TargetOpcode::COPY), ResultReg)
15861591 .addReg(KResultReg);
1592 ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1593 X86::sub_8bit);
15871594 }
15881595
15891596 // Set the high bits to zero.
17671774 // In case OpReg is a K register, COPY to a GPR
17681775 if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
17691776 unsigned KOpReg = OpReg;
1770 OpReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass
1771 : &X86::GR8_ABCD_LRegClass);
1777 OpReg = createResultReg(&X86::GR32RegClass);
17721778 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
17731779 TII.get(TargetOpcode::COPY), OpReg)
17741780 .addReg(KOpReg);
1781 OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1782 X86::sub_8bit);
17751783 }
17761784 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
17771785 .addReg(OpReg)
21122120 // In case OpReg is a K register, COPY to a GPR
21132121 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
21142122 unsigned KCondReg = CondReg;
2115 CondReg = createResultReg(Subtarget->is64Bit() ?
2116 &X86::GR8RegClass : &X86::GR8_ABCD_LRegClass);
2123 CondReg = createResultReg(&X86::GR32RegClass);
21172124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
21182125 TII.get(TargetOpcode::COPY), CondReg)
21192126 .addReg(KCondReg, getKillRegState(CondIsKill));
2127 CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2128 X86::sub_8bit);
21202129 }
21212130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
21222131 .addReg(CondReg, getKillRegState(CondIsKill))
23262335 // In case OpReg is a K register, COPY to a GPR
23272336 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
23282337 unsigned KCondReg = CondReg;
2329 CondReg = createResultReg(Subtarget->is64Bit() ?
2330 &X86::GR8RegClass : &X86::GR8_ABCD_LRegClass);
2338 CondReg = createResultReg(&X86::GR32RegClass);
23312339 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
23322340 TII.get(TargetOpcode::COPY), CondReg)
23332341 .addReg(KCondReg, getKillRegState(CondIsKill));
2342 CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2343 X86::sub_8bit);
23342344 }
23352345 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
23362346 .addReg(CondReg, getKillRegState(CondIsKill))
33063316
33073317 // Handle zero-extension from i1 to i8, which is common.
33083318 if (ArgVT == MVT::i1) {
3319 // In case SrcReg is a K register, COPY to a GPR
3320 if (MRI.getRegClass(ArgReg) == &X86::VK1RegClass) {
3321 unsigned KArgReg = ArgReg;
3322 ArgReg = createResultReg(&X86::GR32RegClass);
3323 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3324 TII.get(TargetOpcode::COPY), ArgReg)
3325 .addReg(KArgReg);
3326 ArgReg = fastEmitInst_extractsubreg(MVT::i8, ArgReg, /*Kill=*/true,
3327 X86::sub_8bit);
3328 }
33093329 // Set the high bits to zero.
33103330 ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
33113331 ArgVT = MVT::i8;
36413661 switch (VT.SimpleTy) {
36423662 default: llvm_unreachable("Unexpected value type");
36433663 case MVT::i1:
3664 if (Subtarget->hasAVX512()) {
3665 // Need to copy to a VK1 register.
3666 unsigned ResultReg = createResultReg(&X86::VK1RegClass);
3667 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3668 TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg);
3669 return ResultReg;
3670 }
36443671 case MVT::i8:
36453672 return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
36463673 X86::sub_8bit);
36623689 unsigned Opc = 0;
36633690 switch (VT.SimpleTy) {
36643691 default: llvm_unreachable("Unexpected value type");
3665 case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH;
3692 case MVT::i1:
3693 // TODO: Support this properly.
3694 if (Subtarget->hasAVX512())
3695 return 0;
3696 VT = MVT::i8;
3697 LLVM_FALLTHROUGH;
36663698 case MVT::i8: Opc = X86::MOV8ri; break;
36673699 case MVT::i16: Opc = X86::MOV16ri; break;
36683700 case MVT::i32: Opc = X86::MOV32ri; break;
21822182
21832183 // GR from/to mask register
21842184 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2185 (COPY_TO_REGCLASS GR16:$src, VK16)>;
2185 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
21862186 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2187 (COPY_TO_REGCLASS VK16:$src, GR16)>;
2187 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
21882188
21892189 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2190 (COPY_TO_REGCLASS GR8:$src, VK8)>;
2190 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
21912191 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2192 (COPY_TO_REGCLASS VK8:$src, GR8)>;
2192 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
21932193
21942194 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
21952195 (KMOVWrk VK16:$src)>;
21962196 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2197 (i32 (INSERT_SUBREG (IMPLICIT_DEF),
2198 (i16 (COPY_TO_REGCLASS VK16:$src, GR16)), sub_16bit))>;
2197 (COPY_TO_REGCLASS VK16:$src, GR32)>;
21992198
22002199 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2201 (MOVZX32rr8 (COPY_TO_REGCLASS VK8:$src, GR8))>, Requires<[NoDQI]>;
2200 (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
22022201 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
22032202 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
22042203 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2205 (i32 (INSERT_SUBREG (IMPLICIT_DEF),
2206 (i8 (COPY_TO_REGCLASS VK8:$src, GR8)), sub_8bit))>;
2204 (COPY_TO_REGCLASS VK8:$src, GR32)>;
22072205
22082206 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
22092207 (COPY_TO_REGCLASS GR32:$src, VK32)>;
32873285
32883286 }
32893287
3288 multiclass avx512_store_scalar_lowering_subreg
3289 AVX512VLVectorVTInfo _,
3290 dag Mask, RegisterClass MaskRC,
3291 SubRegIndex subreg> {
3292
3293 def : Pat<(masked_store addr:$dst, Mask,
3294 (_.info512.VT (insert_subvector undef,
3295 (_.info256.VT (insert_subvector undef,
3296 (_.info128.VT _.info128.RC:$src),
3297 (iPTR 0))),
3298 (iPTR 0)))),
3299 (!cast(InstrStr#mrk) addr:$dst,
3300 (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
3301 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3302
3303 }
3304
32903305 multiclass avx512_load_scalar_lowering
32913306 dag Mask, RegisterClass MaskRC> {
32923307
33133328
33143329 }
33153330
3331 multiclass avx512_load_scalar_lowering_subreg
3332 AVX512VLVectorVTInfo _,
3333 dag Mask, RegisterClass MaskRC,
3334 SubRegIndex subreg> {
3335
3336 def : Pat<(_.info128.VT (extract_subvector
3337 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3338 (_.info512.VT (bitconvert
3339 (v16i32 immAllZerosV))))),
3340 (iPTR 0))),
3341 (!cast(InstrStr#rmkz)
3342 (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
3343 addr:$srcAddr)>;
3344
3345 def : Pat<(_.info128.VT (extract_subvector
3346 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3347 (_.info512.VT (insert_subvector undef,
3348 (_.info256.VT (insert_subvector undef,
3349 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3350 (iPTR 0))),
3351 (iPTR 0))))),
3352 (iPTR 0))),
3353 (!cast(InstrStr#rmk) _.info128.RC:$src,
3354 (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
3355 addr:$srcAddr)>;
3356
3357 }
3358
33163359 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
33173360 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
33183361
33193362 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
33203363 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3321 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3322 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
3323 defm : avx512_store_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
3324 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
3364 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3365 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3366 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3367 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
33253368
33263369 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
33273370 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3328 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3329 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
3330 defm : avx512_load_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
3331 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
3371 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3372 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3373 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3374 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
33323375
33333376 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
33343377 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
33393382 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
33403383
33413384 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
3342 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
3385 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)),
33433386 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
33443387
33453388 let hasSideEffects = 0 in
63086308
63096309 // SrcReg(MaskReg) -> DestReg(GR64)
63106310 // SrcReg(MaskReg) -> DestReg(GR32)
6311 // SrcReg(MaskReg) -> DestReg(GR16)
6312 // SrcReg(MaskReg) -> DestReg(GR8)
63136311
63146312 // All KMASK RegClasses hold the same k registers, can be tested against anyone.
63156313 if (X86::VK16RegClass.contains(SrcReg)) {
63196317 }
63206318 if (X86::GR32RegClass.contains(DestReg))
63216319 return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
6322 if (X86::GR16RegClass.contains(DestReg)) {
6323 DestReg = getX86SubSuperRegister(DestReg, 32);
6324 return X86::KMOVWrk;
6325 }
6326 if (X86::GR8RegClass.contains(DestReg)) {
6327 assert(!isHReg(DestReg) && "Cannot move between mask and h-reg");
6328 DestReg = getX86SubSuperRegister(DestReg, 32);
6329 return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
6330 }
63316320 }
63326321
63336322 // SrcReg(GR64) -> DestReg(MaskReg)
63346323 // SrcReg(GR32) -> DestReg(MaskReg)
6335 // SrcReg(GR16) -> DestReg(MaskReg)
6336 // SrcReg(GR8) -> DestReg(MaskReg)
63376324
63386325 // All KMASK RegClasses hold the same k registers, can be tested against anyone.
63396326 if (X86::VK16RegClass.contains(DestReg)) {
63436330 }
63446331 if (X86::GR32RegClass.contains(SrcReg))
63456332 return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
6346 if (X86::GR16RegClass.contains(SrcReg)) {
6347 SrcReg = getX86SubSuperRegister(SrcReg, 32);
6348 return X86::KMOVWkr;
6349 }
6350 if (X86::GR8RegClass.contains(SrcReg)) {
6351 assert(!isHReg(SrcReg) && "Cannot move between mask and h-reg");
6352 SrcReg = getX86SubSuperRegister(SrcReg, 32);
6353 return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
6354 }
63556333 }
63566334
63576335
297297 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
298298 ; SKX-NEXT: vpmovw2m %xmm0, %k0
299299 ; SKX-NEXT: movb $85, %al
300 ; SKX-NEXT: kmovb %eax, %k1
300 ; SKX-NEXT: kmovd %eax, %k1
301301 ; SKX-NEXT: kandb %k1, %k0, %k0
302302 ; SKX-NEXT: vpmovm2w %k0, %xmm0
303303 ; SKX-NEXT: popq %rax
1818 ; CHECK-NEXT: korw %k3, %k2, %k1
1919 ; CHECK-NEXT: korw %k1, %k0, %k0
2020 ; CHECK-NEXT: kmovw %k0, %eax
21 ; CHECK-NEXT: # kill: %AX %AX %EAX
2122 ; CHECK-NEXT: retq
2223 entry:
2324 %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i16 -1, i32 4)
546546 ret <8 x double> %b
547547 }
548548 define <8 x double> @sitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
549 ; NODQ-LABEL: sitof64_mask:
550 ; NODQ: ## BB#0:
551 ; NODQ-NEXT: kmovw %edi, %k1
552 ; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
553 ; NODQ-NEXT: retq
554 ;
555 ; DQ-LABEL: sitof64_mask:
556 ; DQ: ## BB#0:
557 ; DQ-NEXT: kmovb %edi, %k1
558 ; DQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
559 ; DQ-NEXT: retq
549 ; KNL-LABEL: sitof64_mask:
550 ; KNL: ## BB#0:
551 ; KNL-NEXT: kmovw %edi, %k1
552 ; KNL-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
553 ; KNL-NEXT: retq
554 ;
555 ; VLBW-LABEL: sitof64_mask:
556 ; VLBW: ## BB#0:
557 ; VLBW-NEXT: kmovd %edi, %k1
558 ; VLBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
559 ; VLBW-NEXT: retq
560 ;
561 ; VLNOBW-LABEL: sitof64_mask:
562 ; VLNOBW: ## BB#0:
563 ; VLNOBW-NEXT: kmovw %edi, %k1
564 ; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
565 ; VLNOBW-NEXT: retq
566 ;
567 ; AVX512DQ-LABEL: sitof64_mask:
568 ; AVX512DQ: ## BB#0:
569 ; AVX512DQ-NEXT: kmovw %edi, %k1
570 ; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
571 ; AVX512DQ-NEXT: retq
572 ;
573 ; AVX512BW-LABEL: sitof64_mask:
574 ; AVX512BW: ## BB#0:
575 ; AVX512BW-NEXT: kmovd %edi, %k1
576 ; AVX512BW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
577 ; AVX512BW-NEXT: retq
560578 %1 = bitcast i8 %c to <8 x i1>
561579 %2 = sitofp <8 x i32> %b to <8 x double>
562580 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
563581 ret <8 x double> %3
564582 }
565583 define <8 x double> @sitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
566 ; NODQ-LABEL: sitof64_maskz:
567 ; NODQ: ## BB#0:
568 ; NODQ-NEXT: kmovw %edi, %k1
569 ; NODQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
570 ; NODQ-NEXT: retq
571 ;
572 ; DQ-LABEL: sitof64_maskz:
573 ; DQ: ## BB#0:
574 ; DQ-NEXT: kmovb %edi, %k1
575 ; DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
576 ; DQ-NEXT: retq
584 ; KNL-LABEL: sitof64_maskz:
585 ; KNL: ## BB#0:
586 ; KNL-NEXT: kmovw %edi, %k1
587 ; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
588 ; KNL-NEXT: retq
589 ;
590 ; VLBW-LABEL: sitof64_maskz:
591 ; VLBW: ## BB#0:
592 ; VLBW-NEXT: kmovd %edi, %k1
593 ; VLBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
594 ; VLBW-NEXT: retq
595 ;
596 ; VLNOBW-LABEL: sitof64_maskz:
597 ; VLNOBW: ## BB#0:
598 ; VLNOBW-NEXT: kmovw %edi, %k1
599 ; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
600 ; VLNOBW-NEXT: retq
601 ;
602 ; AVX512DQ-LABEL: sitof64_maskz:
603 ; AVX512DQ: ## BB#0:
604 ; AVX512DQ-NEXT: kmovw %edi, %k1
605 ; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
606 ; AVX512DQ-NEXT: retq
607 ;
608 ; AVX512BW-LABEL: sitof64_maskz:
609 ; AVX512BW: ## BB#0:
610 ; AVX512BW-NEXT: kmovd %edi, %k1
611 ; AVX512BW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
612 ; AVX512BW-NEXT: retq
577613 %1 = bitcast i8 %b to <8 x i1>
578614 %2 = sitofp <8 x i32> %a to <8 x double>
579615 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
858894 ret <16 x double> %b
859895 }
860896 define <8 x double> @uitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
861 ; NODQ-LABEL: uitof64_mask:
862 ; NODQ: ## BB#0:
863 ; NODQ-NEXT: kmovw %edi, %k1
864 ; NODQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
865 ; NODQ-NEXT: retq
866 ;
867 ; DQ-LABEL: uitof64_mask:
868 ; DQ: ## BB#0:
869 ; DQ-NEXT: kmovb %edi, %k1
870 ; DQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
871 ; DQ-NEXT: retq
897 ; KNL-LABEL: uitof64_mask:
898 ; KNL: ## BB#0:
899 ; KNL-NEXT: kmovw %edi, %k1
900 ; KNL-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
901 ; KNL-NEXT: retq
902 ;
903 ; VLBW-LABEL: uitof64_mask:
904 ; VLBW: ## BB#0:
905 ; VLBW-NEXT: kmovd %edi, %k1
906 ; VLBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
907 ; VLBW-NEXT: retq
908 ;
909 ; VLNOBW-LABEL: uitof64_mask:
910 ; VLNOBW: ## BB#0:
911 ; VLNOBW-NEXT: kmovw %edi, %k1
912 ; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
913 ; VLNOBW-NEXT: retq
914 ;
915 ; AVX512DQ-LABEL: uitof64_mask:
916 ; AVX512DQ: ## BB#0:
917 ; AVX512DQ-NEXT: kmovw %edi, %k1
918 ; AVX512DQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
919 ; AVX512DQ-NEXT: retq
920 ;
921 ; AVX512BW-LABEL: uitof64_mask:
922 ; AVX512BW: ## BB#0:
923 ; AVX512BW-NEXT: kmovd %edi, %k1
924 ; AVX512BW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
925 ; AVX512BW-NEXT: retq
872926 %1 = bitcast i8 %c to <8 x i1>
873927 %2 = uitofp <8 x i32> %b to <8 x double>
874928 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
875929 ret <8 x double> %3
876930 }
877931 define <8 x double> @uitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
878 ; NODQ-LABEL: uitof64_maskz:
879 ; NODQ: ## BB#0:
880 ; NODQ-NEXT: kmovw %edi, %k1
881 ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
882 ; NODQ-NEXT: retq
883 ;
884 ; DQ-LABEL: uitof64_maskz:
885 ; DQ: ## BB#0:
886 ; DQ-NEXT: kmovb %edi, %k1
887 ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
888 ; DQ-NEXT: retq
932 ; KNL-LABEL: uitof64_maskz:
933 ; KNL: ## BB#0:
934 ; KNL-NEXT: kmovw %edi, %k1
935 ; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
936 ; KNL-NEXT: retq
937 ;
938 ; VLBW-LABEL: uitof64_maskz:
939 ; VLBW: ## BB#0:
940 ; VLBW-NEXT: kmovd %edi, %k1
941 ; VLBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
942 ; VLBW-NEXT: retq
943 ;
944 ; VLNOBW-LABEL: uitof64_maskz:
945 ; VLNOBW: ## BB#0:
946 ; VLNOBW-NEXT: kmovw %edi, %k1
947 ; VLNOBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
948 ; VLNOBW-NEXT: retq
949 ;
950 ; AVX512DQ-LABEL: uitof64_maskz:
951 ; AVX512DQ: ## BB#0:
952 ; AVX512DQ-NEXT: kmovw %edi, %k1
953 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
954 ; AVX512DQ-NEXT: retq
955 ;
956 ; AVX512BW-LABEL: uitof64_maskz:
957 ; AVX512BW: ## BB#0:
958 ; AVX512BW-NEXT: kmovd %edi, %k1
959 ; AVX512BW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
960 ; AVX512BW-NEXT: retq
889961 %1 = bitcast i8 %b to <8 x i1>
890962 %2 = uitofp <8 x i32> %a to <8 x double>
891963 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
12871287 }
12881288
12891289 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
1290 ; ALL-LABEL: zext_16i1_to_16xi32:
1291 ; ALL: ## BB#0:
1292 ; ALL-NEXT: kmovw %edi, %k1
1293 ; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1294 ; ALL-NEXT: retq
1290 ; KNL-LABEL: zext_16i1_to_16xi32:
1291 ; KNL: ## BB#0:
1292 ; KNL-NEXT: kmovw %edi, %k1
1293 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1294 ; KNL-NEXT: retq
1295 ;
1296 ; SKX-LABEL: zext_16i1_to_16xi32:
1297 ; SKX: ## BB#0:
1298 ; SKX-NEXT: kmovd %edi, %k1
1299 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1300 ; SKX-NEXT: retq
12951301 %a = bitcast i16 %b to <16 x i1>
12961302 %c = zext <16 x i1> %a to <16 x i32>
12971303 ret <16 x i32> %c
13061312 ;
13071313 ; SKX-LABEL: zext_8i1_to_8xi64:
13081314 ; SKX: ## BB#0:
1309 ; SKX-NEXT: kmovb %edi, %k1
1315 ; SKX-NEXT: kmovd %edi, %k1
13101316 ; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
13111317 ; SKX-NEXT: retq
13121318 %a = bitcast i8 %b to <8 x i1>
13211327 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
13221328 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
13231329 ; KNL-NEXT: kmovw %k0, %eax
1330 ; KNL-NEXT: ## kill: %AX %AX %EAX
13241331 ; KNL-NEXT: retq
13251332 ;
13261333 ; SKX-LABEL: trunc_16i8_to_16i1:
13271334 ; SKX: ## BB#0:
13281335 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
13291336 ; SKX-NEXT: vpmovb2m %xmm0, %k0
1330 ; SKX-NEXT: kmovw %k0, %eax
1337 ; SKX-NEXT: kmovd %k0, %eax
1338 ; SKX-NEXT: ## kill: %AX %AX %EAX
13311339 ; SKX-NEXT: retq
13321340 %mask_b = trunc <16 x i8>%a to <16 x i1>
13331341 %mask = bitcast <16 x i1> %mask_b to i16
13401348 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
13411349 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
13421350 ; KNL-NEXT: kmovw %k0, %eax
1351 ; KNL-NEXT: ## kill: %AX %AX %EAX
13431352 ; KNL-NEXT: retq
13441353 ;
13451354 ; SKX-LABEL: trunc_16i32_to_16i1:
13461355 ; SKX: ## BB#0:
13471356 ; SKX-NEXT: vpslld $31, %zmm0, %zmm0
13481357 ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k0
1349 ; SKX-NEXT: kmovw %k0, %eax
1358 ; SKX-NEXT: kmovd %k0, %eax
1359 ; SKX-NEXT: ## kill: %AX %AX %EAX
13501360 ; SKX-NEXT: vzeroupper
13511361 ; SKX-NEXT: retq
13521362 %mask_b = trunc <16 x i32>%a to <16 x i1>
13851395 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
13861396 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
13871397 ; KNL-NEXT: kmovw %k0, %eax
1398 ; KNL-NEXT: ## kill: %AL %AL %EAX
13881399 ; KNL-NEXT: retq
13891400 ;
13901401 ; SKX-LABEL: trunc_8i16_to_8i1:
13911402 ; SKX: ## BB#0:
13921403 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
13931404 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1394 ; SKX-NEXT: kmovb %k0, %eax
1405 ; SKX-NEXT: kmovd %k0, %eax
1406 ; SKX-NEXT: ## kill: %AL %AL %EAX
13951407 ; SKX-NEXT: retq
13961408 %mask_b = trunc <8 x i16>%a to <8 x i1>
13971409 %mask = bitcast <8 x i1> %mask_b to i8
14191431
14201432
14211433 define i16 @trunc_i32_to_i1(i32 %a) {
1422 ; ALL-LABEL: trunc_i32_to_i1:
1423 ; ALL: ## BB#0:
1424 ; ALL-NEXT: andl $1, %edi
1425 ; ALL-NEXT: kmovw %edi, %k0
1426 ; ALL-NEXT: movw $-4, %ax
1427 ; ALL-NEXT: kmovw %eax, %k1
1428 ; ALL-NEXT: kshiftrw $1, %k1, %k1
1429 ; ALL-NEXT: kshiftlw $1, %k1, %k1
1430 ; ALL-NEXT: korw %k0, %k1, %k0
1431 ; ALL-NEXT: kmovw %k0, %eax
1432 ; ALL-NEXT: retq
1434 ; KNL-LABEL: trunc_i32_to_i1:
1435 ; KNL: ## BB#0:
1436 ; KNL-NEXT: andl $1, %edi
1437 ; KNL-NEXT: kmovw %edi, %k0
1438 ; KNL-NEXT: movw $-4, %ax
1439 ; KNL-NEXT: kmovw %eax, %k1
1440 ; KNL-NEXT: kshiftrw $1, %k1, %k1
1441 ; KNL-NEXT: kshiftlw $1, %k1, %k1
1442 ; KNL-NEXT: korw %k0, %k1, %k0
1443 ; KNL-NEXT: kmovw %k0, %eax
1444 ; KNL-NEXT: ## kill: %AX %AX %EAX
1445 ; KNL-NEXT: retq
1446 ;
1447 ; SKX-LABEL: trunc_i32_to_i1:
1448 ; SKX: ## BB#0:
1449 ; SKX-NEXT: andl $1, %edi
1450 ; SKX-NEXT: kmovw %edi, %k0
1451 ; SKX-NEXT: movw $-4, %ax
1452 ; SKX-NEXT: kmovd %eax, %k1
1453 ; SKX-NEXT: kshiftrw $1, %k1, %k1
1454 ; SKX-NEXT: kshiftlw $1, %k1, %k1
1455 ; SKX-NEXT: korw %k0, %k1, %k0
1456 ; SKX-NEXT: kmovd %k0, %eax
1457 ; SKX-NEXT: ## kill: %AX %AX %EAX
1458 ; SKX-NEXT: retq
14331459 %a_i = trunc i32 %a to i1
14341460 %maskv = insertelement <16 x i1> , i1 %a_i, i32 0
14351461 %res = bitcast <16 x i1> %maskv to i16
674674 define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) {
675675 ; SKX-LABEL: test_mm512_mask_extractf64x4_pd:
676676 ; SKX: ## BB#0: ## %entry
677 ; SKX-NEXT: kmovb %edi, %k1
677 ; SKX-NEXT: kmovd %edi, %k1
678678 ; SKX-NEXT: vextractf64x4 $1, %zmm1, %ymm0 {%k1}
679679 ; SKX-NEXT: retq
680680 entry:
688688 define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) {
689689 ; SKX-LABEL: test_mm512_maskz_extractf64x4_pd:
690690 ; SKX: ## BB#0: ## %entry
691 ; SKX-NEXT: kmovb %edi, %k1
691 ; SKX-NEXT: kmovd %edi, %k1
692692 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
693693 ; SKX-NEXT: retq
694694 entry:
702702 define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) {
703703 ; SKX-LABEL: test_mm512_mask_extractf32x4_ps:
704704 ; SKX: ## BB#0: ## %entry
705 ; SKX-NEXT: kmovb %edi, %k1
705 ; SKX-NEXT: kmovd %edi, %k1
706706 ; SKX-NEXT: vextractf32x4 $1, %zmm1, %xmm0 {%k1}
707707 ; SKX-NEXT: vzeroupper
708708 ; SKX-NEXT: retq
718718 define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) {
719719 ; SKX-LABEL: test_mm512_maskz_extractf32x4_ps:
720720 ; SKX: ## BB#0: ## %entry
721 ; SKX-NEXT: kmovb %edi, %k1
721 ; SKX-NEXT: kmovd %edi, %k1
722722 ; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
723723 ; SKX-NEXT: vzeroupper
724724 ; SKX-NEXT: retq
734734 define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) {
735735 ; SKX-LABEL: test_mm256_mask_extractf64x2_pd:
736736 ; SKX: ## BB#0: ## %entry
737 ; SKX-NEXT: kmovb %edi, %k1
737 ; SKX-NEXT: kmovd %edi, %k1
738738 ; SKX-NEXT: vextractf64x2 $1, %ymm1, %xmm0 {%k1}
739739 ; SKX-NEXT: vzeroupper
740740 ; SKX-NEXT: retq
749749 define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) {
750750 ; SKX-LABEL: test_mm256_maskz_extractf64x2_pd:
751751 ; SKX: ## BB#0: ## %entry
752 ; SKX-NEXT: kmovb %edi, %k1
752 ; SKX-NEXT: kmovd %edi, %k1
753753 ; SKX-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
754754 ; SKX-NEXT: vzeroupper
755755 ; SKX-NEXT: retq
764764 define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
765765 ; SKX-LABEL: test_mm256_mask_extracti64x2_epi64:
766766 ; SKX: ## BB#0: ## %entry
767 ; SKX-NEXT: kmovb %edi, %k1
767 ; SKX-NEXT: kmovd %edi, %k1
768768 ; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm0 {%k1}
769769 ; SKX-NEXT: vzeroupper
770770 ; SKX-NEXT: retq
779779 define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) {
780780 ; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64:
781781 ; SKX: ## BB#0: ## %entry
782 ; SKX-NEXT: kmovb %edi, %k1
782 ; SKX-NEXT: kmovd %edi, %k1
783783 ; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
784784 ; SKX-NEXT: vzeroupper
785785 ; SKX-NEXT: retq
794794 define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) {
795795 ; SKX-LABEL: test_mm256_mask_extractf32x4_ps:
796796 ; SKX: ## BB#0: ## %entry
797 ; SKX-NEXT: kmovb %edi, %k1
797 ; SKX-NEXT: kmovd %edi, %k1
798798 ; SKX-NEXT: vextractf32x4 $1, %ymm1, %xmm0 {%k1}
799799 ; SKX-NEXT: vzeroupper
800800 ; SKX-NEXT: retq
809809 define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) {
810810 ; SKX-LABEL: test_mm256_maskz_extractf32x4_ps:
811811 ; SKX: ## BB#0: ## %entry
812 ; SKX-NEXT: kmovb %edi, %k1
812 ; SKX-NEXT: kmovd %edi, %k1
813813 ; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
814814 ; SKX-NEXT: vzeroupper
815815 ; SKX-NEXT: retq
824824 define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
825825 ; SKX-LABEL: test_mm256_mask_extracti32x4_epi32:
826826 ; SKX: ## BB#0: ## %entry
827 ; SKX-NEXT: kmovb %edi, %k1
827 ; SKX-NEXT: kmovd %edi, %k1
828828 ; SKX-NEXT: vextracti32x4 $1, %ymm1, %xmm0 {%k1}
829829 ; SKX-NEXT: vzeroupper
830830 ; SKX-NEXT: retq
842842 define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) {
843843 ; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32:
844844 ; SKX: ## BB#0: ## %entry
845 ; SKX-NEXT: kmovb %edi, %k1
845 ; SKX-NEXT: kmovd %edi, %k1
846846 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
847847 ; SKX-NEXT: vzeroupper
848848 ; SKX-NEXT: retq
859859 define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) {
860860 ; SKX-LABEL: test_mm512_mask_extractf32x8_ps:
861861 ; SKX: ## BB#0: ## %entry
862 ; SKX-NEXT: kmovb %edi, %k1
862 ; SKX-NEXT: kmovd %edi, %k1
863863 ; SKX-NEXT: vextractf32x8 $1, %zmm1, %ymm0 {%k1}
864864 ; SKX-NEXT: retq
865865 entry:
872872 define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) {
873873 ; SKX-LABEL: test_mm512_maskz_extractf32x8_ps:
874874 ; SKX: ## BB#0: ## %entry
875 ; SKX-NEXT: kmovb %edi, %k1
875 ; SKX-NEXT: kmovd %edi, %k1
876876 ; SKX-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
877877 ; SKX-NEXT: retq
878878 entry:
885885 define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) {
886886 ; SKX-LABEL: test_mm512_mask_extractf64x2_pd:
887887 ; SKX: ## BB#0: ## %entry
888 ; SKX-NEXT: kmovb %edi, %k1
888 ; SKX-NEXT: kmovd %edi, %k1
889889 ; SKX-NEXT: vextractf64x2 $3, %zmm1, %xmm0 {%k1}
890890 ; SKX-NEXT: vzeroupper
891891 ; SKX-NEXT: retq
900900 define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) {
901901 ; SKX-LABEL: test_mm512_maskz_extractf64x2_pd:
902902 ; SKX: ## BB#0: ## %entry
903 ; SKX-NEXT: kmovb %edi, %k1
903 ; SKX-NEXT: kmovd %edi, %k1
904904 ; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z}
905905 ; SKX-NEXT: vzeroupper
906906 ; SKX-NEXT: retq
99 ; CHECK-NEXT: pushq %rax
1010 ; CHECK-NEXT: Lcfi0:
1111 ; CHECK-NEXT: .cfi_def_cfa_offset 16
12 ; CHECK-NEXT: movb $1, %al
1312 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
14 ; CHECK-NEXT: setp %cl
15 ; CHECK-NEXT: setne %dl
16 ; CHECK-NEXT: setnp %sil
17 ; CHECK-NEXT: sete %dil
18 ; CHECK-NEXT: andb %sil, %dil
19 ; CHECK-NEXT: ## implicit-def: %R8D
20 ; CHECK-NEXT: movb %dil, %r8b
21 ; CHECK-NEXT: andl $1, %r8d
22 ; CHECK-NEXT: kmovw %r8d, %k0
23 ; CHECK-NEXT: orb %cl, %dl
24 ; CHECK-NEXT: ## implicit-def: %R8D
25 ; CHECK-NEXT: movb %dl, %r8b
26 ; CHECK-NEXT: andl $1, %r8d
27 ; CHECK-NEXT: kmovw %r8d, %k1
28 ; CHECK-NEXT: kmovw %k1, %ecx
29 ; CHECK-NEXT: testb $1, %cl
30 ; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) ## 1-byte Spill
13 ; CHECK-NEXT: setp %al
14 ; CHECK-NEXT: setne %cl
15 ; CHECK-NEXT: setnp %dl
16 ; CHECK-NEXT: sete %sil
17 ; CHECK-NEXT: andb %dl, %sil
18 ; CHECK-NEXT: ## implicit-def: %EDI
19 ; CHECK-NEXT: movb %sil, %dil
20 ; CHECK-NEXT: andl $1, %edi
21 ; CHECK-NEXT: kmovw %edi, %k0
22 ; CHECK-NEXT: orb %al, %cl
23 ; CHECK-NEXT: ## implicit-def: %EDI
24 ; CHECK-NEXT: movb %cl, %dil
25 ; CHECK-NEXT: andl $1, %edi
26 ; CHECK-NEXT: kmovw %edi, %k1
27 ; CHECK-NEXT: kmovw %k1, %edi
28 ; CHECK-NEXT: movb %dil, %al
29 ; CHECK-NEXT: testb $1, %al
3130 ; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
3231 ; CHECK-NEXT: jne LBB0_1
3332 ; CHECK-NEXT: jmp LBB0_2
1313 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
1414 ; CHECK-LABEL: gather_mask_dps:
1515 ; CHECK: ## BB#0:
16 ; CHECK-NEXT: kmovw %edi, %k1
16 ; CHECK-NEXT: kmovd %edi, %k1
1717 ; CHECK-NEXT: kmovq %k1, %k2
1818 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
1919 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
2929 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
3030 ; CHECK-LABEL: gather_mask_dpd:
3131 ; CHECK: ## BB#0:
32 ; CHECK-NEXT: kmovb %edi, %k1
32 ; CHECK-NEXT: kmovd %edi, %k1
3333 ; CHECK-NEXT: kmovq %k1, %k2
3434 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
3535 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
4545 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) {
4646 ; CHECK-LABEL: gather_mask_qps:
4747 ; CHECK: ## BB#0:
48 ; CHECK-NEXT: kmovb %edi, %k1
48 ; CHECK-NEXT: kmovd %edi, %k1
4949 ; CHECK-NEXT: kmovq %k1, %k2
5050 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
5151 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
6161 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
6262 ; CHECK-LABEL: gather_mask_qpd:
6363 ; CHECK: ## BB#0:
64 ; CHECK-NEXT: kmovb %edi, %k1
64 ; CHECK-NEXT: kmovd %edi, %k1
6565 ; CHECK-NEXT: kmovq %k1, %k2
6666 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
6767 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
8989 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
9090 ; CHECK-LABEL: gather_mask_dd:
9191 ; CHECK: ## BB#0:
92 ; CHECK-NEXT: kmovw %edi, %k1
92 ; CHECK-NEXT: kmovd %edi, %k1
9393 ; CHECK-NEXT: kmovq %k1, %k2
9494 ; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
9595 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
105105 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) {
106106 ; CHECK-LABEL: gather_mask_qd:
107107 ; CHECK: ## BB#0:
108 ; CHECK-NEXT: kmovb %edi, %k1
108 ; CHECK-NEXT: kmovd %edi, %k1
109109 ; CHECK-NEXT: kmovq %k1, %k2
110110 ; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
111111 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
121121 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
122122 ; CHECK-LABEL: gather_mask_qq:
123123 ; CHECK: ## BB#0:
124 ; CHECK-NEXT: kmovb %edi, %k1
124 ; CHECK-NEXT: kmovd %edi, %k1
125125 ; CHECK-NEXT: kmovq %k1, %k2
126126 ; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
127127 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
137137 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
138138 ; CHECK-LABEL: gather_mask_dq:
139139 ; CHECK: ## BB#0:
140 ; CHECK-NEXT: kmovb %edi, %k1
140 ; CHECK-NEXT: kmovd %edi, %k1
141141 ; CHECK-NEXT: kmovq %k1, %k2
142142 ; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
143143 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
153153 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
154154 ; CHECK-LABEL: gather_mask_dpd_execdomain:
155155 ; CHECK: ## BB#0:
156 ; CHECK-NEXT: kmovb %edi, %k1
156 ; CHECK-NEXT: kmovd %edi, %k1
157157 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
158158 ; CHECK-NEXT: vmovapd %zmm1, (%rdx)
159159 ; CHECK-NEXT: vzeroupper
166166 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
167167 ; CHECK-LABEL: gather_mask_qpd_execdomain:
168168 ; CHECK: ## BB#0:
169 ; CHECK-NEXT: kmovb %edi, %k1
169 ; CHECK-NEXT: kmovd %edi, %k1
170170 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
171171 ; CHECK-NEXT: vmovapd %zmm1, (%rdx)
172172 ; CHECK-NEXT: vzeroupper
179179 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) {
180180 ; CHECK-LABEL: gather_mask_dps_execdomain:
181181 ; CHECK: ## BB#0:
182 ; CHECK-NEXT: kmovw %edi, %k1
182 ; CHECK-NEXT: kmovd %edi, %k1
183183 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
184184 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
185185 ; CHECK-NEXT: retq
190190 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) {
191191 ; CHECK-LABEL: gather_mask_qps_execdomain:
192192 ; CHECK: ## BB#0:
193 ; CHECK-NEXT: kmovb %edi, %k1
193 ; CHECK-NEXT: kmovd %edi, %k1
194194 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
195195 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
196196 ; CHECK-NEXT: retq
201201 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
202202 ; CHECK-LABEL: scatter_mask_dpd_execdomain:
203203 ; CHECK: ## BB#0:
204 ; CHECK-NEXT: kmovb %esi, %k1
204 ; CHECK-NEXT: kmovd %esi, %k1
205205 ; CHECK-NEXT: vmovapd (%rdi), %zmm1
206206 ; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
207207 ; CHECK-NEXT: vzeroupper
214214 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
215215 ; CHECK-LABEL: scatter_mask_qpd_execdomain:
216216 ; CHECK: ## BB#0:
217 ; CHECK-NEXT: kmovb %esi, %k1
217 ; CHECK-NEXT: kmovd %esi, %k1
218218 ; CHECK-NEXT: vmovapd (%rdi), %zmm1
219219 ; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
220220 ; CHECK-NEXT: vzeroupper
227227 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
228228 ; CHECK-LABEL: scatter_mask_dps_execdomain:
229229 ; CHECK: ## BB#0:
230 ; CHECK-NEXT: kmovw %esi, %k1
230 ; CHECK-NEXT: kmovd %esi, %k1
231231 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
232232 ; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
233233 ; CHECK-NEXT: vzeroupper
240240 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
241241 ; CHECK-LABEL: scatter_mask_qps_execdomain:
242242 ; CHECK: ## BB#0:
243 ; CHECK-NEXT: kmovb %esi, %k1
243 ; CHECK-NEXT: kmovd %esi, %k1
244244 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
245245 ; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
246246 ; CHECK-NEXT: vzeroupper
277277 ; CHECK-NEXT: kxorw %k0, %k0, %k1
278278 ; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
279279 ; CHECK-NEXT: movb $1, %al
280 ; CHECK-NEXT: kmovb %eax, %k1
280 ; CHECK-NEXT: kmovd %eax, %k1
281281 ; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
282282 ; CHECK-NEXT: movb $120, %al
283 ; CHECK-NEXT: kmovb %eax, %k1
283 ; CHECK-NEXT: kmovd %eax, %k1
284284 ; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
285285 ; CHECK-NEXT: vzeroupper
286286 ; CHECK-NEXT: retq
296296 define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
297297 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
298298 ; CHECK: ## BB#0:
299 ; CHECK-NEXT: kmovb %esi, %k1
299 ; CHECK-NEXT: kmovd %esi, %k1
300300 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm0 {%k1}
301301 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
302302 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
314314 define <2 x i64>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
315315 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
316316 ; CHECK: ## BB#0:
317 ; CHECK-NEXT: kmovb %esi, %k1
317 ; CHECK-NEXT: kmovd %esi, %k1
318318 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
319319 ; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0
320320 ; CHECK-NEXT: retq
329329 define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
330330 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
331331 ; CHECK: ## BB#0:
332 ; CHECK-NEXT: kmovb %esi, %k1
332 ; CHECK-NEXT: kmovd %esi, %k1
333333 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm0 {%k1}
334334 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
335335 ; CHECK-NEXT: vxorpd %ymm2, %ymm2, %ymm2
347347 define <4 x i64>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
348348 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
349349 ; CHECK: ## BB#0:
350 ; CHECK-NEXT: kmovb %esi, %k1
350 ; CHECK-NEXT: kmovd %esi, %k1
351351 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
352352 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
353353 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
365365 define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
366366 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
367367 ; CHECK: ## BB#0:
368 ; CHECK-NEXT: kmovb %esi, %k1
368 ; CHECK-NEXT: kmovd %esi, %k1
369369 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm0 {%k1}
370370 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
371371 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
383383 define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
384384 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
385385 ; CHECK: ## BB#0:
386 ; CHECK-NEXT: kmovb %esi, %k1
386 ; CHECK-NEXT: kmovd %esi, %k1
387387 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
388388 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
389389 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
401401 define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
402402 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
403403 ; CHECK: ## BB#0:
404 ; CHECK-NEXT: kmovb %esi, %k1
404 ; CHECK-NEXT: kmovd %esi, %k1
405405 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm0 {%k1}
406406 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
407407 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
420420 define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
421421 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
422422 ; CHECK: ## BB#0:
423 ; CHECK-NEXT: kmovb %esi, %k1
423 ; CHECK-NEXT: kmovd %esi, %k1
424424 ; CHECK-NEXT: vmovdqa %xmm0, %xmm2
425425 ; CHECK-NEXT: kmovq %k1, %k2
426426 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
439439 define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
440440 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
441441 ; CHECK: ## BB#0:
442 ; CHECK-NEXT: kmovb %esi, %k1
442 ; CHECK-NEXT: kmovd %esi, %k1
443443 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm0 {%k1}
444444 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
445445 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
457457 define <2 x i64>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
458458 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
459459 ; CHECK: ## BB#0:
460 ; CHECK-NEXT: kmovb %esi, %k1
460 ; CHECK-NEXT: kmovd %esi, %k1
461461 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
462462 ; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0
463463 ; CHECK-NEXT: retq
472472 define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
473473 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
474474 ; CHECK: ## BB#0:
475 ; CHECK-NEXT: kmovb %esi, %k1
475 ; CHECK-NEXT: kmovd %esi, %k1
476476 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm0 {%k1}
477477 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
478478 ; CHECK-NEXT: vxorpd %ymm2, %ymm2, %ymm2
490490 define <4 x i64>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
491491 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
492492 ; CHECK: ## BB#0:
493 ; CHECK-NEXT: kmovb %esi, %k1
493 ; CHECK-NEXT: kmovd %esi, %k1
494494 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
495495 ; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0
496496 ; CHECK-NEXT: retq
505505 define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
506506 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
507507 ; CHECK: ## BB#0:
508 ; CHECK-NEXT: kmovb %esi, %k1
508 ; CHECK-NEXT: kmovd %esi, %k1
509509 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
510510 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
511511 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
523523 define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
524524 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
525525 ; CHECK: ## BB#0:
526 ; CHECK-NEXT: kmovb %esi, %k1
526 ; CHECK-NEXT: kmovd %esi, %k1
527527 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
528528 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
529529 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
541541 define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
542542 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
543543 ; CHECK: ## BB#0:
544 ; CHECK-NEXT: kmovb %esi, %k1
544 ; CHECK-NEXT: kmovd %esi, %k1
545545 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1}
546546 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
547547 ; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
559559 define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
560560 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
561561 ; CHECK: ## BB#0:
562 ; CHECK-NEXT: kmovb %esi, %k1
562 ; CHECK-NEXT: kmovd %esi, %k1
563563 ; CHECK-NEXT: vmovdqa %ymm0, %ymm2
564564 ; CHECK-NEXT: kmovq %k1, %k2
565565 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
577577 define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
578578 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
579579 ; CHECK: ## BB#0:
580 ; CHECK-NEXT: kmovb %esi, %k1
580 ; CHECK-NEXT: kmovd %esi, %k1
581581 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
582582 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
583583 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
592592 define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
593593 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
594594 ; CHECK: ## BB#0:
595 ; CHECK-NEXT: kmovb %esi, %k1
595 ; CHECK-NEXT: kmovd %esi, %k1
596596 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1}
597597 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
598598 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
607607 define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
608608 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
609609 ; CHECK: ## BB#0:
610 ; CHECK-NEXT: kmovb %esi, %k1
610 ; CHECK-NEXT: kmovd %esi, %k1
611611 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1}
612612 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
613613 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
623623 define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
624624 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
625625 ; CHECK: ## BB#0:
626 ; CHECK-NEXT: kmovb %esi, %k1
626 ; CHECK-NEXT: kmovd %esi, %k1
627627 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1}
628628 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
629629 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
639639 define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
640640 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
641641 ; CHECK: ## BB#0:
642 ; CHECK-NEXT: kmovb %esi, %k1
642 ; CHECK-NEXT: kmovd %esi, %k1
643643 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1}
644644 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
645645 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
654654 define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
655655 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
656656 ; CHECK: ## BB#0:
657 ; CHECK-NEXT: kmovb %esi, %k1
657 ; CHECK-NEXT: kmovd %esi, %k1
658658 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
659659 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
660660 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
669669 define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
670670 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
671671 ; CHECK: ## BB#0:
672 ; CHECK-NEXT: kmovb %esi, %k1
672 ; CHECK-NEXT: kmovd %esi, %k1
673673 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1}
674674 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
675675 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
685685 define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
686686 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
687687 ; CHECK: ## BB#0:
688 ; CHECK-NEXT: kmovb %esi, %k1
688 ; CHECK-NEXT: kmovd %esi, %k1
689689 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1}
690690 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
691691 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
701701 define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
702702 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
703703 ; CHECK: ## BB#0:
704 ; CHECK-NEXT: kmovb %esi, %k1
704 ; CHECK-NEXT: kmovd %esi, %k1
705705 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
706706 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
707707 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
716716 define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
717717 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
718718 ; CHECK: ## BB#0:
719 ; CHECK-NEXT: kmovb %esi, %k1
719 ; CHECK-NEXT: kmovd %esi, %k1
720720 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
721721 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
722722 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
731731 define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
732732 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
733733 ; CHECK: ## BB#0:
734 ; CHECK-NEXT: kmovb %esi, %k1
734 ; CHECK-NEXT: kmovd %esi, %k1
735735 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1}
736736 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
737737 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
747747 define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
748748 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
749749 ; CHECK: ## BB#0:
750 ; CHECK-NEXT: kmovb %esi, %k1
750 ; CHECK-NEXT: kmovd %esi, %k1
751751 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
752752 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2}
753753 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
763763 define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
764764 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
765765 ; CHECK: ## BB#0:
766 ; CHECK-NEXT: kmovb %esi, %k1
766 ; CHECK-NEXT: kmovd %esi, %k1
767767 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1}
768768 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
769769 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
778778 define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
779779 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
780780 ; CHECK: ## BB#0:
781 ; CHECK-NEXT: kmovb %esi, %k1
781 ; CHECK-NEXT: kmovd %esi, %k1
782782 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1}
783783 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
784784 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
793793 define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
794794 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
795795 ; CHECK: ## BB#0:
796 ; CHECK-NEXT: kmovb %esi, %k1
796 ; CHECK-NEXT: kmovd %esi, %k1
797797 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
798798 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
799799 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
809809 define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
810810 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
811811 ; CHECK: ## BB#0:
812 ; CHECK-NEXT: kmovb %esi, %k1
812 ; CHECK-NEXT: kmovd %esi, %k1
813813 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
814814 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
815815 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
828828 ; CHECK-NEXT: kxorw %k0, %k0, %k1
829829 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
830830 ; CHECK-NEXT: movb $1, %al
831 ; CHECK-NEXT: kmovb %eax, %k1
831 ; CHECK-NEXT: kmovd %eax, %k1
832832 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
833833 ; CHECK-NEXT: movb $96, %al
834 ; CHECK-NEXT: kmovb %eax, %k1
834 ; CHECK-NEXT: kmovd %eax, %k1
835835 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
836836 ; CHECK-NEXT: vzeroupper
837837 ; CHECK-NEXT: retq
852852 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
853853 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1}
854854 ; CHECK-NEXT: movw $1, %ax
855 ; CHECK-NEXT: kmovw %eax, %k1
855 ; CHECK-NEXT: kmovd %eax, %k1
856856 ; CHECK-NEXT: vmovaps %zmm1, %zmm4
857857 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm4 {%k1}
858858 ; CHECK-NEXT: movw $220, %ax
859 ; CHECK-NEXT: kmovw %eax, %k1
859 ; CHECK-NEXT: kmovd %eax, %k1
860860 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
861861 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm0
862862 ; CHECK-NEXT: vaddps %zmm4, %zmm1, %zmm1
342342 ; KNL-NEXT: kshiftlw $1, %k1, %k1
343343 ; KNL-NEXT: korw %k0, %k1, %k0
344344 ; KNL-NEXT: kmovw %k0, %eax
345 ; KNL-NEXT: ## kill: %AX %AX %EAX
345346 ; KNL-NEXT: retq
346347 ;
347348 ; SKX-LABEL: test13:
351352 ; SKX-NEXT: andl $1, %eax
352353 ; SKX-NEXT: kmovw %eax, %k0
353354 ; SKX-NEXT: movw $-4, %ax
354 ; SKX-NEXT: kmovw %eax, %k1
355 ; SKX-NEXT: kmovd %eax, %k1
355356 ; SKX-NEXT: kshiftrw $1, %k1, %k1
356357 ; SKX-NEXT: kshiftlw $1, %k1, %k1
357358 ; SKX-NEXT: korw %k0, %k1, %k0
358 ; SKX-NEXT: kmovw %k0, %eax
359 ; SKX-NEXT: kmovd %k0, %eax
360 ; SKX-NEXT: ## kill: %AX %AX %EAX
359361 ; SKX-NEXT: retq
360362 %cmp_res = icmp ult i32 %a, %b
361363 %maskv = insertelement <16 x i1> , i1 %cmp_res, i32 0
432434 ; KNL-NEXT: vpslld $31, %zmm2, %zmm0
433435 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
434436 ; KNL-NEXT: kmovw %k0, %eax
437 ; KNL-NEXT: ## kill: %AX %AX %EAX
435438 ; KNL-NEXT: retq
436439 ;
437440 ; SKX-LABEL: test16:
439442 ; SKX-NEXT: movzbl (%rdi), %eax
440443 ; SKX-NEXT: andl $1, %eax
441444 ; SKX-NEXT: kmovd %eax, %k0
442 ; SKX-NEXT: kmovw %esi, %k1
445 ; SKX-NEXT: kmovd %esi, %k1
443446 ; SKX-NEXT: vpmovm2d %k1, %zmm0
444447 ; SKX-NEXT: vpmovm2d %k0, %zmm1
445448 ; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15]
446449 ; SKX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
447450 ; SKX-NEXT: vpmovd2m %zmm2, %k0
448 ; SKX-NEXT: kmovw %k0, %eax
451 ; SKX-NEXT: kmovd %k0, %eax
452 ; SKX-NEXT: ## kill: %AX %AX %EAX
449453 ; SKX-NEXT: vzeroupper
450454 ; SKX-NEXT: retq
451455 %x = load i1 , i1 * %addr, align 128
469473 ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
470474 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
471475 ; KNL-NEXT: kmovw %k0, %eax
476 ; KNL-NEXT: ## kill: %AL %AL %EAX
472477 ; KNL-NEXT: retq
473478 ;
474479 ; SKX-LABEL: test17:
476481 ; SKX-NEXT: movzbl (%rdi), %eax
477482 ; SKX-NEXT: andl $1, %eax
478483 ; SKX-NEXT: kmovd %eax, %k0
479 ; SKX-NEXT: kmovb %esi, %k1
484 ; SKX-NEXT: kmovd %esi, %k1
480485 ; SKX-NEXT: vpmovm2q %k1, %zmm0
481486 ; SKX-NEXT: vpmovm2q %k0, %zmm1
482487 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7]
483488 ; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
484489 ; SKX-NEXT: vpmovq2m %zmm2, %k0
485 ; SKX-NEXT: kmovb %k0, %eax
490 ; SKX-NEXT: kmovd %k0, %eax
491 ; SKX-NEXT: ## kill: %AL %AL %EAX
486492 ; SKX-NEXT: vzeroupper
487493 ; SKX-NEXT: retq
488494 %x = load i1 , i1 * %addr, align 128
13351341 ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
13361342 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
13371343 ; KNL-NEXT: kmovw %k0, %eax
1344 ; KNL-NEXT: ## kill: %AL %AL %EAX
13381345 ; KNL-NEXT: retq
13391346 ;
13401347 ; SKX-LABEL: test_iinsertelement_v4i1:
13491356 ; SKX-NEXT: vpbroadcastq %xmm1, %xmm1
13501357 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
13511358 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1352 ; SKX-NEXT: kmovb %k0, %eax
1359 ; SKX-NEXT: kmovd %k0, %eax
1360 ; SKX-NEXT: ## kill: %AL %AL %EAX
13531361 ; SKX-NEXT: retq
13541362 %cmp_res_i1 = icmp ult i32 %a, %b
13551363 %cmp_cmp_vec = icmp ult <4 x i32> %x, %y
13801388 ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
13811389 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
13821390 ; KNL-NEXT: kmovw %k0, %eax
1391 ; KNL-NEXT: ## kill: %AL %AL %EAX
13831392 ; KNL-NEXT: retq
13841393 ;
13851394 ; SKX-LABEL: test_iinsertelement_v2i1:
13931402 ; SKX-NEXT: kshiftrw $1, %k1, %k1
13941403 ; SKX-NEXT: kshiftlw $1, %k0, %k0
13951404 ; SKX-NEXT: korw %k0, %k1, %k0
1396 ; SKX-NEXT: kmovb %k0, %eax
1405 ; SKX-NEXT: kmovd %k0, %eax
1406 ; SKX-NEXT: ## kill: %AL %AL %EAX
13971407 ; SKX-NEXT: retq
13981408 %cmp_res_i1 = icmp ult i32 %a, %b
13991409 %cmp_cmp_vec = icmp ult <2 x i64> %x, %y
503503 ; CHECK: ## BB#0:
504504 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
505505 ; CHECK-NEXT: kmovw %k0, %eax
506 ; CHECK-NEXT: ## kill: %AX %AX %EAX
506507 ; CHECK-NEXT: retq
507508 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
508509 ret i16 %res
514515 ; CHECK-NEXT: kmovw %edi, %k1
515516 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
516517 ; CHECK-NEXT: kmovw %k0, %eax
518 ; CHECK-NEXT: ## kill: %AX %AX %EAX
517519 ; CHECK-NEXT: retq
518520 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
519521 ret i16 %res
526528 ; CHECK: ## BB#0:
527529 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
528530 ; CHECK-NEXT: kmovw %k0, %eax
531 ; CHECK-NEXT: ## kill: %AL %AL %EAX
529532 ; CHECK-NEXT: retq
530533 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
531534 ret i8 %res
537540 ; CHECK-NEXT: kmovw %edi, %k1
538541 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
539542 ; CHECK-NEXT: kmovw %k0, %eax
543 ; CHECK-NEXT: ## kill: %AL %AL %EAX
540544 ; CHECK-NEXT: retq
541545 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
542546 ret i8 %res
549553 ; CHECK: ## BB#0:
550554 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
551555 ; CHECK-NEXT: kmovw %k0, %eax
556 ; CHECK-NEXT: ## kill: %AX %AX %EAX
552557 ; CHECK-NEXT: retq
553558 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
554559 ret i16 %res
560565 ; CHECK-NEXT: kmovw %edi, %k1
561566 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
562567 ; CHECK-NEXT: kmovw %k0, %eax
568 ; CHECK-NEXT: ## kill: %AX %AX %EAX
563569 ; CHECK-NEXT: retq
564570 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
565571 ret i16 %res
572578 ; CHECK: ## BB#0:
573579 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
574580 ; CHECK-NEXT: kmovw %k0, %eax
581 ; CHECK-NEXT: ## kill: %AL %AL %EAX
575582 ; CHECK-NEXT: retq
576583 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
577584 ret i8 %res
583590 ; CHECK-NEXT: kmovw %edi, %k1
584591 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
585592 ; CHECK-NEXT: kmovw %k0, %eax
593 ; CHECK-NEXT: ## kill: %AL %AL %EAX
586594 ; CHECK-NEXT: retq
587595 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
588596 ret i8 %res
3939 ; CHECK-NEXT: kandw %k0, %k1, %k0
4040 ; CHECK-NEXT: kandw %k0, %k2, %k0
4141 ; CHECK-NEXT: kmovw %k0, %eax
42 ; CHECK-NEXT: ## kill: %AX %AX %EAX
4243 ; CHECK-NEXT: retq
4344 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
4445 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
5657 ; CHECK-NEXT: kandnw %k2, %k1, %k1
5758 ; CHECK-NEXT: kandnw %k0, %k1, %k0
5859 ; CHECK-NEXT: kmovw %k0, %eax
60 ; CHECK-NEXT: ## kill: %AX %AX %EAX
5961 ; CHECK-NEXT: retq
6062 %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
6163 %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1)
6971 ; CHECK-NEXT: kmovw %edi, %k0
7072 ; CHECK-NEXT: knotw %k0, %k0
7173 ; CHECK-NEXT: kmovw %k0, %eax
74 ; CHECK-NEXT: ## kill: %AX %AX %EAX
7275 ; CHECK-NEXT: retq
7376 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
7477 ret i16 %res
8588 ; CHECK-NEXT: korw %k0, %k1, %k0
8689 ; CHECK-NEXT: korw %k0, %k2, %k0
8790 ; CHECK-NEXT: kmovw %k0, %eax
91 ; CHECK-NEXT: ## kill: %AX %AX %EAX
8892 ; CHECK-NEXT: retq
8993 %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8)
9094 %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1)
100104 ; CHECK-NEXT: kmovw %esi, %k1
101105 ; CHECK-NEXT: kunpckbw %k1, %k0, %k0
102106 ; CHECK-NEXT: kmovw %k0, %eax
107 ; CHECK-NEXT: ## kill: %AX %AX %EAX
103108 ; CHECK-NEXT: retq
104109 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
105110 ret i16 %res
116121 ; CHECK-NEXT: kxorw %k0, %k1, %k0
117122 ; CHECK-NEXT: kxorw %k0, %k2, %k0
118123 ; CHECK-NEXT: kmovw %k0, %eax
124 ; CHECK-NEXT: ## kill: %AX %AX %EAX
119125 ; CHECK-NEXT: retq
120126 %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8)
121127 %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1)
133139 ; CHECK-NEXT: kxorw %k0, %k1, %k0
134140 ; CHECK-NEXT: kxorw %k0, %k2, %k0
135141 ; CHECK-NEXT: kmovw %k0, %eax
142 ; CHECK-NEXT: ## kill: %AX %AX %EAX
136143 ; CHECK-NEXT: retq
137144 %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)
138145 %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1)
713720 ; CHECK: ## BB#0:
714721 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
715722 ; CHECK-NEXT: kmovw %k0, %eax
723 ; CHECK-NEXT: ## kill: %AX %AX %EAX
716724 ; CHECK-NEXT: retq
717725 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
718726 ret i16 %res
724732 ; CHECK: ## BB#0:
725733 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0
726734 ; CHECK-NEXT: kmovw %k0, %eax
735 ; CHECK-NEXT: ## kill: %AL %AL %EAX
727736 ; CHECK-NEXT: retq
728737 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
729738 ret i8 %res
791800 ; CHECK-LABEL: test_vptestmq:
792801 ; CHECK: ## BB#0:
793802 ; CHECK-NEXT: kmovw %edi, %k1
803 ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
804 ; CHECK-NEXT: kmovw %k0, %ecx
794805 ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1}
795 ; CHECK-NEXT: kmovw %k0, %ecx
796 ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
797806 ; CHECK-NEXT: kmovw %k0, %eax
798807 ; CHECK-NEXT: addb %cl, %al
808 ; CHECK-NEXT: ## kill: %AL %AL %EAX
799809 ; CHECK-NEXT: retq
800810 %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
801811 %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m)
808818 ; CHECK-LABEL: test_vptestmd:
809819 ; CHECK: ## BB#0:
810820 ; CHECK-NEXT: kmovw %edi, %k1
821 ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
822 ; CHECK-NEXT: kmovw %k0, %ecx
811823 ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1}
812 ; CHECK-NEXT: kmovw %k0, %ecx
813 ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
814824 ; CHECK-NEXT: kmovw %k0, %eax
815825 ; CHECK-NEXT: addl %ecx, %eax
816826 ; CHECK-NEXT: ## kill: %AX %AX %EAX
837847 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
838848 ; CHECK-LABEL: test_cmp_d_512:
839849 ; CHECK: ## BB#0:
840 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k3
841 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k4
842 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k5
843 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k6
844 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k7
845 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k2
846 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k1
847 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0
848 ; CHECK-NEXT: kmovw %k4, %eax
849 ; CHECK-NEXT: kmovw %k3, %ecx
850 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
851 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k1
852 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k2
853 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k3
854 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k4
855 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k5
856 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k6
857 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k7
858 ; CHECK-NEXT: kmovw %k1, %eax
859 ; CHECK-NEXT: kmovw %k0, %ecx
850860 ; CHECK-NEXT: vmovd %ecx, %xmm0
851861 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
862 ; CHECK-NEXT: kmovw %k2, %eax
863 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
864 ; CHECK-NEXT: kmovw %k3, %eax
865 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
866 ; CHECK-NEXT: kmovw %k4, %eax
867 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
852868 ; CHECK-NEXT: kmovw %k5, %eax
853 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
869 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
854870 ; CHECK-NEXT: kmovw %k6, %eax
855 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
871 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
856872 ; CHECK-NEXT: kmovw %k7, %eax
857 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
858 ; CHECK-NEXT: kmovw %k2, %eax
859 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
860 ; CHECK-NEXT: kmovw %k1, %eax
861 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
862 ; CHECK-NEXT: kmovw %k0, %eax
863873 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
864874 ; CHECK-NEXT: retq
865875 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
884894 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
885895 ; CHECK-LABEL: test_mask_cmp_d_512:
886896 ; CHECK: ## BB#0:
887 ; CHECK-NEXT: kmovw %edi, %k3
888 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k4 {%k3}
889 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k5 {%k3}
890 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k6 {%k3}
891 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k7 {%k3}
892 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k3}
893 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k2 {%k3}
894 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k1 {%k3}
895 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k3 {%k3}
896 ; CHECK-NEXT: kmovw %k5, %eax
897 ; CHECK-NEXT: kmovw %k4, %ecx
897 ; CHECK-NEXT: kmovw %edi, %k1
898 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
899 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k2 {%k1}
900 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1}
901 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k4 {%k1}
902 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k5 {%k1}
903 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k6 {%k1}
904 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k7 {%k1}
905 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k1 {%k1}
906 ; CHECK-NEXT: kmovw %k2, %eax
907 ; CHECK-NEXT: kmovw %k0, %ecx
898908 ; CHECK-NEXT: vmovd %ecx, %xmm0
899909 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
910 ; CHECK-NEXT: kmovw %k3, %eax
911 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
912 ; CHECK-NEXT: kmovw %k4, %eax
913 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
914 ; CHECK-NEXT: kmovw %k5, %eax
915 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
900916 ; CHECK-NEXT: kmovw %k6, %eax
901 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
917 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
902918 ; CHECK-NEXT: kmovw %k7, %eax
903 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
904 ; CHECK-NEXT: kmovw %k0, %eax
905 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
906 ; CHECK-NEXT: kmovw %k2, %eax
907 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
919 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
908920 ; CHECK-NEXT: kmovw %k1, %eax
909 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
910 ; CHECK-NEXT: kmovw %k3, %eax
911921 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
912922 ; CHECK-NEXT: retq
913923 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
934944 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
935945 ; CHECK-LABEL: test_ucmp_d_512:
936946 ; CHECK: ## BB#0:
937 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k3
938 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k4
939 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k5
940 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k6
941 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k7
942 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k2
943 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
944 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0
945 ; CHECK-NEXT: kmovw %k4, %eax
946 ; CHECK-NEXT: kmovw %k3, %ecx
947 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0
948 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k1
949 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k2
950 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k3
951 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k4
952 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k5
953 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k6
954 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k7
955 ; CHECK-NEXT: kmovw %k1, %eax
956 ; CHECK-NEXT: kmovw %k0, %ecx
947957 ; CHECK-NEXT: vmovd %ecx, %xmm0
948958 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
959 ; CHECK-NEXT: kmovw %k2, %eax
960 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
961 ; CHECK-NEXT: kmovw %k3, %eax
962 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
963 ; CHECK-NEXT: kmovw %k4, %eax
964 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
949965 ; CHECK-NEXT: kmovw %k5, %eax
950 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
966 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
951967 ; CHECK-NEXT: kmovw %k6, %eax
952 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
968 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
953969 ; CHECK-NEXT: kmovw %k7, %eax
954 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
955 ; CHECK-NEXT: kmovw %k2, %eax
956 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
957 ; CHECK-NEXT: kmovw %k1, %eax
958 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
959 ; CHECK-NEXT: kmovw %k0, %eax
960970 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
961971 ; CHECK-NEXT: retq
962972 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
981991 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
982992 ; CHECK-LABEL: test_mask_ucmp_d_512:
983993 ; CHECK: ## BB#0:
984 ; CHECK-NEXT: kmovw %edi, %k3
985 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k4 {%k3}
986 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k5 {%k3}
987 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k6 {%k3}
988 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k7 {%k3}
989 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 {%k3}
990 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k2 {%k3}
991 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k3}
992 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k3 {%k3}
993 ; CHECK-NEXT: kmovw %k5, %eax
994 ; CHECK-NEXT: kmovw %k4, %ecx
994 ; CHECK-NEXT: kmovw %edi, %k1
995 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 {%k1}
996 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1}
997 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1}
998 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k4 {%k1}
999 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k5 {%k1}
1000 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k6 {%k1}
1001 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k7 {%k1}
1002 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k1 {%k1}
1003 ; CHECK-NEXT: kmovw %k2, %eax
1004 ; CHECK-NEXT: kmovw %k0, %ecx
9951005 ; CHECK-NEXT: vmovd %ecx, %xmm0
9961006 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
1007 ; CHECK-NEXT: kmovw %k3, %eax
1008 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
1009 ; CHECK-NEXT: kmovw %k4, %eax
1010 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
1011 ; CHECK-NEXT: kmovw %k5, %eax
1012 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
9971013 ; CHECK-NEXT: kmovw %k6, %eax
998 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
1014 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
9991015 ; CHECK-NEXT: kmovw %k7, %eax
1000 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
1001 ; CHECK-NEXT: kmovw %k0, %eax
1002 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
1003 ; CHECK-NEXT: kmovw %k2, %eax
1004 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1016 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
10051017 ; CHECK-NEXT: kmovw %k1, %eax
1006 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
1007 ; CHECK-NEXT: kmovw %k3, %eax
10081018 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
10091019 ; CHECK-NEXT: retq
10101020 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
10311041 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
10321042 ; CHECK-LABEL: test_cmp_q_512:
10331043 ; CHECK: ## BB#0:
1034 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k3
1035 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k4
1036 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k5
1037 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k6
1038 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k7
1039 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k2
1040 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k1
1041 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0
1042 ; CHECK-NEXT: kmovw %k4, %eax
1043 ; CHECK-NEXT: kmovw %k3, %ecx
1044 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
1045 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k1
1046 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k2
1047 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k3
1048 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k4
1049 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k5
1050 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k6
1051 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k7
1052 ; CHECK-NEXT: kmovw %k1, %eax
1053 ; CHECK-NEXT: kmovw %k0, %ecx
10441054 ; CHECK-NEXT: vmovd %ecx, %xmm0
10451055 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1056 ; CHECK-NEXT: kmovw %k2, %eax
1057 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1058 ; CHECK-NEXT: kmovw %k3, %eax
1059 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1060 ; CHECK-NEXT: kmovw %k4, %eax
1061 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
10461062 ; CHECK-NEXT: kmovw %k5, %eax
1047 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1063 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
10481064 ; CHECK-NEXT: kmovw %k6, %eax
1049 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1065 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
10501066 ; CHECK-NEXT: kmovw %k7, %eax
1051 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1052 ; CHECK-NEXT: kmovw %k2, %eax
1053 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1054 ; CHECK-NEXT: kmovw %k1, %eax
1055 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1056 ; CHECK-NEXT: kmovw %k0, %eax
10571067 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
10581068 ; CHECK-NEXT: retq
10591069 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
10781088 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
10791089 ; CHECK-LABEL: test_mask_cmp_q_512:
10801090 ; CHECK: ## BB#0:
1081 ; CHECK-NEXT: kmovw %edi, %k3
1082 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k4 {%k3}
1083 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k5 {%k3}
1084 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k6 {%k3}
1085 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k7 {%k3}
1086 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k3}
1087 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k2 {%k3}
1088 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k1 {%k3}
1089 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k3 {%k3}
1090 ; CHECK-NEXT: kmovw %k5, %eax
1091 ; CHECK-NEXT: kmovw %k4, %ecx
1091 ; CHECK-NEXT: kmovw %edi, %k1
1092 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
1093 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k2 {%k1}
1094 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k3 {%k1}
1095 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k4 {%k1}
1096 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k5 {%k1}
1097 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k6 {%k1}
1098 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k7 {%k1}
1099 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k1 {%k1}
1100 ; CHECK-NEXT: kmovw %k2, %eax
1101 ; CHECK-NEXT: kmovw %k0, %ecx
10921102 ; CHECK-NEXT: vmovd %ecx, %xmm0
10931103 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1104 ; CHECK-NEXT: kmovw %k3, %eax
1105 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1106 ; CHECK-NEXT: kmovw %k4, %eax
1107 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1108 ; CHECK-NEXT: kmovw %k5, %eax
1109 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
10941110 ; CHECK-NEXT: kmovw %k6, %eax
1095 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1111 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
10961112 ; CHECK-NEXT: kmovw %k7, %eax
1097 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1098 ; CHECK-NEXT: kmovw %k0, %eax
1099 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1100 ; CHECK-NEXT: kmovw %k2, %eax
1101 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1113 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
11021114 ; CHECK-NEXT: kmovw %k1, %eax
1103 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1104 ; CHECK-NEXT: kmovw %k3, %eax
11051115 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
11061116 ; CHECK-NEXT: retq
11071117 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
11281138 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
11291139 ; CHECK-LABEL: test_ucmp_q_512:
11301140 ; CHECK: ## BB#0:
1131 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k3
1132 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k4
1133 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k5
1134 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k6
1135 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k7
1136 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k2
1137 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
1138 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0
1139 ; CHECK-NEXT: kmovw %k4, %eax
1140 ; CHECK-NEXT: kmovw %k3, %ecx
1141 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0
1142 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k1
1143 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k2
1144 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k3
1145 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k4
1146 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k5
1147 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k6
1148 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k7
1149 ; CHECK-NEXT: kmovw %k1, %eax
1150 ; CHECK-NEXT: kmovw %k0, %ecx
11411151 ; CHECK-NEXT: vmovd %ecx, %xmm0
11421152 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1153 ; CHECK-NEXT: kmovw %k2, %eax
1154 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1155 ; CHECK-NEXT: kmovw %k3, %eax
1156 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1157 ; CHECK-NEXT: kmovw %k4, %eax
1158 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
11431159 ; CHECK-NEXT: kmovw %k5, %eax
1144 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1160 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
11451161 ; CHECK-NEXT: kmovw %k6, %eax
1146 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1162 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
11471163 ; CHECK-NEXT: kmovw %k7, %eax
1148 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1149 ; CHECK-NEXT: kmovw %k2, %eax
1150 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1151 ; CHECK-NEXT: kmovw %k1, %eax
1152 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1153 ; CHECK-NEXT: kmovw %k0, %eax
11541164 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
11551165 ; CHECK-NEXT: retq
11561166 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
11751185 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
11761186 ; CHECK-LABEL: test_mask_ucmp_q_512:
11771187 ; CHECK: ## BB#0:
1178 ; CHECK-NEXT: kmovw %edi, %k3
1179 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k4 {%k3}
1180 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k5 {%k3}
1181 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k6 {%k3}
1182 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k7 {%k3}
1183 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 {%k3}
1184 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k2 {%k3}
1185 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k3}
1186 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k3 {%k3}
1187 ; CHECK-NEXT: kmovw %k5, %eax
1188 ; CHECK-NEXT: kmovw %k4, %ecx
1188 ; CHECK-NEXT: kmovw %edi, %k1
1189 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 {%k1}
1190 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k2 {%k1}
1191 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k3 {%k1}
1192 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k4 {%k1}
1193 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k5 {%k1}
1194 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k6 {%k1}
1195 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k7 {%k1}
1196 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k1 {%k1}
1197 ; CHECK-NEXT: kmovw %k2, %eax
1198 ; CHECK-NEXT: kmovw %k0, %ecx
11891199 ; CHECK-NEXT: vmovd %ecx, %xmm0
11901200 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1201 ; CHECK-NEXT: kmovw %k3, %eax
1202 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1203 ; CHECK-NEXT: kmovw %k4, %eax
1204 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1205 ; CHECK-NEXT: kmovw %k5, %eax
1206 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
11911207 ; CHECK-NEXT: kmovw %k6, %eax
1192 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1208 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
11931209 ; CHECK-NEXT: kmovw %k7, %eax
1194 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1195 ; CHECK-NEXT: kmovw %k0, %eax
1196 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1197 ; CHECK-NEXT: kmovw %k2, %eax
1198 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1210 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
11991211 ; CHECK-NEXT: kmovw %k1, %eax
1200 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1201 ; CHECK-NEXT: kmovw %k3, %eax
12021212 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
12031213 ; CHECK-NEXT: retq
12041214 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
27692779 ; CHECK: ## BB#0:
27702780 ; CHECK-NEXT: kmovw %edi, %k1
27712781 ; CHECK-NEXT: vmovapd %zmm1, %zmm3
2772 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1}
2773 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1
2774 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0
2782 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3
2783 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1}
2784 ; CHECK-NEXT: vaddpd %zmm3, %zmm1, %zmm0
27752785 ; CHECK-NEXT: retq
27762786 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
27772787 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
27862796 ; CHECK: ## BB#0:
27872797 ; CHECK-NEXT: kmovw %edi, %k1
27882798 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
2789 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1}
2790 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
2791 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
2799 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3
2800 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1}
2801 ; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
27922802 ; CHECK-NEXT: retq
27932803 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
27942804 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
28032813 ; CHECK: ## BB#0:
28042814 ; CHECK-NEXT: kmovw %edi, %k1
28052815 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
2806 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1}
2807 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
2808 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
2816 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3
2817 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1}
2818 ; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0
28092819 ; CHECK-NEXT: retq
28102820 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
28112821 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
28582868 ; CHECK: ## BB#0:
28592869 ; CHECK-NEXT: kmovw %edi, %k1
28602870 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
2861 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z}
2862 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1
2863 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
2871 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3
2872 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z}
2873 ; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
28642874 ; CHECK-NEXT: retq
28652875 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
28662876 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
28762886 ; CHECK: ## BB#0:
28772887 ; CHECK-NEXT: kmovw %edi, %k1
28782888 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
2879 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z}
2880 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1
2881 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
2889 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3
2890 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z}
2891 ; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0
28822892 ; CHECK-NEXT: retq
28832893 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
28842894 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
28932903 ; CHECK: ## BB#0:
28942904 ; CHECK-NEXT: kmovw %edi, %k1
28952905 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
2896 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1}
2897 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1
2898 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
2906 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3
2907 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1}
2908 ; CHECK-NEXT: vpaddd %zmm3, %zmm1, %zmm0
28992909 ; CHECK-NEXT: retq
29002910 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
29012911 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
29392949 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
29402950 ; CHECK: ## BB#0:
29412951 ; CHECK-NEXT: kmovw %edi, %k1
2952 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
29422953 ; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1}
2943 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
29442954 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
29452955 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
29462956 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29732983 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
29742984 ; CHECK: ## BB#0:
29752985 ; CHECK-NEXT: kmovw %edi, %k1
2986 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
29762987 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1}
2977 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
29782988 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
29792989 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
29802990 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30073017 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
30083018 ; CHECK: ## BB#0:
30093019 ; CHECK-NEXT: kmovw %edi, %k1
3020 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
30103021 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1}
3011 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
30123022 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
30133023 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
30143024 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30413051 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
30423052 ; CHECK: ## BB#0:
30433053 ; CHECK-NEXT: kmovw %edi, %k1
3054 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
30443055 ; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
3045 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
30463056 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
30473057 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
30483058 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
30753085 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
30763086 ; CHECK: ## BB#0:
30773087 ; CHECK-NEXT: kmovw %edi, %k1
3088 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
30783089 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1}
3079 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
30803090 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
30813091 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
30823092 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
31093119 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
31103120 ; CHECK: ## BB#0:
31113121 ; CHECK-NEXT: kmovw %edi, %k1
3122 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
31123123 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1}
3113 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
31143124 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
31153125 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
31163126 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
31433153 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
31443154 ; CHECK: ## BB#0:
31453155 ; CHECK-NEXT: kmovw %edi, %k1
3156 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
31463157 ; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1}
3147 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
31483158 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
31493159 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
31503160 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
31773187 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
31783188 ; CHECK: ## BB#0:
31793189 ; CHECK-NEXT: kmovw %edi, %k1
3190 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
31803191 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1}
3181 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
31823192 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
31833193 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
31843194 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
32113221 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
32123222 ; CHECK: ## BB#0:
32133223 ; CHECK-NEXT: kmovw %edi, %k1
3224 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
32143225 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1}
3215 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
32163226 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
32173227 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
32183228 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
32453255 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
32463256 ; CHECK: ## BB#0:
32473257 ; CHECK-NEXT: kmovw %edi, %k1
3258 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
32483259 ; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1}
3249 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
32503260 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
32513261 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
32523262 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32793289 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
32803290 ; CHECK: ## BB#0:
32813291 ; CHECK-NEXT: kmovw %edi, %k1
3292 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
32823293 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1}
3283 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
32843294 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
32853295 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
32863296 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33133323 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
33143324 ; CHECK: ## BB#0:
33153325 ; CHECK-NEXT: kmovw %edi, %k1
3326 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
33163327 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1}
3317 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
33183328 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
33193329 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
33203330 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33473357 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
33483358 ; CHECK: ## BB#0:
33493359 ; CHECK-NEXT: kmovw %edi, %k1
3360 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
33503361 ; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
3351 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
33523362 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
33533363 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
33543364 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
33813391 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
33823392 ; CHECK: ## BB#0:
33833393 ; CHECK-NEXT: kmovw %edi, %k1
3394 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
33843395 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1}
3385 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
33863396 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
33873397 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
33883398 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
34153425 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
34163426 ; CHECK: ## BB#0:
34173427 ; CHECK-NEXT: kmovw %edi, %k1
3428 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
34183429 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1}
3419 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
34203430 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
34213431 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
34223432 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
38033813 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
38043814 ; CHECK: ## BB#0:
38053815 ; CHECK-NEXT: kmovw %edi, %k1
3816 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
38063817 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
3807 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
38083818 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
38093819 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
38103820 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
40814091 ; CHECK: ## BB#0:
40824092 ; CHECK-NEXT: kmovw %edi, %k1
40834093 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
4084 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
4085 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
4086 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4094 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
4095 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1}
4096 ; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
40874097 ; CHECK-NEXT: retq
40884098 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
40894099 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
40984108 ; CHECK: ## BB#0:
40994109 ; CHECK-NEXT: kmovw %edi, %k1
41004110 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
4101 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
4102 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
4103 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4111 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
4112 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
4113 ; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
41044114 ; CHECK-NEXT: retq
41054115 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
41064116 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
41154125 ; CHECK: ## BB#0:
41164126 ; CHECK-NEXT: kmovw %edi, %k1
41174127 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
4118 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
4119 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4120 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4128 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
4129 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1}
4130 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
41214131 ; CHECK-NEXT: retq
41224132 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
41234133 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
41324142 ; CHECK: ## BB#0:
41334143 ; CHECK-NEXT: kmovw %edi, %k1
41344144 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
4135 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
4136 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4137 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4145 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
4146 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
4147 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
41384148 ; CHECK-NEXT: retq
41394149 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
41404150 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
43814391 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_512:
43824392 ; CHECK: ## BB#0:
43834393 ; CHECK-NEXT: kmovw %edi, %k1
4394 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3
43844395 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1}
4385 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3 {%k1} {z}
4386 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0
4387 ; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
4388 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
4396 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z}
4397 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4398 ; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
43894399 ; CHECK-NEXT: retq
43904400 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
43914401 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
44014411 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_512:
44024412 ; CHECK: ## BB#0:
44034413 ; CHECK-NEXT: kmovw %edi, %k1
4414 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3
44044415 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1}
4405 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3 {%k1} {z}
4406 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0
4407 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
4408 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
4416 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z}
4417 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4418 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
44094419 ; CHECK-NEXT: retq
44104420 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
44114421 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
44614471 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512:
44624472 ; CHECK: ## BB#0:
44634473 ; CHECK-NEXT: kmovw %edi, %k1
4474 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3
44644475 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1}
4465 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3 {%k1} {z}
4466 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0
4467 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
4468 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4476 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
4477 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4478 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
44694479 ; CHECK-NEXT: retq
44704480 %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
44714481 %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
44814491 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_512:
44824492 ; CHECK: ## BB#0:
44834493 ; CHECK-NEXT: kmovw %edi, %k1
4494 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3
44844495 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1}
4485 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3 {%k1} {z}
4486 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0
4487 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
4488 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
4496 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
4497 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4498 ; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
44894499 ; CHECK-NEXT: retq
44904500 %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
44914501 %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
45014511 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
45024512 ; CHECK: ## BB#0:
45034513 ; CHECK-NEXT: kmovw %edi, %k1
4514 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3
45044515 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1}
4505 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3 {%k1} {z}
4506 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
4507 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
4508 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4516 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
4517 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4518 ; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0
45094519 ; CHECK-NEXT: retq
45104520 %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
45114521 %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
45214531 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_512:
45224532 ; CHECK: ## BB#0:
45234533 ; CHECK-NEXT: kmovw %edi, %k1
4534 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3
45244535 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1}
4525 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3 {%k1} {z}
4526 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0
4527 ; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
4528 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
4536 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
4537 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4538 ; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
45294539 ; CHECK-NEXT: retq
45304540 %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
45314541 %res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
46584668 ; CHECK: ## BB#0:
46594669 ; CHECK-NEXT: kmovw %edi, %k1
46604670 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4661 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z}
4671 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3
46624672 ; CHECK-NEXT: vmovaps %zmm0, %zmm4
4663 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4
4673 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
46644674 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
46654675 ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
4666 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
4667 ; CHECK-NEXT: vaddps %zmm4, %zmm0, %zmm0
4676 ; CHECK-NEXT: vaddps %zmm0, %zmm4, %zmm0
4677 ; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0
46684678 ; CHECK-NEXT: retq
46694679 %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
46704680 %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 8)
47284738 ; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_512:
47294739 ; CHECK: ## BB#0:
47304740 ; CHECK-NEXT: kmovw %edi, %k1
4741 ; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0
4742 ; CHECK-NEXT: kmovw %k0, %ecx
47314743 ; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 {%k1}
4732 ; CHECK-NEXT: kmovw %k0, %ecx
4733 ; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0
47344744 ; CHECK-NEXT: kmovw %k0, %eax
47354745 ; CHECK-NEXT: addl %ecx, %eax
47364746 ; CHECK-NEXT: ## kill: %AX %AX %EAX
47474757 ; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_512:
47484758 ; CHECK: ## BB#0:
47494759 ; CHECK-NEXT: kmovw %edi, %k1
4760 ; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0
4761 ; CHECK-NEXT: kmovw %k0, %ecx
47504762 ; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 {%k1}
4751 ; CHECK-NEXT: kmovw %k0, %ecx
4752 ; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0
47534763 ; CHECK-NEXT: kmovw %k0, %eax
47544764 ; CHECK-NEXT: addb %cl, %al
4765 ; CHECK-NEXT: ## kill: %AL %AL %EAX
47554766 ; CHECK-NEXT: retq
47564767 %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
47574768 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1)
47634774 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
47644775 ; CHECK: ## BB#0:
47654776 ; CHECK-NEXT: kmovw %esi, %k1
4777 ; CHECK-NEXT: vpbroadcastd %edi, %zmm1 {%k1} {z}
47664778 ; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
4767 ; CHECK-NEXT: vpbroadcastd %edi, %zmm1 {%k1} {z}
47684779 ; CHECK-NEXT: vpbroadcastd %edi, %zmm2
47694780 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
47704781 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
47834794 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
47844795 ; CHECK: ## BB#0:
47854796 ; CHECK-NEXT: kmovw %esi, %k1
4797 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm1 {%k1} {z}
47864798 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
4787 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm1 {%k1} {z}
47884799 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm2
47894800 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
47904801 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
298298 ;
299299 ; SKX-LABEL: masked_and_v16f32:
300300 ; SKX: ## BB#0:
301 ; SKX-NEXT: kmovw %edi, %k1
301 ; SKX-NEXT: kmovd %edi, %k1
302302 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1}
303303 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0
304304 ; SKX-NEXT: retq
323323 ;
324324 ; SKX-LABEL: masked_or_v16f32:
325325 ; SKX: ## BB#0:
326 ; SKX-NEXT: kmovw %edi, %k1
326 ; SKX-NEXT: kmovd %edi, %k1
327327 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1}
328328 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0
329329 ; SKX-NEXT: retq
348348 ;
349349 ; SKX-LABEL: masked_xor_v16f32:
350350 ; SKX: ## BB#0:
351 ; SKX-NEXT: kmovw %edi, %k1
351 ; SKX-NEXT: kmovd %edi, %k1
352352 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1}
353353 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0
354354 ; SKX-NEXT: retq
373373 ;
374374 ; SKX-LABEL: masked_and_v8f64:
375375 ; SKX: ## BB#0:
376 ; SKX-NEXT: kmovb %edi, %k1
376 ; SKX-NEXT: kmovd %edi, %k1
377377 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1}
378378 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0
379379 ; SKX-NEXT: retq
398398 ;
399399 ; SKX-LABEL: masked_or_v8f64:
400400 ; SKX: ## BB#0:
401 ; SKX-NEXT: kmovb %edi, %k1
401 ; SKX-NEXT: kmovd %edi, %k1
402402 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1}
403403 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0
404404 ; SKX-NEXT: retq
423423 ;
424424 ; SKX-LABEL: masked_xor_v8f64:
425425 ; SKX: ## BB#0:
426 ; SKX-NEXT: kmovb %edi, %k1
426 ; SKX-NEXT: kmovd %edi, %k1
427427 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1}
428428 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0
429429 ; SKX-NEXT: retq
447447 ;
448448 ; SKX-LABEL: test_mm512_mask_and_epi32:
449449 ; SKX: ## BB#0: ## %entry
450 ; SKX-NEXT: kmovw %edi, %k1
450 ; SKX-NEXT: kmovd %edi, %k1
451451 ; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1}
452452 ; SKX-NEXT: retq
453453 entry:
469469 ;
470470 ; SKX-LABEL: test_mm512_mask_or_epi32:
471471 ; SKX: ## BB#0: ## %entry
472 ; SKX-NEXT: kmovw %edi, %k1
472 ; SKX-NEXT: kmovd %edi, %k1
473473 ; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1}
474474 ; SKX-NEXT: retq
475475 entry:
491491 ;
492492 ; SKX-LABEL: test_mm512_mask_xor_epi32:
493493 ; SKX: ## BB#0: ## %entry
494 ; SKX-NEXT: kmovw %edi, %k1
494 ; SKX-NEXT: kmovd %edi, %k1
495495 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1}
496496 ; SKX-NEXT: retq
497497 entry:
513513 ;
514514 ; SKX-LABEL: test_mm512_mask_xor_pd:
515515 ; SKX: ## BB#0: ## %entry
516 ; SKX-NEXT: kmovb %edi, %k1
516 ; SKX-NEXT: kmovd %edi, %k1
517517 ; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1}
518518 ; SKX-NEXT: retq
519519 entry:
535535 ;
536536 ; SKX-LABEL: test_mm512_maskz_xor_pd:
537537 ; SKX: ## BB#0: ## %entry
538 ; SKX-NEXT: kmovb %edi, %k1
538 ; SKX-NEXT: kmovd %edi, %k1
539539 ; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z}
540540 ; SKX-NEXT: retq
541541 entry:
557557 ;
558558 ; SKX-LABEL: test_mm512_mask_xor_ps:
559559 ; SKX: ## BB#0: ## %entry
560 ; SKX-NEXT: kmovw %edi, %k1
560 ; SKX-NEXT: kmovd %edi, %k1
561561 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1}
562562 ; SKX-NEXT: retq
563563 entry:
579579 ;
580580 ; SKX-LABEL: test_mm512_maskz_xor_ps:
581581 ; SKX: ## BB#0: ## %entry
582 ; SKX-NEXT: kmovw %edi, %k1
582 ; SKX-NEXT: kmovd %edi, %k1
583583 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z}
584584 ; SKX-NEXT: retq
585585 entry:
601601 ;
602602 ; SKX-LABEL: test_mm512_mask_or_pd:
603603 ; SKX: ## BB#0: ## %entry
604 ; SKX-NEXT: kmovb %edi, %k1
604 ; SKX-NEXT: kmovd %edi, %k1
605605 ; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1}
606606 ; SKX-NEXT: retq
607607 entry:
623623 ;
624624 ; SKX-LABEL: test_mm512_maskz_or_pd:
625625 ; SKX: ## BB#0: ## %entry
626 ; SKX-NEXT: kmovb %edi, %k1
626 ; SKX-NEXT: kmovd %edi, %k1
627627 ; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z}
628628 ; SKX-NEXT: retq
629629 entry:
645645 ;
646646 ; SKX-LABEL: test_mm512_mask_or_ps:
647647 ; SKX: ## BB#0: ## %entry
648 ; SKX-NEXT: kmovw %edi, %k1
648 ; SKX-NEXT: kmovd %edi, %k1
649649 ; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1}
650650 ; SKX-NEXT: retq
651651 entry:
667667 ;
668668 ; SKX-LABEL: test_mm512_maskz_or_ps:
669669 ; SKX: ## BB#0: ## %entry
670 ; SKX-NEXT: kmovw %edi, %k1
670 ; SKX-NEXT: kmovd %edi, %k1
671671 ; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z}
672672 ; SKX-NEXT: retq
673673 entry:
689689 ;
690690 ; SKX-LABEL: test_mm512_mask_and_pd:
691691 ; SKX: ## BB#0: ## %entry
692 ; SKX-NEXT: kmovb %edi, %k1
692 ; SKX-NEXT: kmovd %edi, %k1
693693 ; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1}
694694 ; SKX-NEXT: retq
695695 entry:
711711 ;
712712 ; SKX-LABEL: test_mm512_maskz_and_pd:
713713 ; SKX: ## BB#0: ## %entry
714 ; SKX-NEXT: kmovb %edi, %k1
714 ; SKX-NEXT: kmovd %edi, %k1
715715 ; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z}
716716 ; SKX-NEXT: retq
717717 entry:
733733 ;
734734 ; SKX-LABEL: test_mm512_mask_and_ps:
735735 ; SKX: ## BB#0: ## %entry
736 ; SKX-NEXT: kmovw %edi, %k1
736 ; SKX-NEXT: kmovd %edi, %k1
737737 ; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1}
738738 ; SKX-NEXT: retq
739739 entry:
755755 ;
756756 ; SKX-LABEL: test_mm512_maskz_and_ps:
757757 ; SKX: ## BB#0: ## %entry
758 ; SKX-NEXT: kmovw %edi, %k1
758 ; SKX-NEXT: kmovd %edi, %k1
759759 ; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z}
760760 ; SKX-NEXT: retq
761761 entry:
777777 ;
778778 ; SKX-LABEL: test_mm512_mask_andnot_pd:
779779 ; SKX: ## BB#0: ## %entry
780 ; SKX-NEXT: kmovb %edi, %k1
780 ; SKX-NEXT: kmovd %edi, %k1
781781 ; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1}
782782 ; SKX-NEXT: retq
783783 entry:
800800 ;
801801 ; SKX-LABEL: test_mm512_maskz_andnot_pd:
802802 ; SKX: ## BB#0: ## %entry
803 ; SKX-NEXT: kmovb %edi, %k1
803 ; SKX-NEXT: kmovd %edi, %k1
804804 ; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z}
805805 ; SKX-NEXT: retq
806806 entry:
823823 ;
824824 ; SKX-LABEL: test_mm512_mask_andnot_ps:
825825 ; SKX: ## BB#0: ## %entry
826 ; SKX-NEXT: kmovw %edi, %k1
826 ; SKX-NEXT: kmovd %edi, %k1
827827 ; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1}
828828 ; SKX-NEXT: retq
829829 entry:
846846 ;
847847 ; SKX-LABEL: test_mm512_maskz_andnot_ps:
848848 ; SKX: ## BB#0: ## %entry
849 ; SKX-NEXT: kmovw %edi, %k1
849 ; SKX-NEXT: kmovd %edi, %k1
850850 ; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z}
851851 ; SKX-NEXT: retq
852852 entry:
55
66
77 define i16 @mask16(i16 %x) {
8 ; CHECK-LABEL: mask16:
9 ; CHECK: ## BB#0:
10 ; CHECK-NEXT: kmovw %edi, %k0
11 ; CHECK-NEXT: knotw %k0, %k0
12 ; CHECK-NEXT: kmovw %k0, %eax
13 ; CHECK-NEXT: retq
8 ; KNL-LABEL: mask16:
9 ; KNL: ## BB#0:
10 ; KNL-NEXT: kmovw %edi, %k0
11 ; KNL-NEXT: knotw %k0, %k0
12 ; KNL-NEXT: kmovw %k0, %eax
13 ; KNL-NEXT: ## kill: %AX %AX %EAX
14 ; KNL-NEXT: retq
15 ;
16 ; SKX-LABEL: mask16:
17 ; SKX: ## BB#0:
18 ; SKX-NEXT: kmovd %edi, %k0
19 ; SKX-NEXT: knotw %k0, %k0
20 ; SKX-NEXT: kmovd %k0, %eax
21 ; SKX-NEXT: ## kill: %AX %AX %EAX
22 ; SKX-NEXT: retq
23 ;
24 ; AVX512BW-LABEL: mask16:
25 ; AVX512BW: ## BB#0:
26 ; AVX512BW-NEXT: kmovd %edi, %k0
27 ; AVX512BW-NEXT: knotw %k0, %k0
28 ; AVX512BW-NEXT: kmovd %k0, %eax
29 ; AVX512BW-NEXT: ## kill: %AX %AX %EAX
30 ; AVX512BW-NEXT: retq
31 ;
32 ; AVX512DQ-LABEL: mask16:
33 ; AVX512DQ: ## BB#0:
34 ; AVX512DQ-NEXT: kmovw %edi, %k0
35 ; AVX512DQ-NEXT: knotw %k0, %k0
36 ; AVX512DQ-NEXT: kmovw %k0, %eax
37 ; AVX512DQ-NEXT: ## kill: %AX %AX %EAX
38 ; AVX512DQ-NEXT: retq
1439 %m0 = bitcast i16 %x to <16 x i1>
1540 %m1 = xor <16 x i1> %m0,
1641 %ret = bitcast <16 x i1> %m1 to i16
1843 }
1944
2045 define i32 @mask16_zext(i16 %x) {
21 ; CHECK-LABEL: mask16_zext:
22 ; CHECK: ## BB#0:
23 ; CHECK-NEXT: kmovw %edi, %k0
24 ; CHECK-NEXT: knotw %k0, %k0
25 ; CHECK-NEXT: kmovw %k0, %eax
26 ; CHECK-NEXT: retq
46 ; KNL-LABEL: mask16_zext:
47 ; KNL: ## BB#0:
48 ; KNL-NEXT: kmovw %edi, %k0
49 ; KNL-NEXT: knotw %k0, %k0
50 ; KNL-NEXT: kmovw %k0, %eax
51 ; KNL-NEXT: retq
52 ;
53 ; SKX-LABEL: mask16_zext:
54 ; SKX: ## BB#0:
55 ; SKX-NEXT: kmovd %edi, %k0
56 ; SKX-NEXT: knotw %k0, %k0
57 ; SKX-NEXT: kmovw %k0, %eax
58 ; SKX-NEXT: retq
59 ;
60 ; AVX512BW-LABEL: mask16_zext:
61 ; AVX512BW: ## BB#0:
62 ; AVX512BW-NEXT: kmovd %edi, %k0
63 ; AVX512BW-NEXT: knotw %k0, %k0
64 ; AVX512BW-NEXT: kmovw %k0, %eax
65 ; AVX512BW-NEXT: retq
66 ;
67 ; AVX512DQ-LABEL: mask16_zext:
68 ; AVX512DQ: ## BB#0:
69 ; AVX512DQ-NEXT: kmovw %edi, %k0
70 ; AVX512DQ-NEXT: knotw %k0, %k0
71 ; AVX512DQ-NEXT: kmovw %k0, %eax
72 ; AVX512DQ-NEXT: retq
2773 %m0 = bitcast i16 %x to <16 x i1>
2874 %m1 = xor <16 x i1> %m0,
2975 %m2 = bitcast <16 x i1> %m1 to i16
3783 ; KNL-NEXT: kmovw %edi, %k0
3884 ; KNL-NEXT: knotw %k0, %k0
3985 ; KNL-NEXT: kmovw %k0, %eax
86 ; KNL-NEXT: ## kill: %AL %AL %EAX
4087 ; KNL-NEXT: retq
4188 ;
4289 ; SKX-LABEL: mask8:
4390 ; SKX: ## BB#0:
44 ; SKX-NEXT: kmovb %edi, %k0
91 ; SKX-NEXT: kmovd %edi, %k0
4592 ; SKX-NEXT: knotb %k0, %k0
46 ; SKX-NEXT: kmovb %k0, %eax
93 ; SKX-NEXT: kmovd %k0, %eax
94 ; SKX-NEXT: ## kill: %AL %AL %EAX
4795 ; SKX-NEXT: retq
4896 ;
4997 ; AVX512BW-LABEL: mask8:
5098 ; AVX512BW: ## BB#0:
51 ; AVX512BW-NEXT: kmovw %edi, %k0
99 ; AVX512BW-NEXT: kmovd %edi, %k0
52100 ; AVX512BW-NEXT: knotw %k0, %k0
53 ; AVX512BW-NEXT: kmovw %k0, %eax
101 ; AVX512BW-NEXT: kmovd %k0, %eax
102 ; AVX512BW-NEXT: ## kill: %AL %AL %EAX
54103 ; AVX512BW-NEXT: retq
55104 ;
56105 ; AVX512DQ-LABEL: mask8:
57106 ; AVX512DQ: ## BB#0:
58 ; AVX512DQ-NEXT: kmovb %edi, %k0
107 ; AVX512DQ-NEXT: kmovw %edi, %k0
59108 ; AVX512DQ-NEXT: knotb %k0, %k0
60 ; AVX512DQ-NEXT: kmovb %k0, %eax
109 ; AVX512DQ-NEXT: kmovw %k0, %eax
110 ; AVX512DQ-NEXT: ## kill: %AL %AL %EAX
61111 ; AVX512DQ-NEXT: retq
62112 %m0 = bitcast i8 %x to <8 x i1>
63113 %m1 = xor <8 x i1> %m0,
76126 ;
77127 ; SKX-LABEL: mask8_zext:
78128 ; SKX: ## BB#0:
79 ; SKX-NEXT: kmovb %edi, %k0
129 ; SKX-NEXT: kmovd %edi, %k0
80130 ; SKX-NEXT: knotb %k0, %k0
81131 ; SKX-NEXT: kmovb %k0, %eax
82132 ; SKX-NEXT: retq
83133 ;
84134 ; AVX512BW-LABEL: mask8_zext:
85135 ; AVX512BW: ## BB#0:
86 ; AVX512BW-NEXT: kmovw %edi, %k0
136 ; AVX512BW-NEXT: kmovd %edi, %k0
87137 ; AVX512BW-NEXT: knotw %k0, %k0
88 ; AVX512BW-NEXT: kmovw %k0, %eax
138 ; AVX512BW-NEXT: kmovd %k0, %eax
89139 ; AVX512BW-NEXT: movzbl %al, %eax
90140 ; AVX512BW-NEXT: retq
91141 ;
92142 ; AVX512DQ-LABEL: mask8_zext:
93143 ; AVX512DQ: ## BB#0:
94 ; AVX512DQ-NEXT: kmovb %edi, %k0
144 ; AVX512DQ-NEXT: kmovw %edi, %k0
95145 ; AVX512DQ-NEXT: knotb %k0, %k0
96146 ; AVX512DQ-NEXT: kmovb %k0, %eax
97147 ; AVX512DQ-NEXT: retq
176226 }
177227
178228 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
179 ; CHECK-LABEL: mand16_mem:
180 ; CHECK: ## BB#0:
181 ; CHECK-NEXT: kmovw (%rdi), %k0
182 ; CHECK-NEXT: kmovw (%rsi), %k1
183 ; CHECK-NEXT: kandw %k1, %k0, %k2
184 ; CHECK-NEXT: kxorw %k1, %k0, %k0
185 ; CHECK-NEXT: korw %k0, %k2, %k0
186 ; CHECK-NEXT: kmovw %k0, %eax
187 ; CHECK-NEXT: retq
229 ; KNL-LABEL: mand16_mem:
230 ; KNL: ## BB#0:
231 ; KNL-NEXT: kmovw (%rdi), %k0
232 ; KNL-NEXT: kmovw (%rsi), %k1
233 ; KNL-NEXT: kandw %k1, %k0, %k2
234 ; KNL-NEXT: kxorw %k1, %k0, %k0
235 ; KNL-NEXT: korw %k0, %k2, %k0
236 ; KNL-NEXT: kmovw %k0, %eax
237 ; KNL-NEXT: ## kill: %AX %AX %EAX
238 ; KNL-NEXT: retq
239 ;
240 ; SKX-LABEL: mand16_mem:
241 ; SKX: ## BB#0:
242 ; SKX-NEXT: kmovw (%rdi), %k0
243 ; SKX-NEXT: kmovw (%rsi), %k1
244 ; SKX-NEXT: kandw %k1, %k0, %k2
245 ; SKX-NEXT: kxorw %k1, %k0, %k0
246 ; SKX-NEXT: korw %k0, %k2, %k0
247 ; SKX-NEXT: kmovd %k0, %eax
248 ; SKX-NEXT: ## kill: %AX %AX %EAX
249 ; SKX-NEXT: retq
250 ;
251 ; AVX512BW-LABEL: mand16_mem:
252 ; AVX512BW: ## BB#0:
253 ; AVX512BW-NEXT: kmovw (%rdi), %k0
254 ; AVX512BW-NEXT: kmovw (%rsi), %k1
255 ; AVX512BW-NEXT: kandw %k1, %k0, %k2
256 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
257 ; AVX512BW-NEXT: korw %k0, %k2, %k0
258 ; AVX512BW-NEXT: kmovd %k0, %eax
259 ; AVX512BW-NEXT: ## kill: %AX %AX %EAX
260 ; AVX512BW-NEXT: retq
261 ;
262 ; AVX512DQ-LABEL: mand16_mem:
263 ; AVX512DQ: ## BB#0:
264 ; AVX512DQ-NEXT: kmovw (%rdi), %k0
265 ; AVX512DQ-NEXT: kmovw (%rsi), %k1
266 ; AVX512DQ-NEXT: kandw %k1, %k0, %k2
267 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
268 ; AVX512DQ-NEXT: korw %k0, %k2, %k0
269 ; AVX512DQ-NEXT: kmovw %k0, %eax
270 ; AVX512DQ-NEXT: ## kill: %AX %AX %EAX
271 ; AVX512DQ-NEXT: retq
188272 %ma = load <16 x i1>, <16 x i1>* %x
189273 %mb = load <16 x i1>, <16 x i1>* %y
190274 %mc = and <16 x i1> %ma, %mb
200284 ; KNL-NEXT: kmovw %edi, %k0
201285 ; KNL-NEXT: kshiftrw $8, %k0, %k0
202286 ; KNL-NEXT: kmovw %k0, %eax
287 ; KNL-NEXT: ## kill: %AL %AL %EAX
203288 ; KNL-NEXT: retq
204289 ;
205290 ; SKX-LABEL: shuf_test1:
206291 ; SKX: ## BB#0:
207 ; SKX-NEXT: kmovw %edi, %k0
292 ; SKX-NEXT: kmovd %edi, %k0
208293 ; SKX-NEXT: kshiftrw $8, %k0, %k0
209 ; SKX-NEXT: kmovb %k0, %eax
294 ; SKX-NEXT: kmovd %k0, %eax
295 ; SKX-NEXT: ## kill: %AL %AL %EAX
210296 ; SKX-NEXT: retq
211297 ;
212298 ; AVX512BW-LABEL: shuf_test1:
213299 ; AVX512BW: ## BB#0:
214 ; AVX512BW-NEXT: kmovw %edi, %k0
300 ; AVX512BW-NEXT: kmovd %edi, %k0
215301 ; AVX512BW-NEXT: kshiftrw $8, %k0, %k0
216 ; AVX512BW-NEXT: kmovw %k0, %eax
302 ; AVX512BW-NEXT: kmovd %k0, %eax
303 ; AVX512BW-NEXT: ## kill: %AL %AL %EAX
217304 ; AVX512BW-NEXT: retq
218305 ;
219306 ; AVX512DQ-LABEL: shuf_test1:
220307 ; AVX512DQ: ## BB#0:
221308 ; AVX512DQ-NEXT: kmovw %edi, %k0
222309 ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0
223 ; AVX512DQ-NEXT: kmovb %k0, %eax
310 ; AVX512DQ-NEXT: kmovw %k0, %eax
311 ; AVX512DQ-NEXT: ## kill: %AL %AL %EAX
224312 ; AVX512DQ-NEXT: retq
225313 %v1 = bitcast i16 %v to <16 x i1>
226314 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32>
525613 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
526614 ; SKX-NEXT: vpmovw2m %xmm0, %k0
527615 ; SKX-NEXT: movb $85, %al
528 ; SKX-NEXT: kmovb %eax, %k1
616 ; SKX-NEXT: kmovd %eax, %k1
529617 ; SKX-NEXT: korb %k1, %k0, %k0
530618 ; SKX-NEXT: ktestb %k0, %k0
531619 ; SKX-NEXT: retq
535623 ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
536624 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
537625 ; AVX512BW-NEXT: movb $85, %al
538 ; AVX512BW-NEXT: kmovw %eax, %k1
626 ; AVX512BW-NEXT: kmovd %eax, %k1
539627 ; AVX512BW-NEXT: korw %k1, %k0, %k0
540 ; AVX512BW-NEXT: kmovw %k0, %eax
628 ; AVX512BW-NEXT: kmovd %k0, %eax
541629 ; AVX512BW-NEXT: testb %al, %al
542630 ; AVX512BW-NEXT: vzeroupper
543631 ; AVX512BW-NEXT: retq
548636 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
549637 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
550638 ; AVX512DQ-NEXT: movb $85, %al
551 ; AVX512DQ-NEXT: kmovb %eax, %k1
639 ; AVX512DQ-NEXT: kmovw %eax, %k1
552640 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
553641 ; AVX512DQ-NEXT: ktestb %k0, %k0
554642 ; AVX512DQ-NEXT: vzeroupper
798886 ; SKX-NEXT: movw $21845, %ax ## imm = 0x5555
799887 ; SKX-NEXT: movw $1, %cx
800888 ; SKX-NEXT: cmovgw %ax, %cx
801 ; SKX-NEXT: kmovw %ecx, %k0
889 ; SKX-NEXT: kmovd %ecx, %k0
802890 ; SKX-NEXT: vpmovm2b %k0, %xmm0
803891 ; SKX-NEXT: retq
804892 ;
808896 ; AVX512BW-NEXT: movw $21845, %ax ## imm = 0x5555
809897 ; AVX512BW-NEXT: movw $1, %cx
810898 ; AVX512BW-NEXT: cmovgw %ax, %cx
811 ; AVX512BW-NEXT: kmovw %ecx, %k0
899 ; AVX512BW-NEXT: kmovd %ecx, %k0
812900 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
813901 ; AVX512BW-NEXT: ## kill: %XMM0 %XMM0 %ZMM0
814902 ; AVX512BW-NEXT: vzeroupper
10981186 ;
10991187 ; SKX-LABEL: test18:
11001188 ; SKX: ## BB#0:
1101 ; SKX-NEXT: kmovb %edi, %k0
1102 ; SKX-NEXT: kmovw %esi, %k1
1189 ; SKX-NEXT: kmovd %edi, %k0
1190 ; SKX-NEXT: kmovd %esi, %k1
11031191 ; SKX-NEXT: kshiftlw $7, %k1, %k2
11041192 ; SKX-NEXT: kshiftrw $15, %k2, %k2
11051193 ; SKX-NEXT: kshiftlw $6, %k1, %k1
11191207 ;
11201208 ; AVX512BW-LABEL: test18:
11211209 ; AVX512BW: ## BB#0:
1122 ; AVX512BW-NEXT: kmovw %edi, %k1
1123 ; AVX512BW-NEXT: kmovw %esi, %k2
1210 ; AVX512BW-NEXT: kmovd %edi, %k1
1211 ; AVX512BW-NEXT: kmovd %esi, %k2
11241212 ; AVX512BW-NEXT: kshiftlw $7, %k2, %k0
11251213 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
11261214 ; AVX512BW-NEXT: kshiftlw $6, %k2, %k2
11421230 ;
11431231 ; AVX512DQ-LABEL: test18:
11441232 ; AVX512DQ: ## BB#0:
1145 ; AVX512DQ-NEXT: kmovb %edi, %k0
1233 ; AVX512DQ-NEXT: kmovw %edi, %k0
11461234 ; AVX512DQ-NEXT: kmovw %esi, %k1
11471235 ; AVX512DQ-NEXT: kshiftlw $7, %k1, %k2
11481236 ; AVX512DQ-NEXT: kshiftrw $15, %k2, %k2
17071795 ; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
17081796 ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
17091797 ; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
1710 ; AVX512BW-NEXT: kmovw %k0, %eax
1798 ; AVX512BW-NEXT: kmovd %k0, %eax
17111799 ; AVX512BW-NEXT: testb %al, %al
17121800 ; AVX512BW-NEXT: je LBB41_2
17131801 ; AVX512BW-NEXT: ## BB#1: ## %L1
35633651 ; AVX512BW: ## BB#0:
35643652 ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
35653653 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
3566 ; AVX512BW-NEXT: kmovw %k0, %eax
3654 ; AVX512BW-NEXT: kmovd %k0, %eax
35673655 ; AVX512BW-NEXT: movzbl %al, %eax
35683656 ; AVX512BW-NEXT: addl %eax, %eax
35693657 ; AVX512BW-NEXT: vzeroupper
36283716 }
36293717
36303718 define i16 @test_v16i1_add(i16 %x, i16 %y) {
3631 ; CHECK-LABEL: test_v16i1_add:
3632 ; CHECK: ## BB#0:
3633 ; CHECK-NEXT: kmovw %edi, %k0
3634 ; CHECK-NEXT: kmovw %esi, %k1
3635 ; CHECK-NEXT: kxorw %k1, %k0, %k0
3636 ; CHECK-NEXT: kmovw %k0, %eax
3637 ; CHECK-NEXT: retq
3719 ; KNL-LABEL: test_v16i1_add:
3720 ; KNL: ## BB#0:
3721 ; KNL-NEXT: kmovw %edi, %k0
3722 ; KNL-NEXT: kmovw %esi, %k1
3723 ; KNL-NEXT: kxorw %k1, %k0, %k0
3724 ; KNL-NEXT: kmovw %k0, %eax
3725 ; KNL-NEXT: ## kill: %AX %AX %EAX
3726 ; KNL-NEXT: retq
3727 ;
3728 ; SKX-LABEL: test_v16i1_add:
3729 ; SKX: ## BB#0:
3730 ; SKX-NEXT: kmovd %edi, %k0
3731 ; SKX-NEXT: kmovd %esi, %k1
3732 ; SKX-NEXT: kxorw %k1, %k0, %k0
3733 ; SKX-NEXT: kmovd %k0, %eax
3734 ; SKX-NEXT: ## kill: %AX %AX %EAX
3735 ; SKX-NEXT: retq
3736 ;
3737 ; AVX512BW-LABEL: test_v16i1_add:
3738 ; AVX512BW: ## BB#0:
3739 ; AVX512BW-NEXT: kmovd %edi, %k0
3740 ; AVX512BW-NEXT: kmovd %esi, %k1
3741 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3742 ; AVX512BW-NEXT: kmovd %k0, %eax
3743 ; AVX512BW-NEXT: ## kill: %AX %AX %EAX
3744 ; AVX512BW-NEXT: retq
3745 ;
3746 ; AVX512DQ-LABEL: test_v16i1_add:
3747 ; AVX512DQ: ## BB#0:
3748 ; AVX512DQ-NEXT: kmovw %edi, %k0
3749 ; AVX512DQ-NEXT: kmovw %esi, %k1
3750 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
3751 ; AVX512DQ-NEXT: kmovw %k0, %eax
3752 ; AVX512DQ-NEXT: ## kill: %AX %AX %EAX
3753 ; AVX512DQ-NEXT: retq
36383754 %m0 = bitcast i16 %x to <16 x i1>
36393755 %m1 = bitcast i16 %y to <16 x i1>
36403756 %m2 = add <16 x i1> %m0, %m1
36433759 }
36443760
36453761 define i16 @test_v16i1_sub(i16 %x, i16 %y) {
3646 ; CHECK-LABEL: test_v16i1_sub:
3647 ; CHECK: ## BB#0:
3648 ; CHECK-NEXT: kmovw %edi, %k0
3649 ; CHECK-NEXT: kmovw %esi, %k1
3650 ; CHECK-NEXT: kxorw %k1, %k0, %k0
3651 ; CHECK-NEXT: kmovw %k0, %eax
3652 ; CHECK-NEXT: retq
3762 ; KNL-LABEL: test_v16i1_sub:
3763 ; KNL: ## BB#0:
3764 ; KNL-NEXT: kmovw %edi, %k0
3765 ; KNL-NEXT: kmovw %esi, %k1
3766 ; KNL-NEXT: kxorw %k1, %k0, %k0
3767 ; KNL-NEXT: kmovw %k0, %eax
3768 ; KNL-NEXT: ## kill: %AX %AX %EAX
3769 ; KNL-NEXT: retq
3770 ;
3771 ; SKX-LABEL: test_v16i1_sub:
3772 ; SKX: ## BB#0:
3773 ; SKX-NEXT: kmovd %edi, %k0
3774 ; SKX-NEXT: kmovd %esi, %k1
3775 ; SKX-NEXT: kxorw %k1, %k0, %k0
3776 ; SKX-NEXT: kmovd %k0, %eax
3777 ; SKX-NEXT: ## kill: %AX %AX %EAX
3778 ; SKX-NEXT: retq
3779 ;
3780 ; AVX512BW-LABEL: test_v16i1_sub:
3781 ; AVX512BW: ## BB#0:
3782 ; AVX512BW-NEXT: kmovd %edi, %k0
3783 ; AVX512BW-NEXT: kmovd %esi, %k1
3784 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3785 ; AVX512BW-NEXT: kmovd %k0, %eax
3786 ; AVX512BW-NEXT: ## kill: %AX %AX %EAX
3787 ; AVX512BW-NEXT: retq
3788 ;
3789 ; AVX512DQ-LABEL: test_v16i1_sub:
3790 ; AVX512DQ: ## BB#0:
3791 ; AVX512DQ-NEXT: kmovw %edi, %k0
3792 ; AVX512DQ-NEXT: kmovw %esi, %k1
3793 ; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
3794 ; AVX512DQ-NEXT: kmovw %k0, %eax
3795 ; AVX512DQ-NEXT: ## kill: %AX %AX %EAX
3796 ; AVX512DQ-NEXT: retq
36533797 %m0 = bitcast i16 %x to <16 x i1>
36543798 %m1 = bitcast i16 %y to <16 x i1>
36553799 %m2 = sub <16 x i1> %m0, %m1
36583802 }
36593803
36603804 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
3661 ; CHECK-LABEL: test_v16i1_mul:
3662 ; CHECK: ## BB#0:
3663 ; CHECK-NEXT: kmovw %edi, %k0
3664 ; CHECK-NEXT: kmovw %esi, %k1
3665 ; CHECK-NEXT: kandw %k1, %k0, %k0
3666 ; CHECK-NEXT: kmovw %k0, %eax
3667 ; CHECK-NEXT: retq
3805 ; KNL-LABEL: test_v16i1_mul:
3806 ; KNL: ## BB#0:
3807 ; KNL-NEXT: kmovw %edi, %k0
3808 ; KNL-NEXT: kmovw %esi, %k1
3809 ; KNL-NEXT: kandw %k1, %k0, %k0
3810 ; KNL-NEXT: kmovw %k0, %eax
3811 ; KNL-NEXT: ## kill: %AX %AX %EAX
3812 ; KNL-NEXT: retq
3813 ;
3814 ; SKX-LABEL: test_v16i1_mul:
3815 ; SKX: ## BB#0:
3816 ; SKX-NEXT: kmovd %edi, %k0
3817 ; SKX-NEXT: kmovd %esi, %k1
3818 ; SKX-NEXT: kandw %k1, %k0, %k0
3819 ; SKX-NEXT: kmovd %k0, %eax
3820 ; SKX-NEXT: ## kill: %AX %AX %EAX
3821 ; SKX-NEXT: retq
3822 ;
3823 ; AVX512BW-LABEL: test_v16i1_mul:
3824 ; AVX512BW: ## BB#0:
3825 ; AVX512BW-NEXT: kmovd %edi, %k0
3826 ; AVX512BW-NEXT: kmovd %esi, %k1
3827 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3828 ; AVX512BW-NEXT: kmovd %k0, %eax
3829 ; AVX512BW-NEXT: ## kill: %AX %AX %EAX
3830 ; AVX512BW-NEXT: retq
3831 ;
3832 ; AVX512DQ-LABEL: test_v16i1_mul:
3833 ; AVX512DQ: ## BB#0:
3834 ; AVX512DQ-NEXT: kmovw %edi, %k0
3835 ; AVX512DQ-NEXT: kmovw %esi, %k1
3836 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0
3837 ; AVX512DQ-NEXT: kmovw %k0, %eax
3838 ; AVX512DQ-NEXT: ## kill: %AX %AX %EAX
3839 ; AVX512DQ-NEXT: retq
36683840 %m0 = bitcast i16 %x to <16 x i1>
36693841 %m1 = bitcast i16 %y to <16 x i1>
36703842 %m2 = mul <16 x i1> %m0, %m1
36793851 ; KNL-NEXT: kmovw %esi, %k1
36803852 ; KNL-NEXT: kxorw %k1, %k0, %k0
36813853 ; KNL-NEXT: kmovw %k0, %eax
3854 ; KNL-NEXT: ## kill: %AL %AL %EAX
36823855 ; KNL-NEXT: retq
36833856 ;
36843857 ; SKX-LABEL: test_v8i1_add:
36853858 ; SKX: ## BB#0:
3686 ; SKX-NEXT: kmovb %edi, %k0
3687 ; SKX-NEXT: kmovb %esi, %k1
3859 ; SKX-NEXT: kmovd %edi, %k0
3860 ; SKX-NEXT: kmovd %esi, %k1
36883861 ; SKX-NEXT: kxorb %k1, %k0, %k0
3689 ; SKX-NEXT: kmovb %k0, %eax
3862 ; SKX-NEXT: kmovd %k0, %eax
3863 ; SKX-NEXT: ## kill: %AL %AL %EAX
36903864 ; SKX-NEXT: retq
36913865 ;
36923866 ; AVX512BW-LABEL: test_v8i1_add:
36933867 ; AVX512BW: ## BB#0:
3694 ; AVX512BW-NEXT: kmovw %edi, %k0
3695 ; AVX512BW-NEXT: kmovw %esi, %k1
3868 ; AVX512BW-NEXT: kmovd %edi, %k0
3869 ; AVX512BW-NEXT: kmovd %esi, %k1
36963870 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3697 ; AVX512BW-NEXT: kmovw %k0, %eax
3871 ; AVX512BW-NEXT: kmovd %k0, %eax
3872 ; AVX512BW-NEXT: ## kill: %AL %AL %EAX
36983873 ; AVX512BW-NEXT: retq
36993874 ;
37003875 ; AVX512DQ-LABEL: test_v8i1_add:
37013876 ; AVX512DQ: ## BB#0:
3702 ; AVX512DQ-NEXT: kmovb %edi, %k0
3703 ; AVX512DQ-NEXT: kmovb %esi, %k1
3877 ; AVX512DQ-NEXT: kmovw %edi, %k0
3878 ; AVX512DQ-NEXT: kmovw %esi, %k1
37043879 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
3705 ; AVX512DQ-NEXT: kmovb %k0, %eax
3880 ; AVX512DQ-NEXT: kmovw %k0, %eax
3881 ; AVX512DQ-NEXT: ## kill: %AL %AL %EAX
37063882 ; AVX512DQ-NEXT: retq
37073883 %m0 = bitcast i8 %x to <8 x i1>
37083884 %m1 = bitcast i8 %y to <8 x i1>
37183894 ; KNL-NEXT: kmovw %esi, %k1
37193895 ; KNL-NEXT: kxorw %k1, %k0, %k0
37203896 ; KNL-NEXT: kmovw %k0, %eax
3897 ; KNL-NEXT: ## kill: %AL %AL %EAX
37213898 ; KNL-NEXT: retq
37223899 ;
37233900 ; SKX-LABEL: test_v8i1_sub:
37243901 ; SKX: ## BB#0:
3725 ; SKX-NEXT: kmovb %edi, %k0
3726 ; SKX-NEXT: kmovb %esi, %k1
3902 ; SKX-NEXT: kmovd %edi, %k0
3903 ; SKX-NEXT: kmovd %esi, %k1
37273904 ; SKX-NEXT: kxorb %k1, %k0, %k0
3728 ; SKX-NEXT: kmovb %k0, %eax
3905 ; SKX-NEXT: kmovd %k0, %eax
3906 ; SKX-NEXT: ## kill: %AL %AL %EAX
37293907 ; SKX-NEXT: retq
37303908 ;
37313909 ; AVX512BW-LABEL: test_v8i1_sub:
37323910 ; AVX512BW: ## BB#0:
3733 ; AVX512BW-NEXT: kmovw %edi, %k0
3734 ; AVX512BW-NEXT: kmovw %esi, %k1
3911 ; AVX512BW-NEXT: kmovd %edi, %k0
3912 ; AVX512BW-NEXT: kmovd %esi, %k1
37353913 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0
3736 ; AVX512BW-NEXT: kmovw %k0, %eax
3914 ; AVX512BW-NEXT: kmovd %k0, %eax
3915 ; AVX512BW-NEXT: ## kill: %AL %AL %EAX
37373916 ; AVX512BW-NEXT: retq
37383917 ;
37393918 ; AVX512DQ-LABEL: test_v8i1_sub:
37403919 ; AVX512DQ: ## BB#0:
3741 ; AVX512DQ-NEXT: kmovb %edi, %k0
3742 ; AVX512DQ-NEXT: kmovb %esi, %k1
3920 ; AVX512DQ-NEXT: kmovw %edi, %k0
3921 ; AVX512DQ-NEXT: kmovw %esi, %k1
37433922 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
3744 ; AVX512DQ-NEXT: kmovb %k0, %eax
3923 ; AVX512DQ-NEXT: kmovw %k0, %eax
3924 ; AVX512DQ-NEXT: ## kill: %AL %AL %EAX
37453925 ; AVX512DQ-NEXT: retq
37463926 %m0 = bitcast i8 %x to <8 x i1>
37473927 %m1 = bitcast i8 %y to <8 x i1>
37573937 ; KNL-NEXT: kmovw %esi, %k1
37583938 ; KNL-NEXT: kandw %k1, %k0, %k0
37593939 ; KNL-NEXT: kmovw %k0, %eax
3940 ; KNL-NEXT: ## kill: %AL %AL %EAX
37603941 ; KNL-NEXT: retq
37613942 ;
37623943 ; SKX-LABEL: test_v8i1_mul:
37633944 ; SKX: ## BB#0:
3764 ; SKX-NEXT: kmovb %edi, %k0
3765 ; SKX-NEXT: kmovb %esi, %k1
3945 ; SKX-NEXT: kmovd %edi, %k0
3946 ; SKX-NEXT: kmovd %esi, %k1
37663947 ; SKX-NEXT: kandb %k1, %k0, %k0
3767 ; SKX-NEXT: kmovb %k0, %eax
3948 ; SKX-NEXT: kmovd %k0, %eax
3949 ; SKX-NEXT: ## kill: %AL %AL %EAX
37683950 ; SKX-NEXT: retq
37693951 ;
37703952 ; AVX512BW-LABEL: test_v8i1_mul:
37713953 ; AVX512BW: ## BB#0:
3772 ; AVX512BW-NEXT: kmovw %edi, %k0
3773 ; AVX512BW-NEXT: kmovw %esi, %k1
3954 ; AVX512BW-NEXT: kmovd %edi, %k0
3955 ; AVX512BW-NEXT: kmovd %esi, %k1
37743956 ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3775 ; AVX512BW-NEXT: kmovw %k0, %eax
3957 ; AVX512BW-NEXT: kmovd %k0, %eax
3958 ; AVX512BW-NEXT: ## kill: %AL %AL %EAX
37763959 ; AVX512BW-NEXT: retq
37773960 ;
37783961 ; AVX512DQ-LABEL: test_v8i1_mul:
37793962 ; AVX512DQ: ## BB#0:
3780 ; AVX512DQ-NEXT: kmovb %edi, %k0
3781 ; AVX512DQ-NEXT: kmovb %esi, %k1
3963 ; AVX512DQ-NEXT: kmovw %edi, %k0
3964 ; AVX512DQ-NEXT: kmovw %esi, %k1
37823965 ; AVX512DQ-NEXT: kandb %k1, %k0, %k0
3783 ; AVX512DQ-NEXT: kmovb %k0, %eax
3966 ; AVX512DQ-NEXT: kmovw %k0, %eax
3967 ; AVX512DQ-NEXT: ## kill: %AL %AL %EAX
37843968 ; AVX512DQ-NEXT: retq
37853969 %m0 = bitcast i8 %x to <8 x i1>
37863970 %m1 = bitcast i8 %y to <8 x i1>
250250 }
251251
252252 ; CHECK-LABEL: test_argv16i1:
253 ; CHECK: kmovw %edx, %k{{[0-9]+}}
254 ; CHECK: kmovw %ecx, %k{{[0-9]+}}
255 ; CHECK: kmovw %eax, %k{{[0-9]+}}
253 ; CHECK: kmovd %edx, %k{{[0-9]+}}
254 ; CHECK: kmovd %ecx, %k{{[0-9]+}}
255 ; CHECK: kmovd %eax, %k{{[0-9]+}}
256256 ; CHECK: ret{{l|q}}
257257
258258 ; Test regcall when receiving arguments of v16i1 type
300300 }
301301
302302 ; CHECK-LABEL: test_argv8i1:
303 ; CHECK: kmovw %edx, %k{{[0-9]+}}
304 ; CHECK: kmovw %ecx, %k{{[0-9]+}}
305 ; CHECK: kmovw %eax, %k{{[0-9]+}}
303 ; CHECK: kmovd %edx, %k{{[0-9]+}}
304 ; CHECK: kmovd %ecx, %k{{[0-9]+}}
305 ; CHECK: kmovd %eax, %k{{[0-9]+}}
306306 ; CHECK: ret{{l|q}}
307307
308308 ; Test regcall when receiving arguments of v8i1 type
338338
339339 ; CHECK-LABEL: caller_retv8i1:
340340 ; CHECK: call{{l|q}} {{_*}}test_retv8i1
341 ; CHECK: kmovw %eax, %k{{[0-9]+}}
341 ; CHECK: kmovd %eax, %k{{[0-9]+}}
342342 ; CHECK: ret{{l|q}}
343343
344344 ; Test regcall when processing result of v8i1 type
8989 ; CHECK-NEXT: kmovw %eax, %k1
9090 ; CHECK-NEXT: korw %k1, %k0, %k0
9191 ; CHECK-NEXT: kmovw %k0, %eax
92 ; CHECK-NEXT: ## kill: %AL %AL %EAX
9293 ; CHECK-NEXT: retq
9394 %mask = load <8 x i1> , <8 x i1>* %m
9495 %a = load <8 x i1> , <8 x i1>* %a.0
119120 ; CHECK-NEXT: kmovw %eax, %k1
120121 ; CHECK-NEXT: kandw %k1, %k0, %k0
121122 ; CHECK-NEXT: kmovw %k0, %eax
123 ; CHECK-NEXT: ## kill: %AL %AL %EAX
122124 ; CHECK-NEXT: retq
123125 %mask = load <8 x i1> , <8 x i1>* %m
124126 %a = load <8 x i1> , <8 x i1>* %a.0
136138 ; CHECK-NEXT: kandw %k0, %k1, %k0
137139 ; CHECK-NEXT: korw %k2, %k0, %k0
138140 ; CHECK-NEXT: kmovw %k0, %eax
141 ; CHECK-NEXT: ## kill: %AL %AL %EAX
139142 ; CHECK-NEXT: retq
140143 %mask = bitcast i8 %m to <8 x i1>
141144 %a = bitcast i8 %a.0 to <8 x i1>
165165 ; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
166166 ; KNL-NEXT: kunpckbw %k0, %k1, %k0
167167 ; KNL-NEXT: kmovw %k0, %eax
168 ; KNL-NEXT: ## kill: %AX %AX %EAX
168169 ; KNL-NEXT: retq
169170 ;
170171 ; SKX-LABEL: test12: