llvm.org GIT mirror llvm / 1b68387
[X86] Bring consistent naming to the SSE/AVX and AVX512 PALIGNR instructions. Then add shuffle decode printing for the EVEX forms which is made easier by having the naming structure more similar to other instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272249 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 4 years ago
7 changed file(s) with 42 addition(s) and 46 deletion(s). Raw diff Collapse all Expand all
341341 ShuffleMask);
342342 break;
343343
344 case X86::PALIGNR128rr:
345 case X86::VPALIGNR128rr:
346 case X86::VPALIGNR256rr:
344 CASE_SHUF(PALIGNR, rri)
347345 Src1Name = getRegName(MI->getOperand(2).getReg());
348346 // FALL THROUGH.
349 case X86::PALIGNR128rm:
350 case X86::VPALIGNR128rm:
351 case X86::VPALIGNR256rm:
347 CASE_SHUF(PALIGNR, rmi)
352348 Src2Name = getRegName(MI->getOperand(1).getReg());
353349 DestName = getRegName(MI->getOperand(0).getReg());
354350 if (MI->getOperand(NumOperands - 1).isImm())
72237223 defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
72247224 EVEX_CD8<64, CD8VF>, VEX_W;
72257225
7226 multiclass avx512_vpalign_lowering p>{
7226 multiclass avx512_vpalignr_lowering p>{
72277227 let Predicates = p in
72287228 def NAME#_.VTName#rri:
72297229 Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
72317231 _.RC:$src1, _.RC:$src2, imm:$imm)>;
72327232 }
72337233
7234 multiclass avx512_vpalign_lowering_common:
7235 avx512_vpalign_lowering<_.info512, [HasBWI]>,
7236 avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>,
7237 avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>;
7238
7239 defm VPALIGN: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
7234 multiclass avx512_vpalignr_lowering_common:
7235 avx512_vpalignr_lowering<_.info512, [HasBWI]>,
7236 avx512_vpalignr_lowering<_.info128, [HasBWI, HasVLX]>,
7237 avx512_vpalignr_lowering<_.info256, [HasBWI, HasVLX]>;
7238
7239 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
72407240 avx512vl_i8_info, avx512vl_i8_info>,
7241 avx512_vpalign_lowering_common,
7242 avx512_vpalign_lowering_common,
7243 avx512_vpalign_lowering_common,
7244 avx512_vpalign_lowering_common,
7245 avx512_vpalign_lowering_common_info>,
7241 avx512_vpalignr_lowering_common_info>,
7242 avx512_vpalignr_lowering_common,
7243 avx512_vpalignr_lowering_common,
7244 avx512_vpalignr_lowering_common,
7245 avx512_vpalignr_lowering_common,
72467246 EVEX_CD8<8, CD8VF>;
72477247
72487248 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
10281028 { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
10291029 { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
10301030 { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
1031 { X86::PALIGNR128rr, X86::PALIGNR128rm, TB_ALIGN_16 },
1031 { X86::PALIGNRrri, X86::PALIGNRrmi, TB_ALIGN_16 },
10321032 { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
10331033 { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
10341034 { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
13251325 { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
13261326 { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
13271327 { X86::VPADDWrr, X86::VPADDWrm, 0 },
1328 { X86::VPALIGNR128rr, X86::VPALIGNR128rm, 0 },
1328 { X86::VPALIGNRrri, X86::VPALIGNRrmi, 0 },
13291329 { X86::VPANDNrr, X86::VPANDNrm, 0 },
13301330 { X86::VPANDrr, X86::VPANDrm, 0 },
13311331 { X86::VPAVGBrr, X86::VPAVGBrm, 0 },
14811481 { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
14821482 { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
14831483 { X86::VPADDWYrr, X86::VPADDWYrm, 0 },
1484 { X86::VPALIGNR256rr, X86::VPALIGNR256rm, 0 },
1484 { X86::VPALIGNRYrri, X86::VPALIGNRYrmi, 0 },
14851485 { X86::VPANDNYrr, X86::VPANDNYrm, 0 },
14861486 { X86::VPANDYrr, X86::VPANDYrm, 0 },
14871487 { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
56735673
56745674 multiclass ssse3_palignr {
56755675 let hasSideEffects = 0 in {
5676 def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
5676 def rri : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
56775677 (ins VR128:$src1, VR128:$src2, u8imm:$src3),
56785678 !if(Is2Addr,
56795679 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
56815681 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
56825682 [], IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>;
56835683 let mayLoad = 1 in
5684 def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
5684 def rmi : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
56855685 (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
56865686 !if(Is2Addr,
56875687 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
56935693
56945694 multiclass ssse3_palignr_y {
56955695 let hasSideEffects = 0 in {
5696 def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
5696 def Yrri : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
56975697 (ins VR256:$src1, VR256:$src2, u8imm:$src3),
56985698 !strconcat(asm,
56995699 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
57005700 []>, Sched<[WriteShuffle]>;
57015701 let mayLoad = 1 in
5702 def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
5702 def Yrmi : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
57035703 (ins VR256:$src1, i256mem:$src2, u8imm:$src3),
57045704 !strconcat(asm,
57055705 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
57085708 }
57095709
57105710 let Predicates = [HasAVX] in
5711 defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V;
5711 defm VPALIGNR : ssse3_palignr<"vpalignr", 0>, VEX_4V;
57125712 let Predicates = [HasAVX2] in
5713 defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
5713 defm VPALIGNR : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
57145714 let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
5715 defm PALIGN : ssse3_palignr<"palignr">;
5715 defm PALIGNR : ssse3_palignr<"palignr">;
57165716
57175717 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
57185718 def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5719 (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
5719 (VPALIGNRYrri VR256:$src1, VR256:$src2, imm:$imm)>;
57205720 def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5721 (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
5721 (VPALIGNRYrri VR256:$src1, VR256:$src2, imm:$imm)>;
57225722 def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5723 (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
5723 (VPALIGNRYrri VR256:$src1, VR256:$src2, imm:$imm)>;
57245724 def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5725 (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
5725 (VPALIGNRYrri VR256:$src1, VR256:$src2, imm:$imm)>;
57265726 }
57275727
57285728 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
57295729 def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5730 (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
5730 (VPALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
57315731 def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5732 (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
5732 (VPALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
57335733 def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5734 (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
5734 (VPALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
57355735 def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5736 (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
5736 (VPALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
57375737 }
57385738
57395739 let Predicates = [UseSSSE3] in {
57405740 def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5741 (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
5741 (PALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
57425742 def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5743 (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
5743 (PALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
57445744 def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5745 (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
5745 (PALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
57465746 def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5747 (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
5747 (PALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
57485748 }
57495749
57505750 //===---------------------------------------------------------------------===//
104104 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
105105 ; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7]
106106 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
107 ; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1
107 ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm1 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,20,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,36,37,38,39,40,41,42,43,44,45,46,47,32,33,34,35,52,53,54,55,56,57,58,59,60,61,62,63,48,49,50,51]
108108 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
109109 ; AVX512BW-NEXT: vmovd %xmm0, %eax
110110 ; AVX512BW-NEXT: retq
355355 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
356356 ; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7]
357357 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
358 ; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1
358 ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm1 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,20,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,36,37,38,39,40,41,42,43,44,45,46,47,32,33,34,35,52,53,54,55,56,57,58,59,60,61,62,63,48,49,50,51]
359359 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
360360 ; AVX512BW-NEXT: vmovd %xmm0, %eax
361361 ; AVX512BW-NEXT: retq
840840 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
841841 ; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7]
842842 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
843 ; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1
843 ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm1 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,20,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,36,37,38,39,40,41,42,43,44,45,46,47,32,33,34,35,52,53,54,55,56,57,58,59,60,61,62,63,48,49,50,51]
844844 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
845845 ; AVX512BW-NEXT: vmovd %xmm0, %eax
846846 ; AVX512BW-NEXT: retq
1212 ;
1313 ; AVX512BW-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
1414 ; AVX512BW: # BB#0:
15 ; AVX512BW-NEXT: vpalignr $2, %zmm0, %zmm0, %zmm0
15 ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17,34,35,36,37,38,39,40,41,42,43,44,45,46,47,32,33,50,51,52,53,54,55,56,57,58,59,60,61,62,63,48,49]
1616 ; AVX512BW-NEXT: retq
1717 ;
1818 ; AVX512DQ-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
3232 ;
3333 ; AVX512BW-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
3434 ; AVX512BW: # BB#0:
35 ; AVX512BW-NEXT: vpalignr $15, %zmm1, %zmm0, %zmm0
35 ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
3636 ; AVX512BW-NEXT: retq
3737 ;
3838 ; AVX512DQ-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4040 ; VL_BW_DQ-NEXT: movb $1, %al
4141 ; VL_BW_DQ-NEXT: kmovb %eax, %k0
4242 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm1
43 ; VL_BW_DQ-NEXT: vpalignr $8, %xmm0, %xmm1, %xmm0
43 ; VL_BW_DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
4444 ; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
4545 ; VL_BW_DQ-NEXT: vptestmq %xmm0, %xmm0, %k0
4646 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0