llvm.org GIT mirror llvm / 3f2b2c2
Add a bunch more X86 AVX2 instructions and their corresponding intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143529 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 years ago
4 changed file(s) with 584 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
15241524 llvm_v16i16_ty], [IntrNoMem]>;
15251525 }
15261526
1527 // Absolute value ops
1528 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1529 def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">,
1530 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
1531 def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">,
1532 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
1533 def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
1534 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
1535 }
1536
1537 // Horizontal arithmetic ops
1538 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1539 def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">,
1540 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1541 llvm_v16i16_ty], [IntrNoMem]>;
1542 def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">,
1543 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1544 llvm_v8i32_ty], [IntrNoMem]>;
1545 def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">,
1546 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1547 llvm_v16i16_ty], [IntrNoMem]>;
1548 def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">,
1549 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1550 llvm_v16i16_ty], [IntrNoMem]>;
1551 def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">,
1552 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1553 llvm_v8i32_ty], [IntrNoMem]>;
1554 def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">,
1555 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1556 llvm_v16i16_ty], [IntrNoMem]>;
1557 def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">,
1558 Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty,
1559 llvm_v32i8_ty], [IntrNoMem]>;
1560 }
1561
1562 // Sign ops
1563 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1564 def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">,
1565 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1566 llvm_v32i8_ty], [IntrNoMem]>;
1567 def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">,
1568 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1569 llvm_v16i16_ty], [IntrNoMem]>;
1570 def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">,
1571 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1572 llvm_v8i32_ty], [IntrNoMem]>;
1573 }
1574
1575 // Packed multiply high with round and scale
1576 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1577 def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">,
1578 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1579 llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1580 }
1581
1582 // Vector sign and zero extend
1583 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1584 def int_x86_avx2_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd256">,
1585 Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
1586 [IntrNoMem]>;
1587 def int_x86_avx2_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq256">,
1588 Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
1589 [IntrNoMem]>;
1590 def int_x86_avx2_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw256">,
1591 Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
1592 [IntrNoMem]>;
1593 def int_x86_avx2_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq256">,
1594 Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
1595 [IntrNoMem]>;
1596 def int_x86_avx2_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd256">,
1597 Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
1598 [IntrNoMem]>;
1599 def int_x86_avx2_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq256">,
1600 Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
1601 [IntrNoMem]>;
1602 def int_x86_avx2_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd256">,
1603 Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
1604 [IntrNoMem]>;
1605 def int_x86_avx2_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq256">,
1606 Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
1607 [IntrNoMem]>;
1608 def int_x86_avx2_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw256">,
1609 Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
1610 [IntrNoMem]>;
1611 def int_x86_avx2_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq256">,
1612 Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
1613 [IntrNoMem]>;
1614 def int_x86_avx2_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd256">,
1615 Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
1616 [IntrNoMem]>;
1617 def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">,
1618 Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
1619 [IntrNoMem]>;
1620 }
1621
1622 // Misc.
1623 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1624 def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
1625 Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
1626 def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">,
1627 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1628 llvm_v32i8_ty], [IntrNoMem]>;
1629 }
1630
15271631 //===----------------------------------------------------------------------===//
15281632 // MMX
15291633
275275 def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
276276
277277 // 256-bit memop pattern fragments
278 def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
279278 def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
280279 def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
281280 def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
282281 def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
282 def memopv16i16 : PatFrag<(ops node:$ptr), (v16i16 (memop node:$ptr))>;
283 def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
283284
284285 // SSSE3 uses MMX registers for some instructions. They aren't aligned on a
285286 // 16-byte boundary.
325326 def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
326327
327328 // 256-bit bitconvert pattern fragments
329 def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>;
330 def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
328331 def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
329332 def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
330333
40074007 (bc_frag (memopv2i64 addr:$src1)),
40084008 (undef))))]>;
40094009 }
4010
4011 multiclass sse2_pshuffle_y
4012 PatFrag bc_frag> {
4013 def Yri : Ii8<0x70, MRMSrcReg,
4014 (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
4015 !strconcat(OpcodeStr,
4016 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4017 [(set VR256:$dst, (vt (pshuf_frag:$src2 VR256:$src1,
4018 (undef))))]>;
4019 def Ymi : Ii8<0x70, MRMSrcMem,
4020 (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
4021 !strconcat(OpcodeStr,
4022 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4023 [(set VR256:$dst, (vt (pshuf_frag:$src2
4024 (bc_frag (memopv4i64 addr:$src1)),
4025 (undef))))]>;
4026 }
40104027 } // ExeDomain = SSEPackedInt
40114028
40124029 let Predicates = [HasAVX] in {
40514068 (VPSHUFLWmi addr:$src, imm:$imm)>;
40524069 }
40534070
4071 let Predicates = [HasAVX2] in {
4072 let AddedComplexity = 5 in
4073 defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, pshufd, bc_v8i32>, TB,
4074 OpSize, VEX;
4075
4076 // SSE2 with ImmT == Imm8 and XS prefix.
4077 defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, pshufhw, bc_v16i16>, XS,
4078 VEX;
4079
4080 // SSE2 with ImmT == Imm8 and XD prefix.
4081 defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, pshuflw, bc_v16i16>, XD,
4082 VEX;
4083 }
4084
40544085 let Predicates = [HasSSE2] in {
40554086 let AddedComplexity = 5 in
40564087 defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
41134144 addr:$src2))))]>;
41144145 }
41154146
4147 multiclass sse2_unpack_y opc, string OpcodeStr, ValueType vt,
4148 SDNode OpNode, PatFrag bc_frag> {
4149 def Yrr : PDI
4150 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
4151 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4152 [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
4153 def Yrm : PDI
4154 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
4155 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4156 [(set VR256:$dst, (OpNode VR256:$src1,
4157 (bc_frag (memopv4i64 addr:$src2))))]>;
4158 }
4159
41164160 let Predicates = [HasAVX] in {
41174161 defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
41184162 bc_v16i8, 0>, VEX_4V;
41534197 "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
41544198 [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
41554199 (memopv2i64 addr:$src2))))]>, VEX_4V;
4200 }
4201
4202 let Predicates = [HasAVX2] in {
4203 defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw,
4204 bc_v32i8>, VEX_4V;
4205 defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd,
4206 bc_v16i16>, VEX_4V;
4207 defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq,
4208 bc_v8i32>, VEX_4V;
4209
4210 /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
4211 /// knew to collapse (bitconvert VT to VT) into its operand.
4212 def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg,
4213 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
4214 "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4215 [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1,
4216 VR256:$src2)))]>, VEX_4V;
4217 def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem,
4218 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
4219 "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4220 [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1,
4221 (memopv4i64 addr:$src2))))]>, VEX_4V;
4222
4223 defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
4224 bc_v32i8>, VEX_4V;
4225 defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd,
4226 bc_v16i16>, VEX_4V;
4227 defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq,
4228 bc_v8i32>, VEX_4V;
4229
4230 /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
4231 /// knew to collapse (bitconvert VT to VT) into its operand.
4232 def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg,
4233 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
4234 "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4235 [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1,
4236 VR256:$src2)))]>, VEX_4V;
4237 def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem,
4238 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
4239 "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4240 [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1,
4241 (memopv4i64 addr:$src2))))]>, VEX_4V;
41564242 }
41574243
41584244 let Constraints = "$src1 = $dst" in {
42654351 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
42664352 def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
42674353 "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
4354
4355 let Predicates = [HasAVX2] in {
4356 def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
4357 "pmovmskb\t{$src, $dst|$dst, $src}",
4358 [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX;
4359 def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
4360 "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
4361 }
4362
42684363 def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
42694364 "pmovmskb\t{$src, $dst|$dst, $src}",
42704365 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
50155110 (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
50165111 }
50175112
5113 /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
5114 multiclass SS3I_unop_rm_int_y opc, string OpcodeStr,
5115 PatFrag mem_frag256, Intrinsic IntId256> {
5116 def rr256 : SS38I
5117 (ins VR256:$src),
5118 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5119 [(set VR256:$dst, (IntId256 VR256:$src))]>,
5120 OpSize;
5121
5122 def rm256 : SS38I
5123 (ins i256mem:$src),
5124 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5125 [(set VR256:$dst,
5126 (IntId256
5127 (bitconvert (mem_frag256 addr:$src))))]>, OpSize;
5128 }
5129
50185130 let Predicates = [HasAVX] in {
50195131 defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
50205132 int_x86_ssse3_pabs_b_128>, VEX;
50225134 int_x86_ssse3_pabs_w_128>, VEX;
50235135 defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
50245136 int_x86_ssse3_pabs_d_128>, VEX;
5137 }
5138
5139 let Predicates = [HasAVX2] in {
5140 defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8,
5141 int_x86_avx2_pabs_b>, VEX;
5142 defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16,
5143 int_x86_avx2_pabs_w>, VEX;
5144 defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32,
5145 int_x86_avx2_pabs_d>, VEX;
50255146 }
50265147
50275148 defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
50545175 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
50555176 [(set VR128:$dst,
50565177 (IntId128 VR128:$src1,
5057 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
5178 (bitconvert (mem_frag128 addr:$src2))))]>, OpSize;
5179 }
5180
5181 multiclass SS3I_binop_rm_int_y opc, string OpcodeStr,
5182 PatFrag mem_frag256, Intrinsic IntId256> {
5183 let isCommutable = 1 in
5184 def rr256 : SS38I
5185 (ins VR256:$src1, VR256:$src2),
5186 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5187 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
5188 OpSize;
5189 def rm256 : SS38I
5190 (ins VR256:$src1, i256mem:$src2),
5191 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5192 [(set VR256:$dst,
5193 (IntId256 VR256:$src1,
5194 (bitconvert (mem_frag256 addr:$src2))))]>, OpSize;
50585195 }
50595196
50605197 let ImmT = NoImm, Predicates = [HasAVX] in {
50845221 }
50855222 defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
50865223 int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
5224 }
5225
5226 let ImmT = NoImm, Predicates = [HasAVX2] in {
5227 let isCommutable = 0 in {
5228 defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16,
5229 int_x86_avx2_phadd_w>, VEX_4V;
5230 defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32,
5231 int_x86_avx2_phadd_d>, VEX_4V;
5232 defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16,
5233 int_x86_avx2_phadd_sw>, VEX_4V;
5234 defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16,
5235 int_x86_avx2_phsub_w>, VEX_4V;
5236 defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32,
5237 int_x86_avx2_phsub_d>, VEX_4V;
5238 defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16,
5239 int_x86_avx2_phsub_sw>, VEX_4V;
5240 defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8,
5241 int_x86_avx2_pmadd_ub_sw>, VEX_4V;
5242 defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
5243 int_x86_avx2_pshuf_b>, VEX_4V;
5244 defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8,
5245 int_x86_avx2_psign_b>, VEX_4V;
5246 defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16,
5247 int_x86_avx2_psign_w>, VEX_4V;
5248 defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32,
5249 int_x86_avx2_psign_d>, VEX_4V;
5250 }
5251 defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
5252 int_x86_avx2_pmul_hr_sw>, VEX_4V;
50875253 }
50885254
50895255 // None of these have i8 immediate fields.
51655331 []>, OpSize;
51665332 }
51675333
5334 multiclass ssse3_palign_y {
5335 def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
5336 (ins VR256:$src1, VR256:$src2, i8imm:$src3),
5337 !strconcat(asm,
5338 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5339 []>, OpSize;
5340 def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
5341 (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
5342 !strconcat(asm,
5343 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5344 []>, OpSize;
5345 }
5346
51685347 let Predicates = [HasAVX] in
51695348 defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
5349 let Predicates = [HasAVX2] in
5350 defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
51705351 let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
51715352 defm PALIGN : ssse3_palign<"palignr">;
51725353
52325413 [(set VR128:$dst,
52335414 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
52345415 OpSize;
5416 }
5417
5418 multiclass SS41I_binop_rm_int16_y opc, string OpcodeStr,
5419 Intrinsic IntId> {
5420 def Yrr : SS48I
5421 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5422 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
5423
5424 def Yrm : SS48I
5425 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5426 [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
52355427 }
52365428
52375429 let Predicates = [HasAVX] in {
52495441 VEX;
52505442 }
52515443
5444 let Predicates = [HasAVX2] in {
5445 defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
5446 int_x86_avx2_pmovsxbw>, VEX;
5447 defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
5448 int_x86_avx2_pmovsxwd>, VEX;
5449 defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
5450 int_x86_avx2_pmovsxdq>, VEX;
5451 defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
5452 int_x86_avx2_pmovzxbw>, VEX;
5453 defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
5454 int_x86_avx2_pmovzxwd>, VEX;
5455 defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
5456 int_x86_avx2_pmovzxdq>, VEX;
5457 }
5458
52525459 defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
52535460 defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
52545461 defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
53325539 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
53335540 [(set VR128:$dst,
53345541 (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
5542 OpSize;
5543 }
5544
5545 multiclass SS41I_binop_rm_int8_y opc, string OpcodeStr,
5546 Intrinsic IntId> {
5547 def Yrr : SS48I
5548 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5549 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
5550
5551 def Yrm : SS48I
5552 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5553 [(set VR256:$dst,
5554 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
53355555 OpSize;
53365556 }
53375557
53465566 VEX;
53475567 }
53485568
5569 let Predicates = [HasAVX2] in {
5570 defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
5571 int_x86_avx2_pmovsxbd>, VEX;
5572 defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
5573 int_x86_avx2_pmovsxwq>, VEX;
5574 defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
5575 int_x86_avx2_pmovzxbd>, VEX;
5576 defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
5577 int_x86_avx2_pmovzxwq>, VEX;
5578 }
5579
53495580 defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
53505581 defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
53515582 defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
53905621 OpSize;
53915622 }
53925623
5624 multiclass SS41I_binop_rm_int4_y opc, string OpcodeStr,
5625 Intrinsic IntId> {
5626 def Yrr : SS48I
5627 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5628 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
5629
5630 // Expecting a i16 load any extended to i32 value.
5631 def Yrm : SS48I
5632 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5633 [(set VR256:$dst, (IntId (bitconvert
5634 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
5635 OpSize;
5636 }
5637
53935638 let Predicates = [HasAVX] in {
53945639 defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
53955640 VEX;
53965641 defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
53975642 VEX;
5643 }
5644 let Predicates = [HasAVX2] in {
5645 defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
5646 int_x86_avx2_pmovsxbq>, VEX;
5647 defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
5648 int_x86_avx2_pmovzxbq>, VEX;
53985649 }
53995650 defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
54005651 defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
159159 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
160160
161161
162 define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) {
163 ; CHECK: vpmovmskb
164 %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; [#uses=1]
165 ret i32 %res
166 }
167 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
168
169
162170 define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) {
163171 ; CHECK: vpmulhw
164172 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
381389 ret <16 x i16> %res
382390 }
383391 declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
392
393
394 define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
395 ; CHECK: vpabsb
396 %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
397 ret <32 x i8> %res
398 }
399 declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
400
401
402 define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
403 ; CHECK: vpabsd
404 %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
405 ret <8 x i32> %res
406 }
407 declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
408
409
410 define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
411 ; CHECK: vpabsw
412 %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
413 ret <16 x i16> %res
414 }
415 declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
416
417
418 define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {
419 ; CHECK: vphaddd
420 %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
421 ret <8 x i32> %res
422 }
423 declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
424
425
426 define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) {
427 ; CHECK: vphaddsw
428 %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
429 ret <16 x i16> %res
430 }
431 declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
432
433
434 define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) {
435 ; CHECK: vphaddw
436 %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
437 ret <16 x i16> %res
438 }
439 declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
440
441
442 define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) {
443 ; CHECK: vphsubd
444 %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
445 ret <8 x i32> %res
446 }
447 declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
448
449
450 define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) {
451 ; CHECK: vphsubsw
452 %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
453 ret <16 x i16> %res
454 }
455 declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
456
457
458 define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) {
459 ; CHECK: vphsubw
460 %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
461 ret <16 x i16> %res
462 }
463 declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
464
465
466 define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) {
467 ; CHECK: vpmaddubsw
468 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
469 ret <16 x i16> %res
470 }
471 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
472
473
474 define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) {
475 ; CHECK: vpmulhrsw
476 %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
477 ret <16 x i16> %res
478 }
479 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
480
481
482 define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) {
483 ; CHECK: vpshufb
484 %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
485 ret <32 x i8> %res
486 }
487 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
488
489
490 define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) {
491 ; CHECK: vpsignb
492 %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
493 ret <32 x i8> %res
494 }
495 declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
496
497
498 define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) {
499 ; CHECK: vpsignd
500 %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
501 ret <8 x i32> %res
502 }
503 declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
504
505
506 define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) {
507 ; CHECK: vpsignw
508 %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
509 ret <16 x i16> %res
510 }
511 declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
512
513
514 define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
515 ; CHECK: vpmovsxbd
516 %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
517 ret <8 x i32> %res
518 }
519 declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
520
521
522 define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
523 ; CHECK: vpmovsxbq
524 %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
525 ret <4 x i64> %res
526 }
527 declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
528
529
530 define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
531 ; CHECK: vpmovsxbw
532 %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
533 ret <16 x i16> %res
534 }
535 declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
536
537
538 define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
539 ; CHECK: vpmovsxdq
540 %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
541 ret <4 x i64> %res
542 }
543 declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
544
545
546 define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
547 ; CHECK: vpmovsxwd
548 %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
549 ret <8 x i32> %res
550 }
551 declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
552
553
554 define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
555 ; CHECK: vpmovsxwq
556 %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
557 ret <4 x i64> %res
558 }
559 declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
560
561
562 define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
563 ; CHECK: vpmovzxbd
564 %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
565 ret <8 x i32> %res
566 }
567 declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
568
569
570 define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
571 ; CHECK: vpmovzxbq
572 %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
573 ret <4 x i64> %res
574 }
575 declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
576
577
578 define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
579 ; CHECK: vpmovzxbw
580 %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
581 ret <16 x i16> %res
582 }
583 declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
584
585
586 define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
587 ; CHECK: vpmovzxdq
588 %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
589 ret <4 x i64> %res
590 }
591 declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
592
593
594 define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
595 ; CHECK: vpmovzxwd
596 %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
597 ret <8 x i32> %res
598 }
599 declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
600
601
602 define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
603 ; CHECK: vpmovzxwq
604 %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
605 ret <4 x i64> %res
606 }
607 declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone