llvm.org GIT mirror llvm / aac33cf
[SKX] Enabling SKX target and AVX512BW, AVX512DQ, AVX512VL features. Enabling HasAVX512{DQ,BW,VL} predicates. Adding VK2, VK4, VK32, VK64 masked register classes. Adding new types (v64i8, v32i16) to VR512. Extending calling conventions for new types (v64i8, v32i16) Patch by Zinovy Nis <zinovy.y.nis@intel.com> Reviewed by Elena Demikhovsky <elena.demikhovsky@intel.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213545 91177308-0d34-0410-b5e6-96231b3b80d8 Robert Khasanov 5 years ago
11 changed file(s) with 106 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
16651665 // Recognize only reasonable suffixes.
16661666 const char *BroadcastPrimitive =
16671667 StringSwitch(getLexer().getTok().getIdentifier())
1668 .Case("to2", "{1to2}")
1669 .Case("to4", "{1to4}")
16681670 .Case("to8", "{1to8}")
16691671 .Case("to16", "{1to16}")
16701672 .Default(nullptr);
264264 ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \
265265 ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \
266266 ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \
267 ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
267 ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
268268
269269 #define ENUM_ENTRY(n, r, d) n,
270270 enum InstructionContext {
452452 ENUM_ENTRY(TYPE_XMM256, "32-byte") \
453453 ENUM_ENTRY(TYPE_XMM512, "64-byte") \
454454 ENUM_ENTRY(TYPE_VK1, "1-bit") \
455 ENUM_ENTRY(TYPE_VK2, "2-bit") \
456 ENUM_ENTRY(TYPE_VK4, "4-bit") \
455457 ENUM_ENTRY(TYPE_VK8, "8-bit") \
456458 ENUM_ENTRY(TYPE_VK16, "16-bit") \
459 ENUM_ENTRY(TYPE_VK32, "32-bit") \
460 ENUM_ENTRY(TYPE_VK64, "64-bit") \
457461 ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
458462 ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
459463 ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
103103 def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
104104 "Enable AVX-512 PreFetch Instructions",
105105 [FeatureAVX512]>;
106
106 def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
107 "Enable AVX-512 Doubleword and Quadword Instructions",
108 [FeatureAVX512]>;
109 def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
110 "Enable AVX-512 Byte and Word Instructions",
111 [FeatureAVX512]>;
112 def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
113 "Enable AVX-512 Vector Length eXtensions",
114 [FeatureAVX512]>;
107115 def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
108116 "Enable packed carry-less multiplication instructions",
109117 [FeatureSSE2]>;
275283 FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
276284 FeatureSlowIncDec]>;
277285
286 // SKX
287 // FIXME: define SKX model
288 def : ProcessorModel<"skx", HaswellModel,
289 [FeatureAVX512, FeatureCDI,
290 FeatureDQI, FeatureBWI, FeatureVLX,
291 FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
292 FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
293 FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
294 FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
295 FeatureSlowIncDec]>;
296
278297 def : Proc<"k6", [FeatureMMX]>;
279298 def : Proc<"k6-2", [Feature3DNow]>;
280299 def : Proc<"k6-3", [Feature3DNow]>;
5151 // 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3
5252 // can only be used by ABI non-compliant code. This vector type is only
5353 // supported while using the AVX-512 target feature.
54 CCIfType<[v16i32, v8i64, v16f32, v8f64],
54 CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
5555 CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
5656
5757 // MMX vector types are always returned in MM0. If the target doesn't have
251251 YMM4, YMM5, YMM6, YMM7]>>>>,
252252
253253 // The first 8 512-bit vector arguments are passed in ZMM registers.
254 CCIfNotVarArg16i32, v8i64, v16f32, v8f64],
254 CCIfNotVarArg64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
255255 CCIfSubtarget<"hasAVX512()",
256256 CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,
257257
0 // Bitcasts between 512-bit vector types. Return the original type since
11 // no instruction is needed for the conversion
22 let Predicates = [HasAVX512] in {
3 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
4 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
5 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
6 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
37 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
4 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
5 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
8 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
69 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
7 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
10 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
11 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
812 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
13 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
14 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
15 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
16 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
917 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
10 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
11 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
12 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
13 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
18 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
19 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
20 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
1421 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
15 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
22 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
23 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
24 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
25 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
26 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
27 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
28 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
29 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
30 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
31 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
32 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
1633
1734 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
1835 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
134151 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
135152 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
136153 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
137
138154 }
139155
140156 let hasSideEffects = 0 in {
183183 class EVEX_B { bit hasEVEX_B = 1; }
184184 class EVEX_RC { bit hasEVEX_RC = 1; }
185185 class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
186 class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
187 class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
186188
187189 // Specify AVX512 8-bit compressed displacement encoding based on the vector
188190 // element size in bits (8, 16, 32, 64) and the CDisp8 form.
718718 AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">;
719719 def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
720720 def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
721 def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
721 def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
722722 def HasCDI : Predicate<"Subtarget->hasCDI()">;
723723 def HasPFI : Predicate<"Subtarget->hasPFI()">;
724724 def HasERI : Predicate<"Subtarget->hasERI()">;
725 def HasDQI : Predicate<"Subtarget->hasDQI()">;
726 def HasBWI : Predicate<"Subtarget->hasBWI()">;
727 def HasVLX : Predicate<"Subtarget->hasVLX()">,
728 AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
725729
726730 def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
727731 def HasAES : Predicate<"Subtarget->hasAES()">;
448448 }
449449
450450 // AVX-512 vector/mask registers.
451 def VR512 : RegisterClass<"X86", [v16f32, v8f64, v16i32, v8i64], 512,
451 def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512,
452452 (sequence "ZMM%u", 0, 31)>;
453453
454454 // Scalar AVX-512 floating point registers.
462462 def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
463463 256, (sequence "YMM%u", 0, 31)>;
464464
465 // The size of the all masked registers is 16 bit because we have only one
466 // KMOVW istruction that can store this register in memory, and it writes 2 bytes
467 def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)>;
468 def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK1)> {let Size = 16;}
465 // Mask registers
466 def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
467 def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
468 def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
469 def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
469470 def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
471 def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
472 def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
470473
471474 def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;}
475 def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
476 def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
472477 def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
473 def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;
474
478 def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
479 def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
480 def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
271271 HasERI = false;
272272 HasCDI = false;
273273 HasPFI = false;
274 HasDQI = false;
275 HasBWI = false;
276 HasVLX = false;
274277 HasADX = false;
275278 HasSHA = false;
276279 HasPRFCHW = false;
188188
189189 /// Processor has AVX-512 PreFetch Instructions
190190 bool HasPFI;
191
191
192192 /// Processor has AVX-512 Exponential and Reciprocal Instructions
193193 bool HasERI;
194
194
195195 /// Processor has AVX-512 Conflict Detection Instructions
196196 bool HasCDI;
197
197
198 /// Processor has AVX-512 Doubleword and Quadword instructions
199 bool HasDQI;
200
201 /// Processor has AVX-512 Byte and Word instructions
202 bool HasBWI;
203
204 /// Processor has AVX-512 Vector Length eXtenstions
205 bool HasVLX;
206
198207 /// stackAlignment - The minimum alignment known to hold of the stack frame on
199208 /// entry to the function and which must be maintained by every function.
200209 unsigned stackAlignment;
348357 bool hasCDI() const { return HasCDI; }
349358 bool hasPFI() const { return HasPFI; }
350359 bool hasERI() const { return HasERI; }
360 bool hasDQI() const { return HasDQI; }
361 bool hasBWI() const { return HasBWI; }
362 bool hasVLX() const { return HasVLX; }
351363
352364 bool isAtom() const { return X86ProcFamily == IntelAtom; }
353365 bool isSLM() const { return X86ProcFamily == IntelSLM; }
974974 TYPE("VR512", TYPE_XMM512)
975975 TYPE("VK1", TYPE_VK1)
976976 TYPE("VK1WM", TYPE_VK1)
977 TYPE("VK2", TYPE_VK2)
978 TYPE("VK2WM", TYPE_VK2)
979 TYPE("VK4", TYPE_VK4)
980 TYPE("VK4WM", TYPE_VK4)
977981 TYPE("VK8", TYPE_VK8)
978982 TYPE("VK8WM", TYPE_VK8)
979983 TYPE("VK16", TYPE_VK16)
980984 TYPE("VK16WM", TYPE_VK16)
985 TYPE("VK32", TYPE_VK32)
986 TYPE("VK32WM", TYPE_VK32)
987 TYPE("VK64", TYPE_VK64)
988 TYPE("VK64WM", TYPE_VK64)
981989 TYPE("GR16_NOAX", TYPE_Rv)
982990 TYPE("GR32_NOAX", TYPE_Rv)
983991 TYPE("GR64_NOAX", TYPE_R64)
11001108 ENCODING("VR256X", ENCODING_VVVV)
11011109 ENCODING("VR512", ENCODING_VVVV)
11021110 ENCODING("VK1", ENCODING_VVVV)
1111 ENCODING("VK2", ENCODING_VVVV)
1112 ENCODING("VK4", ENCODING_VVVV)
11031113 ENCODING("VK8", ENCODING_VVVV)
11041114 ENCODING("VK16", ENCODING_VVVV)
11051115 errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
11101120 RecognizableInstr::writemaskRegisterEncodingFromString(const std::string &s,
11111121 uint8_t OpSize) {
11121122 ENCODING("VK1WM", ENCODING_WRITEMASK)
1123 ENCODING("VK2WM", ENCODING_WRITEMASK)
1124 ENCODING("VK4WM", ENCODING_WRITEMASK)
11131125 ENCODING("VK8WM", ENCODING_WRITEMASK)
11141126 ENCODING("VK16WM", ENCODING_WRITEMASK)
1127 ENCODING("VK32WM", ENCODING_WRITEMASK)
1128 ENCODING("VK64WM", ENCODING_WRITEMASK)
11151129 errs() << "Unhandled mask register encoding " << s << "\n";
11161130 llvm_unreachable("Unhandled mask register encoding");
11171131 }