llvm.org GIT mirror llvm / 633f98b
AVX-512: added VPCONFLICT instruction and intrinsics, added EVEX_KZ to tablegen git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193959 91177308-0d34-0410-b5e6-96231b3b80d8 Elena Demikhovsky 7 years ago
7 changed file(s) with 209 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
29942994 []>;
29952995 }
29962996
2997 // AVX-512 conflict detection
2998 let TargetPrefix = "x86" in {
2999 def int_x86_avx512_conflict_d_512 : GCCBuiltin<"__builtin_ia32_condlictd512">,
3000 Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty],
3001 []>;
3002 def int_x86_avx512_conflict_d_mask_512 :
3003 GCCBuiltin<"__builtin_ia32_mask_condlictd512">,
3004 Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
3005 llvm_v16i1_ty, llvm_v16i32_ty],
3006 []>;
3007 def int_x86_avx512_conflict_d_maskz_512:
3008 GCCBuiltin<"__builtin_ia32_maskz_condlictd512">,
3009 Intrinsic<[llvm_v16i32_ty], [llvm_v16i1_ty, llvm_v16i32_ty],
3010 []>;
3011
3012 def int_x86_avx512_conflict_q_512 : GCCBuiltin<"__builtin_ia32_condlictq512">,
3013 Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty],
3014 []>;
3015 def int_x86_avx512_conflict_q_mask_512 :
3016 GCCBuiltin<"__builtin_ia32_mask_condlictq512">,
3017 Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
3018 llvm_v8i1_ty, llvm_v8i64_ty],
3019 []>;
3020 def int_x86_avx512_conflict_q_maskz_512:
3021 GCCBuiltin<"__builtin_ia32_maskz_condlictq512">,
3022 Intrinsic<[llvm_v8i64_ty], [llvm_v8i1_ty, llvm_v8i64_ty],
3023 []>;
3024 }
29973025 // Misc.
29983026 let TargetPrefix = "x86" in {
29993027 def int_x86_avx512_mskblend_ps_512 : GCCBuiltin<"__builtin_ia32_mskblendps512">,
220220 ENUM_ENTRY(IC_EVEX_L2_W_K_B, 3, "requires EVEX_B, EVEX_K, L2 and W") \
221221 ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XS prefix") \
222222 ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XD prefix") \
223 ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and OpSize")
223 ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and OpSize") \
224 ENUM_ENTRY(IC_EVEX_KZ_B, 1, "requires EVEX_B and EVEX_KZ prefix") \
225 ENUM_ENTRY(IC_EVEX_XS_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XS prefix") \
226 ENUM_ENTRY(IC_EVEX_XD_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XD prefix") \
227 ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the OpSize prefix") \
228 ENUM_ENTRY(IC_EVEX_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the W prefix") \
229 ENUM_ENTRY(IC_EVEX_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XS prefix") \
230 ENUM_ENTRY(IC_EVEX_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XD prefix") \
231 ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and OpSize") \
232 ENUM_ENTRY(IC_EVEX_L_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L prefix") \
233 ENUM_ENTRY(IC_EVEX_L_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XS prefix")\
234 ENUM_ENTRY(IC_EVEX_L_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XD prefix")\
235 ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, and OpSize") \
236 ENUM_ENTRY(IC_EVEX_L_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L and W") \
237 ENUM_ENTRY(IC_EVEX_L_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XS prefix") \
238 ENUM_ENTRY(IC_EVEX_L_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XD prefix") \
239 ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and OpSize") \
240 ENUM_ENTRY(IC_EVEX_L2_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L2 prefix") \
241 ENUM_ENTRY(IC_EVEX_L2_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XS prefix")\
242 ENUM_ENTRY(IC_EVEX_L2_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XD prefix")\
243 ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, and OpSize") \
244 ENUM_ENTRY(IC_EVEX_L2_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L2 and W") \
245 ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XS prefix") \
246 ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XD prefix") \
247 ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and OpSize") \
248 ENUM_ENTRY(IC_EVEX_KZ, 1, "requires an EVEX_KZ prefix") \
249 ENUM_ENTRY(IC_EVEX_XS_KZ, 2, "requires EVEX_KZ and the XS prefix") \
250 ENUM_ENTRY(IC_EVEX_XD_KZ, 2, "requires EVEX_KZ and the XD prefix") \
251 ENUM_ENTRY(IC_EVEX_OPSIZE_KZ, 2, "requires EVEX_KZ and the OpSize prefix") \
252 ENUM_ENTRY(IC_EVEX_W_KZ, 3, "requires EVEX_KZ and the W prefix") \
253 ENUM_ENTRY(IC_EVEX_W_XS_KZ, 4, "requires EVEX_KZ, W, and XS prefix") \
254 ENUM_ENTRY(IC_EVEX_W_XD_KZ, 4, "requires EVEX_KZ, W, and XD prefix") \
255 ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ, 4, "requires EVEX_KZ, W, and OpSize") \
256 ENUM_ENTRY(IC_EVEX_L_KZ, 3, "requires EVEX_KZ and the L prefix") \
257 ENUM_ENTRY(IC_EVEX_L_XS_KZ, 4, "requires EVEX_KZ and the L and XS prefix")\
258 ENUM_ENTRY(IC_EVEX_L_XD_KZ, 4, "requires EVEX_KZ and the L and XD prefix")\
259 ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ, 4, "requires EVEX_KZ, L, and OpSize") \
260 ENUM_ENTRY(IC_EVEX_L_W_KZ, 3, "requires EVEX_KZ, L and W") \
261 ENUM_ENTRY(IC_EVEX_L_W_XS_KZ, 4, "requires EVEX_KZ, L, W and XS prefix") \
262 ENUM_ENTRY(IC_EVEX_L_W_XD_KZ, 4, "requires EVEX_KZ, L, W and XD prefix") \
263 ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L, W and OpSize") \
264 ENUM_ENTRY(IC_EVEX_L2_KZ, 3, "requires EVEX_KZ and the L2 prefix") \
265 ENUM_ENTRY(IC_EVEX_L2_XS_KZ, 4, "requires EVEX_KZ and the L2 and XS prefix")\
266 ENUM_ENTRY(IC_EVEX_L2_XD_KZ, 4, "requires EVEX_KZ and the L2 and XD prefix")\
267 ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, and OpSize") \
268 ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \
269 ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \
270 ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \
271 ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
224272
225273 #define ENUM_ENTRY(n, r, d) n,
226274 typedef enum {
33963396 defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W,
33973397 EVEX_CD8<64, CD8VF>;
33983398
3399 multiclass avx512_conflict opc, string OpcodeStr,
3400 RegisterClass RC, RegisterClass KRC, PatFrag memop_frag,
3401 X86MemOperand x86memop, PatFrag scalar_mfrag,
3402 X86MemOperand x86scalar_mop, string BrdcstStr,
3403 Intrinsic Int, Intrinsic maskInt, Intrinsic maskzInt> {
3404 def rr : AVX5128I
3405 (ins RC:$src),
3406 !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
3407 [(set RC:$dst, (Int RC:$src))]>, EVEX;
3408 def rm : AVX5128I
3409 (ins x86memop:$src),
3410 !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
3411 [(set RC:$dst, (Int (memop_frag addr:$src)))]>, EVEX;
3412 def rmb : AVX5128I
3413 (ins x86scalar_mop:$src),
3414 !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
3415 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
3416 []>, EVEX, EVEX_B;
3417 def rrkz : AVX5128I
3418 (ins KRC:$mask, RC:$src),
3419 !strconcat(OpcodeStr,
3420 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
3421 [(set RC:$dst, (maskzInt KRC:$mask, RC:$src))]>, EVEX, EVEX_KZ;
3422 def rmkz : AVX5128I
3423 (ins KRC:$mask, x86memop:$src),
3424 !strconcat(OpcodeStr,
3425 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
3426 [(set RC:$dst, (maskzInt KRC:$mask, (memop_frag addr:$src)))]>,
3427 EVEX, EVEX_KZ;
3428 def rmbkz : AVX5128I
3429 (ins KRC:$mask, x86scalar_mop:$src),
3430 !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
3431 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
3432 BrdcstStr, "}"),
3433 []>, EVEX, EVEX_KZ, EVEX_B;
3434
3435 let Constraints = "$src1 = $dst" in {
3436 def rrk : AVX5128I
3437 (ins RC:$src1, KRC:$mask, RC:$src2),
3438 !strconcat(OpcodeStr,
3439 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
3440 [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, RC:$src2))]>, EVEX, EVEX_K;
3441 def rmk : AVX5128I
3442 (ins RC:$src1, KRC:$mask, x86memop:$src2),
3443 !strconcat(OpcodeStr,
3444 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
3445 [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, (memop_frag addr:$src2)))]>, EVEX, EVEX_K;
3446 def rmbk : AVX5128I
3447 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
3448 !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
3449 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
3450 []>, EVEX, EVEX_K, EVEX_B;
3451 }
3452 }
3453
3454 let Predicates = [HasCDI] in {
3455 defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
3456 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
3457 int_x86_avx512_conflict_d_512,
3458 int_x86_avx512_conflict_d_mask_512,
3459 int_x86_avx512_conflict_d_maskz_512>,
3460 EVEX_V512, EVEX_CD8<32, CD8VF>;
3461
3462 defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
3463 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
3464 int_x86_avx512_conflict_q_512,
3465 int_x86_avx512_conflict_q_mask_512,
3466 int_x86_avx512_conflict_q_maskz_512>,
3467 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3468 }
316316 ret <8 x i64> %res
317317 }
318318 declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly
319
320 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
321 ; CHECK: vpconflictd
322 %res = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a)
323 ret <16 x i32> %res
324 }
325 declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) nounwind readonly
326
327 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
328 ; CHECK: vpconflictd %zmm0, %zmm0 {%k1} {z}
329 %vmask = bitcast i16 %mask to <16 x i1>
330 %res = call <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1> %vmask, <16 x i32> %a)
331 ret <16 x i32> %res
332 }
333 declare <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1>,<16 x i32>) nounwind readonly
334
335 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
336 ; CHECK: vpconflictq {{.*}} {%k1}
337 %vmask = bitcast i8 %mask to <8 x i1>
338 %res = call <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64> %b, <8 x i1> %vmask, <8 x i64> %a)
339 ret <8 x i64> %res
340 }
341 declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly
127127 inheritsFrom(child, IC_EVEX_L_W_XD);
128128 case IC_EVEX_OPSIZE:
129129 return inheritsFrom(child, IC_EVEX_W_OPSIZE) ||
130 inheritsFrom(child, IC_EVEX_W_OPSIZE);
130 inheritsFrom(child, IC_EVEX_L_W_OPSIZE);
131131 case IC_EVEX_W:
132132 case IC_EVEX_W_XS:
133133 case IC_EVEX_W_XD:
175175 case IC_EVEX_L_XD_K:
176176 case IC_EVEX_L_OPSIZE_K:
177177 return false;
178 case IC_EVEX_W_KZ:
179 case IC_EVEX_W_XS_KZ:
180 case IC_EVEX_W_XD_KZ:
181 case IC_EVEX_W_OPSIZE_KZ:
182 return false;
183 case IC_EVEX_L_KZ:
184 case IC_EVEX_L_XS_KZ:
185 case IC_EVEX_L_XD_KZ:
186 case IC_EVEX_L_OPSIZE_KZ:
187 return false;
178188 case IC_EVEX_L_W_K:
179189 case IC_EVEX_L_W_XS_K:
180190 case IC_EVEX_L_W_XD_K:
181191 case IC_EVEX_L_W_OPSIZE_K:
192 case IC_EVEX_L_W_KZ:
193 case IC_EVEX_L_W_XS_KZ:
194 case IC_EVEX_L_W_XD_KZ:
195 case IC_EVEX_L_W_OPSIZE_KZ:
182196 return false;
183197 case IC_EVEX_L2_K:
184198 case IC_EVEX_L2_B:
186200 case IC_EVEX_L2_XD_K:
187201 case IC_EVEX_L2_OPSIZE_K:
188202 case IC_EVEX_L2_OPSIZE_B:
203 case IC_EVEX_L2_OPSIZE_K_B:
204 case IC_EVEX_L2_KZ:
205 case IC_EVEX_L2_XS_KZ:
206 case IC_EVEX_L2_XD_KZ:
207 case IC_EVEX_L2_OPSIZE_KZ:
208 case IC_EVEX_L2_OPSIZE_KZ_B:
189209 return false;
190210 case IC_EVEX_L2_W_K:
211 case IC_EVEX_L2_W_B:
191212 case IC_EVEX_L2_W_XS_K:
192213 case IC_EVEX_L2_W_XD_K:
193214 case IC_EVEX_L2_W_OPSIZE_K:
194215 case IC_EVEX_L2_W_OPSIZE_B:
216 case IC_EVEX_L2_W_OPSIZE_K_B:
217 case IC_EVEX_L2_W_KZ:
218 case IC_EVEX_L2_W_XS_KZ:
219 case IC_EVEX_L2_W_XD_KZ:
220 case IC_EVEX_L2_W_OPSIZE_KZ:
221 case IC_EVEX_L2_W_OPSIZE_KZ_B:
195222 return false;
196223 default:
197224 llvm_unreachable("Unknown instruction class");
212239
213240 #define ENUM_ENTRY(n, r, d) r,
214241 #define ENUM_ENTRY_K_B(n, r, d) ENUM_ENTRY(n, r, d) \
215 ENUM_ENTRY(n##_K_B, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d)
242 ENUM_ENTRY(n##_K_B, r, d) ENUM_ENTRY(n##_KZ_B, r, d) \
243 ENUM_ENTRY(n##_KZ, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d)
216244 static int ranks[IC_max] = {
217245 INSTRUCTION_CONTEXTS
218246 };
234262 llvm_unreachable("Unhandled instruction class");
235263 #define ENUM_ENTRY(n, r, d) case n: return #n; break;
236264 #define ENUM_ENTRY_K_B(n, r, d) ENUM_ENTRY(n, r, d) ENUM_ENTRY(n##_K_B, r, d)\
237 ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d)
265 ENUM_ENTRY(n##_KZ, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d)\
266 ENUM_ENTRY(n##_KZ_B, r, d)
238267 INSTRUCTION_CONTEXTS
239268 #undef ENUM_ENTRY
240269 #undef ENUM_ENTRY_K_B
243243 HasEVEXPrefix = Rec->getValueAsBit("hasEVEXPrefix");
244244 HasEVEX_L2Prefix = Rec->getValueAsBit("hasEVEX_L2");
245245 HasEVEX_K = Rec->getValueAsBit("hasEVEX_K");
246 HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z");
246247 HasEVEX_B = Rec->getValueAsBit("hasEVEX_B");
247248 HasLockPrefix = Rec->getValueAsBit("hasLockPrefix");
248249 IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly");
303304 recogInstr.emitDecodePath(tables);
304305 }
305306
306 #define EVEX_KB(n) (HasEVEX_K && HasEVEX_B? n##_K_B : \
307 (HasEVEX_K? n##_K : (HasEVEX_B ? n##_B : n)))
307 #define EVEX_KB(n) (HasEVEX_KZ && HasEVEX_B ? n##_KZ_B : \
308 (HasEVEX_K && HasEVEX_B ? n##_K_B : \
309 (HasEVEX_KZ ? n##_KZ : \
310 (HasEVEX_K? n##_K : (HasEVEX_B ? n##_B : n)))))
308311
309312 InstructionContext RecognizableInstr::insnContext() const {
310313 InstructionContext insnContext;
7171 bool HasEVEX_L2Prefix;
7272 /// The hasEVEX_K field from the record
7373 bool HasEVEX_K;
74 /// The hasEVEX_KZ field from the record
75 bool HasEVEX_KZ;
7476 /// The hasEVEX_B field from the record
7577 bool HasEVEX_B;
7678 /// The hasLockPrefix field from the record