llvm.org GIT mirror llvm / fa7378c
AMDGPU/SI: Fix input vcc operand for VOP2b instructions Adds vcc to output string input for e32. Allows option of using e64 encoding with assembler. Also fixes these instructions not implicitly reading exec. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247074 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 5 years ago
6 changed file(s) with 134 addition(s) and 69 deletion(s). Raw diff Collapse all Expand all
212212 bool isSSrc64() const {
213213 return isImm() || isInlineImm() ||
214214 (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
215 }
216
217 bool isSCSrc64() const {
218 return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm();
215219 }
216220
217221 bool isVCSrc32() const {
989989 // Returns the register class to use for sources of VOP3 instructions for the
990990 // given VT.
991991 class getVOP3SrcForVT {
992 RegisterOperand ret = !if(!eq(VT.Size, 64), VCSrc_64, VCSrc_32);
992 RegisterOperand ret =
993 !if(!eq(VT.Size, 64),
994 VCSrc_64,
995 !if(!eq(VT.Value, i1.Value),
996 SCSrc_64,
997 VCSrc_32
998 )
999 );
9931000 }
9941001
9951002 // Returns 1 if the source arguments have modifiers, 0 if they do not.
10691076 "$dst, "#src0#src1#src2#"$clamp"#"$omod");
10701077 }
10711078
1072
10731079 class VOPProfile _ArgVT> {
10741080
10751081 field list ArgVT = _ArgVT;
11311137 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
11321138 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
11331139
1134 class VOP2b_Profile : VOPProfile<[vt, vt, vt, untyped]> {
1140 // Write out to vcc or arbitrary SGPR.
1141 def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
11351142 let Asm32 = "$dst, vcc, $src0, $src1";
11361143 let Asm64 = "$dst, $sdst, $src0, $src1";
11371144 let Outs32 = (outs DstRC:$dst);
11381145 let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
11391146 }
11401147
1141 def VOP2b_I32_I1_I32_I32 : VOP2b_Profile;
1142
1143 def VOP2b_I32_I1_I32_I32_VCC : VOP2b_Profile {
1148 // Write out to vcc or arbitrary SGPR and read in from vcc or
1149 // arbitrary SGPR.
1150 def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
11441151 let Src0RC32 = VCSrc_32;
1152 let Asm32 = "$dst, vcc, $src0, $src1, vcc";
1153 let Asm64 = "$dst, $sdst, $src0, $src1, $src2";
1154 let Outs32 = (outs DstRC:$dst);
1155 let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
1156
1157 // Suppress src2 implied by type since the 32-bit encoding uses an
1158 // implicit VCC use.
1159 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
11451160 }
11461161
11471162 // VOPC instructions are a special case because for the 32-bit
14281443 // No VI instruction. This class is for SI only.
14291444 }
14301445
1431 // XXX - Is v_div_scale_{f32|f64} only available in vop3b without
1432 // option of implicit vcc use?
1433 multiclass VOP3b_2_m
1434 list pattern, string opName, string revOp,
1435 bit HasMods = 1, bit UseFullOp = 0> {
1436 def "" : VOP3_Pseudo ,
1437 VOP2_REV;
1446 // Two operand VOP3b instruction that may have a 3rd SGPR bool operand
1447 // instead of an implicit VCC as in the VOP2b format.
1448 multiclass VOP3b_2_3_m
1449 list pattern, string opName, string revOp,
1450 bit HasMods = 1, bit useSGPRInput = 0,
1451 bit UseFullOp = 0> {
1452 def "" : VOP3_Pseudo ;
14381453
14391454 def _si : VOP3b_Real_si ,
1440 VOP3DisableFields<1, 0, HasMods>;
1455 VOP3DisableFields<1, useSGPRInput, HasMods>;
14411456
14421457 def _vi : VOP3b_Real_vi ,
1443 VOP3DisableFields<1, 0, HasMods>;
1444 }
1445
1446 multiclass VOP3b_3_m
1447 list pattern, string opName, string revOp,
1448 bit HasMods = 1, bit UseFullOp = 0> {
1449 def "" : VOP3_Pseudo ;
1450
1451
1452 def _si : VOP3b_Real_si ,
1453 VOP3DisableFields<1, 1, HasMods>;
1454
1455 def _vi : VOP3b_Real_vi ,
1456 VOP3DisableFields<1, 1, HasMods>;
1458 VOP3DisableFields<1, useSGPRInput, HasMods>;
14571459 }
14581460
14591461 multiclass VOP3_C_m
15741576 multiclass VOP2b_Helper
15751577 dag ins32, string asm32, list pat32,
15761578 dag ins64, string asm64, list pat64,
1577 string revOp, bit HasMods> {
1578
1579 defm _e32 : VOP2_m ;
1580
1581 defm _e64 : VOP3b_2_m
1582 outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods
1579 string revOp, bit HasMods, bit useSGPRInput> {
1580
1581 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
1582 defm _e32 : VOP2_m ;
1583 }
1584
1585 defm _e64 : VOP3b_2_3_m
1586 outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods, useSGPRInput
15831587 >;
15841588 }
15851589
15951599 i1:$clamp, i32:$omod)),
15961600 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
15971601 [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
1598 revOp, P.HasModifiers
1602 revOp, P.HasModifiers, !eq(P.NumSrcArgs, 3)
15991603 >;
16001604
16011605 // A VOP2 instruction that is VOP3-only on VI.
18461850
18471851 multiclass VOP3b_Helper
18481852 string opName, list pattern> :
1849 VOP3b_3_m <
1853 VOP3b_2_3_m <
18501854 op, (outs vrc:$vdst, SReg_64:$sdst),
18511855 (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
18521856 InputModsNoDefault:$src1_modifiers, arc:$src1,
18531857 InputModsNoDefault:$src2_modifiers, arc:$src2,
18541858 ClampMod:$clamp, omod:$omod),
18551859 opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
1856 opName, opName, 1, 1
1860 opName, opName, 1, 0, 1
18571861 >;
18581862
18591863 multiclass VOP3b_64 pattern> :
15061506 defm V_MADAK_F32 : VOP2MADK , "v_madak_f32">;
15071507 } // End isCommutable = 1
15081508
1509 let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
1509 let isCommutable = 1 in {
15101510 // No patterns so that the scalar instructions are always selected.
15111511 // The scalar versions will be replaced with vector when needed later.
15121512
15211521 VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
15221522 >;
15231523
1524 let Uses = [VCC] in { // Carry-in comes from VCC
15251524 defm V_ADDC_U32 : VOP2bInst , "v_addc_u32",
1526 VOP2b_I32_I1_I32_I32_VCC
1525 VOP2b_I32_I1_I32_I32_I1
15271526 >;
15281527 defm V_SUBB_U32 : VOP2bInst , "v_subb_u32",
1529 VOP2b_I32_I1_I32_I32_VCC
1528 VOP2b_I32_I1_I32_I32_I1
15301529 >;
15311530 defm V_SUBBREV_U32 : VOP2bInst , "v_subbrev_u32",
1532 VOP2b_I32_I1_I32_I32_VCC, null_frag, "v_subb_u32"
1533 >;
1534
1535 } // End Uses = [VCC]
1536 } // End isCommutable = 1, Defs = [VCC]
1531 VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
1532 >;
1533
1534 } // End isCommutable = 1
15371535
15381536 defm V_READLANE_B32 : VOP2SI_3VI_m <
15391537 vop3 <0x001, 0x289>,
280280 let OperandType = "OPERAND_REG_INLINE_C";
281281 let ParserMatchClass = RegImmMatcher<"VCSrc64">;
282282 }
283
284 //===----------------------------------------------------------------------===//
285 // SCSrc_* Operands with an SGPR or an inline constant
286 //===----------------------------------------------------------------------===//
287
288 def SCSrc_64 : RegisterOperand {
289 let OperandNamespace = "AMDGPU";
290 let OperandType = "OPERAND_REG_INLINE_C";
291 let ParserMatchClass = RegImmMatcher<"SCSrc64">;
292 }
3434 v_add_i32_e32 v1, s[0:1], v2, v3
3535 // CHECK: error: invalid operand for instruction
3636
37 v_addc_u32_e32 v1, vcc, v2, v3, s[2:3]
38 // CHECK: error: invalid operand for instruction
39
40 v_addc_u32_e32 v1, s[0:1], v2, v3, s[2:3]
41 // CHECK: error: invalid operand for instruction
42
43 v_addc_u32_e32 v1, vcc, v2, v3, -1
44 // CHECK: error: invalid operand for instruction
45
46 v_addc_u32_e32 v1, vcc, v2, v3, 123
47 // CHECK: error: invalid operand for instruction
48
49 v_addc_u32_e32 v1, vcc, v2, v3, s0
50 // CHECK: error: invalid operand for instruction
51
52 v_addc_u32_e32 v1, -1, v2, v3, s0
53 // CHECK: error: invalid operand for instruction
54
55 v_addc_u32_e64 v1, s[0:1], v2, v3, 123
56 // CHECK: error: invalid operand for instruction
57
58 v_addc_u32 v1, s[0:1], v2, v3, 123
59 // CHECK: error: invalid operand for instruction
60
3761 // TODO: Constant bus restrictions
306306 // VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
307307 v_subrev_u32 v1, s[0:1], v2, v3
308308
309 // SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
310 // VI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
311 v_addc_u32 v1, vcc, v2, v3
312
313 // SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00]
314 // VI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00]
315 v_addc_u32 v1, s[0:1], v2, v3
316
317 // SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
318 // VI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
319 v_subb_u32 v1, vcc, v2, v3
320
321 // SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00]
322 // VI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00]
323 v_subb_u32 v1, s[0:1], v2, v3
324
325 // SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
326 // VI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
327 v_subbrev_u32 v1, vcc, v2, v3
328
329 // SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00]
330 // VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00]
331 v_subbrev_u32 v1, s[0:1], v2, v3
309 // SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
310 // VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
311 v_addc_u32 v1, vcc, v2, v3, vcc
312
313 // SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
314 // VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
315 v_addc_u32_e32 v1, vcc, v2, v3, vcc
316
317
318 // SI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01]
319 // VI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0xaa,0x01]
320 v_addc_u32 v1, s[0:1], v2, v3, vcc
321
322 // SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
323 // VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
324 v_addc_u32 v1, s[0:1], v2, v3, s[2:3]
325
326 // SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
327 // VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
328 v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3]
329
330 // SI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x06,0x03]
331 // VI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x06,0x03]
332 v_addc_u32_e64 v1, s[0:1], v2, v3, -1
333
334 // SI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0x06,0x03]
335 // VI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0x06,0x03]
336 v_addc_u32_e64 v1, vcc, v2, v3, -1
337
338 // SI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01]
339 // VI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0xaa,0x01]
340 v_addc_u32_e64 v1, vcc, v2, v3, vcc
341
342 // SI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52]
343 // VI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3a]
344 v_subb_u32 v1, vcc, v2, v3, vcc
345
346 // SI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01]
347 // VI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0xaa,0x01]
348 v_subb_u32 v1, s[0:1], v2, v3, vcc
349
350 // SICI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x54]
351 // VI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3c]
352 v_subbrev_u32 v1, vcc, v2, v3, vcc
353
354 // SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0xaa,0x01]
355 // VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01]
356 v_subbrev_u32 v1, s[0:1], v2, v3, vcc
332357
333358 // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
334359 // VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]