llvm.org GIT mirror llvm / 7049ac9
[AMDGPU][llvm-mc] v_cndmask_b32: src2 is mandatory; do not enforce VOP2 when src2 == VCC. Another step for unification llvm assembler/disassembler with sp3. Besides, CodeGen output is a bit improved, thus changes in CodeGen tests. Assembler/Disassembler tests updated/added. Differential Revision: http://reviews.llvm.org/D20796 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271900 91177308-0d34-0410-b5e6-96231b3b80d8 Artem Tamazov 4 years ago
9 changed file(s) with 103 addition(s) and 19 deletion(s). Raw diff Collapse all Expand all
14741474 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
14751475 }
14761476
1477 // Read in from vcc or arbitrary SGPR
1478 def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
1479 let Src0RC32 = VCSrc_32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
1480 let Asm32 = "$vdst, $src0, $src1, vcc";
1481 let Asm64 = "$vdst, $src0, $src1, $src2";
1482 let Outs32 = (outs DstRC:$vdst);
1483 let Outs64 = (outs DstRC:$vdst);
1484
1485 // Suppress src2 implied by type since the 32-bit encoding uses an
1486 // implicit VCC use.
1487 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
1488 }
1489
14771490 class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> {
14781491 let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
14791492 let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod";
15151528 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
15161529 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
15171530 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
1518 def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
1519 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
1520 let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
1521 let Asm64 = "$vdst, $src0, $src1, $src2";
1522 }
15231531
15241532 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
15251533 def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
18621870 let DisableDecoder = DisableVIDecoder;
18631871 }
18641872
1873 class VOP3e_Real_si op, dag outs, dag ins, string asm, string opName,
1874 bit HasMods = 0, bit VOP3Only = 0> :
1875 VOP3Common ,
1876 VOP3e ,
1877 SIMCInstr {
1878 let AssemblerPredicates = [isSICI];
1879 let DecoderNamespace = "SICI";
1880 let DisableDecoder = DisableSIDecoder;
1881 }
1882
1883 class VOP3e_Real_vi op, dag outs, dag ins, string asm, string opName,
1884 bit HasMods = 0, bit VOP3Only = 0> :
1885 VOP3Common ,
1886 VOP3e_vi ,
1887 SIMCInstr {
1888 let AssemblerPredicates = [isVI];
1889 let DecoderNamespace = "VI";
1890 let DisableDecoder = DisableVIDecoder;
1891 }
1892
18651893 multiclass VOP3_m pattern,
18661894 string opName, int NumSrcArgs, bit HasMods = 1, bit VOP3Only = 0> {
18671895
19371965 VOP3DisableFields<1, useSrc2Input, HasMods>;
19381966
19391967 def _vi : VOP3b_Real_vi ,
1968 VOP3DisableFields<1, useSrc2Input, HasMods>;
1969 }
1970
1971 // Same as VOP3b_2_3_m but no 2nd destination (sdst), e.g. v_cndmask_b32.
1972 multiclass VOP3e_2_3_m
1973 list pattern, string opName, string revOp,
1974 bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
1975 def "" : VOP3_Pseudo ;
1976
1977 def _si : VOP3e_Real_si ,
1978 VOP3DisableFields<1, useSrc2Input, HasMods>;
1979
1980 def _vi : VOP3e_Real_vi ,
19401981 VOP3DisableFields<1, useSrc2Input, HasMods>;
19411982 }
19421983
20652106 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
20662107 opName, revOp, P.HasModifiers>;
20672108 }
2109
2110 multiclass VOP2e_Helper
2111 list pat32, list pat64,
2112 string revOp, bit useSGPRInput> {
2113
2114 let SchedRW = [Write32Bit, WriteSALU] in {
2115 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
2116 defm _e32 : VOP2_m ;
2117 }
2118
2119 defm _e64 : VOP3e_2_3_m
2120 opName, revOp, p.HasModifiers, useSGPRInput>;
2121 }
2122 }
2123
2124 multiclass VOP2eInst
2125 SDPatternOperator node = null_frag,
2126 string revOp = opName> : VOP2e_Helper <
2127 op, opName, P, [],
2128 !if(P.HasModifiers,
2129 [(set P.DstVT:$vdst,
2130 (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
2131 i1:$clamp, i32:$omod)),
2132 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
2133 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
2134 revOp, !eq(P.NumSrcArgs, 3)
2135 >;
20682136
20692137 multiclass VOP2b_Helper
20702138 list pat32, list pat64,
14721472 // VOP2 Instructions
14731473 //===----------------------------------------------------------------------===//
14741474
1475 multiclass V_CNDMASK {
1476 defm _e32 : VOP2_m ;
1477
1478 defm _e64 : VOP3_m <
1479 op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
1480 name#!cast(VOP_CNDMASK.Asm64), [], name, 3, 0>;
1481 }
1482
1483 defm V_CNDMASK_B32 : V_CNDMASK, "v_cndmask_b32">;
1475 defm V_CNDMASK_B32 : VOP2eInst , "v_cndmask_b32",
1476 VOP2e_I32_I32_I32_I1
1477 >;
14841478
14851479 let isCommutable = 1 in {
14861480 defm V_ADD_F32 : VOP2Inst , "v_add_f32",
2424 ; SI-DAG: cndmask_b32
2525 ; SI-DAG: v_cmp_lt_f64
2626 ; SI-DAG: v_cmp_lg_f64
27 ; SI-DAG: s_and_b64
28 ; SI: v_cndmask_b32
27 ; SI-DAG: v_cndmask_b32
2928 ; SI: v_cndmask_b32
3029 ; SI: v_add_f64
3130 ; SI: s_endpgm
2424 ; GCN-DAG: v_cmp_lt_u64
2525
2626 ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
27 ; GCN: v_cndmask_b32_e32 [[SIGN_SEL:v[0-9]+]],
27 ; GCN: v_cndmask_b32_e{{32|64}} [[SIGN_SEL:v[0-9]+]],
2828 ; GCN: {{buffer|flat}}_store_dword {{.*}}[[SIGN_SEL]]
2929 define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
3030 %tid = call i32 @llvm.amdgcn.workitem.id.x()
66
77 v_mul_i32_i24 v1, v2, 100
88 // CHECK: error: invalid operand for instruction
9
10 v_cndmask_b32 v1, v2, v3
11 // CHECK: error: too few operands for instruction
912
1013 //===----------------------------------------------------------------------===//
1114 // _e32 checks
1720
1821 // sgpr src1
1922 v_mul_i32_i24_e32 v1, v2, s3
23 // CHECK: error: invalid operand for instruction
24
25 v_cndmask_b32_e32 v1, v2, v3, s[0:1]
2026 // CHECK: error: invalid operand for instruction
2127
2228 //===----------------------------------------------------------------------===//
9797 // Instructions
9898 //===----------------------------------------------------------------------===//
9999
100 // GCN: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00]
101 v_cndmask_b32 v1, v2, v3
100 // GCN: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00]
101 v_cndmask_b32 v1, v2, v3, vcc
102
103 // GCN: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00]
104 v_cndmask_b32_e32 v1, v2, v3, vcc
102105
103106 // SICI: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02]
104107 // VI: v_readlane_b32 s1, v2, s3 ; encoding: [0x01,0x00,0x89,0xd2,0x02,0x07,0x00,0x00]
201201 // SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
202202 // VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00]
203203
204 v_cndmask_b32_e64 v1, v3, v5, s[4:5]
205 // SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
206 // VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00]
207
208 v_cndmask_b32_e64 v1, v3, v5, vcc
209 // SICI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0xaa,0x01]
210 // VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01]
211
204212 //TODO: readlane, writelane
205213
206214 v_add_f32 v1, v3, s5
0 # RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI
1
2 # VI: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00]
3 0x02 0x07 0x02 0x00
14
25 # VI: v_readlane_b32 s1, v2, s3 ; encoding: [0x01,0x00,0x89,0xd2,0x02,0x07,0x00,0x00]
36 0x01 0x00 0x89 0xd2 0x02 0x07 0x00 0x00
110110 # VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00]
111111 0x01 0x00 0x00 0xd1 0x03 0x0b 0x12 0x00
112112
113 # VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01]
114 0x01 0x00 0x00 0xd1 0x03 0x0b 0xaa 0x01
115
113116 # VI: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x00,0x00]
114117 0x01 0x00 0x01 0xd1 0x03 0x0b 0x00 0x00
115118