llvm.org GIT mirror llvm / 63cffd6
[AMDGPU] Assembler: change v_madmk operands to have same order as mad. The constant is now at source operand 1 (previously at 2). This is also how it is in legacy AMD sp3 assembler. Update tests. Differential Revision: http://reviews.llvm.org/D17984 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263212 91177308-0d34-0410-b5e6-96231b3b80d8 Nikolay Haustov 4 years ago
6 changed file(s) with 29 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
10551055 if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
10561056 return false;
10571057
1058 // We need to do some weird looking operand shuffling since the madmk
1059 // operands are out of the normal expected order with the multiplied
1060 // constant as the last operand.
1061 //
1062 // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1
1063 // src0 -> src2 K
1064 // src1 -> src0
1065 // src2 -> src1
1058 // We need to swap operands 0 and 1 since madmk constant is at operand 1.
10661059
10671060 const int64_t Imm = DefMI->getOperand(1).getImm();
10681061
10771070
10781071 unsigned Src1Reg = Src1->getReg();
10791072 unsigned Src1SubReg = Src1->getSubReg();
1080 unsigned Src2Reg = Src2->getReg();
1081 unsigned Src2SubReg = Src2->getSubReg();
10821073 Src0->setReg(Src1Reg);
10831074 Src0->setSubReg(Src1SubReg);
10841075 Src0->setIsKill(Src1->isKill());
1085
1086 Src1->setReg(Src2Reg);
1087 Src1->setSubReg(Src2SubReg);
1088 Src1->setIsKill(Src2->isKill());
10891076
10901077 if (Opc == AMDGPU::V_MAC_F32_e64) {
10911078 UseMI->untieRegOperand(
10921079 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
10931080 }
10941081
1095 Src2->ChangeToImmediate(Imm);
1082 Src1->ChangeToImmediate(Imm);
10961083
10971084 removeModOperands(*UseMI);
10981085 UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
15591559 }
15601560
15611561 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
1562 def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
1563 field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
1564 field string Asm = "$vdst, $src0, $vsrc1, $src2";
1562 def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
1563 field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$k);
1564 field string Asm32 = "$vdst, $src0, $vsrc1, $k";
1565 }
1566 def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
1567 field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$k, VGPR_32:$vsrc1);
1568 field string Asm32 = "$vdst, $src0, $k, $vsrc1";
15651569 }
15661570 def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
15671571 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
20972101 revOp
20982102 >;
20992103
2100 multiclass VOP2MADK pattern = []> {
2101
2102 def "" : VOP2_Pseudo ;
2104 multiclass VOP2MADK pattern = []> {
2105
2106 def "" : VOP2_Pseudo ;
21032107
21042108 let isCodeGenOnly = 0 in {
2105 def _si : VOP2Common
2106 !strconcat(opName, VOP_MADK.Asm), []>,
2109 def _si : VOP2Common ,
2110 !strconcat(opName, P.Asm32), []>,
21072111 SIMCInstr ,
21082112 VOP2_MADKe {
21092113 let AssemblerPredicates = [isSICI];
21112115 let DisableDecoder = DisableSIDecoder;
21122116 }
21132117
2114 def _vi : VOP2Common
2115 !strconcat(opName, VOP_MADK.Asm), []>,
2118 def _vi : VOP2Common ,
2119 !strconcat(opName, P.Asm32), []>,
21162120 SIMCInstr ,
21172121 VOP2_MADKe {
21182122 let AssemblerPredicates = [isVI];
15371537 }
15381538 } // End isCommutable = 1
15391539
1540 defm V_MADMK_F32 : VOP2MADK , "v_madmk_f32">;
1540 defm V_MADMK_F32 : VOP2MADK , "v_madmk_f32", VOP_MADMK>;
15411541
15421542 let isCommutable = 1 in {
1543 defm V_MADAK_F32 : VOP2MADK , "v_madak_f32">;
1543 defm V_MADAK_F32 : VOP2MADK , "v_madak_f32", VOP_MADAK>;
15441544 } // End isCommutable = 1
15451545
15461546 let isCommutable = 1 in {
5353 defm V_MUL_F16 : VOP2Inst , "v_mul_f16", VOP_F16_F16_F16>;
5454 defm V_MAC_F16 : VOP2Inst , "v_mac_f16", VOP_F16_F16_F16>;
5555 } // End isCommutable = 1
56 defm V_MADMK_F16 : VOP2MADK , "v_madmk_f16">;
56 defm V_MADMK_F16 : VOP2MADK , "v_madmk_f16", VOP_MADMK>;
5757 let isCommutable = 1 in {
58 defm V_MADAK_F16 : VOP2MADK , "v_madak_f16">;
58 defm V_MADAK_F16 : VOP2MADK , "v_madak_f16", VOP_MADAK>;
5959 defm V_ADD_U16 : VOP2Inst , "v_add_u16", VOP_I16_I16_I16>;
6060 defm V_SUB_U16 : VOP2Inst , "v_sub_u16" , VOP_I16_I16_I16>;
6161 defm V_SUBREV_U16 : VOP2Inst , "v_subrev_u16", VOP_I16_I16_I16>;
66 ; GCN-LABEL: {{^}}madmk_f32:
77 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
88 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
9 ; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
9 ; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], 0x41200000, [[VB]]
1010 define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
1111 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1212 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
181181
182182 ; SI-LABEL: {{^}}kill_madmk_verifier_error:
183183 ; SI: s_xor_b64
184 ; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x472aee8c
184 ; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
185185 ; SI: s_or_b64
186186 define void @kill_madmk_verifier_error() nounwind {
187187 bb:
230230 // VI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
231231 v_mac_f32 v1, v2, v3
232232
233 // SICI: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
234 // VI: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42]
235 v_madmk_f32 v1, v2, v3, 64.0
233 // SICI: v_madmk_f32_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
234 // VI: v_madmk_f32_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42]
235 v_madmk_f32 v1, v2, 64.0, v3
236236
237237 // SICI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42]
238 // VI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
238 // VI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
239239 v_madak_f32 v1, v2, v3, 64.0
240240
241241 // SICI: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44]
409409 v_mac_f16 v1, v2, v3
410410
411411 // NOSICI: error: instruction not supported on this GPU
412 // NOSICI: v_madmk_f16 v1, v2, v3, 64.0
413 // VI: v_madmk_f16_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x00,0x80,0x42]
414 v_madmk_f16 v1, v2, v3, 64.0
412 // NOSICI: v_madmk_f16 v1, v2, 64.0, v3
413 // VI: v_madmk_f16_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x00,0x80,0x42]
414 v_madmk_f16 v1, v2, 64.0, v3
415415
416416 // NOSICI: error: instruction not supported on this GPU
417417 // NOSICI: v_madak_f16 v1, v2, v3, 64.0