llvm.org GIT mirror llvm / e492308
R600/SI: add cummuting of rev instructions Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Michel Dänzer <michel.daenzer@amd.com> Tested-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178127 91177308-0d34-0410-b5e6-96231b3b80d8 Christian Konig 7 years ago
8 changed file(s) with 96 addition(s) and 43 deletion(s). Raw diff Collapse all Expand all
543543 unsigned NumDefs = Desc->getNumDefs();
544544 unsigned NumOps = Desc->getNumOperands();
545545
546 // Commuted opcode if available
547 int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
548 const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
549
550 assert(!DescRev || DescRev->getNumDefs() == NumDefs);
551 assert(!DescRev || DescRev->getNumOperands() == NumOps);
552
546553 // e64 version if available, -1 otherwise
547554 int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
548555 const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
604611 continue;
605612 }
606613
607 if (i == 1 && Desc->isCommutable() &&
608 fitsRegClass(DAG, Ops[0], RegClass)) {
614 if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
609615
610616 unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
611617 assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
619625 SDValue Tmp = Ops[1];
620626 Ops[1] = Ops[0];
621627 Ops[0] = Tmp;
628
629 Desc = DescRev;
630 DescRev = 0;
622631 continue;
623632 }
624633 }
654663 for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
655664 Ops.push_back(Node->getOperand(i));
656665
657 // Either create a complete new or update the current instruction
658 if (Promote2e64)
659 return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(),
660 Node->getVTList(), Ops.data(), Ops.size());
661 else
662 return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
663 }
666 // Create a complete new instruction
667 return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
668 Node->getVTList(), Ops.data(), Ops.size());
669 }
157157 }
158158 }
159159
160 unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
161
162 int NewOpc;
163
164 // Try to map original to commuted opcode
165 if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
166 return NewOpc;
167
168 // Try to map commuted to original opcode
169 if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
170 return NewOpc;
171
172 return Opcode;
173 }
174
160175 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
161176 bool NewMI) const {
162177
164179 !MI->getOperand(2).isReg())
165180 return 0;
166181
167 return TargetInstrInfo::commuteInstruction(MI, NewMI);
182 MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
183
184 if (MI)
185 MI->setDesc(get(commuteOpcode(MI->getOpcode())));
186
187 return MI;
168188 }
169189
170190 MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
3333 MachineBasicBlock::iterator MI, DebugLoc DL,
3434 unsigned DestReg, unsigned SrcReg,
3535 bool KillSrc) const;
36
37 unsigned commuteOpcode(unsigned Opcode) const;
3638
3739 virtual MachineInstr *commuteInstruction(MachineInstr *MI,
3840 bool NewMI=false) const;
7577 namespace AMDGPU {
7678
7779 int getVOPe64(uint16_t Opcode);
80 int getCommuteRev(uint16_t Opcode);
81 int getCommuteOrig(uint16_t Opcode);
7882
7983 } // End namespace AMDGPU
8084
135135
136136 class VOP {
137137 string OpName = opName;
138 }
139
140 class VOP2_REV {
141 string RevOp = revOp;
142 bit IsOrig = isOrig;
138143 }
139144
140145 multiclass VOP1_Helper op, RegisterClass drc, RegisterClass src,
165170 : VOP1_Helper ;
166171
167172 multiclass VOP2_Helper op, RegisterClass vrc, RegisterClass arc,
168 string opName, list pattern> {
173 string opName, list pattern, string revOp> {
169174 def _e32 : VOP2 <
170175 op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
171176 opName#"_e32 $dst, $src0, $src1", pattern
172 >, VOP ;
177 >, VOP , VOP2_REV;
173178
174179 def _e64 : VOP3 <
175180 {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
178183 i32imm:$abs, i32imm:$clamp,
179184 i32imm:$omod, i32imm:$neg),
180185 opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
181 >, VOP {
186 >, VOP , VOP2_REV {
182187 let SRC2 = SIOperand.ZERO;
183188 }
184189 }
185190
186 multiclass VOP2_32 op, string opName, list pattern>
187 : VOP2_Helper ;
188
189 multiclass VOP2_64 op, string opName, list pattern>
190 : VOP2_Helper ;
191
192 multiclass VOP2b_32 op, string opName, list pattern> {
191 multiclass VOP2_32 op, string opName, list pattern,
192 string revOp = opName>
193 : VOP2_Helper ;
194
195 multiclass VOP2_64 op, string opName, list pattern,
196 string revOp = opName>
197 : VOP2_Helper ;
198
199 multiclass VOP2b_32 op, string opName, list pattern,
200 string revOp = opName> {
193201
194202 def _e32 : VOP2 <
195203 op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
196204 opName#"_e32 $dst, $src0, $src1", pattern
197 >, VOP ;
205 >, VOP , VOP2_REV;
198206
199207 def _e64 : VOP3b <
200208 {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
203211 i32imm:$abs, i32imm:$clamp,
204212 i32imm:$omod, i32imm:$neg),
205213 opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
206 >, VOP {
214 >, VOP , VOP2_REV {
207215 let SRC2 = SIOperand.ZERO;
208216 /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
209217 can write it into any SGPR. We currently don't use the carry out,
326334 let ValueCols = [["8"]];
327335 }
328336
337 // Maps an original opcode to its commuted version
338 def getCommuteRev : InstrMapping {
339 let FilterClass = "VOP2_REV";
340 let RowFields = ["RevOp"];
341 let ColFields = ["IsOrig"];
342 let KeyCol = ["1"];
343 let ValueCols = [["0"]];
344 }
345
346 // Maps an commuted opcode to its original version
347 def getCommuteOrig : InstrMapping {
348 let FilterClass = "VOP2_REV";
349 let RowFields = ["RevOp"];
350 let ColFields = ["IsOrig"];
351 let KeyCol = ["0"];
352 let ValueCols = [["1"]];
353 }
354
329355 include "SIInstructions.td"
803803 defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
804804 [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
805805 >;
806 } // End isCommutable = 1
807806
808807 defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
809808 [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
810809 >;
811
812 defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
810 defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
811 } // End isCommutable = 1
812
813813 defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
814814
815815 let isCommutable = 1 in {
847847 defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
848848 defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
849849
850 } // End isCommutable = 1
851
852850 defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
853851 [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
854852 >;
855 defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
853 defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
854
856855 defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
857856 [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
858857 >;
859 defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
858 defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
859
860860 defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
861861 [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
862862 >;
863 defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
864
865 let isCommutable = 1 in {
863 defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
866864
867865 defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
868866 [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
883881 //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
884882 //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
885883 //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
886 let Defs = [VCC] in { // Carry-out goes to VCC
887
888 let isCommutable = 1 in {
884
885 let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
889886 defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
890887 [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
891888 >;
892 } // End isCommutable = 1
893889
894890 defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
895891 [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
896892 >;
897
898 defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>;
893 defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
894
899895 let Uses = [VCC] in { // Carry-out comes from VCC
900896 defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
901897 defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
902 defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>;
898 defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
903899 } // End Uses = [VCC]
904 } // End Defs = [VCC]
900 } // End isCommutable = 1, Defs = [VCC]
901
905902 defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
906903 ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
907904 ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
0 ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
11
2 ;CHECK: V_LSHL_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0
2 ;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0
33
44 define void @test(i32 %p) {
55 %i = mul i32 %p, 2
0 ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
11
2 ;CHECK: V_LSHR_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0
2 ;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
33
44 define void @test(i32 %p) {
55 %i = udiv i32 %p, 2
11
22 ;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
33 ;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
4 ;CHECK-NEXT: V_LSHR_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0
4 ;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
55
66 define void @test(i32 %p) {
77 %i = udiv i32 %p, 3