llvm.org GIT mirror llvm / bd7c634
R600: Control Flow support for pre EG gen git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179020 91177308-0d34-0410-b5e6-96231b3b80d8 Vincent Lejeune 7 years ago
3 changed file(s) with 242 addition(s) and 74 deletion(s). Raw diff Collapse all Expand all
265265 Emit(Inst, OS);
266266 break;
267267 }
268 case AMDGPU::CF_TC:
269 case AMDGPU::CF_VC:
270 case AMDGPU::CF_CALL_FS:
268 case AMDGPU::CF_TC_EG:
269 case AMDGPU::CF_VC_EG:
270 case AMDGPU::CF_CALL_FS_EG:
271 case AMDGPU::CF_TC_R600:
272 case AMDGPU::CF_VC_R600:
273 case AMDGPU::CF_CALL_FS_R600:
271274 return;
272 case AMDGPU::WHILE_LOOP:
273 case AMDGPU::END_LOOP:
274 case AMDGPU::LOOP_BREAK:
275 case AMDGPU::CF_CONTINUE:
276 case AMDGPU::CF_JUMP:
277 case AMDGPU::CF_ELSE:
278 case AMDGPU::POP: {
275 case AMDGPU::WHILE_LOOP_EG:
276 case AMDGPU::END_LOOP_EG:
277 case AMDGPU::LOOP_BREAK_EG:
278 case AMDGPU::CF_CONTINUE_EG:
279 case AMDGPU::CF_JUMP_EG:
280 case AMDGPU::CF_ELSE_EG:
281 case AMDGPU::POP_EG:
282 case AMDGPU::WHILE_LOOP_R600:
283 case AMDGPU::END_LOOP_R600:
284 case AMDGPU::LOOP_BREAK_R600:
285 case AMDGPU::CF_CONTINUE_R600:
286 case AMDGPU::CF_JUMP_R600:
287 case AMDGPU::CF_ELSE_R600:
288 case AMDGPU::POP_R600: {
279289 uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
280290 EmitByte(INSTR_NATIVE, OS);
281291 Emit(Inst, OS);
2929 class R600ControlFlowFinalizer : public MachineFunctionPass {
3030
3131 private:
32 enum ControlFlowInstruction {
33 CF_TC,
34 CF_CALL_FS,
35 CF_WHILE_LOOP,
36 CF_END_LOOP,
37 CF_LOOP_BREAK,
38 CF_LOOP_CONTINUE,
39 CF_JUMP,
40 CF_ELSE,
41 CF_POP
42 };
43
3244 static char ID;
3345 const R600InstrInfo *TII;
3446 unsigned MaxFetchInst;
47 const AMDGPUSubtarget &ST;
3548
3649 bool isFetch(const MachineInstr *MI) const {
3750 switch (MI->getOpcode()) {
6982 }
7083 }
7184
85 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
86 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
87 switch (CFI) {
88 case CF_TC:
89 return TII->get(AMDGPU::CF_TC_R600);
90 case CF_CALL_FS:
91 return TII->get(AMDGPU::CF_CALL_FS_R600);
92 case CF_WHILE_LOOP:
93 return TII->get(AMDGPU::WHILE_LOOP_R600);
94 case CF_END_LOOP:
95 return TII->get(AMDGPU::END_LOOP_R600);
96 case CF_LOOP_BREAK:
97 return TII->get(AMDGPU::LOOP_BREAK_R600);
98 case CF_LOOP_CONTINUE:
99 return TII->get(AMDGPU::CF_CONTINUE_R600);
100 case CF_JUMP:
101 return TII->get(AMDGPU::CF_JUMP_R600);
102 case CF_ELSE:
103 return TII->get(AMDGPU::CF_ELSE_R600);
104 case CF_POP:
105 return TII->get(AMDGPU::POP_R600);
106 }
107 } else {
108 switch (CFI) {
109 case CF_TC:
110 return TII->get(AMDGPU::CF_TC_EG);
111 case CF_CALL_FS:
112 return TII->get(AMDGPU::CF_CALL_FS_EG);
113 case CF_WHILE_LOOP:
114 return TII->get(AMDGPU::WHILE_LOOP_EG);
115 case CF_END_LOOP:
116 return TII->get(AMDGPU::END_LOOP_EG);
117 case CF_LOOP_BREAK:
118 return TII->get(AMDGPU::LOOP_BREAK_EG);
119 case CF_LOOP_CONTINUE:
120 return TII->get(AMDGPU::CF_CONTINUE_EG);
121 case CF_JUMP:
122 return TII->get(AMDGPU::CF_JUMP_EG);
123 case CF_ELSE:
124 return TII->get(AMDGPU::CF_ELSE_EG);
125 case CF_POP:
126 return TII->get(AMDGPU::POP_EG);
127 }
128 }
129 }
130
72131 MachineBasicBlock::iterator
73132 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
74133 unsigned CfAddress) const {
84143 break;
85144 }
86145 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
87 TII->get(AMDGPU::CF_TC))
146 getHWInstrDesc(CF_TC))
88147 .addImm(CfAddress) // ADDR
89148 .addImm(AluInstCount); // COUNT
90149 return I;
103162
104163 public:
105164 R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
106 TII (static_cast(tm.getInstrInfo())) {
165 TII (static_cast(tm.getInstrInfo())),
166 ST(tm.getSubtarget()) {
107167 const AMDGPUSubtarget &ST = tm.getSubtarget();
108168 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
109169 MaxFetchInst = 8;
123183 R600MachineFunctionInfo *MFI = MF.getInfo();
124184 if (MFI->ShaderType == 1) {
125185 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
126 TII->get(AMDGPU::CF_CALL_FS));
186 getHWInstrDesc(CF_CALL_FS));
127187 CfCount++;
128188 }
129189 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
153213 CurrentStack++;
154214 MaxStack = std::max(MaxStack, CurrentStack);
155215 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
156 TII->get(AMDGPU::WHILE_LOOP))
216 getHWInstrDesc(CF_WHILE_LOOP))
157217 .addImm(2);
158218 std::pair > Pair(CfCount,
159219 std::set());
169229 LoopStack.back();
170230 LoopStack.pop_back();
171231 CounterPropagateAddr(Pair.second, CfCount);
172 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
232 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
173233 .addImm(Pair.first + 1);
174234 MI->eraseFromParent();
175235 CfCount++;
177237 }
178238 case AMDGPU::IF_PREDICATE_SET: {
179239 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
180 TII->get(AMDGPU::CF_JUMP))
240 getHWInstrDesc(CF_JUMP))
181241 .addImm(0)
182242 .addImm(0);
183243 IfThenElseStack.push_back(MIb);
191251 IfThenElseStack.pop_back();
192252 CounterPropagateAddr(JumpInst, CfCount);
193253 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
194 TII->get(AMDGPU::CF_ELSE))
254 getHWInstrDesc(CF_ELSE))
195255 .addImm(0)
196256 .addImm(1);
197257 DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
206266 IfThenElseStack.pop_back();
207267 CounterPropagateAddr(IfOrElseInst, CfCount + 1);
208268 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
209 TII->get(AMDGPU::POP))
269 getHWInstrDesc(CF_POP))
210270 .addImm(CfCount + 1)
211271 .addImm(1);
212272 DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
217277 case AMDGPU::PREDICATED_BREAK: {
218278 CurrentStack--;
219279 CfCount += 3;
220 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
280 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
221281 .addImm(CfCount)
222282 .addImm(1);
223283 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
224 TII->get(AMDGPU::LOOP_BREAK))
284 getHWInstrDesc(CF_LOOP_BREAK))
225285 .addImm(0);
226 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
286 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
227287 .addImm(CfCount)
228288 .addImm(1);
229289 LoopStack.back().second.insert(MIb);
232292 }
233293 case AMDGPU::CONTINUE: {
234294 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
235 TII->get(AMDGPU::CF_CONTINUE))
295 getHWInstrDesc(CF_LOOP_CONTINUE))
236296 .addImm(0);
237297 LoopStack.back().second.insert(MIb);
238298 MI->eraseFromParent();
822822 let Inst{63-32} = Word1;
823823 }
824824
825 class CF_WORD0 {
825 class CF_WORD0_R600 {
826 field bits<32> Word0;
827
828 bits<32> ADDR;
829
830 let Word0 = ADDR;
831 }
832
833 class CF_WORD1_R600 {
834 field bits<32> Word1;
835
836 bits<3> POP_COUNT;
837 bits<5> CF_CONST;
838 bits<2> COND;
839 bits<3> COUNT;
840 bits<6> CALL_COUNT;
841 bits<1> COUNT_3;
842 bits<1> END_OF_PROGRAM;
843 bits<1> VALID_PIXEL_MODE;
844 bits<7> CF_INST;
845 bits<1> WHOLE_QUAD_MODE;
846 bits<1> BARRIER;
847
848 let Word1{2-0} = POP_COUNT;
849 let Word1{7-3} = CF_CONST;
850 let Word1{9-8} = COND;
851 let Word1{12-10} = COUNT;
852 let Word1{18-13} = CALL_COUNT;
853 let Word1{19} = COUNT_3;
854 let Word1{21} = END_OF_PROGRAM;
855 let Word1{22} = VALID_PIXEL_MODE;
856 let Word1{29-23} = CF_INST;
857 let Word1{30} = WHOLE_QUAD_MODE;
858 let Word1{31} = BARRIER;
859 }
860
861 class CF_CLAUSE_R600 inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
862 ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
863 field bits<64> Inst;
864
865 let CF_INST = inst;
866 let BARRIER = 1;
867 let CF_CONST = 0;
868 let VALID_PIXEL_MODE = 0;
869 let COND = 0;
870 let CALL_COUNT = 0;
871 let COUNT_3 = 0;
872 let END_OF_PROGRAM = 0;
873 let WHOLE_QUAD_MODE = 0;
874
875 let Inst{31-0} = Word0;
876 let Inst{63-32} = Word1;
877 }
878
879 class CF_WORD0_EG {
826880 field bits<32> Word0;
827881
828882 bits<24> ADDR;
832886 let Word0{26-24} = JUMPTABLE_SEL;
833887 }
834888
835 class CF_WORD1 {
889 class CF_WORD1_EG {
836890 field bits<32> Word1;
837891
838892 bits<3> POP_COUNT;
852906 let Word1{31} = BARRIER;
853907 }
854908
855 class CF_CLAUSE inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
856 ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
909 class CF_CLAUSE_EG inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
910 ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
857911 field bits<64> Inst;
858912
859913 let CF_INST = inst;
865919
866920 let Inst{31-0} = Word0;
867921 let Inst{63-32} = Word1;
868 }
869
870 def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
871 "TEX $COUNT @$ADDR"> {
872 let POP_COUNT = 0;
873 }
874
875 def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
876 "VTX $COUNT @$ADDR"> {
877 let POP_COUNT = 0;
878 }
879
880 def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
881 let POP_COUNT = 0;
882 let COUNT = 0;
883 }
884
885 def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
886 let POP_COUNT = 0;
887 let COUNT = 0;
888 }
889
890 def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
891 let POP_COUNT = 0;
892 let COUNT = 0;
893 }
894
895 def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
896 let POP_COUNT = 0;
897 let COUNT = 0;
898 }
899
900 def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
901 let COUNT = 0;
902 }
903
904 def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
905 let COUNT = 0;
906 }
907
908 def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
909 let ADDR = 0;
910 let COUNT = 0;
911 let POP_COUNT = 0;
912 }
913
914 def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
915 let COUNT = 0;
916922 }
917923
918924 def CF_ALU : ALU_CLAUSE<8, "ALU">;
14321438 let Word1{31} = 1; // BARRIER
14331439 }
14341440 defm : SteamOutputExportPattern;
1441
1442 def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
1443 "TEX $COUNT @$ADDR"> {
1444 let POP_COUNT = 0;
1445 }
1446 def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
1447 "VTX $COUNT @$ADDR"> {
1448 let POP_COUNT = 0;
1449 }
1450 def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
1451 "LOOP_START_DX10 @$ADDR"> {
1452 let POP_COUNT = 0;
1453 let COUNT = 0;
1454 }
1455 def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
1456 let POP_COUNT = 0;
1457 let COUNT = 0;
1458 }
1459 def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
1460 "LOOP_BREAK @$ADDR"> {
1461 let POP_COUNT = 0;
1462 let COUNT = 0;
1463 }
1464 def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
1465 "CONTINUE @$ADDR"> {
1466 let POP_COUNT = 0;
1467 let COUNT = 0;
1468 }
1469 def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1470 "JUMP @$ADDR POP:$POP_COUNT"> {
1471 let COUNT = 0;
1472 }
1473 def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1474 "ELSE @$ADDR POP:$POP_COUNT"> {
1475 let COUNT = 0;
1476 }
1477 def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
1478 let ADDR = 0;
1479 let COUNT = 0;
1480 let POP_COUNT = 0;
1481 }
1482 def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1483 "POP @$ADDR POP:$POP_COUNT"> {
1484 let COUNT = 0;
1485 }
1486
14351487 }
14361488
14371489 // Helper pattern for normalizing inputs to triginomic instructions for R700+
15871639 let Word1{31} = 1; // BARRIER
15881640 }
15891641 defm : SteamOutputExportPattern;
1642
1643 def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
1644 "TEX $COUNT @$ADDR"> {
1645 let POP_COUNT = 0;
1646 }
1647 def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
1648 "VTX $COUNT @$ADDR"> {
1649 let POP_COUNT = 0;
1650 }
1651 def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
1652 "LOOP_START_DX10 @$ADDR"> {
1653 let POP_COUNT = 0;
1654 let COUNT = 0;
1655 }
1656 def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
1657 let POP_COUNT = 0;
1658 let COUNT = 0;
1659 }
1660 def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
1661 "LOOP_BREAK @$ADDR"> {
1662 let POP_COUNT = 0;
1663 let COUNT = 0;
1664 }
1665 def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
1666 "CONTINUE @$ADDR"> {
1667 let POP_COUNT = 0;
1668 let COUNT = 0;
1669 }
1670 def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1671 "JUMP @$ADDR POP:$POP_COUNT"> {
1672 let COUNT = 0;
1673 }
1674 def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1675 "ELSE @$ADDR POP:$POP_COUNT"> {
1676 let COUNT = 0;
1677 }
1678 def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
1679 let ADDR = 0;
1680 let COUNT = 0;
1681 let POP_COUNT = 0;
1682 }
1683 def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1684 "POP @$ADDR POP:$POP_COUNT"> {
1685 let COUNT = 0;
1686 }
1687
15901688
15911689 //===----------------------------------------------------------------------===//
15921690 // Memory read/write instructions