llvm.org GIT mirror llvm / 5c0c884
R600: Refactor stack size calculation reviewed-by: Vincent Lejeune <vljn at ovi.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199840 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
4 changed file(s) with 159 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
7878 AMDGPUSubtarget::getWavefrontSize() const {
7979 return WavefrontSize;
8080 }
81 unsigned
82 AMDGPUSubtarget::getStackEntrySize() const {
83 assert(getGeneration() <= NORTHERN_ISLANDS);
84 switch(getWavefrontSize()) {
85 case 16:
86 return 8;
87 case 32:
88 if (hasCaymanISA())
89 return 4;
90 else
91 return 8;
92 case 64:
93 return 4;
94 default:
95 llvm_unreachable("Illegal wavefront size.");
96 }
97 }
8198 bool
8299 AMDGPUSubtarget::isTargetELF() const {
83100 return false;
6969 bool IsIRStructurizerEnabled() const;
7070 bool isIfCvtEnabled() const;
7171 unsigned getWavefrontSize() const;
72 unsigned getStackEntrySize() const;
7273
7374 virtual bool enableMachineScheduler() const {
7475 return getGeneration() <= NORTHERN_ISLANDS;
2626 using namespace llvm;
2727
2828 namespace {
29
30 struct CFStack {
31
32 enum StackItem {
33 ENTRY = 0,
34 SUB_ENTRY = 1,
35 FIRST_NON_WQM_PUSH = 2,
36 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
37 };
38
39 const AMDGPUSubtarget &ST;
40 std::vector BranchStack;
41 std::vector LoopStack;
42 unsigned MaxStackSize;
43 unsigned CurrentEntries;
44 unsigned CurrentSubEntries;
45
46 CFStack(const AMDGPUSubtarget &st, unsigned ShaderType) : ST(st),
47 // We need to reserve a stack entry for CALL_FS in vertex shaders.
48 MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
49 CurrentEntries(0), CurrentSubEntries(0) { }
50
51 unsigned getLoopDepth();
52 bool branchStackContains(CFStack::StackItem);
53 bool requiresWorkAroundForInst(unsigned Opcode);
54 unsigned getSubEntrySize(CFStack::StackItem Item);
55 void updateMaxStackSize();
56 void pushBranch(unsigned Opcode, bool isWQM = false);
57 void pushLoop();
58 void popBranch();
59 void popLoop();
60 };
61
62 unsigned CFStack::getLoopDepth() {
63 return LoopStack.size();
64 }
65
66 bool CFStack::branchStackContains(CFStack::StackItem Item) {
67 for (std::vector::const_iterator I = BranchStack.begin(),
68 E = BranchStack.end(); I != E; ++I) {
69 if (*I == Item)
70 return true;
71 }
72 return false;
73 }
74
75 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
76 switch(Item) {
77 default:
78 return 0;
79 case CFStack::FIRST_NON_WQM_PUSH:
80 assert(!ST.hasCaymanISA());
81 if (ST.getGeneration() <= AMDGPUSubtarget::R700) {
82 // +1 For the push operation.
83 // +2 Extra space required.
84 return 3;
85 } else {
86 // Some documentation says that this is not necessary on Evergreen,
87 // but experimentation has show that we need to allocate 1 extra
88 // sub-entry for the first non-WQM push.
89 // +1 For the push operation.
90 // +1 Extra space required.
91 return 2;
92 }
93 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
94 assert(ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
95 // +1 For the push operation.
96 // +1 Extra space required.
97 return 2;
98 case CFStack::SUB_ENTRY:
99 return 1;
100 }
101 }
102
103 void CFStack::updateMaxStackSize() {
104 unsigned CurrentStackSize = CurrentEntries +
105 (RoundUpToAlignment(CurrentSubEntries, 4) / 4);
106 MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
107 }
108
109 void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
110 CFStack::StackItem Item = CFStack::ENTRY;
111 switch(Opcode) {
112 case AMDGPU::CF_PUSH_EG:
113 case AMDGPU::CF_ALU_PUSH_BEFORE:
114 if (!isWQM) {
115 if (!ST.hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
116 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
117 // See comment in
118 // CFStack::getSubEntrySize()
119 else if (CurrentEntries > 0 &&
120 ST.getGeneration() > AMDGPUSubtarget::EVERGREEN &&
121 !ST.hasCaymanISA() &&
122 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
123 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
124 else
125 Item = CFStack::SUB_ENTRY;
126 } else
127 Item = CFStack::ENTRY;
128 break;
129 }
130 BranchStack.push_back(Item);
131 if (Item == CFStack::ENTRY)
132 CurrentEntries++;
133 else
134 CurrentSubEntries += getSubEntrySize(Item);
135 updateMaxStackSize();
136 }
137
138 void CFStack::pushLoop() {
139 LoopStack.push_back(CFStack::ENTRY);
140 CurrentEntries++;
141 updateMaxStackSize();
142 }
143
144 void CFStack::popBranch() {
145 CFStack::StackItem Top = BranchStack.back();
146 if (Top == CFStack::ENTRY)
147 CurrentEntries--;
148 else
149 CurrentSubEntries-= getSubEntrySize(Top);
150 BranchStack.pop_back();
151 }
152
153 void CFStack::popLoop() {
154 CurrentEntries--;
155 LoopStack.pop_back();
156 }
29157
30158 class R600ControlFlowFinalizer : public MachineFunctionPass {
31159
299427 }
300428 }
301429
302 unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
303 switch (ST.getGeneration()) {
304 case AMDGPUSubtarget::R600:
305 case AMDGPUSubtarget::R700:
306 if (hasPush)
307 StackSubEntry += 2;
308 break;
309 case AMDGPUSubtarget::EVERGREEN:
310 if (hasPush)
311 StackSubEntry ++;
312 case AMDGPUSubtarget::NORTHERN_ISLANDS:
313 StackSubEntry += 2;
314 break;
315 default: llvm_unreachable("Not a VLIW4/VLIW5 GPU");
316 }
317 return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
318 }
319
320430 public:
321431 R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
322432 TII (0), TRI(0),
328438 virtual bool runOnMachineFunction(MachineFunction &MF) {
329439 TII=static_cast(MF.getTarget().getInstrInfo());
330440 TRI=static_cast(MF.getTarget().getRegisterInfo());
331
332 unsigned MaxStack = 0;
333 unsigned CurrentStack = 0;
334 unsigned CurrentLoopDepth = 0;
335 bool HasPush = false;
441 R600MachineFunctionInfo *MFI = MF.getInfo();
442
443 CFStack CFStack(ST, MFI->ShaderType);
336444 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
337445 ++MB) {
338446 MachineBasicBlock &MBB = *MB;
339447 unsigned CfCount = 0;
340448 std::vector > > LoopStack;
341449 std::vector IfThenElseStack;
342 R600MachineFunctionInfo *MFI = MF.getInfo();
343450 if (MFI->ShaderType == 1) {
344451 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
345452 getHWInstrDesc(CF_CALL_FS));
346453 CfCount++;
347 MaxStack = 1;
348454 }
349455 std::vector FetchClauses, AluClauses;
350456 std::vector LastAlu(1);
367473 I++;
368474 switch (MI->getOpcode()) {
369475 case AMDGPU::CF_ALU_PUSH_BEFORE:
370 CurrentStack++;
371 MaxStack = std::max(MaxStack, CurrentStack);
372 HasPush = true;
373 if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
476 if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) {
374477 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
375478 .addImm(CfCount + 1)
376479 .addImm(1);
377480 MI->setDesc(TII->get(AMDGPU::CF_ALU));
378481 CfCount++;
379 }
482 CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
483 } else
484 CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
485
380486 case AMDGPU::CF_ALU:
381487 I = MI;
382488 AluClauses.push_back(MakeALUClause(MBB, I));
384490 CfCount++;
385491 break;
386492 case AMDGPU::WHILELOOP: {
387 CurrentStack+=4;
388 CurrentLoopDepth++;
389 MaxStack = std::max(MaxStack, CurrentStack);
493 CFStack.pushLoop();
390494 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
391495 getHWInstrDesc(CF_WHILE_LOOP))
392496 .addImm(1);
399503 break;
400504 }
401505 case AMDGPU::ENDLOOP: {
402 CurrentStack-=4;
403 CurrentLoopDepth--;
506 CFStack.popLoop();
404507 std::pair > Pair =
405508 LoopStack.back();
406509 LoopStack.pop_back();
438541 break;
439542 }
440543 case AMDGPU::ENDIF: {
441 CurrentStack--;
544 CFStack.popBranch();
442545 if (LastAlu.back()) {
443546 ToPopAfter.push_back(LastAlu.back());
444547 } else {
513616 .addImm(Alu->getOperand(8).getImm());
514617 Alu->eraseFromParent();
515618 }
516 MFI->StackSize = getHWStackSize(MaxStack, HasPush);
619 MFI->StackSize = CFStack.MaxStackSize;
517620 }
518621
519622 return false;
55
66 ; CONFIG-CHECK: .section .AMDGPU.config
77 ; CONFIG-CHECK-NEXT: .long 166100
8 ; CONFIG-CHECK-NEXT: .long 258
8 ; CONFIG-CHECK-NEXT: .long 2
99 ; CONFIG-CHECK-NEXT: .long 165900
1010 ; CONFIG-CHECK-NEXT: .long 0
1111 define void @test(float addrspace(1)* %out, i32 %p) {