llvm.org GIT mirror llvm / 8a4f11e
Revert "R600: Add work-around for the CF stack entry HW bug" This reverts commit 35b8331cad6eb512a2506adbc394201181da94ba. The -debug-only flag for llc doesn't appear to be available in all build configurations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199845 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
6 changed file(s) with 7 addition(s) and 288 deletion(s). Raw diff Collapse all Expand all
6262 "true",
6363 "Use Cayman ISA">;
6464
65 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
66 "CFALUBug",
67 "true",
68 "GPU has CF_ALU bug">;
69
7065 class SubtargetFeatureFetchLimit :
7166 SubtargetFeature <"fetch"#Value,
7267 "TexVTXClauseSize",
3838 EnableIRStructurizer = true;
3939 EnableIfCvt = true;
4040 WavefrontSize = 0;
41 CFALUBug = false;
4241 ParseSubtargetFeatures(GPU, FS);
4342 DevName = GPU;
4443 }
9796 }
9897 }
9998 bool
100 AMDGPUSubtarget::hasCFAluBug() const {
101 assert(getGeneration() <= NORTHERN_ISLANDS);
102 return CFALUBug;
103 }
104 bool
10599 AMDGPUSubtarget::isTargetELF() const {
106100 return false;
107101 }
5151 bool EnableIRStructurizer;
5252 bool EnableIfCvt;
5353 unsigned WavefrontSize;
54 bool CFALUBug;
5554
5655 InstrItineraryData InstrItins;
5756
7170 bool isIfCvtEnabled() const;
7271 unsigned getWavefrontSize() const;
7372 unsigned getStackEntrySize() const;
74 bool hasCFAluBug() const;
7573
7674 virtual bool enableMachineScheduler() const {
7775 return getGeneration() <= NORTHERN_ISLANDS;
4545 //===----------------------------------------------------------------------===//
4646
4747 def : Proc<"cedar", R600_VLIW5_Itin,
48 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32,
49 FeatureCFALUBug]>;
48 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32]>;
5049
5150 def : Proc<"redwood", R600_VLIW5_Itin,
52 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64,
53 FeatureCFALUBug]>;
51 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
5452
5553 def : Proc<"sumo", R600_VLIW5_Itin,
56 [FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>;
54 [FeatureEvergreen, FeatureWavefrontSize64]>;
5755
5856 def : Proc<"juniper", R600_VLIW5_Itin,
5957 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
6765 //===----------------------------------------------------------------------===//
6866
6967 def : Proc<"barts", R600_VLIW5_Itin,
70 [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
68 [FeatureNorthernIslands, FeatureVertexCache]>;
7169
7270 def : Proc<"turks", R600_VLIW5_Itin,
73 [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
71 [FeatureNorthernIslands, FeatureVertexCache]>;
7472
7573 def : Proc<"caicos", R600_VLIW5_Itin,
76 [FeatureNorthernIslands, FeatureCFALUBug]>;
74 [FeatureNorthernIslands]>;
7775
7876 def : Proc<"cayman", R600_VLIW4_Itin,
7977 [FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
7070 return true;
7171 }
7272 return false;
73 }
74
75 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
76 if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() &&
77 getLoopDepth() > 1)
78 return true;
79
80 if (!ST.hasCFAluBug())
81 return false;
82
83 switch(Opcode) {
84 default: return false;
85 case AMDGPU::CF_ALU_PUSH_BEFORE:
86 case AMDGPU::CF_ALU_ELSE_AFTER:
87 case AMDGPU::CF_ALU_BREAK:
88 case AMDGPU::CF_ALU_CONTINUE:
89 if (CurrentSubEntries == 0)
90 return false;
91 if (ST.getWavefrontSize() == 64) {
92 // We are being conservative here. We only require this work-around if
93 // CurrentSubEntries > 3 &&
94 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
95 //
96 // We have to be conservative, because we don't know for certain that
97 // our stack allocation algorithm for Evergreen/NI is correct. Applying this
98 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
99 // resources without any problems.
100 return CurrentSubEntries > 3;
101 } else {
102 assert(ST.getWavefrontSize() == 32);
103 // We are being conservative here. We only require the work-around if
104 // CurrentSubEntries > 7 &&
105 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
106 // See the comment on the wavefront size == 64 case for why we are
107 // being conservative.
108 return CurrentSubEntries > 7;
109 }
110 }
11173 }
11274
11375 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
509471 if (MI->getOpcode() == AMDGPU::CF_ALU)
510472 LastAlu.back() = MI;
511473 I++;
512 bool RequiresWorkAround =
513 CFStack.requiresWorkAroundForInst(MI->getOpcode());
514474 switch (MI->getOpcode()) {
515475 case AMDGPU::CF_ALU_PUSH_BEFORE:
516 if (RequiresWorkAround) {
517 DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
476 if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) {
518477 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
519478 .addImm(CfCount + 1)
520479 .addImm(1);
+0
-225
test/CodeGen/R600/cf-stack-bug.ll less more
None ; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
1 ; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
2 ; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
3 ; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
4 ; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
5 ; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG32 --check-prefix=FUNC
6 ; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
7 ; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
8 ; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
9
10 ; We are currently allocating 2 extra sub-entries on Evergreen / NI for
11 ; non-WQM push instructions if we change this to 1, then we will need to
12 ; add one level of depth to each of these tests.
13
14 ; BUG64-NOT: Applying bug work-around
15 ; BUG32-NOT: Applying bug work-around
16 ; NOBUG-NOT: Applying bug work-around
17 ; FUNC-LABEL: @nested3
18 define void @nested3(i32 addrspace(1)* %out, i32 %cond) {
19 entry:
20 %0 = icmp sgt i32 %cond, 0
21 br i1 %0, label %if.1, label %end
22
23 if.1:
24 %1 = icmp sgt i32 %cond, 10
25 br i1 %1, label %if.2, label %if.store.1
26
27 if.store.1:
28 store i32 1, i32 addrspace(1)* %out
29 br label %end
30
31 if.2:
32 %2 = icmp sgt i32 %cond, 20
33 br i1 %2, label %if.3, label %if.2.store
34
35 if.2.store:
36 store i32 2, i32 addrspace(1)* %out
37 br label %end
38
39 if.3:
40 store i32 3, i32 addrspace(1)* %out
41 br label %end
42
43 end:
44 ret void
45 }
46
47 ; BUG64: Applying bug work-around
48 ; BUG32-NOT: Applying bug work-around
49 ; NOBUG-NOT: Applying bug work-around
50 ; FUNC-LABEL: @nested4
51 define void @nested4(i32 addrspace(1)* %out, i32 %cond) {
52 entry:
53 %0 = icmp sgt i32 %cond, 0
54 br i1 %0, label %if.1, label %end
55
56 if.1:
57 %1 = icmp sgt i32 %cond, 10
58 br i1 %1, label %if.2, label %if.1.store
59
60 if.1.store:
61 store i32 1, i32 addrspace(1)* %out
62 br label %end
63
64 if.2:
65 %2 = icmp sgt i32 %cond, 20
66 br i1 %2, label %if.3, label %if.2.store
67
68 if.2.store:
69 store i32 2, i32 addrspace(1)* %out
70 br label %end
71
72 if.3:
73 %3 = icmp sgt i32 %cond, 30
74 br i1 %3, label %if.4, label %if.3.store
75
76 if.3.store:
77 store i32 3, i32 addrspace(1)* %out
78 br label %end
79
80 if.4:
81 store i32 4, i32 addrspace(1)* %out
82 br label %end
83
84 end:
85 ret void
86 }
87
88 ; BUG64: Applying bug work-around
89 ; BUG32-NOT: Applying bug work-around
90 ; NOBUG-NOT: Applying bug work-around
91 ; FUNC-LABEL: @nested7
92 define void @nested7(i32 addrspace(1)* %out, i32 %cond) {
93 entry:
94 %0 = icmp sgt i32 %cond, 0
95 br i1 %0, label %if.1, label %end
96
97 if.1:
98 %1 = icmp sgt i32 %cond, 10
99 br i1 %1, label %if.2, label %if.1.store
100
101 if.1.store:
102 store i32 1, i32 addrspace(1)* %out
103 br label %end
104
105 if.2:
106 %2 = icmp sgt i32 %cond, 20
107 br i1 %2, label %if.3, label %if.2.store
108
109 if.2.store:
110 store i32 2, i32 addrspace(1)* %out
111 br label %end
112
113 if.3:
114 %3 = icmp sgt i32 %cond, 30
115 br i1 %3, label %if.4, label %if.3.store
116
117 if.3.store:
118 store i32 3, i32 addrspace(1)* %out
119 br label %end
120
121 if.4:
122 %4 = icmp sgt i32 %cond, 40
123 br i1 %4, label %if.5, label %if.4.store
124
125 if.4.store:
126 store i32 4, i32 addrspace(1)* %out
127 br label %end
128
129 if.5:
130 %5 = icmp sgt i32 %cond, 50
131 br i1 %5, label %if.6, label %if.5.store
132
133 if.5.store:
134 store i32 5, i32 addrspace(1)* %out
135 br label %end
136
137 if.6:
138 %6 = icmp sgt i32 %cond, 60
139 br i1 %6, label %if.7, label %if.6.store
140
141 if.6.store:
142 store i32 6, i32 addrspace(1)* %out
143 br label %end
144
145 if.7:
146 store i32 7, i32 addrspace(1)* %out
147 br label %end
148
149 end:
150 ret void
151 }
152
153 ; BUG64: Applying bug work-around
154 ; BUG32: Applying bug work-around
155 ; NOBUG-NOT: Applying bug work-around
156 ; FUNC-LABEL: @nested8
157 define void @nested8(i32 addrspace(1)* %out, i32 %cond) {
158 entry:
159 %0 = icmp sgt i32 %cond, 0
160 br i1 %0, label %if.1, label %end
161
162 if.1:
163 %1 = icmp sgt i32 %cond, 10
164 br i1 %1, label %if.2, label %if.1.store
165
166 if.1.store:
167 store i32 1, i32 addrspace(1)* %out
168 br label %end
169
170 if.2:
171 %2 = icmp sgt i32 %cond, 20
172 br i1 %2, label %if.3, label %if.2.store
173
174 if.2.store:
175 store i32 2, i32 addrspace(1)* %out
176 br label %end
177
178 if.3:
179 %3 = icmp sgt i32 %cond, 30
180 br i1 %3, label %if.4, label %if.3.store
181
182 if.3.store:
183 store i32 3, i32 addrspace(1)* %out
184 br label %end
185
186 if.4:
187 %4 = icmp sgt i32 %cond, 40
188 br i1 %4, label %if.5, label %if.4.store
189
190 if.4.store:
191 store i32 4, i32 addrspace(1)* %out
192 br label %end
193
194 if.5:
195 %5 = icmp sgt i32 %cond, 50
196 br i1 %5, label %if.6, label %if.5.store
197
198 if.5.store:
199 store i32 5, i32 addrspace(1)* %out
200 br label %end
201
202 if.6:
203 %6 = icmp sgt i32 %cond, 60
204 br i1 %6, label %if.7, label %if.6.store
205
206 if.6.store:
207 store i32 6, i32 addrspace(1)* %out
208 br label %end
209
210 if.7:
211 %7 = icmp sgt i32 %cond, 70
212 br i1 %7, label %if.8, label %if.7.store
213
214 if.7.store:
215 store i32 7, i32 addrspace(1)* %out
216 br label %end
217
218 if.8:
219 store i32 8, i32 addrspace(1)* %out
220 br label %end
221
222 end:
223 ret void
224 }