llvm.org GIT mirror llvm / efa1355
R600: Add work-around for the CF stack entry HW bug The CF stack can be corrupted if you use CF_ALU_PUSH_BEFORE, CF_ALU_ELSE_AFTER, CF_ALU_BREAK, or CF_ALU_CONTINUE when the number of sub-entries on the stack is greater than or equal to the stack entry size and sub-entries modulo 4 is either 0 or 3 (on cedar the bug is present when number of sub-entries module 8 is either 7 or 0) We choose to be conservative and always apply the work-around when the number of sub-enries is greater than or equal to the stack entry size, so that we can safely over-allocate the stack when we are unsure of the stack allocation rules. reviewed-by: Vincent Lejeune <vljn at ovi.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199842 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
6 changed file(s) with 288 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
6262 "true",
6363 "Use Cayman ISA">;
6464
65 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
66 "CFALUBug",
67 "true",
68 "GPU has CF_ALU bug">;
69
6570 class SubtargetFeatureFetchLimit :
6671 SubtargetFeature <"fetch"#Value,
6772 "TexVTXClauseSize",
3838 EnableIRStructurizer = true;
3939 EnableIfCvt = true;
4040 WavefrontSize = 0;
41 CFALUBug = false;
4142 ParseSubtargetFeatures(GPU, FS);
4243 DevName = GPU;
4344 }
9697 }
9798 }
9899 bool
100 AMDGPUSubtarget::hasCFAluBug() const {
101 assert(getGeneration() <= NORTHERN_ISLANDS);
102 return CFALUBug;
103 }
104 bool
99105 AMDGPUSubtarget::isTargetELF() const {
100106 return false;
101107 }
5151 bool EnableIRStructurizer;
5252 bool EnableIfCvt;
5353 unsigned WavefrontSize;
54 bool CFALUBug;
5455
5556 InstrItineraryData InstrItins;
5657
7071 bool isIfCvtEnabled() const;
7172 unsigned getWavefrontSize() const;
7273 unsigned getStackEntrySize() const;
74 bool hasCFAluBug() const;
7375
7476 virtual bool enableMachineScheduler() const {
7577 return getGeneration() <= NORTHERN_ISLANDS;
4545 //===----------------------------------------------------------------------===//
4646
4747 def : Proc<"cedar", R600_VLIW5_Itin,
48 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32]>;
48 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32,
49 FeatureCFALUBug]>;
4950
5051 def : Proc<"redwood", R600_VLIW5_Itin,
51 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
52 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64,
53 FeatureCFALUBug]>;
5254
5355 def : Proc<"sumo", R600_VLIW5_Itin,
54 [FeatureEvergreen, FeatureWavefrontSize64]>;
56 [FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>;
5557
5658 def : Proc<"juniper", R600_VLIW5_Itin,
5759 [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
6567 //===----------------------------------------------------------------------===//
6668
6769 def : Proc<"barts", R600_VLIW5_Itin,
68 [FeatureNorthernIslands, FeatureVertexCache]>;
70 [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
6971
7072 def : Proc<"turks", R600_VLIW5_Itin,
71 [FeatureNorthernIslands, FeatureVertexCache]>;
73 [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
7274
7375 def : Proc<"caicos", R600_VLIW5_Itin,
74 [FeatureNorthernIslands]>;
76 [FeatureNorthernIslands, FeatureCFALUBug]>;
7577
7678 def : Proc<"cayman", R600_VLIW4_Itin,
7779 [FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
7070 return true;
7171 }
7272 return false;
73 }
74
75 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
76 if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() &&
77 getLoopDepth() > 1)
78 return true;
79
80 if (!ST.hasCFAluBug())
81 return false;
82
83 switch(Opcode) {
84 default: return false;
85 case AMDGPU::CF_ALU_PUSH_BEFORE:
86 case AMDGPU::CF_ALU_ELSE_AFTER:
87 case AMDGPU::CF_ALU_BREAK:
88 case AMDGPU::CF_ALU_CONTINUE:
89 if (CurrentSubEntries == 0)
90 return false;
91 if (ST.getWavefrontSize() == 64) {
92 // We are being conservative here. We only require this work-around if
93 // CurrentSubEntries > 3 &&
94 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
95 //
96 // We have to be conservative, because we don't know for certain that
97 // our stack allocation algorithm for Evergreen/NI is correct. Applying this
98 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
99 // resources without any problems.
100 return CurrentSubEntries > 3;
101 } else {
102 assert(ST.getWavefrontSize() == 32);
103 // We are being conservative here. We only require the work-around if
104 // CurrentSubEntries > 7 &&
105 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
106 // See the comment on the wavefront size == 64 case for why we are
107 // being conservative.
108 return CurrentSubEntries > 7;
109 }
110 }
73111 }
74112
75113 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
471509 if (MI->getOpcode() == AMDGPU::CF_ALU)
472510 LastAlu.back() = MI;
473511 I++;
512 bool RequiresWorkAround =
513 CFStack.requiresWorkAroundForInst(MI->getOpcode());
474514 switch (MI->getOpcode()) {
475515 case AMDGPU::CF_ALU_PUSH_BEFORE:
476 if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) {
516 if (RequiresWorkAround) {
517 DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
477518 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
478519 .addImm(CfCount + 1)
479520 .addImm(1);
0 ; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
1 ; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
2 ; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
3 ; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
4 ; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
5 ; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG32 --check-prefix=FUNC
6 ; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
7 ; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
8 ; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
9
10 ; We are currently allocating 2 extra sub-entries on Evergreen / NI for
11 ; non-WQM push instructions if we change this to 1, then we will need to
12 ; add one level of depth to each of these tests.
13
14 ; BUG64-NOT: Applying bug work-around
15 ; BUG32-NOT: Applying bug work-around
16 ; NOBUG-NOT: Applying bug work-around
17 ; FUNC-LABEL: @nested3
18 define void @nested3(i32 addrspace(1)* %out, i32 %cond) {
19 entry:
20 %0 = icmp sgt i32 %cond, 0
21 br i1 %0, label %if.1, label %end
22
23 if.1:
24 %1 = icmp sgt i32 %cond, 10
25 br i1 %1, label %if.2, label %if.store.1
26
27 if.store.1:
28 store i32 1, i32 addrspace(1)* %out
29 br label %end
30
31 if.2:
32 %2 = icmp sgt i32 %cond, 20
33 br i1 %2, label %if.3, label %if.2.store
34
35 if.2.store:
36 store i32 2, i32 addrspace(1)* %out
37 br label %end
38
39 if.3:
40 store i32 3, i32 addrspace(1)* %out
41 br label %end
42
43 end:
44 ret void
45 }
46
47 ; BUG64: Applying bug work-around
48 ; BUG32-NOT: Applying bug work-around
49 ; NOBUG-NOT: Applying bug work-around
50 ; FUNC-LABEL: @nested4
51 define void @nested4(i32 addrspace(1)* %out, i32 %cond) {
52 entry:
53 %0 = icmp sgt i32 %cond, 0
54 br i1 %0, label %if.1, label %end
55
56 if.1:
57 %1 = icmp sgt i32 %cond, 10
58 br i1 %1, label %if.2, label %if.1.store
59
60 if.1.store:
61 store i32 1, i32 addrspace(1)* %out
62 br label %end
63
64 if.2:
65 %2 = icmp sgt i32 %cond, 20
66 br i1 %2, label %if.3, label %if.2.store
67
68 if.2.store:
69 store i32 2, i32 addrspace(1)* %out
70 br label %end
71
72 if.3:
73 %3 = icmp sgt i32 %cond, 30
74 br i1 %3, label %if.4, label %if.3.store
75
76 if.3.store:
77 store i32 3, i32 addrspace(1)* %out
78 br label %end
79
80 if.4:
81 store i32 4, i32 addrspace(1)* %out
82 br label %end
83
84 end:
85 ret void
86 }
87
88 ; BUG64: Applying bug work-around
89 ; BUG32-NOT: Applying bug work-around
90 ; NOBUG-NOT: Applying bug work-around
91 ; FUNC-LABEL: @nested7
92 define void @nested7(i32 addrspace(1)* %out, i32 %cond) {
93 entry:
94 %0 = icmp sgt i32 %cond, 0
95 br i1 %0, label %if.1, label %end
96
97 if.1:
98 %1 = icmp sgt i32 %cond, 10
99 br i1 %1, label %if.2, label %if.1.store
100
101 if.1.store:
102 store i32 1, i32 addrspace(1)* %out
103 br label %end
104
105 if.2:
106 %2 = icmp sgt i32 %cond, 20
107 br i1 %2, label %if.3, label %if.2.store
108
109 if.2.store:
110 store i32 2, i32 addrspace(1)* %out
111 br label %end
112
113 if.3:
114 %3 = icmp sgt i32 %cond, 30
115 br i1 %3, label %if.4, label %if.3.store
116
117 if.3.store:
118 store i32 3, i32 addrspace(1)* %out
119 br label %end
120
121 if.4:
122 %4 = icmp sgt i32 %cond, 40
123 br i1 %4, label %if.5, label %if.4.store
124
125 if.4.store:
126 store i32 4, i32 addrspace(1)* %out
127 br label %end
128
129 if.5:
130 %5 = icmp sgt i32 %cond, 50
131 br i1 %5, label %if.6, label %if.5.store
132
133 if.5.store:
134 store i32 5, i32 addrspace(1)* %out
135 br label %end
136
137 if.6:
138 %6 = icmp sgt i32 %cond, 60
139 br i1 %6, label %if.7, label %if.6.store
140
141 if.6.store:
142 store i32 6, i32 addrspace(1)* %out
143 br label %end
144
145 if.7:
146 store i32 7, i32 addrspace(1)* %out
147 br label %end
148
149 end:
150 ret void
151 }
152
153 ; BUG64: Applying bug work-around
154 ; BUG32: Applying bug work-around
155 ; NOBUG-NOT: Applying bug work-around
156 ; FUNC-LABEL: @nested8
157 define void @nested8(i32 addrspace(1)* %out, i32 %cond) {
158 entry:
159 %0 = icmp sgt i32 %cond, 0
160 br i1 %0, label %if.1, label %end
161
162 if.1:
163 %1 = icmp sgt i32 %cond, 10
164 br i1 %1, label %if.2, label %if.1.store
165
166 if.1.store:
167 store i32 1, i32 addrspace(1)* %out
168 br label %end
169
170 if.2:
171 %2 = icmp sgt i32 %cond, 20
172 br i1 %2, label %if.3, label %if.2.store
173
174 if.2.store:
175 store i32 2, i32 addrspace(1)* %out
176 br label %end
177
178 if.3:
179 %3 = icmp sgt i32 %cond, 30
180 br i1 %3, label %if.4, label %if.3.store
181
182 if.3.store:
183 store i32 3, i32 addrspace(1)* %out
184 br label %end
185
186 if.4:
187 %4 = icmp sgt i32 %cond, 40
188 br i1 %4, label %if.5, label %if.4.store
189
190 if.4.store:
191 store i32 4, i32 addrspace(1)* %out
192 br label %end
193
194 if.5:
195 %5 = icmp sgt i32 %cond, 50
196 br i1 %5, label %if.6, label %if.5.store
197
198 if.5.store:
199 store i32 5, i32 addrspace(1)* %out
200 br label %end
201
202 if.6:
203 %6 = icmp sgt i32 %cond, 60
204 br i1 %6, label %if.7, label %if.6.store
205
206 if.6.store:
207 store i32 6, i32 addrspace(1)* %out
208 br label %end
209
210 if.7:
211 %7 = icmp sgt i32 %cond, 70
212 br i1 %7, label %if.8, label %if.7.store
213
214 if.7.store:
215 store i32 7, i32 addrspace(1)* %out
216 br label %end
217
218 if.8:
219 store i32 8, i32 addrspace(1)* %out
220 br label %end
221
222 end:
223 ret void
224 }