llvm.org GIT mirror llvm / fdc698b
AMDGPU: Erase redundant redefs of m0 in SIFoldOperands Only handle simple inter-block redefs of m0 to the same value. This avoids interference from redefs of m0 in SILoadStoreOptimzer. I was initially teaching that pass to ignore redefs of m0, but having them not exist beforehand is much simpler. This is in preparation for deleting the current special m0 handling in SIFixSGPRCopies to allow the register coalescer to handle the difficult cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375449 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 1 month ago
2 changed file(s) with 387 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
13481348
13491349 for (MachineBasicBlock *MBB : depth_first(&MF)) {
13501350 MachineBasicBlock::iterator I, Next;
1351
1352 MachineOperand *CurrentKnownM0Val = nullptr;
13511353 for (I = MBB->begin(); I != MBB->end(); I = Next) {
13521354 Next = std::next(I);
13531355 MachineInstr &MI = *I;
13601362 if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
13611363 !tryFoldOMod(MI))
13621364 tryFoldClamp(MI);
1365
1366 // Saw an unknown clobber of m0, so we no longer know what it is.
1367 if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
1368 CurrentKnownM0Val = nullptr;
1369 continue;
1370 }
1371
1372 // Specially track simple redefs of m0 to the same value in a block, so we
1373 // can erase the later ones.
1374 if (MI.getOperand(0).getReg() == AMDGPU::M0) {
1375 MachineOperand &NewM0Val = MI.getOperand(1);
1376 if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
1377 MI.eraseFromParent();
1378 continue;
1379 }
1380
1381 // We aren't tracking other physical registers
1382 CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ?
1383 nullptr : &NewM0Val;
13631384 continue;
13641385 }
13651386
0 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
1 # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
2
3 --- |
4 define amdgpu_kernel void @redef_m0_same_copy() { ret void }
5 define amdgpu_kernel void @multi_redef_m0_same_copy() { ret void }
6 define amdgpu_kernel void @redef_m0_different_copy() { ret void }
7 define amdgpu_kernel void @redef_m0_mixed_copy0() { ret void }
8 define amdgpu_kernel void @redef_m0_mixed_copy1() { ret void }
9 define amdgpu_kernel void @redef_m0_same_mov_imm() { ret void }
10 define amdgpu_kernel void @redef_m0_different_inst0() { ret void }
11 define amdgpu_kernel void @redef_m0_different_inst1() { ret void }
12 define amdgpu_kernel void @redef_m0_mixed_read_m0() { ret void }
13 define amdgpu_kernel void @redef_m0_same_copy_call() { ret void }
14 define amdgpu_kernel void @redef_m0_same_copy_multi_block() { ret void }
15 define amdgpu_kernel void @redef_m0_copy_self() { ret void }
16 define amdgpu_kernel void @redef_m0_copy_physreg() { ret void }
17
18 declare void @func()
19 ...
20
21 ---
22 name: redef_m0_same_copy
23 tracksRegLiveness: true
24 machineFunctionInfo:
25 isEntryFunction: true
26 body: |
27 bb.0:
28 liveins: $vgpr0, $sgpr0
29
30 ; GCN-LABEL: name: redef_m0_same_copy
31 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
32 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
33 ; GCN: $m0 = COPY [[COPY1]]
34 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
35 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
36 %0:vgpr_32 = COPY $vgpr0
37 %1:sgpr_32 = COPY $sgpr0
38 $m0 = COPY %1
39 %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
40 $m0 = COPY %1
41 %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
42
43 ...
44
45 ---
46 name: multi_redef_m0_same_copy
47 tracksRegLiveness: true
48 machineFunctionInfo:
49 isEntryFunction: true
50 body: |
51 bb.0:
52 liveins: $vgpr0, $sgpr0
53
54 ; GCN-LABEL: name: multi_redef_m0_same_copy
55 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
56 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
57 ; GCN: $m0 = COPY [[COPY1]]
58 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
59 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
60 %0:vgpr_32 = COPY $vgpr0
61 %1:sgpr_32 = COPY $sgpr0
62 $m0 = COPY %1
63 %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
64 $m0 = COPY %1
65 $m0 = COPY %1
66 %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
67
68 ...
69
70 ---
71 name: redef_m0_different_copy
72 tracksRegLiveness: true
73 machineFunctionInfo:
74 isEntryFunction: true
75 body: |
76 bb.0:
77 liveins: $vgpr0, $sgpr0, $sgpr1
78
79 ; GCN-LABEL: name: redef_m0_different_copy
80 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
81 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
82 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
83 ; GCN: $m0 = COPY [[COPY1]]
84 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
85 ; GCN: $m0 = COPY [[COPY2]]
86 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
87 %0:vgpr_32 = COPY $vgpr0
88 %1:sgpr_32 = COPY $sgpr0
89 %2:sgpr_32 = COPY $sgpr1
90 $m0 = COPY %1
91 %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
92 $m0 = COPY %2
93 %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
94
95 ...
96
97 ---
98 name: redef_m0_mixed_copy0
99 tracksRegLiveness: true
100 machineFunctionInfo:
101 isEntryFunction: true
102 body: |
103 bb.0:
104 liveins: $vgpr0, $sgpr0, $sgpr1
105
106 ; GCN-LABEL: name: redef_m0_mixed_copy0
107 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
108 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
109 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
110 ; GCN: $m0 = COPY [[COPY1]]
111 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
112 ; GCN: $m0 = COPY [[COPY2]]
113 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
114 %0:vgpr_32 = COPY $vgpr0
115 %1:sgpr_32 = COPY $sgpr0
116 %2:sgpr_32 = COPY $sgpr1
117 $m0 = COPY %1
118 %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
119 $m0 = COPY %1
120 $m0 = COPY %2
121 %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
122
123 ...
124
125 ---
126 name: redef_m0_mixed_copy1
127 tracksRegLiveness: true
128
129 machineFunctionInfo:
130 isEntryFunction: true
131 body: |
132 bb.0:
133 liveins: $vgpr0, $sgpr0, $sgpr1
134
135 ; GCN-LABEL: name: redef_m0_mixed_copy1
136 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
137 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
138 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
139 ; GCN: $m0 = COPY [[COPY1]]
140 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
141 ; GCN: $m0 = COPY [[COPY2]]
142 ; GCN: $m0 = COPY [[COPY1]]
143 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
144 %0:vgpr_32 = COPY $vgpr0
145 %1:sgpr_32 = COPY $sgpr0
146 %2:sgpr_32 = COPY $sgpr1
147 $m0 = COPY %1
148 %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
149 $m0 = COPY %2
150 $m0 = COPY %1
151 %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
152
153 ...
154
155 ---
156 name: redef_m0_same_mov_imm
157 tracksRegLiveness: true
158 machineFunctionInfo:
159 isEntryFunction: true
160 body: |
161 bb.0:
162 liveins: $vgpr0, $sgpr0
163
164 ; GCN-LABEL: name: redef_m0_same_mov_imm
165 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
166 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
167 ; GCN: $m0 = S_MOV_B32 -1
168 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
169 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
170 %0:vgpr_32 = COPY $vgpr0
171 %1:sgpr_32 = COPY $sgpr0
172 $m0 = S_MOV_B32 -1
173 %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
174 $m0 = S_MOV_B32 -1
175 %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
176
177 ...
178
179 ---
180 name: redef_m0_different_inst0
181 tracksRegLiveness: true
182 machineFunctionInfo:
183 isEntryFunction: true
184 body: |
185 bb.0:
186 liveins: $vgpr0, $sgpr0
187
188 ; GCN-LABEL: name: redef_m0_different_inst0
189 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
190 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
191 ; GCN: $m0 = COPY [[COPY1]]
192 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
193 ; GCN: $m0 = IMPLICIT_DEF
194 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
195 %0:vgpr_32 = COPY $vgpr0
196 %1:sgpr_32 = COPY $sgpr0
197 $m0 = COPY %1
198 %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
199 $m0 = IMPLICIT_DEF
200 %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
201
202 ...
203
204 ---
205 name: redef_m0_different_inst1
206 tracksRegLiveness: true
207 machineFunctionInfo:
208 isEntryFunction: true
209 body: |
210 bb.0:
211 liveins: $vgpr0, $sgpr0
212
213 ; GCN-LABEL: name: redef_m0_different_inst1
214 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
215 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
216 ; GCN: $m0 = COPY [[COPY1]]
217 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
218 ; GCN: S_NOP 0, implicit-def $m0
219 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
220 %0:vgpr_32 = COPY $vgpr0
221 %1:sgpr_32 = COPY $sgpr0
222 $m0 = COPY %1
223 %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
224 S_NOP 0, implicit-def $m0
225 %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
226
227 ...
228
229 ---
230 name: redef_m0_mixed_read_m0
231 tracksRegLiveness: true
232 machineFunctionInfo:
233 isEntryFunction: true
234 body: |
235 bb.0:
236 liveins: $vgpr0, $sgpr0, $sgpr1
237
238 ; GCN-LABEL: name: redef_m0_mixed_read_m0
239 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
240 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
241 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
242 ; GCN: $m0 = COPY [[COPY1]]
243 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
244 ; GCN: $m0 = COPY [[COPY2]]
245 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
246 ; GCN: [[DS_READ_B32_2:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 128, 0, implicit $m0, implicit $exec :: (load 4)
247 %0:vgpr_32 = COPY $vgpr0
248 %1:sgpr_32 = COPY $sgpr0
249 %2:sgpr_32 = COPY $sgpr1
250 $m0 = COPY %1
251 %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
252 $m0 = COPY %2
253 %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
254 $m0 = COPY %2
255 %5:vgpr_32 = DS_READ_B32 %0, 128, 0, implicit $m0, implicit $exec :: (load 4)
256 ...
257
258 ---
259 name: redef_m0_same_copy_call
260 tracksRegLiveness: true
261 machineFunctionInfo:
262 isEntryFunction: true
263 body: |
264 bb.0:
265 liveins: $vgpr0, $sgpr0
266
267 ; GCN-LABEL: name: redef_m0_same_copy_call
268 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
269 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
270 ; GCN: $m0 = COPY [[COPY1]]
271 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
272 ; GCN: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs
273 ; GCN: $m0 = COPY [[COPY1]]
274 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
275 %0:vgpr_32 = COPY $vgpr0
276 %1:sgpr_32 = COPY $sgpr0
277 $m0 = COPY %1
278 %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
279 dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs
280 $m0 = COPY %1
281 %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
282
283 ...
284
285 ---
286 name: redef_m0_same_copy_multi_block
287 tracksRegLiveness: true
288 machineFunctionInfo:
289 isEntryFunction: true
290 body: |
291 ; GCN-LABEL: name: redef_m0_same_copy_multi_block
292 ; GCN: bb.0:
293 ; GCN: successors: %bb.1(0x80000000)
294 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
295 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
296 ; GCN: $m0 = COPY [[COPY1]]
297 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
298 ; GCN: bb.1:
299 ; GCN: $m0 = COPY [[COPY1]]
300 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
301 bb.0:
302 liveins: $vgpr0, $sgpr0
303
304 %0:vgpr_32 = COPY $vgpr0
305 %1:sgpr_32 = COPY $sgpr0
306 $m0 = COPY %1
307 %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
308
309 bb.1:
310 $m0 = COPY %1
311 %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
312
313 ...
314
315 ---
316 name: redef_m0_copy_self
317 tracksRegLiveness: true
318 machineFunctionInfo:
319 isEntryFunction: true
320 body: |
321 bb.0:
322 liveins: $vgpr0, $sgpr0
323
324 ; GCN-LABEL: name: redef_m0_copy_self
325 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
326 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
327 ; GCN: $m0 = COPY [[COPY1]]
328 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
329 ; GCN: $m0 = COPY $m0
330 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
331 %0:vgpr_32 = COPY $vgpr0
332 %1:sgpr_32 = COPY $sgpr0
333 $m0 = COPY %1
334 %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
335 $m0 = COPY $m0
336 %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
337
338 ...
339
340 ---
341 name: redef_m0_copy_physreg
342 tracksRegLiveness: true
343 machineFunctionInfo:
344 isEntryFunction: true
345 body: |
346 bb.0:
347 liveins: $vgpr0, $sgpr0
348
349 ; GCN-LABEL: name: redef_m0_copy_physreg
350 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
351 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
352 ; GCN: $m0 = COPY $sgpr0
353 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4)
354 ; GCN: $sgpr0 = S_MOV_B32 0
355 ; GCN: $m0 = COPY $sgpr0
356 ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4)
357 %0:vgpr_32 = COPY $vgpr0
358 %1:sgpr_32 = COPY $sgpr0
359 $m0 = COPY $sgpr0
360 %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4)
361 $sgpr0 = S_MOV_B32 0
362 $m0 = COPY $sgpr0
363 %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4)
364
365 ...