llvm.org GIT mirror llvm / 8c7e984
AMDGPU: Fix breaking VOP3 v_add_i32s This was shrinking the instruction even though the carry output register was a virtual register, not known VCC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291716 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
2 changed file(s) with 316 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
8383 // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
8484 // a special case for it. It can only be shrunk if the third operand
8585 // is vcc. We should handle this the same way we handle vopc, by addding
86 // a register allocation hint pre-regalloc and then do the shrining
86 // a register allocation hint pre-regalloc and then do the shrinking
8787 // post-regalloc.
8888 if (Src2) {
8989 switch (MI.getOpcode()) {
455455 continue;
456456 }
457457
458 // Check for the bool flag output for instructions like V_ADD_I32_e64.
459 const MachineOperand *SDst = TII->getNamedOperand(MI,
460 AMDGPU::OpName::sdst);
461 if (SDst && SDst->getReg() != AMDGPU::VCC) {
462 if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
463 MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
464
465 continue;
466 }
467
458468 // We can shrink this instruction
459469 DEBUG(dbgs() << "Shrinking " << MI);
460470
0 # RUN: llc -verify-machineinstrs -march=amdgcn -run-pass si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s
1 # Check that add with carry out isn't incorrectly reduced to e32 when
2 # the carry out is a virtual register.
3
4 # TODO: We should run this test until the end of codegen to make sure
5 # that the post-RA run does manage to shrink it, but right now the
6 # resume crashes
7
8 --- |
9 define void @shrink_add_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
10 %tid = call i32 @llvm.amdgcn.workitem.id.x()
11 %tid.ext = sext i32 %tid to i64
12 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
13 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
14 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
15 %a = load volatile i32, i32 addrspace(1)* %a.ptr
16 %b = load volatile i32, i32 addrspace(1)* %b.ptr
17 %result = add i32 %a, %b
18 store volatile i32 %result, i32 addrspace(1)* %out.gep
19 ret void
20 }
21
22 define void @shrink_sub_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
23 %tid = call i32 @llvm.amdgcn.workitem.id.x()
24 %tid.ext = sext i32 %tid to i64
25 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
26 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
27 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
28 %a = load volatile i32, i32 addrspace(1)* %a.ptr
29 %b = load volatile i32, i32 addrspace(1)* %b.ptr
30 %result = sub i32 %a, %b
31 store volatile i32 %result, i32 addrspace(1)* %out.gep
32 ret void
33 }
34
35 define void @shrink_subrev_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
36 %tid = call i32 @llvm.amdgcn.workitem.id.x()
37 %tid.ext = sext i32 %tid to i64
38 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
39 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
40 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
41 %a = load volatile i32, i32 addrspace(1)* %a.ptr
42 %b = load volatile i32, i32 addrspace(1)* %b.ptr
43 %result = sub i32 %a, %b
44 store volatile i32 %result, i32 addrspace(1)* %out.gep
45 ret void
46 }
47
48 declare i32 @llvm.amdgcn.workitem.id.x() #1
49
50 attributes #0 = { nounwind }
51 attributes #1 = { nounwind readnone }
52
53 ...
54 ---
55 # GCN-LABEL: name: shrink_add_vop3{{$}}
56 # GCN: %29, %9 = V_ADD_I32_e64 %19, %17, implicit %exec
57 # GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
58 name: shrink_add_vop3
59 alignment: 0
60 exposesReturnsTwice: false
61 legalized: false
62 regBankSelected: false
63 selected: false
64 tracksRegLiveness: true
65 registers:
66 - { id: 0, class: sgpr_64 }
67 - { id: 1, class: sreg_32_xm0 }
68 - { id: 2, class: sgpr_32 }
69 - { id: 3, class: vgpr_32 }
70 - { id: 4, class: sreg_64_xexec }
71 - { id: 5, class: sreg_64_xexec }
72 - { id: 6, class: sreg_32 }
73 - { id: 7, class: sreg_32 }
74 - { id: 8, class: sreg_32_xm0 }
75 - { id: 9, class: sreg_64 }
76 - { id: 10, class: sreg_32_xm0 }
77 - { id: 11, class: sreg_32_xm0 }
78 - { id: 12, class: sgpr_64 }
79 - { id: 13, class: sgpr_128 }
80 - { id: 14, class: sreg_32_xm0 }
81 - { id: 15, class: sreg_64 }
82 - { id: 16, class: sgpr_128 }
83 - { id: 17, class: vgpr_32 }
84 - { id: 18, class: vreg_64 }
85 - { id: 19, class: vgpr_32 }
86 - { id: 20, class: vreg_64 }
87 - { id: 21, class: sreg_32_xm0 }
88 - { id: 22, class: sreg_32 }
89 - { id: 23, class: sreg_32 }
90 - { id: 24, class: vgpr_32 }
91 - { id: 25, class: vreg_64 }
92 - { id: 26, class: vgpr_32 }
93 - { id: 27, class: vreg_64 }
94 - { id: 28, class: vreg_64 }
95 - { id: 29, class: vgpr_32 }
96 liveins:
97 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
98 - { reg: '%vgpr0', virtual-reg: '%3' }
99 frameInfo:
100 isFrameAddressTaken: false
101 isReturnAddressTaken: false
102 hasStackMap: false
103 hasPatchPoint: false
104 stackSize: 0
105 offsetAdjustment: 0
106 maxAlignment: 0
107 adjustsStack: false
108 hasCalls: false
109 maxCallFrameSize: 0
110 hasOpaqueSPAdjustment: false
111 hasVAStart: false
112 hasMustTailInVarArgFunc: false
113 body: |
114 bb.0 (%ir-block.0):
115 liveins: %sgpr0_sgpr1, %vgpr0
116
117 %3 = COPY %vgpr0
118 %0 = COPY %sgpr0_sgpr1
119 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
120 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
121 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
122 %27 = REG_SEQUENCE %3, 1, %26, 2
123 %10 = S_MOV_B32 61440
124 %11 = S_MOV_B32 0
125 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
126 %13 = REG_SEQUENCE killed %5, 17, %12, 18
127 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
128 %16 = REG_SEQUENCE killed %4, 17, %12, 18
129 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
130 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
131 %29, %9 = V_ADD_I32_e64 %19, %17, implicit %exec
132 %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
133 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
134 S_ENDPGM
135
136 ...
137 ---
138 # GCN-LABEL: name: shrink_sub_vop3{{$}}
139 # GCN: %29, %9 = V_SUB_I32_e64 %19, %17, implicit %exec
140 # GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
141
142 name: shrink_sub_vop3
143 alignment: 0
144 exposesReturnsTwice: false
145 legalized: false
146 regBankSelected: false
147 selected: false
148 tracksRegLiveness: true
149 registers:
150 - { id: 0, class: sgpr_64 }
151 - { id: 1, class: sreg_32_xm0 }
152 - { id: 2, class: sgpr_32 }
153 - { id: 3, class: vgpr_32 }
154 - { id: 4, class: sreg_64_xexec }
155 - { id: 5, class: sreg_64_xexec }
156 - { id: 6, class: sreg_32 }
157 - { id: 7, class: sreg_32 }
158 - { id: 8, class: sreg_32_xm0 }
159 - { id: 9, class: sreg_64 }
160 - { id: 10, class: sreg_32_xm0 }
161 - { id: 11, class: sreg_32_xm0 }
162 - { id: 12, class: sgpr_64 }
163 - { id: 13, class: sgpr_128 }
164 - { id: 14, class: sreg_32_xm0 }
165 - { id: 15, class: sreg_64 }
166 - { id: 16, class: sgpr_128 }
167 - { id: 17, class: vgpr_32 }
168 - { id: 18, class: vreg_64 }
169 - { id: 19, class: vgpr_32 }
170 - { id: 20, class: vreg_64 }
171 - { id: 21, class: sreg_32_xm0 }
172 - { id: 22, class: sreg_32 }
173 - { id: 23, class: sreg_32 }
174 - { id: 24, class: vgpr_32 }
175 - { id: 25, class: vreg_64 }
176 - { id: 26, class: vgpr_32 }
177 - { id: 27, class: vreg_64 }
178 - { id: 28, class: vreg_64 }
179 - { id: 29, class: vgpr_32 }
180 liveins:
181 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
182 - { reg: '%vgpr0', virtual-reg: '%3' }
183 frameInfo:
184 isFrameAddressTaken: false
185 isReturnAddressTaken: false
186 hasStackMap: false
187 hasPatchPoint: false
188 stackSize: 0
189 offsetAdjustment: 0
190 maxAlignment: 0
191 adjustsStack: false
192 hasCalls: false
193 maxCallFrameSize: 0
194 hasOpaqueSPAdjustment: false
195 hasVAStart: false
196 hasMustTailInVarArgFunc: false
197 body: |
198 bb.0 (%ir-block.0):
199 liveins: %sgpr0_sgpr1, %vgpr0
200
201 %3 = COPY %vgpr0
202 %0 = COPY %sgpr0_sgpr1
203 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
204 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
205 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
206 %27 = REG_SEQUENCE %3, 1, %26, 2
207 %10 = S_MOV_B32 61440
208 %11 = S_MOV_B32 0
209 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
210 %13 = REG_SEQUENCE killed %5, 17, %12, 18
211 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
212 %16 = REG_SEQUENCE killed %4, 17, %12, 18
213 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
214 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
215 %29, %9 = V_SUB_I32_e64 %19, %17, implicit %exec
216 %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
217 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
218 S_ENDPGM
219
220 ...
221 ---
222 # GCN-LABEL: name: shrink_subrev_vop3{{$}}
223 # GCN: %29, %9 = V_SUBREV_I32_e64 %19, %17, implicit %exec
224 # GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
225
226 name: shrink_subrev_vop3
227 alignment: 0
228 exposesReturnsTwice: false
229 legalized: false
230 regBankSelected: false
231 selected: false
232 tracksRegLiveness: true
233 registers:
234 - { id: 0, class: sgpr_64 }
235 - { id: 1, class: sreg_32_xm0 }
236 - { id: 2, class: sgpr_32 }
237 - { id: 3, class: vgpr_32 }
238 - { id: 4, class: sreg_64_xexec }
239 - { id: 5, class: sreg_64_xexec }
240 - { id: 6, class: sreg_32 }
241 - { id: 7, class: sreg_32 }
242 - { id: 8, class: sreg_32_xm0 }
243 - { id: 9, class: sreg_64 }
244 - { id: 10, class: sreg_32_xm0 }
245 - { id: 11, class: sreg_32_xm0 }
246 - { id: 12, class: sgpr_64 }
247 - { id: 13, class: sgpr_128 }
248 - { id: 14, class: sreg_32_xm0 }
249 - { id: 15, class: sreg_64 }
250 - { id: 16, class: sgpr_128 }
251 - { id: 17, class: vgpr_32 }
252 - { id: 18, class: vreg_64 }
253 - { id: 19, class: vgpr_32 }
254 - { id: 20, class: vreg_64 }
255 - { id: 21, class: sreg_32_xm0 }
256 - { id: 22, class: sreg_32 }
257 - { id: 23, class: sreg_32 }
258 - { id: 24, class: vgpr_32 }
259 - { id: 25, class: vreg_64 }
260 - { id: 26, class: vgpr_32 }
261 - { id: 27, class: vreg_64 }
262 - { id: 28, class: vreg_64 }
263 - { id: 29, class: vgpr_32 }
264 liveins:
265 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
266 - { reg: '%vgpr0', virtual-reg: '%3' }
267 frameInfo:
268 isFrameAddressTaken: false
269 isReturnAddressTaken: false
270 hasStackMap: false
271 hasPatchPoint: false
272 stackSize: 0
273 offsetAdjustment: 0
274 maxAlignment: 0
275 adjustsStack: false
276 hasCalls: false
277 maxCallFrameSize: 0
278 hasOpaqueSPAdjustment: false
279 hasVAStart: false
280 hasMustTailInVarArgFunc: false
281 body: |
282 bb.0 (%ir-block.0):
283 liveins: %sgpr0_sgpr1, %vgpr0
284
285 %3 = COPY %vgpr0
286 %0 = COPY %sgpr0_sgpr1
287 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
288 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
289 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
290 %27 = REG_SEQUENCE %3, 1, %26, 2
291 %10 = S_MOV_B32 61440
292 %11 = S_MOV_B32 0
293 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
294 %13 = REG_SEQUENCE killed %5, 17, %12, 18
295 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
296 %16 = REG_SEQUENCE killed %4, 17, %12, 18
297 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
298 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
299 %29, %9 = V_SUBREV_I32_e64 %19, %17, implicit %exec
300 %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec
301 BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
302 S_ENDPGM
303
304 ...