llvm.org GIT mirror llvm / bec9628
AMDGPU: Support shrinking add with FI in SIFoldOperands Avoids test regression in a future patch git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359898 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 9 months ago
2 changed file(s) with 59 addition(s) and 58 deletion(s). Raw diff Collapse all Expand all
215215 break;
216216 }
217217 }
218
219 if (Fold.needsShrink()) {
220 MachineBasicBlock *MBB = MI->getParent();
221 auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
222 if (Liveness != MachineBasicBlock::LQR_Dead)
223 return false;
224
225 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
226 int Op32 = Fold.getShrinkOpcode();
227 MachineOperand &Dst0 = MI->getOperand(0);
228 MachineOperand &Dst1 = MI->getOperand(1);
229 assert(Dst0.isDef() && Dst1.isDef());
230
231 bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
232
233 const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
234 unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
235
236 MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
237
238 if (HaveNonDbgCarryUse) {
239 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
240 .addReg(AMDGPU::VCC, RegState::Kill);
241 }
242
243 // Keep the old instruction around to avoid breaking iterators, but
244 // replace it with a dummy instruction to remove uses.
245 //
246 // FIXME: We should not invert how this pass looks at operands to avoid
247 // this. Should track set of foldable movs instead of looking for uses
248 // when looking at a use.
249 Dst0.setReg(NewReg0);
250 for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
251 MI->RemoveOperand(I);
252 MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
253
254 if (Fold.isCommuted())
255 TII.commuteInstruction(*Inst32, false);
256 return true;
257 }
258
218 }
219
220 if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
221 MachineBasicBlock *MBB = MI->getParent();
222 auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
223 if (Liveness != MachineBasicBlock::LQR_Dead)
224 return false;
225
226 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
227 int Op32 = Fold.getShrinkOpcode();
228 MachineOperand &Dst0 = MI->getOperand(0);
229 MachineOperand &Dst1 = MI->getOperand(1);
230 assert(Dst0.isDef() && Dst1.isDef());
231
232 bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
233
234 const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
235 unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
236
237 MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
238
239 if (HaveNonDbgCarryUse) {
240 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
241 .addReg(AMDGPU::VCC, RegState::Kill);
242 }
243
244 // Keep the old instruction around to avoid breaking iterators, but
245 // replace it with a dummy instruction to remove uses.
246 //
247 // FIXME: We should not invert how this pass looks at operands to avoid
248 // this. Should track set of foldable movs instead of looking for uses
249 // when looking at a use.
250 Dst0.setReg(NewReg0);
251 for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
252 MI->RemoveOperand(I);
253 MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
254
255 if (Fold.isCommuted())
256 TII.commuteInstruction(*Inst32, false);
257 return true;
258 }
259
260 assert(!Fold.needsShrink() && "not handled");
261
262 if (Fold.isImm()) {
259263 Old.ChangeToImmediate(Fold.ImmToFold);
260264 return true;
261265 }
262
263 assert(!Fold.needsShrink() && "not handled");
264266
265267 if (Fold.isFI()) {
266268 Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
362364 if ((Opc == AMDGPU::V_ADD_I32_e64 ||
363365 Opc == AMDGPU::V_SUB_I32_e64 ||
364366 Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
365 OpToFold->isImm()) {
367 (OpToFold->isImm() || OpToFold->isFI())) {
366368 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
367369
368370 // Verify the other operand is a VGPR, otherwise we would violate the
1515 ; GCN: liveins: $vgpr0
1616 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
1717 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
18 ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec
19 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
18 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
19 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
2020 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
2121 %1:vgpr_32 = COPY $vgpr0
2222 %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
3939 ; GCN: liveins: $vgpr0
4040 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4141 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
42 ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
43 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
42 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
43 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
4444 %0:vgpr_32 = COPY $vgpr0
4545 %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
4646 %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
6363 ; GCN: liveins: $sgpr0
6464 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
6565 ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
66 ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec
66 ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
6767 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
6868 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
6969 %1:sreg_32_xm0 = COPY $sgpr0
8787 ; GCN: liveins: $sgpr0
8888 ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
8989 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
90 ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
90 ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec
9191 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
9292 %0:sreg_32_xm0 = COPY $sgpr0
9393 %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
111111 ; GCN: liveins: $vgpr0
112112 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
113113 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
114 ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[COPY]], 0, implicit $exec
115 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
114 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
115 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
116116 %0:sreg_32_xm0 = S_MOV_B32 %stack.0
117117 %1:vgpr_32 = COPY $vgpr0
118118 %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
135135 ; GCN: liveins: $vgpr0
136136 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
137137 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
138 ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[S_MOV_B32_]], 0, implicit $exec
139 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
138 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
139 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
140140 %0:vgpr_32 = COPY $vgpr0
141141 %1:sreg_32_xm0 = S_MOV_B32 %stack.0
142142 %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
156156
157157 ; GCN-LABEL: name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use
158158 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
159 ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], 16, 0, implicit $exec
160 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
159 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
160 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
161161 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
162162 %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
163163 %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
198198
199199 ; GCN-LABEL: name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use
200200 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
201 ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
202 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit-def $vcc, implicit $exec
201 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
203202 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
204203 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
205204 %1:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec