llvm.org GIT mirror llvm / d915812
AMDGPU/GlobalISel: Implement select() for G_FCONSTANT Summary: Also clean up G_CONSTANT selection. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D46170 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332379 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 2 years ago
3 changed file(s) with 116 addition(s) and 23 deletion(s). Raw diff Collapse all Expand all
203203 MachineBasicBlock *BB = I.getParent();
204204 MachineFunction *MF = BB->getParent();
205205 MachineRegisterInfo &MRI = MF->getRegInfo();
206 MachineOperand &ImmOp = I.getOperand(1);
207
208 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
209 if (ImmOp.isFPImm()) {
210 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
211 ImmOp.ChangeToImmediate(Imm.getZExtValue());
212 } else if (ImmOp.isCImm()) {
213 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
214 }
215
206216 unsigned DstReg = I.getOperand(0).getReg();
207 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
208
217 unsigned Size;
218 bool IsSgpr;
219 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
220 if (RB) {
221 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
222 Size = MRI.getType(DstReg).getSizeInBits();
223 } else {
224 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
225 IsSgpr = TRI.isSGPRClass(RC);
226 Size = RC->MC->getPhysRegSize() * 8;
227 }
228
229 if (Size != 32 && Size != 64)
230 return false;
231
232 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
209233 if (Size == 32) {
210 I.setDesc(TII.get(AMDGPU::S_MOV_B32));
234 I.setDesc(TII.get(Opcode));
235 I.addImplicitDefUseOperands(*MF);
211236 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
212237 }
213238
214 assert(Size == 64);
215
216239 DebugLoc DL = I.getDebugLoc();
217 unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
218 unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
219 const APInt &Imm = I.getOperand(1).getCImm()->getValue();
220
221 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
240 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
241 &AMDGPU::VGPR_32RegClass;
242 unsigned LoReg = MRI.createVirtualRegister(RC);
243 unsigned HiReg = MRI.createVirtualRegister(RC);
244 const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
245
246 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
222247 .addImm(Imm.trunc(32).getZExtValue());
223248
224 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
249 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
225250 .addImm(Imm.ashr(32).getZExtValue());
226251
227 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
228 .addReg(LoReg)
229 .addImm(AMDGPU::sub0)
230 .addReg(HiReg)
231 .addImm(AMDGPU::sub1);
252 const MachineInstr *RS =
253 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
254 .addReg(LoReg)
255 .addImm(AMDGPU::sub0)
256 .addReg(HiReg)
257 .addImm(AMDGPU::sub1);
258
232259 // We can't call constrainSelectedInstRegOperands here, because it doesn't
233260 // work for target independent opcodes
234261 I.eraseFromParent();
235 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
262 const TargetRegisterClass *DstRC =
263 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
264 if (!DstRC)
265 return true;
266 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
236267 }
237268
238269 static bool isConstant(const MachineInstr &MI) {
483514 case TargetOpcode::G_BITCAST:
484515 return selectCOPY(I);
485516 case TargetOpcode::G_CONSTANT:
517 case TargetOpcode::G_FCONSTANT:
486518 return selectG_CONSTANT(I);
487519 case TargetOpcode::G_GEP:
488520 return selectG_GEP(I);
0 # RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
1
2 --- |
3 define amdgpu_kernel void @constant(i32 addrspace(1)* %global0, i64 addrspace(1)* %global1) {ret void}
4 ...
5 ---
6
7 name: constant
8 legalized: true
9 regBankSelected: true
10
11
12 body: |
13 bb.0:
14 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
15 ; GCN-LABEL: name: constant
16 %0:vgpr(s64) = COPY $vgpr0_vgpr1
17 %1:vgpr(s64) = COPY $vgpr2_vgpr3
18
19 ; GCN: %{{[0-9]+}}:sreg_32 = S_MOV_B32 1
20 %2:sreg_32(s32) = G_CONSTANT i32 1
21
22 ; GCN: [[LO0:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
23 ; GCN: [[HI0:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
24 ; GCN: %{{[0-9]+}}:sreg_64_xexec = REG_SEQUENCE [[LO0]], %subreg.sub0, [[HI0]], %subreg.sub1
25 %3:sgpr(s64) = G_CONSTANT i64 4294967296
26
27 ; GCN: %{{[0-9]+}}:sreg_32 = S_MOV_B32 1065353216
28 %4:sgpr(s32) = G_FCONSTANT float 1.0
29
30 ; GCN: [[LO1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
31 ; GCN: [[HI1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1072693248
32 ; GCN: %{{[0-9]+}}:sreg_64_xexec = REG_SEQUENCE [[LO1]], %subreg.sub0, [[HI1]], %subreg.sub1
33 %5:sgpr(s64) = G_FCONSTANT double 1.0
34
35 ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1
36 %6:vgpr(s32) = G_CONSTANT i32 1
37
38 ; GCN: [[LO2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0
39 ; GCN: [[HI2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1
40 ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO2]], %subreg.sub0, [[HI2]], %subreg.sub1
41 %7:vgpr(s64) = G_CONSTANT i64 4294967296
42
43 ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1065353216
44 %8:vgpr(s32) = G_FCONSTANT float 1.0
45
46 ; GCN: [[LO3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0
47 ; GCN: [[HI3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1072693248
48 ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO3]], %subreg.sub0, [[HI3]], %subreg.sub1
49 %9:vgpr(s64) = G_FCONSTANT double 1.0
50
51 G_STORE %2, %0 :: (volatile store 4 into %ir.global0)
52 G_STORE %4, %0 :: (volatile store 4 into %ir.global0)
53 G_STORE %6, %0 :: (volatile store 4 into %ir.global0)
54 G_STORE %8, %0 :: (volatile store 4 into %ir.global0)
55 G_STORE %3, %1 :: (volatile store 8 into %ir.global1)
56 G_STORE %5, %1 :: (volatile store 8 into %ir.global1)
57 G_STORE %7, %1 :: (volatile store 8 into %ir.global1)
58 G_STORE %9, %1 :: (volatile store 8 into %ir.global1)
59 ...
60 ---
4141 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
4242
4343 # Max immediate for CI
44 # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
45 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3
44 # SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4294967292
45 # SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 3
4646 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
4747 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0
4848 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0
5555 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
5656
5757 # Immediate overflow for CI
58 # GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
59 # GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4
58 # GCN: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
59 # GCN: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4
6060 # GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
6161 # GCN-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0
6262 # GCN-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0
7373 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
7474
7575 # Overflow 32-bit byte offset
76 # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
77 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1
76 # SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
77 # SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
7878 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
7979 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0
8080 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0