llvm.org GIT mirror llvm / 78e51c0
Revert "AMDGPU/GlobalISel: Add support for simple shaders" This reverts commit r293503. Revert while I investigate some of the buildbot failures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293509 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 2 years ago
22 changed file(s) with 12 addition(s) and 1498 deletion(s). Raw diff Collapse all Expand all
561561 include "AMDGPUInstrInfo.td"
562562 include "AMDGPUIntrinsics.td"
563563 include "AMDGPURegisterInfo.td"
564 include "AMDGPURegisterBanks.td"
565564 include "AMDGPUInstructions.td"
566565 include "AMDGPUCallingConv.td"
1313 //===----------------------------------------------------------------------===//
1414
1515 #include "AMDGPUCallLowering.h"
16 #include "AMDGPU.h"
1716 #include "AMDGPUISelLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIRegisterInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
17
2318 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2419 #include "llvm/CodeGen/MachineInstrBuilder.h"
2520
3429 }
3530
3631 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
37 const Value *Val, unsigned VReg) const {
38 MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
32 const Value *Val, unsigned VReg) const {
3933 return true;
40 }
41
42 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
43 Type *ParamTy,
44 unsigned Offset) const {
45
46 MachineFunction &MF = MIRBuilder.getMF();
47 const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
48 MachineRegisterInfo &MRI = MF.getRegInfo();
49 const Function &F = *MF.getFunction();
50 const DataLayout &DL = F.getParent()->getDataLayout();
51 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
52 LLT PtrType(*PtrTy, DL);
53 unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
54 unsigned KernArgSegmentPtr =
55 TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
56 unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
57
58 unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
59 MIRBuilder.buildConstant(OffsetReg, Offset);
60
61 MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
62
63 return DstReg;
64 }
65
66 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
67 Type *ParamTy, unsigned Offset,
68 unsigned DstReg) const {
69 MachineFunction &MF = MIRBuilder.getMF();
70 const Function &F = *MF.getFunction();
71 const DataLayout &DL = F.getParent()->getDataLayout();
72 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
73 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
74 unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
75 unsigned Align = DL.getABITypeAlignment(ParamTy);
76 unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
77
78 MachineMemOperand *MMO =
79 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
80 MachineMemOperand::MONonTemporal |
81 MachineMemOperand::MOInvariant,
82 TypeSize, Align);
83
84 MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
8534 }
8635
8736 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
8837 const Function &F,
8938 ArrayRef VRegs) const {
90
91 MachineFunction &MF = MIRBuilder.getMF();
92 const SISubtarget *Subtarget = static_cast(&MF.getSubtarget());
93 MachineRegisterInfo &MRI = MF.getRegInfo();
94 SIMachineFunctionInfo *Info = MF.getInfo();
95 const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
96 const DataLayout &DL = F.getParent()->getDataLayout();
97
98 SmallVector ArgLocs;
99 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
100
101 // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
102 if (Info->hasPrivateSegmentBuffer()) {
103 unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
104 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
105 CCInfo.AllocateReg(PrivateSegmentBufferReg);
106 }
107
108 if (Info->hasDispatchPtr()) {
109 unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
110 // FIXME: Need to add reg as live-in
111 CCInfo.AllocateReg(DispatchPtrReg);
112 }
113
114 if (Info->hasQueuePtr()) {
115 unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
116 // FIXME: Need to add reg as live-in
117 CCInfo.AllocateReg(QueuePtrReg);
118 }
119
120 if (Info->hasKernargSegmentPtr()) {
121 unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
122 const LLT P2 = LLT::pointer(2, 64);
123 unsigned VReg = MRI.createGenericVirtualRegister(P2);
124 MRI.addLiveIn(InputPtrReg, VReg);
125 MIRBuilder.getMBB().addLiveIn(InputPtrReg);
126 MIRBuilder.buildCopy(VReg, InputPtrReg);
127 CCInfo.AllocateReg(InputPtrReg);
128 }
129
130 if (Info->hasDispatchID()) {
131 unsigned DispatchIDReg = Info->addDispatchID(*TRI);
132 // FIXME: Need to add reg as live-in
133 CCInfo.AllocateReg(DispatchIDReg);
134 }
135
136 if (Info->hasFlatScratchInit()) {
137 unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
138 // FIXME: Need to add reg as live-in
139 CCInfo.AllocateReg(FlatScratchInitReg);
140 }
141
142 unsigned NumArgs = F.arg_size();
143 Function::const_arg_iterator CurOrigArg = F.arg_begin();
144 const AMDGPUTargetLowering &TLI = *getTLI();
145 for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
146 CurOrigArg->getType()->dump();
147 MVT ValVT = TLI.getValueType(DL, CurOrigArg->getType()).getSimpleVT();
148 ISD::ArgFlagsTy Flags;
149 Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
150 CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
151 /*IsVarArg=*/false);
152 bool Res =
153 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Flags, CCInfo);
154 assert(!Res && "Call operand has unhandled type");
155 (void)Res;
156 }
157
158 Function::const_arg_iterator Arg = F.arg_begin();
159 for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
160 // FIXME: We should be getting DebugInfo from the arguments some how.
161 CCValAssign &VA = ArgLocs[i];
162 lowerParameter(MIRBuilder, Arg->getType(),
163 VA.getLocMemOffset() +
164 Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
165 }
166
39 // TODO: Implement once there are generic loads/stores.
16740 return true;
16841 }
2121 class AMDGPUTargetLowering;
2222
2323 class AMDGPUCallLowering: public CallLowering {
24
25 unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
26 unsigned Offset) const;
27
28 void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
29 unsigned Offset, unsigned DstReg) const;
30
3124 public:
3225 AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
3326
3528 unsigned VReg) const override;
3629 bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
3730 ArrayRef VRegs) const override;
38 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
3931 };
4032 } // End of namespace llvm;
4133 #endif
+0
-62
lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def less more
None //===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file defines all the static objects used by AMDGPURegisterBankInfo.
10 /// \todo This should be generated by TableGen.
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_BUILD_GLOBAL_ISEL
14 #error "You shouldn't build this"
15 #endif
16
17 namespace llvm {
18 namespace AMDGPU {
19
20 enum PartialMappingIdx {
21 None = - 1,
22 PM_SGPR32 = 0,
23 PM_SGPR64 = 1,
24 PM_VGPR32 = 2,
25 PM_VGPR64 = 3
26 };
27
28 const RegisterBankInfo::PartialMapping PartMappings[] {
29 // StartIdx, Length, RegBank
30 {0, 32, SGPRRegBank},
31 {0, 64, SGPRRegBank},
32 {0, 32, VGPRRegBank},
33 {0, 64, VGPRRegBank}
34 };
35
36 const RegisterBankInfo::ValueMapping ValMappings[] {
37 // SGPR 32-bit
38 {&PartMappings[0], 1},
39 // SGPR 64-bit
40 {&PartMappings[1], 1},
41 // VGPR 32-bit
42 {&PartMappings[2], 1},
43 // VGPR 64-bit
44 {&PartMappings[3], 1}
45 };
46
47 enum ValueMappingIdx {
48 SGPRStartIdx = 0,
49 VGPRStartIdx = 2
50 };
51
52 const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
53 unsigned Size) {
54 assert(Size % 32 == 0);
55 unsigned Idx = BankID == AMDGPU::SGPRRegBankID ? SGPRStartIdx : VGPRStartIdx;
56 Idx += (Size / 32) - 1;
57 return &ValMappings[Idx];
58 }
59
60 } // End AMDGPU namespace.
61 } // End llvm namespace.
1414
1515 #include "AMDGPUISelLowering.h"
1616 #include "AMDGPU.h"
17 #include "AMDGPUCallLowering.h"
1817 #include "AMDGPUFrameLowering.h"
1918 #include "AMDGPUIntrinsicInfo.h"
2019 #include "AMDGPURegisterInfo.h"
669668 //===---------------------------------------------------------------------===//
670669 // TargetLowering Callbacks
671670 //===---------------------------------------------------------------------===//
672
673 CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
674 bool IsVarArg) const {
675 return CC_AMDGPU;
676 }
677671
678672 /// The SelectionDAGBuilder will automatically promote function arguments
679673 /// with illegal types. However, this does not work for the AMDGPU targets
+0
-418
lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp less more
None //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUInstructionSelector.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstr.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/IR/Type.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 #define DEBUG_TYPE "amdgpu-isel"
29
30 using namespace llvm;
31
32 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
33 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
34 : InstructionSelector(), TII(*STI.getInstrInfo()),
35 TRI(*STI.getRegisterInfo()), RBI(RBI) {}
36
37 MachineOperand
38 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
39 unsigned SubIdx) const {
40
41 MachineInstr *MI = MO.getParent();
42 MachineBasicBlock *BB = MO.getParent()->getParent();
43 MachineFunction *MF = BB->getParent();
44 MachineRegisterInfo &MRI = MF->getRegInfo();
45 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
46
47 if (MO.isReg()) {
48 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
49 unsigned Reg = MO.getReg();
50 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
51 .addReg(Reg, 0, ComposedSubIdx);
52
53 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
54 MO.isKill(), MO.isDead(), MO.isUndef(),
55 MO.isEarlyClobber(), 0, MO.isDebug(),
56 MO.isInternalRead());
57 }
58
59 assert(MO.isImm());
60
61 APInt Imm(64, MO.getImm());
62
63 switch (SubIdx) {
64 default:
65 llvm_unreachable("do not know to split immediate with this sub index.");
66 case AMDGPU::sub0:
67 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
68 case AMDGPU::sub1:
69 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
70 }
71 }
72
73 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
74 MachineBasicBlock *BB = I.getParent();
75 MachineFunction *MF = BB->getParent();
76 MachineRegisterInfo &MRI = MF->getRegInfo();
77 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
78 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
79 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
80
81 if (Size != 64)
82 return false;
83
84 DebugLoc DL = I.getDebugLoc();
85
86 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
87 .add(getSubOperand64(I.getOperand(1), AMDGPU::sub0))
88 .add(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
89
90 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
91 .add(getSubOperand64(I.getOperand(1), AMDGPU::sub1))
92 .add(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
93
94 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
95 .addReg(DstLo)
96 .addImm(AMDGPU::sub0)
97 .addReg(DstHi)
98 .addImm(AMDGPU::sub1);
99
100 for (MachineOperand &MO : I.explicit_operands()) {
101 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
102 continue;
103 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
104 }
105
106 I.eraseFromParent();
107 return true;
108 }
109
110 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
111 return selectG_ADD(I);
112 }
113
114 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
115 MachineBasicBlock *BB = I.getParent();
116 DebugLoc DL = I.getDebugLoc();
117
118 // FIXME: Select store instruction based on address space
119 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
120 .add(I.getOperand(1))
121 .add(I.getOperand(0))
122 .addImm(0)
123 .addImm(0)
124 .addImm(0);
125
126 // Now that we selected an opcode, we need to constrain the register
127 // operands to use appropriate classes.
128 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
129
130 I.eraseFromParent();
131 return Ret;
132 }
133
134 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
135 MachineBasicBlock *BB = I.getParent();
136 MachineFunction *MF = BB->getParent();
137 MachineRegisterInfo &MRI = MF->getRegInfo();
138 unsigned DstReg = I.getOperand(0).getReg();
139 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
140
141 if (Size == 32) {
142 I.setDesc(TII.get(AMDGPU::S_MOV_B32));
143 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
144 }
145
146 assert(Size == 64);
147
148 DebugLoc DL = I.getDebugLoc();
149 unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
150 unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
151 const APInt &Imm = I.getOperand(1).getCImm()->getValue();
152
153 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
154 .addImm(Imm.trunc(32).getZExtValue());
155
156 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
157 .addImm(Imm.ashr(32).getZExtValue());
158
159 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
160 .addReg(LoReg)
161 .addImm(AMDGPU::sub0)
162 .addReg(HiReg)
163 .addImm(AMDGPU::sub1);
164 // We can't call constrainSelectedInstRegOperands here, because it doesn't
165 // work for target independent opcodes
166 I.eraseFromParent();
167 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
168 }
169
170 static bool isConstant(const MachineInstr &MI) {
171 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
172 }
173
174 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
175 const MachineRegisterInfo &MRI, SmallVectorImpl &AddrInfo) const {
176
177 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
178
179 assert(PtrMI);
180
181 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
182 return;
183
184 GEPInfo GEPInfo(*PtrMI);
185
186 for (unsigned i = 1, e = 3; i < e; ++i) {
187 const MachineOperand &GEPOp = PtrMI->getOperand(i);
188 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
189 assert(OpDef);
190 if (isConstant(*OpDef)) {
191 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
192 // are lacking other optimizations.
193 assert(GEPInfo.Imm == 0);
194 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
195 continue;
196 }
197 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
198 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
199 GEPInfo.SgprParts.push_back(GEPOp.getReg());
200 else
201 GEPInfo.VgprParts.push_back(GEPOp.getReg());
202 }
203
204 AddrInfo.push_back(GEPInfo);
205 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
206 }
207
208 static bool isInstrUniform(const MachineInstr &MI) {
209 if (!MI.hasOneMemOperand())
210 return false;
211
212 const MachineMemOperand *MMO = *MI.memoperands_begin();
213 const Value *Ptr = MMO->getValue();
214
215 // UndefValue means this is a load of a kernel input. These are uniform.
216 // Sometimes LDS instructions have constant pointers.
217 // If Ptr is null, then that means this mem operand contains a
218 // PseudoSourceValue like GOT.
219 if (!Ptr || isa(Ptr) || isa(Ptr) ||
220 isa(Ptr) || isa(Ptr))
221 return true;
222
223 const Instruction *I = dyn_cast(Ptr);
224 return I && I->getMetadata("amdgpu.uniform");
225 }
226
227 static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
228
229 if (LoadSize == 32)
230 return BaseOpcode;
231
232 switch (BaseOpcode) {
233 case AMDGPU::S_LOAD_DWORD_IMM:
234 switch (LoadSize) {
235 case 64:
236 return AMDGPU::S_LOAD_DWORDX2_IMM;
237 case 128:
238 return AMDGPU::S_LOAD_DWORDX4_IMM;
239 case 256:
240 return AMDGPU::S_LOAD_DWORDX8_IMM;
241 case 512:
242 return AMDGPU::S_LOAD_DWORDX16_IMM;
243 }
244 break;
245 case AMDGPU::S_LOAD_DWORD_IMM_ci:
246 switch (LoadSize) {
247 case 64:
248 return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
249 case 128:
250 return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
251 case 256:
252 return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
253 case 512:
254 return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
255 }
256 break;
257 case AMDGPU::S_LOAD_DWORD_SGPR:
258 switch (LoadSize) {
259 case 64:
260 return AMDGPU::S_LOAD_DWORDX2_SGPR;
261 case 128:
262 return AMDGPU::S_LOAD_DWORDX4_SGPR;
263 case 256:
264 return AMDGPU::S_LOAD_DWORDX8_SGPR;
265 case 512:
266 return AMDGPU::S_LOAD_DWORDX16_SGPR;
267 }
268 break;
269 }
270 llvm_unreachable("Invalid base smrd opcode or size");
271 }
272
273 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const {
274 for (const GEPInfo &GEPInfo : AddrInfo) {
275 if (!GEPInfo.VgprParts.empty())
276 return true;
277 }
278 return false;
279 }
280
281 bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
282 ArrayRef AddrInfo) const {
283
284 if (!I.hasOneMemOperand())
285 return false;
286
287 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS)
288 return false;
289
290 if (!isInstrUniform(I))
291 return false;
292
293 if (hasVgprParts(AddrInfo))
294 return false;
295
296 MachineBasicBlock *BB = I.getParent();
297 MachineFunction *MF = BB->getParent();
298 const SISubtarget &Subtarget = MF->getSubtarget();
299 MachineRegisterInfo &MRI = MF->getRegInfo();
300 unsigned DstReg = I.getOperand(0).getReg();
301 const DebugLoc &DL = I.getDebugLoc();
302 unsigned Opcode;
303 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
304
305 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
306
307 const GEPInfo &GEPInfo = AddrInfo[0];
308
309 unsigned PtrReg = GEPInfo.SgprParts[0];
310 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
311 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
312 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
313
314 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
315 .addReg(PtrReg)
316 .addImm(EncodedImm)
317 .addImm(0); // glc
318 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
319 }
320
321 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
322 isUInt<32>(EncodedImm)) {
323 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
324 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
325 .addReg(PtrReg)
326 .addImm(EncodedImm)
327 .addImm(0); // glc
328 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
329 }
330
331 if (isUInt<32>(GEPInfo.Imm)) {
332 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
333 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
334 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
335 .addImm(GEPInfo.Imm);
336
337 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
338 .addReg(PtrReg)
339 .addReg(OffsetReg)
340 .addImm(0); // glc
341 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
342 }
343 }
344
345 unsigned PtrReg = I.getOperand(1).getReg();
346 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
347 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
348 .addReg(PtrReg)
349 .addImm(0)
350 .addImm(0); // glc
351 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
352 }
353
354
355 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
356 MachineBasicBlock *BB = I.getParent();
357 MachineFunction *MF = BB->getParent();
358 MachineRegisterInfo &MRI = MF->getRegInfo();
359 DebugLoc DL = I.getDebugLoc();
360 unsigned DstReg = I.getOperand(0).getReg();
361 unsigned PtrReg = I.getOperand(1).getReg();
362 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
363 unsigned Opcode;
364
365 SmallVector AddrInfo;
366
367 getAddrModeInfo(I, MRI, AddrInfo);
368
369 if (selectSMRD(I, AddrInfo)) {
370 I.eraseFromParent();
371 return true;
372 }
373
374 switch (LoadSize) {
375 default:
376 llvm_unreachable("Load size not supported\n");
377 case 32:
378 Opcode = AMDGPU::FLAT_LOAD_DWORD;
379 break;
380 case 64:
381 Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
382 break;
383 }
384
385 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
386 .add(I.getOperand(0))
387 .addReg(PtrReg)
388 .addImm(0)
389 .addImm(0)
390 .addImm(0);
391
392 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
393 I.eraseFromParent();
394 return Ret;
395 }
396
397 bool AMDGPUInstructionSelector::select(MachineInstr &I) const {
398
399 if (!isPreISelGenericOpcode(I.getOpcode()))
400 return true;
401
402 switch (I.getOpcode()) {
403 default:
404 break;
405 case TargetOpcode::G_ADD:
406 return selectG_ADD(I);
407 case TargetOpcode::G_CONSTANT:
408 return selectG_CONSTANT(I);
409 case TargetOpcode::G_GEP:
410 return selectG_GEP(I);
411 case TargetOpcode::G_LOAD:
412 return selectG_LOAD(I);
413 case TargetOpcode::G_STORE:
414 return selectG_STORE(I);
415 }
416 return false;
417 }
+0
-65
lib/Target/AMDGPU/AMDGPUInstructionSelector.h less more
None //===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file declares the targeting of the InstructionSelector class for
10 /// AMDGPU.
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
14 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
15
16 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19
20 namespace llvm {
21
22 class AMDGPUInstrInfo;
23 class AMDGPURegisterBankInfo;
24 class MachineInstr;
25 class MachineOperand;
26 class MachineRegisterInfo;
27 class SIInstrInfo;
28 class SIRegisterInfo;
29 class SISubtarget;
30
31 class AMDGPUInstructionSelector : public InstructionSelector {
32 public:
33 AMDGPUInstructionSelector(const SISubtarget &STI,
34 const AMDGPURegisterBankInfo &RBI);
35
36 bool select(MachineInstr &I) const override;
37
38 private:
39 struct GEPInfo {
40 const MachineInstr &GEP;
41 SmallVector SgprParts;
42 SmallVector VgprParts;
43 int64_t Imm;
44 GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
45 };
46
47 MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
48 bool selectG_CONSTANT(MachineInstr &I) const;
49 bool selectG_ADD(MachineInstr &I) const;
50 bool selectG_GEP(MachineInstr &I) const;
51 bool hasVgprParts(ArrayRef AddrInfo) const;
52 void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
53 SmallVectorImpl &AddrInfo) const;
54 bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const;
55 bool selectG_LOAD(MachineInstr &I) const;
56 bool selectG_STORE(MachineInstr &I) const;
57
58 const SIInstrInfo &TII;
59 const SIRegisterInfo &TRI;
60 const AMDGPURegisterBankInfo &RBI;
61 };
62
63 } // End llvm namespace.
64 #endif
+0
-62
lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp less more
None //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPULegalizerInfo.h"
15 #include "llvm/CodeGen/ValueTypes.h"
16 #include "llvm/IR/Type.h"
17 #include "llvm/IR/DerivedTypes.h"
18 #include "llvm/Target/TargetOpcodes.h"
19 #include "llvm/Support/Debug.h"
20
21 using namespace llvm;
22
23 #ifndef LLVM_BUILD_GLOBAL_ISEL
24 #error "You shouldn't build this"
25 #endif
26
27 AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
28 using namespace TargetOpcode;
29
30 const LLT S32 = LLT::scalar(32);
31 const LLT S64 = LLT::scalar(64);
32 const LLT P1 = LLT::pointer(1, 64);
33 const LLT P2 = LLT::pointer(2, 64);
34
35 setAction({G_CONSTANT, S64}, Legal);
36
37 setAction({G_GEP, P1}, Legal);
38 setAction({G_GEP, P2}, Legal);
39 setAction({G_GEP, 1, S64}, Legal);
40
41 setAction({G_LOAD, P1}, Legal);
42 setAction({G_LOAD, P2}, Legal);
43 setAction({G_LOAD, S32}, Legal);
44 setAction({G_LOAD, 1, P1}, Legal);
45 setAction({G_LOAD, 1, P2}, Legal);
46
47 setAction({G_STORE, S32}, Legal);
48 setAction({G_STORE, 1, P1}, Legal);
49
50 // FIXME: When RegBankSelect inserts copies, it will only create new
51 // registers with scalar types. This means we can end up with
52 // G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer
53 // operands. In assert builds, the instruction selector will assert
54 // if it sees a generic instruction which isn't legal, so we need to
55 // tell it that scalar types are legal for pointer operands
56 setAction({G_GEP, S64}, Legal);
57 setAction({G_LOAD, 1, S64}, Legal);
58 setAction({G_STORE, 1, S64}, Legal);
59
60 computeTables();
61 }
+0
-30
lib/Target/AMDGPU/AMDGPULegalizerInfo.h less more
None //===- AMDGPULegalizerInfo ---------------------------------------*- C++ -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file declares the targeting of the Machinelegalizer class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
16
17 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
18
19 namespace llvm {
20
21 class LLVMContext;
22
23 /// This class provides the information for the target register banks.
24 class AMDGPULegalizerInfo : public LegalizerInfo {
25 public:
26 AMDGPULegalizerInfo();
27 };
28 } // End llvm namespace.
29 #endif
+0
-228
lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp less more
None //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPURegisterBankInfo.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "SIRegisterInfo.h"
17 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
18 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/Target/TargetRegisterInfo.h"
21 #include "llvm/Target/TargetSubtargetInfo.h"
22
23 #define GET_TARGET_REGBANK_IMPL
24 #include "AMDGPUGenRegisterBank.inc"
25
26 // This file will be TableGen'ed at some point.
27 #include "AMDGPUGenRegisterBankInfo.def"
28
29 using namespace llvm;
30
31 #ifndef LLVM_BUILD_GLOBAL_ISEL
32 #error "You shouldn't build this"
33 #endif
34
35 AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
36 : AMDGPUGenRegisterBankInfo(),
37 TRI(static_cast(&TRI)) {
38
39 // HACK: Until this is fully tablegen'd
40 static bool AlreadyInit = false;
41 if (AlreadyInit)
42 return;
43
44 AlreadyInit = true;
45
46 const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
47 assert(&RBSGPR == &AMDGPU::SGPRRegBank);
48
49 const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
50 assert(&RBVGPR == &AMDGPU::VGPRRegBank);
51
52 }
53
54 unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &A,
55 const RegisterBank &B,
56 unsigned Size) const {
57 return RegisterBankInfo::copyCost(A, B, Size);
58 }
59
60 const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
61 const TargetRegisterClass &RC) const {
62
63 if (TRI->isSGPRClass(&RC))
64 return getRegBank(AMDGPU::SGPRRegBankID);
65
66 return getRegBank(AMDGPU::VGPRRegBankID);
67 }
68
69 RegisterBankInfo::InstructionMappings
70 AMDGPURegisterBankInfo::getInstrAlternativeMappings(
71 const MachineInstr &MI) const {
72
73 const MachineFunction &MF = *MI.getParent()->getParent();
74 const MachineRegisterInfo &MRI = MF.getRegInfo();
75
76 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
77
78 InstructionMappings AltMappings;
79 switch (MI.getOpcode()) {
80 case TargetOpcode::G_LOAD: {
81 // FIXME: Should we be hard coding the size for these mappings?
82 InstructionMapping SSMapping(1, 1,
83 getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
84 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
85 2); // Num Operands
86 AltMappings.emplace_back(std::move(SSMapping));
87
88 InstructionMapping VVMapping(2, 1,
89 getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
90 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
91 2); // Num Operands
92 AltMappings.emplace_back(std::move(VVMapping));
93
94 // FIXME: Should this be the pointer-size (64-bits) or the size of the
95 // register that will hold the bufffer resourc (128-bits).
96 InstructionMapping VSMapping(3, 1,
97 getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
98 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
99 2); // Num Operands
100 AltMappings.emplace_back(std::move(VSMapping));
101
102 return AltMappings;
103
104 }
105 default:
106 break;
107 }
108 return RegisterBankInfo::getInstrAlternativeMappings(MI);
109 }
110
111 void AMDGPURegisterBankInfo::applyMappingImpl(
112 const OperandsMapper &OpdMapper) const {
113 return applyDefaultMapping(OpdMapper);
114 }
115
116 static bool isInstrUniform(const MachineInstr &MI) {
117 if (!MI.hasOneMemOperand())
118 return false;
119
120 const MachineMemOperand *MMO = *MI.memoperands_begin();
121 return AMDGPU::isUniformMMO(MMO);
122 }
123
124 RegisterBankInfo::InstructionMapping
125 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
126
127 const MachineFunction &MF = *MI.getParent()->getParent();
128 const MachineRegisterInfo &MRI = MF.getRegInfo();
129 RegisterBankInfo::InstructionMapping Mapping =
130 InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
131 SmallVector OpdsMapping(MI.getNumOperands());
132 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
133 unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
134
135 const ValueMapping *ValMapping;
136 const ValueMapping *PtrMapping;
137
138 if (isInstrUniform(MI)) {
139 // We have a uniform instruction so we want to use an SMRD load
140 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
141 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
142 } else {
143 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
144 // FIXME: What would happen if we used SGPRRegBankID here?
145 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
146 }
147
148 OpdsMapping[0] = ValMapping;
149 OpdsMapping[1] = PtrMapping;
150 Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
151 return Mapping;
152
153 // FIXME: Do we want to add a mapping for FLAT load, or should we just
154 // handle that during instruction selection?
155 }
156
157 RegisterBankInfo::InstructionMapping
158 AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
159 RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
160
161 if (Mapping.isValid())
162 return Mapping;
163
164 const MachineFunction &MF = *MI.getParent()->getParent();
165 const MachineRegisterInfo &MRI = MF.getRegInfo();
166 Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
167 SmallVector OpdsMapping(MI.getNumOperands());
168
169 switch (MI.getOpcode()) {
170 default: break;
171 case AMDGPU::G_CONSTANT: {
172 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
173 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
174 Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
175 return Mapping;
176 }
177 case AMDGPU::G_GEP: {
178 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
179 if (!MI.getOperand(i).isReg())
180 continue;
181
182 unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
183 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
184 }
185 Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
186 return Mapping;
187 }
188 case AMDGPU::G_STORE: {
189 assert(MI.getOperand(0).isReg());
190 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
191 // FIXME: We need to specify a different reg bank once scalar stores
192 // are supported.
193 const ValueMapping *ValMapping =
194 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
195 // FIXME: Depending on the type of store, the pointer could be in
196 // the SGPR Reg bank.
197 // FIXME: Pointer size should be based on the address space.
198 const ValueMapping *PtrMapping =
199 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
200
201 OpdsMapping[0] = ValMapping;
202 OpdsMapping[1] = PtrMapping;
203 Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
204 return Mapping;
205 }
206
207 case AMDGPU::G_LOAD:
208 return getInstrMappingForLoad(MI);
209 }
210
211 unsigned BankID = AMDGPU::SGPRRegBankID;
212
213 Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
214 unsigned Size = 0;
215 for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
216 // If the operand is not a register default to the size of the previous
217 // operand.
218 // FIXME: Can't we pull the types from the MachineInstr rather than the
219 // operands.
220 if (MI.getOperand(Idx).isReg())
221 Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
222 OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
223 }
224 Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
225
226 return Mapping;
227 }
+0
-65
lib/Target/AMDGPU/AMDGPURegisterBankInfo.h less more
None //===- AMDGPURegisterBankInfo -----------------------------------*- C++ -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file declares the targeting of the RegisterBankInfo class for AMDGPU.
10 /// \todo This should be generated by TableGen.
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
15
16 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
17
18 namespace llvm {
19
20 class SIRegisterInfo;
21 class TargetRegisterInfo;
22
23 namespace AMDGPU {
24 enum {
25 SGPRRegBankID = 0,
26 VGPRRegBankID = 1,
27 NumRegisterBanks
28 };
29 } // End AMDGPU namespace.
30
31 /// This class provides the information for the target register banks.
32 class AMDGPUGenRegisterBankInfo : public RegisterBankInfo {
33
34 protected:
35
36 #define GET_TARGET_REGBANK_CLASS
37 #include "AMDGPUGenRegisterBank.inc"
38
39 };
40 class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
41 const SIRegisterInfo *TRI;
42
43 /// See RegisterBankInfo::applyMapping.
44 void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
45
46 RegisterBankInfo::InstructionMapping
47 getInstrMappingForLoad(const MachineInstr &MI) const;
48
49 public:
50 AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
51
52 unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
53 unsigned Size) const override;
54
55 const RegisterBank &
56 getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
57
58 InstructionMappings
59 getInstrAlternativeMappings(const MachineInstr &MI) const override;
60
61 InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
62 };
63 } // End llvm namespace.
64 #endif
+0
-16
lib/Target/AMDGPU/AMDGPURegisterBanks.td less more
None //=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------*- tablegen -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 def SGPRRegBank : RegisterBank<"SGPR",
10 [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512]
11 >;
12
13 def VGPRRegBank : RegisterBank<"VGPR",
14 [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
15 >;
516516 return GISel->getCallLowering();
517517 }
518518
519 const InstructionSelector *getInstructionSelector() const override {
520 assert(GISel && "Access to GlobalISel APIs not set");
521 return GISel->getInstructionSelector();
522 }
523
524 const LegalizerInfo *getLegalizerInfo() const {
525 assert(GISel && "Access to GlobalISel APIs not set");
526 return GISel->getLegalizerInfo();
527 }
528
529 const RegisterBankInfo *getRegBankInfo() const override {
530 assert(GISel && "Access to GlobalISel APIs not set");
531 return GISel->getRegBankInfo();
532 }
533
534519 const SIRegisterInfo *getRegisterInfo() const override {
535520 return &InstrInfo.getRegisterInfo();
536521 }
1515 #include "AMDGPUTargetMachine.h"
1616 #include "AMDGPU.h"
1717 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUInstructionSelector.h"
19 #include "AMDGPULegalizerInfo.h"
20 #include "AMDGPURegisterBankInfo.h"
2118 #include "AMDGPUTargetObjectFile.h"
2219 #include "AMDGPUTargetTransformInfo.h"
2320 #include "GCNSchedStrategy.h"
2421 #include "R600MachineScheduler.h"
2522 #include "SIMachineScheduler.h"
26 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/Triple.h"
27 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
2728 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
28 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
29 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
29 #include "llvm/CodeGen/MachineScheduler.h"
3030 #include "llvm/CodeGen/Passes.h"
3131 #include "llvm/CodeGen/TargetPassConfig.h"
3232 #include "llvm/Support/TargetRegistry.h"
255255
256256 struct SIGISelActualAccessor : public GISelAccessor {
257257 std::unique_ptr CallLoweringInfo;
258 std::unique_ptr InstSelector;
259 std::unique_ptr Legalizer;
260 std::unique_ptr RegBankInfo;
261258 const AMDGPUCallLowering *getCallLowering() const override {
262259 return CallLoweringInfo.get();
263 }
264 const InstructionSelector *getInstructionSelector() const override {
265 return InstSelector.get();
266 }
267 const LegalizerInfo *getLegalizerInfo() const override {
268 return Legalizer.get();
269 }
270 const RegisterBankInfo *getRegBankInfo() const override {
271 return RegBankInfo.get();
272260 }
273261 };
274262
303291 SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
304292 GISel->CallLoweringInfo.reset(
305293 new AMDGPUCallLowering(*I->getTargetLowering()));
306 GISel->Legalizer.reset(new AMDGPULegalizerInfo());
307
308 GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo()));
309 GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I,
310 *static_cast(GISel->RegBankInfo.get())));
311294 #endif
312295
313296 I->setGISelAccessor(*GISel);
608591 }
609592
610593 bool GCNPassConfig::addLegalizeMachineIR() {
611 addPass(new Legalizer());
612594 return false;
613595 }
614596
615597 bool GCNPassConfig::addRegBankSelect() {
616 addPass(new RegBankSelect());
617598 return false;
618599 }
619600
620601 bool GCNPassConfig::addGlobalInstructionSelect() {
621 addPass(new InstructionSelect());
622 return false;
623 }
624
602 return false;
603 }
625604 #endif
626605
627606 void GCNPassConfig::addPreRegAlloc() {
1111 tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
1212 tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
1313 tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering)
14 if(LLVM_BUILD_GLOBAL_ISEL)
15 tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank)
16 endif()
1714 add_public_tablegen_target(AMDGPUCommonTableGen)
1815
1916 # List of all GlobalISel files.
2017 set(GLOBAL_ISEL_FILES
2118 AMDGPUCallLowering.cpp
22 AMDGPUInstructionSelector.cpp
23 AMDGPULegalizerInfo.cpp
24 AMDGPURegisterBankInfo.cpp
2519 )
2620
2721 # Add GlobalISel files to the dependencies if the user wants to build it.
99 #include "AMDGPU.h"
1010 #include "SIDefines.h"
1111 #include "llvm/CodeGen/MachineMemOperand.h"
12 #include "llvm/IR/LLVMContext.h"
1213 #include "llvm/IR/Constants.h"
1314 #include "llvm/IR/Function.h"
1415 #include "llvm/IR/GlobalValue.h"
15 #include "llvm/IR/LLVMContext.h"
1616 #include "llvm/MC/MCContext.h"
1717 #include "llvm/MC/MCInstrInfo.h"
1818 #include "llvm/MC/MCRegisterInfo.h"
+0
-25
test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir less more
None # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
1 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
2
3 --- |
4 define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
5 ...
6 ---
7
8 name: global_addrspace
9 legalized: true
10 regBankSelected: true
11
12 # GCN: global_addrspace
13 # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
14 # GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0
15
16 body: |
17 bb.0:
18 liveins: %vgpr0_vgpr1
19
20 %0:vgpr(p1) = COPY %vgpr0_vgpr1
21 %1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0)
22
23 ...
24 ---
+0
-141
test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir less more
None # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI
1 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI
2 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI
3
4 --- |
5 define void @smrd_imm(i32 addrspace(2)* %const0) { ret void }
6 ...
7 ---
8
9 name: smrd_imm
10 legalized: true
11 regBankSelected: true
12
13 # GCN: body:
14 # GCN: [[PTR:%[0-9]+]] = COPY %sgpr0_sgpr1
15
16 # Immediate offset:
17 # SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
18 # VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0
19
20 # Max immediate offset for SI
21 # SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
22 # VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0
23
24 # Immediate overflow for SI
25 # FIXME: The immediate gets selected twice, once into the
26 # S_LOAD_DWORD instruction and once just as a normal constat.
27 # SI: S_MOV_B32 1024
28 # SI: [[K1024:%[0-9]+]] = S_MOV_B32 1024
29 # SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0
30 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0
31 # VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0
32
33 # Max immediate offset for VI
34 # SI: S_MOV_B32 1048572
35 # SI: [[K1048572:%[0-9]+]] = S_MOV_B32 1048572
36 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143
37 # VI: S_LOAD_DWORD_IMM [[PTR]], 1048572
38
39 #
40 # Immediate overflow for VI
41 # FIXME: The immediate gets selected twice, once into the
42 # S_LOAD_DWORD instruction and once just as a normal constat.
43 # SIVI: S_MOV_B32 1048576
44 # SIVI: [[K1048576:%[0-9]+]] = S_MOV_B32 1048576
45 # SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0
46 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
47
48 # Max immediate for CI
49 # SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 4294967292
50 # SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 3
51 # SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
52 # SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
53 # SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
54 # SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
55 # SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
56 # SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
57 # SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
58 # SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
59 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
60 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
61
62 # Immediate overflow for CI
63 # GCN: [[K_LO:%[0-9]+]] = S_MOV_B32 0
64 # GCN: [[K_HI:%[0-9]+]] = S_MOV_B32 4
65 # GCN: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
66 # GCN: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
67 # GCN: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
68 # GCN: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
69 # GCN: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
70 # GCN: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
71 # GCN: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
72 # GCN: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
73 # GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
74
75 # Max 32-bit byte offset
76 # FIXME: The immediate gets selected twice, once into the
77 # S_LOAD_DWORD instruction and once just as a normal constat.
78 # SIVI: S_MOV_B32 4294967292
79 # SIVI: [[K4294967292:%[0-9]+]] = S_MOV_B32 4294967292
80 # SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0
81 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
82
83 # Overflow 32-bit byte offset
84 # SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 0
85 # SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 1
86 # SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
87 # SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
88 # SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
89 # SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
90 # SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
91 # SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
92 # SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
93 # SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
94 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
95 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0
96
97 body: |
98 bb.0:
99 liveins: %sgpr0_sgpr1
100
101 %0:sgpr(p2) = COPY %sgpr0_sgpr1
102
103 %1:sgpr(s64) = G_CONSTANT i64 4
104 %2:sgpr(p2) = G_GEP %0, %1
105 %3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0)
106
107 %4:sgpr(s64) = G_CONSTANT i64 1020
108 %5:sgpr(p2) = G_GEP %0, %4
109 %6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0)
110
111 %7:sgpr(s64) = G_CONSTANT i64 1024
112 %8:sgpr(p2) = G_GEP %0, %7
113 %9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0)
114
115 %10:sgpr(s64) = G_CONSTANT i64 1048572
116 %11:sgpr(p2) = G_GEP %0, %10
117 %12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0)
118
119 %13:sgpr(s64) = G_CONSTANT i64 1048576
120 %14:sgpr(p2) = G_GEP %0, %13
121 %15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0)
122
123 %16:sgpr(s64) = G_CONSTANT i64 17179869180
124 %17:sgpr(p2) = G_GEP %0, %16
125 %18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0)
126
127 %19:sgpr(s64) = G_CONSTANT i64 17179869184
128 %20:sgpr(p2) = G_GEP %0, %19
129 %21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0)
130
131 %22:sgpr(s64) = G_CONSTANT i64 4294967292
132 %23:sgpr(p2) = G_GEP %0, %22
133 %24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0)
134
135 %25:sgpr(s64) = G_CONSTANT i64 4294967296
136 %26:sgpr(p2) = G_GEP %0, %25
137 %27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0)
138
139 ...
140 ---
+0
-27
test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir less more
None # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
1 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
2
3 --- |
4 define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
5 ...
6 ---
7
8 name: global_addrspace
9 legalized: true
10 regBankSelected: true
11
12 # GCN: global_addrspace
13 # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
14 # GCN: [[VAL:%[0-9]+]] = COPY %vgpr2
15 # GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0
16
17 body: |
18 bb.0:
19 liveins: %vgpr0_vgpr1, %vgpr2
20
21 %0:vgpr(p1) = COPY %vgpr0_vgpr1
22 %1:vgpr(s32) = COPY %vgpr2
23 G_STORE %1, %0 :: (store 4 into %ir.global0)
24
25 ...
26 ---
+0
-67
test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir less more
None # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - | FileCheck %s
1
2 --- |
3 define void @load_constant(i32 addrspace(2)* %ptr0) { ret void }
4 define void @load_global_uniform(i32 addrspace(1)* %ptr1) {
5 %tmp0 = load i32, i32 addrspace(1)* %ptr1
6 ret void
7 }
8 define void @load_global_non_uniform(i32 addrspace(1)* %ptr2) {
9 %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
10 %tmp1 = getelementptr i32, i32 addrspace(1)* %ptr2, i32 %tmp0
11 %tmp2 = load i32, i32 addrspace(1)* %tmp1
12 ret void
13 }
14 declare i32 @llvm.amdgcn.workitem.id.x() #0
15 attributes #0 = { nounwind readnone }
16 ...
17
18 ---
19 name : load_constant
20 legalized: true
21
22 # CHECK-LABEL: name: load_constant
23 # CHECK: registers:
24 # CHECK: - { id: 0, class: sgpr }
25 # CHECK: - { id: 1, class: sgpr }
26
27 body: |
28 bb.0:
29 liveins: %sgpr0_sgpr1
30 %0:_(p2) = COPY %sgpr0_sgpr1
31 %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0)
32 ...
33
34 ---
35 name: load_global_uniform
36 legalized: true
37
38 # CHECK-LABEL: name: load_global_uniform
39 # CHECK: registers:
40 # CHECK: - { id: 0, class: sgpr }
41 # CHECK: - { id: 1, class: sgpr }
42
43 body: |
44 bb.0:
45 liveins: %sgpr0_sgpr1
46 %0:_(p1) = COPY %sgpr0_sgpr1
47 %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1)
48 ...
49
50 ---
51 name: load_global_non_uniform
52 legalized: true
53
54 # CHECK-LABEL: name: load_global_non_uniform
55 # CHECK: registers:
56 # CHECK: - { id: 0, class: sgpr }
57 # CHECK: - { id: 1, class: vgpr }
58 # CHECK: - { id: 2, class: vgpr }
59
60
61 body: |
62 bb.0:
63 liveins: %sgpr0_sgpr1
64 %0:_(p1) = COPY %sgpr0_sgpr1
65 %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.tmp1)
66 ...
+0
-9
test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll less more
None ; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=GCN %s
1
2 ; GCN-LABEL: vs_epilog
3 ; GCN: s_endpgm
4
5 define amdgpu_vs void @vs_epilog() {
6 main_body:
7 ret void
8 }
+0
-87
test/CodeGen/AMDGPU/GlobalISel/smrd.ll less more
None ; FIXME: Need to add support for mubuf stores to enable this on SI.
1 ; XUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
2 ; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=CI --check-prefix=GCN %s
3 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
4
5 ; SMRD load with an immediate offset.
6 ; GCN-LABEL: {{^}}smrd0:
7 ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
8 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
9 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
10 entry:
11 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
12 %1 = load i32, i32 addrspace(2)* %0
13 store i32 %1, i32 addrspace(1)* %out
14 ret void
15 }
16
17 ; SMRD load with the largest possible immediate offset.
18 ; GCN-LABEL: {{^}}smrd1:
19 ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
20 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
21 define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
22 entry:
23 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
24 %1 = load i32, i32 addrspace(2)* %0
25 store i32 %1, i32 addrspace(1)* %out
26 ret void
27 }
28
29 ; SMRD load with an offset greater than the largest possible immediate.
30 ; GCN-LABEL: {{^}}smrd2:
31 ; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
32 ; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
33 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
34 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
35 ; GCN: s_endpgm
36 define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
37 entry:
38 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
39 %1 = load i32, i32 addrspace(2)* %0
40 store i32 %1, i32 addrspace(1)* %out
41 ret void
42 }
43
44 ; SMRD load with a 64-bit offset
45 ; GCN-LABEL: {{^}}smrd3:
46 ; FIXME: There are too many copies here because we don't fold immediates
47 ; through REG_SEQUENCE
48 ; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
49 ; TODO: Add VI checks
50 ; XGCN: s_endpgm
51 define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
52 entry:
53 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
54 %1 = load i32, i32 addrspace(2)* %0
55 store i32 %1, i32 addrspace(1)* %out
56 ret void
57 }
58
59 ; SMRD load with the largest possible immediate offset on VI
60 ; GCN-LABEL: {{^}}smrd4:
61 ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
62 ; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
63 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
64 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
65 define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
66 entry:
67 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
68 %1 = load i32, i32 addrspace(2)* %0
69 store i32 %1, i32 addrspace(1)* %out
70 ret void
71 }
72
73 ; SMRD load with an offset greater than the largest possible immediate on VI
74 ; GCN-LABEL: {{^}}smrd5:
75 ; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
76 ; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
77 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
78 ; GCN: s_endpgm
79 define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
80 entry:
81 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
82 %1 = load i32, i32 addrspace(2)* %0
83 store i32 %1, i32 addrspace(1)* %out
84 ret void
85 }
86