llvm.org GIT mirror llvm / c0b0c67
R600: Support for indirect addressing v4 Only implemented for R600 so far. SI is missing implementations of a few callbacks used by the Indirect Addressing pass and needs code to handle frame indices. At the moment R600 only supports array sizes of 16 dwords or less. Register packing of vector types is currently disabled, which means that a vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order to correctly pack registers in all cases, we will need to implement an analysis pass for R600 that determines the correct vector width for each array. v2: - Add support for i8 zext load from stack. - Coding style fixes v3: - Don't reserve registers for indirect addressing when it isn't being used. - Fix bug caused by LLVM limiting the number of SubRegIndex declarations. v4: - Fix 64-bit defines git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174525 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 7 years ago
31 changed file(s) with 1165 addition(s) and 116 deletion(s). Raw diff Collapse all Expand all
3535 // Passes common to R600 and SI
3636 Pass *createAMDGPUStructurizeCFGPass();
3737 FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
38 FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
3839
3940 } // End namespace llvm
4041
0 //===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 // Interface to describe a layout of a stack frame on a AMDIL target machine
10 //
11 //===----------------------------------------------------------------------===//
12 #include "AMDGPUFrameLowering.h"
13 #include "AMDGPURegisterInfo.h"
14 #include "R600MachineFunctionInfo.h"
15 #include "llvm/CodeGen/MachineFrameInfo.h"
16 #include "llvm/CodeGen/MachineRegisterInfo.h"
17 #include "llvm/IR/Instructions.h"
18
19 using namespace llvm;
20 AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
21 int LAO, unsigned TransAl)
22 : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
23
24 AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
25
26 unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
27
28 // XXX: Hardcoding to 1 for now.
29 //
30 // I think the StackWidth should stored as metadata associated with the
31 // MachineFunction. This metadata can either be added by a frontend, or
32 // calculated by a R600 specific LLVM IR pass.
33 //
34 // The StackWidth determines how stack objects are laid out in memory.
35 // For a vector stack variable, like: int4 stack[2], the data will be stored
36 // in the following ways depending on the StackWidth.
37 //
38 // StackWidth = 1:
39 //
40 // T0.X = stack[0].x
41 // T1.X = stack[0].y
42 // T2.X = stack[0].z
43 // T3.X = stack[0].w
44 // T4.X = stack[1].x
45 // T5.X = stack[1].y
46 // T6.X = stack[1].z
47 // T7.X = stack[1].w
48 //
49 // StackWidth = 2:
50 //
51 // T0.X = stack[0].x
52 // T0.Y = stack[0].y
53 // T1.X = stack[0].z
54 // T1.Y = stack[0].w
55 // T2.X = stack[1].x
56 // T2.Y = stack[1].y
57 // T3.X = stack[1].z
58 // T3.Y = stack[1].w
59 //
60 // StackWidth = 4:
61 // T0.X = stack[0].x
62 // T0.Y = stack[0].y
63 // T0.Z = stack[0].z
64 // T0.W = stack[0].w
65 // T1.X = stack[1].x
66 // T1.Y = stack[1].y
67 // T1.Z = stack[1].z
68 // T1.W = stack[1].w
69 return 1;
70 }
71
72 /// \returns The number of registers allocated for \p FI.
73 int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
74 int FI) const {
75 const MachineFrameInfo *MFI = MF.getFrameInfo();
76 unsigned Offset = 0;
77 int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
78
79 for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
80 const AllocaInst *Alloca = MFI->getObjectAllocation(i);
81 unsigned ArrayElements;
82 const Type *AllocaType = Alloca->getAllocatedType();
83 const Type *ElementType;
84
85 if (AllocaType->isArrayTy()) {
86 ArrayElements = AllocaType->getArrayNumElements();
87 ElementType = AllocaType->getArrayElementType();
88 } else {
89 ArrayElements = 1;
90 ElementType = AllocaType;
91 }
92
93 unsigned VectorElements;
94 if (ElementType->isVectorTy()) {
95 VectorElements = ElementType->getVectorNumElements();
96 } else {
97 VectorElements = 1;
98 }
99
100 Offset += (VectorElements / getStackWidth(MF)) * ArrayElements;
101 }
102 return Offset;
103 }
104
105 const TargetFrameLowering::SpillSlot *
106 AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
107 NumEntries = 0;
108 return 0;
109 }
110 void
111 AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
112 }
113 void
114 AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
115 MachineBasicBlock &MBB) const {
116 }
117
118 bool
119 AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
120 return false;
121 }
0 //===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Interface to describe a layout of a stack frame on a AMDIL target
11 /// machine.
12 //
13 //===----------------------------------------------------------------------===//
14 #ifndef AMDILFRAME_LOWERING_H
15 #define AMDILFRAME_LOWERING_H
16
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/Target/TargetFrameLowering.h"
19
20 namespace llvm {
21
22 /// \brief Information about the stack frame layout on the AMDGPU targets.
23 ///
24 /// It holds the direction of the stack growth, the known stack alignment on
25 /// entry to each function, and the offset to the locals area.
26 /// See TargetFrameInfo for more comments.
27 class AMDGPUFrameLowering : public TargetFrameLowering {
28 public:
29 AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
30 unsigned TransAl = 1);
31 virtual ~AMDGPUFrameLowering();
32
33 /// \returns The number of 32-bit sub-registers that are used when storing
34 /// values to the stack.
35 virtual unsigned getStackWidth(const MachineFunction &MF) const;
36 virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
37 virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
38 virtual void emitPrologue(MachineFunction &MF) const;
39 virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
40 virtual bool hasFP(const MachineFunction &MF) const;
41 };
42 } // namespace llvm
43 #endif // AMDILFRAME_LOWERING_H
411411 NODE_NAME_CASE(URECIP)
412412 NODE_NAME_CASE(EXPORT)
413413 NODE_NAME_CASE(CONST_ADDRESS)
414 }
415 }
414 NODE_NAME_CASE(REGISTER_LOAD)
415 NODE_NAME_CASE(REGISTER_STORE)
416 }
417 }
121121 URECIP,
122122 EXPORT,
123123 CONST_ADDRESS,
124 REGISTER_LOAD,
125 REGISTER_STORE,
124126 LAST_AMDGPU_ISD_NUMBER
125127 };
126128
0 //===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// Instructions can use indirect addressing to index the register file as if it
12 /// were memory. This pass lowers RegisterLoad and RegisterStore instructions
13 /// to either a COPY or a MOV that uses indirect addressing.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "AMDGPU.h"
18 #include "R600InstrInfo.h"
19 #include "R600MachineFunctionInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Support/Debug.h"
25
26 using namespace llvm;
27
28 namespace {
29
30 class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
31
32 private:
33 static char ID;
34 const AMDGPUInstrInfo *TII;
35
36 bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
37
38 public:
39 AMDGPUIndirectAddressingPass(TargetMachine &tm) :
40 MachineFunctionPass(ID),
41 TII(static_cast(tm.getInstrInfo()))
42 { }
43
44 virtual bool runOnMachineFunction(MachineFunction &MF);
45
46 const char *getPassName() const { return "R600 Handle indirect addressing"; }
47
48 };
49
50 } // End anonymous namespace
51
52 char AMDGPUIndirectAddressingPass::ID = 0;
53
54 FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
55 return new AMDGPUIndirectAddressingPass(tm);
56 }
57
58 bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
59 MachineRegisterInfo &MRI = MF.getRegInfo();
60
61 int IndirectBegin = TII->getIndirectIndexBegin(MF);
62 int IndirectEnd = TII->getIndirectIndexEnd(MF);
63
64 if (IndirectBegin == -1) {
65 // No indirect addressing, we can skip this pass
66 assert(IndirectEnd == -1);
67 return false;
68 }
69
70 // The map keeps track of the indirect address that is represented by
71 // each virtual register. The key is the register and the value is the
72 // indirect address it uses.
73 std::map RegisterAddressMap;
74
75 // First pass - Lower all of the RegisterStore instructions and track which
76 // registers are live.
77 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
78 BB != BB_E; ++BB) {
79 // This map keeps track of the current live indirect registers.
80 // The key is the address and the value is the register
81 std::map LiveAddressRegisterMap;
82 MachineBasicBlock &MBB = *BB;
83
84 for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
85 I != MBB.end(); I = Next) {
86 Next = llvm::next(I);
87 MachineInstr &MI = *I;
88
89 if (!TII->isRegisterStore(MI)) {
90 continue;
91 }
92
93 // Lower RegisterStore
94
95 unsigned RegIndex = MI.getOperand(2).getImm();
96 unsigned Channel = MI.getOperand(3).getImm();
97 unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
98 const TargetRegisterClass *IndirectStoreRegClass =
99 TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
100
101 if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
102 // Direct register access.
103 unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
104
105 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
106 .addOperand(MI.getOperand(0));
107
108 RegisterAddressMap[DstReg] = Address;
109 LiveAddressRegisterMap[Address] = DstReg;
110 } else {
111 // Indirect register access.
112 MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
113 MI.getOperand(0).getReg(), // Value
114 Address,
115 MI.getOperand(1).getReg()); // Offset
116 for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
117 unsigned Addr = TII->calculateIndirectAddress(i, Channel);
118 unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
119 MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
120 RegisterAddressMap[DstReg] = Addr;
121 LiveAddressRegisterMap[Addr] = DstReg;
122 }
123 }
124 MI.eraseFromParent();
125 }
126
127 // Update the live-ins of the succesor blocks
128 for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
129 SuccEnd = MBB.succ_end();
130 SuccEnd != Succ; ++Succ) {
131 std::map::const_iterator Key, KeyEnd;
132 for (Key = LiveAddressRegisterMap.begin(),
133 KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
134 (*Succ)->addLiveIn(Key->second);
135 }
136 }
137 }
138
139 // Second pass - Lower the RegisterLoad instructions
140 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
141 BB != BB_E; ++BB) {
142 // Key is the address and the value is the register
143 std::map LiveAddressRegisterMap;
144 MachineBasicBlock &MBB = *BB;
145
146 MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
147 while (LI != MBB.livein_end()) {
148 std::vector PhiRegisters;
149
150 // Make sure this live in is used for indirect addressing
151 if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
152 ++LI;
153 continue;
154 }
155
156 unsigned Address = RegisterAddressMap[*LI];
157 LiveAddressRegisterMap[Address] = *LI;
158 PhiRegisters.push_back(*LI);
159
160 // Check if there are other live in registers which map to the same
161 // indirect address.
162 for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
163 LE = MBB.livein_end();
164 LJ != LE; ++LJ) {
165 unsigned Reg = *LJ;
166 if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
167 continue;
168 }
169
170 if (RegisterAddressMap[Reg] == Address) {
171 if (!regHasExplicitDef(MRI, Reg)) {
172 continue;
173 }
174 PhiRegisters.push_back(Reg);
175 }
176 }
177
178 if (PhiRegisters.size() == 1) {
179 // We don't need to insert a Phi instruction, so we can just add the
180 // registers to the live list for the block.
181 LiveAddressRegisterMap[Address] = *LI;
182 MBB.removeLiveIn(*LI);
183 } else {
184 // We need to insert a PHI, because we have the same address being
185 // written in multiple predecessor blocks.
186 const TargetRegisterClass *PhiDstClass =
187 TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
188 unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
189 MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
190 MBB.findDebugLoc(MBB.begin()),
191 TII->get(AMDGPU::PHI), PhiDstReg);
192
193 for (std::vector::const_iterator RI = PhiRegisters.begin(),
194 RE = PhiRegisters.end();
195 RI != RE; ++RI) {
196 unsigned Reg = *RI;
197 MachineInstr *DefInst = MRI.getVRegDef(Reg);
198 assert(DefInst);
199 MachineBasicBlock *RegBlock = DefInst->getParent();
200 Phi.addReg(Reg);
201 Phi.addMBB(RegBlock);
202 MBB.removeLiveIn(Reg);
203 }
204 RegisterAddressMap[PhiDstReg] = Address;
205 LiveAddressRegisterMap[Address] = PhiDstReg;
206 }
207 LI = MBB.livein_begin();
208 }
209
210 for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
211 I != MBB.end(); I = Next) {
212 Next = llvm::next(I);
213 MachineInstr &MI = *I;
214
215 if (!TII->isRegisterLoad(MI)) {
216 if (MI.getOpcode() == AMDGPU::PHI) {
217 continue;
218 }
219 // Check for indirect register defs
220 for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
221 OpIdx < NumOperands; ++OpIdx) {
222 MachineOperand &MO = MI.getOperand(OpIdx);
223 if (MO.isReg() && MO.isDef() &&
224 RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
225 unsigned Reg = MO.getReg();
226 unsigned LiveAddress = RegisterAddressMap[Reg];
227 // Chain the live-ins
228 if (LiveAddressRegisterMap.find(LiveAddress) !=
229 RegisterAddressMap.end()) {
230 MI.addOperand(MachineOperand::CreateReg(
231 LiveAddressRegisterMap[LiveAddress],
232 false, // isDef
233 true, // isImp
234 true)); // isKill
235 }
236 LiveAddressRegisterMap[LiveAddress] = Reg;
237 }
238 }
239 continue;
240 }
241
242 const TargetRegisterClass *SuperIndirectRegClass =
243 TII->getSuperIndirectRegClass();
244 const TargetRegisterClass *IndirectLoadRegClass =
245 TII->getIndirectAddrLoadRegClass();
246 unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
247
248 unsigned RegIndex = MI.getOperand(2).getImm();
249 unsigned Channel = MI.getOperand(3).getImm();
250 unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
251
252 if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
253 // Direct register access
254 unsigned Reg = LiveAddressRegisterMap[Address];
255 unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
256
257 if (regHasExplicitDef(MRI, Reg)) {
258 // If the register we are reading from has an explicit def, then that
259 // means it was written via a direct register access (i.e. COPY
260 // or other instruction that doesn't use indirect addressing). In
261 // this case we know where the value has been stored, so we can just
262 // issue a copy.
263 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
264 MI.getOperand(0).getReg())
265 .addReg(Reg);
266 } else {
267 // If the register we are reading has an implicit def, then that
268 // means it was written by an indirect register access (i.e. An
269 // instruction that uses indirect addressing.
270 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
271 MI.getOperand(0).getReg())
272 .addReg(AddrReg);
273 }
274 } else {
275 // Indirect register access
276
277 // Note on REQ_SEQUENCE instructons: You can't actually use the register
278 // it defines unless you have an instruction that takes the defined
279 // register class as an operand.
280
281 MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
282 TII->get(AMDGPU::REG_SEQUENCE),
283 IndirectReg);
284 for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
285 unsigned Addr = TII->calculateIndirectAddress(i, Channel);
286 if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
287 continue;
288 }
289 unsigned Reg = LiveAddressRegisterMap[Addr];
290
291 // We only need to use REG_SEQUENCE for explicit defs, since the
292 // register coalescer won't do anything with the implicit defs.
293 MachineInstr *DefInstr = MRI.getVRegDef(Reg);
294 if (!DefInstr->getOperand(0).isReg() ||
295 DefInstr->getOperand(0).getReg() != Reg) {
296 continue;
297 }
298
299 // Insert a REQ_SEQUENCE instruction to force the register allocator
300 // to allocate the virtual register to the correct physical register.
301 Sequence.addReg(LiveAddressRegisterMap[Addr]);
302 Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
303 }
304 MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
305 MI.getOperand(0).getReg(), // Value
306 Address,
307 MI.getOperand(1).getReg()); // Offset
308
309
310
311 Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
312
313 }
314 MI.eraseFromParent();
315 }
316 }
317 return false;
318 }
319
320 bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
321 unsigned Reg) const {
322 MachineInstr *DefInstr = MRI.getVRegDef(Reg);
323 return DefInstr && DefInstr->getOperand(0).isReg() &&
324 DefInstr->getOperand(0).getReg() == Reg;
325 }
233233 // TODO: Implement this function
234234 return true;
235235 }
236
236
237 bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
238 return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
239 }
240
241 bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
242 return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
243 }
244
245
237246 void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
238247 DebugLoc DL) const {
239248 MachineRegisterInfo &MRI = MF.getRegInfo();
3939 class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
4040 private:
4141 const AMDGPURegisterInfo RI;
42 TargetMachine &TM;
4342 bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
4443 MachineBasicBlock &MBB) const;
44 protected:
45 TargetMachine &TM;
4546 public:
4647 explicit AMDGPUInstrInfo(TargetMachine &tm);
4748
129130 bool isAExtLoadInst(llvm::MachineInstr *MI) const;
130131 bool isStoreInst(llvm::MachineInstr *MI) const;
131132 bool isTruncStoreInst(llvm::MachineInstr *MI) const;
133 bool isRegisterStore(const MachineInstr &MI) const;
134 bool isRegisterLoad(const MachineInstr &MI) const;
135
136 //===---------------------------------------------------------------------===//
137 // Pure virtual funtions to be implemented by sub-classes.
138 //===---------------------------------------------------------------------===//
132139
133140 virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
134141 int64_t Imm) const = 0;
135142 virtual unsigned getIEQOpcode() const = 0;
136143 virtual bool isMov(unsigned opcode) const = 0;
137144
145 /// \returns the smallest register index that will be accessed by an indirect
146 /// read or write or -1 if indirect addressing is not used by this program.
147 virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
148
149 /// \returns the largest register index that will be accessed by an indirect
150 /// read or write or -1 if indirect addressing is not used by this program.
151 virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
152
153 /// \brief Calculate the "Indirect Address" for the given \p RegIndex and
154 /// \p Channel
155 ///
156 /// We model indirect addressing using a virtual address space that can be
157 /// accesed with loads and stores. The "Indirect Address" is the memory
158 /// address in this virtual address space that maps to the given \p RegIndex
159 /// and \p Channel.
160 virtual unsigned calculateIndirectAddress(unsigned RegIndex,
161 unsigned Channel) const = 0;
162
163 /// \returns The register class to be used for storing values to an
164 /// "Indirect Address" .
165 virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
166 unsigned SourceReg) const = 0;
167
168 /// \returns The register class to be used for loading values from
169 /// an "Indirect Address" .
170 virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
171
172 /// \brief Build instruction(s) for an indirect register write.
173 ///
174 /// \returns The instruction that performs the indirect register write
175 virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
176 MachineBasicBlock::iterator I,
177 unsigned ValueReg, unsigned Address,
178 unsigned OffsetReg) const = 0;
179
180 /// \brief Build instruction(s) for an indirect register read.
181 ///
182 /// \returns The instruction that performs the indirect register read
183 virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
184 MachineBasicBlock::iterator I,
185 unsigned ValueReg, unsigned Address,
186 unsigned OffsetReg) const = 0;
187
188 /// \returns the register class whose sub registers are the set of all
189 /// possible registers that can be used for indirect addressing.
190 virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
191
192
138193 /// \brief Convert the AMDIL MachineInstr to a supported ISA
139194 /// MachineInstr
140195 virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
144199
145200 } // End llvm namespace
146201
202 #define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
203 #define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
204
147205 #endif // AMDGPUINSTRINFO_H
7171 def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
7272
7373 def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
74
75 def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
76 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
77 [SDNPHasChain, SDNPMayLoad]>;
78
79 def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
80 SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
81 [SDNPHasChain, SDNPMayStore]>;
1212 //===----------------------------------------------------------------------===//
1313
1414 class AMDGPUInst pattern> : Instruction {
15 field bits<16> AMDILOp = 0;
16 field bits<3> Gen = 0;
15 field bit isRegisterLoad = 0;
16 field bit isRegisterStore = 0;
1717
1818 let Namespace = "AMDGPU";
1919 let OutOperandList = outs;
2121 let AsmString = asm;
2222 let Pattern = pattern;
2323 let Itinerary = NullALU;
24 let TSFlags{42-40} = Gen;
25 let TSFlags{63-48} = AMDILOp;
24
25 let TSFlags{63} = isRegisterLoad;
26 let TSFlags{62} = isRegisterStore;
2627 }
2728
2829 class AMDGPUShaderInst pattern>
100101 [{return N->isExactlyValue(1.0);}]
101102 >;
102103
103 let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
104 let isCodeGenOnly = 1, isPseudo = 1 in {
105
106 let usesCustomInserter = 1 in {
104107
105108 class CLAMP : AMDGPUShaderInst <
106109 (outs rc:$dst),
130133 [(int_AMDGPU_shader_type imm:$type)]
131134 >;
132135
133 } // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
136 } // usesCustomInserter = 1
137
138 multiclass RegisterLoadStore
139 ComplexPattern addrPat> {
140 def RegisterLoad : AMDGPUShaderInst <
141 (outs dstClass:$dst),
142 (ins addrClass:$addr, i32imm:$chan),
143 "RegisterLoad $dst, $addr",
144 [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
145 (i32 timm:$chan)))]
146 > {
147 let isRegisterLoad = 1;
148 }
149
150 def RegisterStore : AMDGPUShaderInst <
151 (outs),
152 (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
153 "RegisterStore $val, $addr",
154 [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
155 > {
156 let isRegisterStore = 1;
157 }
158 }
159
160 } // End isCodeGenOnly = 1, isPseudo = 1
134161
135162 /* Generic helper patterns for intrinsics */
136163 /* -------------------------------------- */
4747 return 0;
4848 }
4949
50 unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
51
52 switch(IndirectIndex) {
53 case 0: return AMDGPU::indirect_0;
54 case 1: return AMDGPU::indirect_1;
55 case 2: return AMDGPU::indirect_2;
56 case 3: return AMDGPU::indirect_3;
57 case 4: return AMDGPU::indirect_4;
58 case 5: return AMDGPU::indirect_5;
59 case 6: return AMDGPU::indirect_6;
60 case 7: return AMDGPU::indirect_7;
61 case 8: return AMDGPU::indirect_8;
62 case 9: return AMDGPU::indirect_9;
63 case 10: return AMDGPU::indirect_10;
64 case 11: return AMDGPU::indirect_11;
65 case 12: return AMDGPU::indirect_12;
66 case 13: return AMDGPU::indirect_13;
67 case 14: return AMDGPU::indirect_14;
68 case 15: return AMDGPU::indirect_15;
69 default: llvm_unreachable("indirect index out of range");
70 }
71 }
72
5073 #define GET_REGINFO_TARGET_DESC
5174 #include "AMDGPUGenRegisterInfo.inc"
5656 RegScavenger *RS) const;
5757 unsigned getFrameRegister(const MachineFunction &MF) const;
5858
59 unsigned getIndirectSubReg(unsigned IndirectIndex) const;
60
5961 };
6062
6163 } // End namespace llvm
1515 def sel_y : SubRegIndex;
1616 def sel_z : SubRegIndex;
1717 def sel_w : SubRegIndex;
18
19
20 foreach Index = 0-15 in {
21 def indirect_#Index : SubRegIndex;
22 }
23
24 def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
25
1826 }
1927
2028 include "R600RegisterInfo.td"
101101 bool AMDGPUPassConfig::addInstSelector() {
102102 addPass(createAMDGPUPeepholeOpt(*TM));
103103 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
104
105 const AMDGPUSubtarget &ST = TM->getSubtarget();
106 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
107 // This callbacks this pass uses are not implemented yet on SI.
108 addPass(createAMDGPUIndirectAddressingPass(*TM));
109 }
104110 return false;
105111 }
106112
1414 #ifndef AMDGPU_TARGET_MACHINE_H
1515 #define AMDGPU_TARGET_MACHINE_H
1616
17 #include "AMDGPUFrameLowering.h"
1718 #include "AMDGPUInstrInfo.h"
1819 #include "AMDGPUSubtarget.h"
19 #include "AMDILFrameLowering.h"
2020 #include "AMDILIntrinsicInfo.h"
2121 #include "R600ISelLowering.h"
2222 #include "llvm/ADT/OwningPtr.h"
+0
-47
lib/Target/R600/AMDILFrameLowering.cpp less more
None //===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Interface to describe a layout of a stack frame on a AMDGPU target
11 /// machine.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "AMDILFrameLowering.h"
15 #include "llvm/CodeGen/MachineFrameInfo.h"
16
17 using namespace llvm;
18 AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
19 int LAO, unsigned TransAl)
20 : TargetFrameLowering(D, StackAl, LAO, TransAl) {
21 }
22
23 AMDGPUFrameLowering::~AMDGPUFrameLowering() {
24 }
25
26 int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
27 int FI) const {
28 const MachineFrameInfo *MFI = MF.getFrameInfo();
29 return MFI->getObjectOffset(FI);
30 }
31
32 const TargetFrameLowering::SpillSlot *
33 AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
34 NumEntries = 0;
35 return 0;
36 }
37 void
38 AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
39 }
40 void
41 AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
42 }
43 bool
44 AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
45 return false;
46 }
+0
-40
lib/Target/R600/AMDILFrameLowering.h less more
None //===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Interface to describe a layout of a stack frame on a AMDIL target
11 /// machine.
12 //
13 //===----------------------------------------------------------------------===//
14 #ifndef AMDILFRAME_LOWERING_H
15 #define AMDILFRAME_LOWERING_H
16
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/Target/TargetFrameLowering.h"
19
20 namespace llvm {
21
22 /// \brief Information about the stack frame layout on the AMDGPU targets.
23 ///
24 /// It holds the direction of the stack growth, the known stack alignment on
25 /// entry to each function, and the offset to the locals area.
26 /// See TargetFrameInfo for more comments.
27 class AMDGPUFrameLowering : public TargetFrameLowering {
28 public:
29 AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
30 unsigned TransAl = 1);
31 virtual ~AMDGPUFrameLowering();
32 virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
33 virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
34 virtual void emitPrologue(MachineFunction &MF) const;
35 virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
36 virtual bool hasFP(const MachineFunction &MF) const;
37 };
38 } // namespace llvm
39 #endif // AMDILFRAME_LOWERING_H
7474 bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
7575 bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
7676 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
77 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
7778
7879 // Include the pieces autogenerated from the target description.
7980 #include "AMDGPUGenDAGISel.inc"
160161 }
161162 switch (Opc) {
162163 default: break;
163 case ISD::FrameIndex: {
164 if (FrameIndexSDNode *FIN = dyn_cast(N)) {
165 unsigned int FI = FIN->getIndex();
166 EVT OpVT = N->getValueType(0);
167 unsigned int NewOpc = AMDGPU::COPY;
168 SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
169 return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
170 }
171 break;
172 }
173164 case ISD::ConstantFP:
174165 case ISD::Constant: {
175166 const AMDGPUSubtarget &ST = TM.getSubtarget();
612603
613604 return true;
614605 }
606
607 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
608 SDValue &Offset) {
609 ConstantSDNode *C;
610
611 if ((C = dyn_cast(Addr))) {
612 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
613 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
614 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
615 (C = dyn_cast(Addr.getOperand(1)))) {
616 Base = Addr.getOperand(0);
617 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
618 } else {
619 Base = Addr;
620 Offset = CurDAG->getTargetConstant(0, MVT::i32);
621 }
622
623 return true;
624 }
1616 AMDILDevice.cpp
1717 AMDILDeviceInfo.cpp
1818 AMDILEvergreenDevice.cpp
19 AMDILFrameLowering.cpp
2019 AMDILIntrinsicInfo.cpp
2120 AMDILISelDAGToDAG.cpp
2221 AMDILISelLowering.cpp
2423 AMDILPeepholeOptimizer.cpp
2524 AMDILSIDevice.cpp
2625 AMDGPUAsmPrinter.cpp
26 AMDGPUFrameLowering.cpp
27 AMDGPUIndirectAddressing.cpp
2728 AMDGPUMCInstLower.cpp
2829 AMDGPUSubtarget.cpp
2930 AMDGPUStructurizeCFG.cpp
104104
105105 void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
106106 raw_ostream &O) {
107 const MCOperand &Op = MI->getOperand(OpNo);
108 if (Op.getImm() != 0) {
109 O << " + " << Op.getImm();
110 }
107 printIfSet(MI, OpNo, O, "+");
111108 }
112109
113110 void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
4848 #define HW_REG_MASK 0x1ff
4949 #define HW_CHAN_SHIFT 9
5050
51 #define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
52 #define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
53
5154 namespace R600Operands {
5255 enum Ops {
5356 DST,
1515 #include "R600Defines.h"
1616 #include "R600InstrInfo.h"
1717 #include "R600MachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
1819 #include "llvm/CodeGen/MachineInstrBuilder.h"
1920 #include "llvm/CodeGen/MachineRegisterInfo.h"
2021 #include "llvm/CodeGen/SelectionDAG.h"
7071 setOperationAction(ISD::SELECT, MVT::i32, Custom);
7172 setOperationAction(ISD::SELECT, MVT::f32, Custom);
7273
74 // Legalize loads and stores to the private address space.
75 setOperationAction(ISD::LOAD, MVT::i32, Custom);
76 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
77 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
78 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
79 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
80 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
81 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
82 setOperationAction(ISD::STORE, MVT::i8, Custom);
7383 setOperationAction(ISD::STORE, MVT::i32, Custom);
84 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
7485 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
7586
7687 setOperationAction(ISD::LOAD, MVT::i32, Custom);
7788 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
89 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
90
7891 setTargetDAGCombine(ISD::FP_ROUND);
7992 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
8093
349362 case ISD::STORE: return LowerSTORE(Op, DAG);
350363 case ISD::LOAD: return LowerLOAD(Op, DAG);
351364 case ISD::FPOW: return LowerFPOW(Op, DAG);
365 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
352366 case ISD::INTRINSIC_VOID: {
353367 SDValue Chain = Op.getOperand(0);
354368 unsigned IntrinsicID =
484498 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
485499 return;
486500 }
501 case ISD::STORE:
502 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
503 Results.push_back(SDValue(Node, 0));
504 return;
487505 }
488506 }
489507
549567 DAG.getConstant(ByteOffset, MVT::i32), // PTR
550568 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
551569 false, false, false, 0);
570 }
571
572 SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
573
574 MachineFunction &MF = DAG.getMachineFunction();
575 const AMDGPUFrameLowering *TFL =
576 static_cast(getTargetMachine().getFrameLowering());
577
578 FrameIndexSDNode *FIN = dyn_cast(Op);
579 assert(FIN);
580
581 unsigned FrameIndex = FIN->getIndex();
582 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
583 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
552584 }
553585
554586 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
765797 return Cond;
766798 }
767799
800 /// LLVM generates byte-addresed pointers. For indirect addressing, we need to
801 /// convert these pointers to a register index. Each register holds
802 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
803 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
804 /// for indirect addressing.
805 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
806 unsigned StackWidth,
807 SelectionDAG &DAG) const {
808 unsigned SRLPad;
809 switch(StackWidth) {
810 case 1:
811 SRLPad = 2;
812 break;
813 case 2:
814 SRLPad = 3;
815 break;
816 case 4:
817 SRLPad = 4;
818 break;
819 default: llvm_unreachable("Invalid stack width");
820 }
821
822 return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
823 DAG.getConstant(SRLPad, MVT::i32));
824 }
825
826 void R600TargetLowering::getStackAddress(unsigned StackWidth,
827 unsigned ElemIdx,
828 unsigned &Channel,
829 unsigned &PtrIncr) const {
830 switch (StackWidth) {
831 default:
832 case 1:
833 Channel = 0;
834 if (ElemIdx > 0) {
835 PtrIncr = 1;
836 } else {
837 PtrIncr = 0;
838 }
839 break;
840 case 2:
841 Channel = ElemIdx % 2;
842 if (ElemIdx == 2) {
843 PtrIncr = 1;
844 } else {
845 PtrIncr = 0;
846 }
847 break;
848 case 4:
849 Channel = ElemIdx;
850 PtrIncr = 0;
851 break;
852 }
853 }
854
768855 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
769856 DebugLoc DL = Op.getDebugLoc();
770857 StoreSDNode *StoreNode = cast(Op);
786873 }
787874 return Chain;
788875 }
789 return SDValue();
876
877 EVT ValueVT = Value.getValueType();
878
879 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
880 return SDValue();
881 }
882
883 // Lowering for indirect addressing
884
885 const MachineFunction &MF = DAG.getMachineFunction();
886 const AMDGPUFrameLowering *TFL = static_cast(
887 getTargetMachine().getFrameLowering());
888 unsigned StackWidth = TFL->getStackWidth(MF);
889
890 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
891
892 if (ValueVT.isVector()) {
893 unsigned NumElemVT = ValueVT.getVectorNumElements();
894 EVT ElemVT = ValueVT.getVectorElementType();
895 SDValue Stores[4];
896
897 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
898 "vector width in load");
899
900 for (unsigned i = 0; i < NumElemVT; ++i) {
901 unsigned Channel, PtrIncr;
902 getStackAddress(StackWidth, i, Channel, PtrIncr);
903 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
904 DAG.getConstant(PtrIncr, MVT::i32));
905 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
906 Value, DAG.getConstant(i, MVT::i32));
907
908 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
909 Chain, Elem, Ptr,
910 DAG.getTargetConstant(Channel, MVT::i32));
911 }
912 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
913 } else {
914 if (ValueVT == MVT::i8) {
915 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
916 }
917 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
918 DAG.getTargetConstant(0, MVT::i32)); // Channel
919 }
920
921 return Chain;
790922 }
791923
792924 // return (512 + (kc_bank << 12)
8751007 return DAG.getMergeValues(MergedValues, 2, DL);
8761008 }
8771009
878 return SDValue();
1010 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1011 return SDValue();
1012 }
1013
1014 // Lowering for indirect addressing
1015 const MachineFunction &MF = DAG.getMachineFunction();
1016 const AMDGPUFrameLowering *TFL = static_cast(
1017 getTargetMachine().getFrameLowering());
1018 unsigned StackWidth = TFL->getStackWidth(MF);
1019
1020 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1021
1022 if (VT.isVector()) {
1023 unsigned NumElemVT = VT.getVectorNumElements();
1024 EVT ElemVT = VT.getVectorElementType();
1025 SDValue Loads[4];
1026
1027 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1028 "vector width in load");
1029
1030 for (unsigned i = 0; i < NumElemVT; ++i) {
1031 unsigned Channel, PtrIncr;
1032 getStackAddress(StackWidth, i, Channel, PtrIncr);
1033 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1034 DAG.getConstant(PtrIncr, MVT::i32));
1035 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1036 Chain, Ptr,
1037 DAG.getTargetConstant(Channel, MVT::i32),
1038 Op.getOperand(2));
1039 }
1040 for (unsigned i = NumElemVT; i < 4; ++i) {
1041 Loads[i] = DAG.getUNDEF(ElemVT);
1042 }
1043 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1044 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
1045 } else {
1046 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1047 Chain, Ptr,
1048 DAG.getTargetConstant(0, MVT::i32), // Channel
1049 Op.getOperand(2));
1050 }
1051
1052 SDValue Ops[2];
1053 Ops[0] = LoweredLoad;
1054 Ops[1] = Chain;
1055
1056 return DAG.getMergeValues(Ops, 2, DL);
8791057 }
8801058
8811059 SDValue R600TargetLowering::LowerFPOW(SDValue Op,
6363 SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
6464 SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
6565 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
66
66 SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
67
68 SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
69 SelectionDAG &DAG) const;
70 void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
71 unsigned &Channel, unsigned &PtrIncr) const;
6772 bool isZero(SDValue Op) const;
6873 };
6974
1515 #include "AMDGPUSubtarget.h"
1616 #include "AMDGPUTargetMachine.h"
1717 #include "R600Defines.h"
18 #include "R600MachineFunctionInfo.h"
1819 #include "R600RegisterInfo.h"
1920 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
2023
2124 #define GET_INSTRINFO_CTOR
2225 #include "AMDGPUGenDFAPacketizer.inc"
463466 *PredCost = 2;
464467 return 2;
465468 }
469
470 int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
471 const MachineRegisterInfo &MRI = MF.getRegInfo();
472 const MachineFrameInfo *MFI = MF.getFrameInfo();
473 int Offset = 0;
474
475 if (MFI->getNumObjects() == 0) {
476 return -1;
477 }
478
479 if (MRI.livein_empty()) {
480 return 0;
481 }
482
483 for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
484 LE = MRI.livein_end();
485 LI != LE; ++LI) {
486 Offset = std::max(Offset,
487 GET_REG_INDEX(RI.getEncodingValue(LI->first)));
488 }
489
490 return Offset + 1;
491 }
492
493 int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
494 int Offset = 0;
495 const MachineFrameInfo *MFI = MF.getFrameInfo();
496
497 // Variable sized objects are not supported
498 assert(!MFI->hasVarSizedObjects());
499
500 if (MFI->getNumObjects() == 0) {
501 return -1;
502 }
503
504 Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
505
506 return getIndirectIndexBegin(MF) + Offset;
507 }
508
509 std::vector R600InstrInfo::getIndirectReservedRegs(
510 const MachineFunction &MF) const {
511 const AMDGPUFrameLowering *TFL =
512 static_cast(TM.getFrameLowering());
513 std::vector Regs;
514
515 unsigned StackWidth = TFL->getStackWidth(MF);
516 int End = getIndirectIndexEnd(MF);
517
518 if (End == -1) {
519 return Regs;
520 }
521
522 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
523 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
524 Regs.push_back(SuperReg);
525 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
526 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
527 Regs.push_back(Reg);
528 }
529 }
530 return Regs;
531 }
532
533 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
534 unsigned Channel) const {
535 // XXX: Remove when we support a stack width > 2
536 assert(Channel == 0);
537 return RegIndex;
538 }
539
540 const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
541 unsigned SourceReg) const {
542 return &AMDGPU::R600_TReg32RegClass;
543 }
544
545 const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
546 return &AMDGPU::TRegMemRegClass;
547 }
548
549 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
550 MachineBasicBlock::iterator I,
551 unsigned ValueReg, unsigned Address,
552 unsigned OffsetReg) const {
553 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
554 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
555 AMDGPU::AR_X, OffsetReg);
556 setImmOperand(MOVA, R600Operands::WRITE, 0);
557
558 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
559 AddrReg, ValueReg)
560 .addReg(AMDGPU::AR_X, RegState::Implicit);
561 setImmOperand(Mov, R600Operands::DST_REL, 1);
562 return Mov;
563 }
564
565 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
566 MachineBasicBlock::iterator I,
567 unsigned ValueReg, unsigned Address,
568 unsigned OffsetReg) const {
569 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
570 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
571 AMDGPU::AR_X,
572 OffsetReg);
573 setImmOperand(MOVA, R600Operands::WRITE, 0);
574 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
575 ValueReg,
576 AddrReg)
577 .addReg(AMDGPU::AR_X, RegState::Implicit);
578 setImmOperand(Mov, R600Operands::SRC0_REL, 1);
579
580 return Mov;
581 }
582
583 const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
584 return &AMDGPU::IndirectRegRegClass;
585 }
586
466587
467588 MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
468589 MachineBasicBlock::iterator I,
112112 virtual int getInstrLatency(const InstrItineraryData *ItinData,
113113 SDNode *Node) const { return 1;}
114114
115 /// \returns a list of all the registers that may be accesed using indirect
116 /// addressing.
117 std::vector getIndirectReservedRegs(const MachineFunction &MF) const;
118
119 virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
120
121 virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
122
123
124 virtual unsigned calculateIndirectAddress(unsigned RegIndex,
125 unsigned Channel) const;
126
127 virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
128 unsigned SourceReg) const;
129
130 virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
131
132 virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
133 MachineBasicBlock::iterator I,
134 unsigned ValueReg, unsigned Address,
135 unsigned OffsetReg) const;
136
137 virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
138 MachineBasicBlock::iterator I,
139 unsigned ValueReg, unsigned Address,
140 unsigned OffsetReg) const;
141
142 virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
143
144
145 ///buildDefaultInstruction - This function returns a MachineInstr with
146 /// all the instruction modifiers initialized to their default values.
115147 /// You can use this function to avoid manually specifying each instruction
116148 /// modifier operand when building a new instruction.
117149 ///
9090 // default to 0.
9191 def LAST : InstFlag<"printLast", 1>;
9292
93 def FRAMEri : Operand {
94 let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
95 }
96
9397 def ADDRParam : ComplexPattern;
9498 def ADDRDWord : ComplexPattern;
9599 def ADDRVTX_READ : ComplexPattern;
96100 def ADDRGA_CONST_OFFSET : ComplexPattern;
97101 def ADDRGA_VAR_OFFSET : ComplexPattern;
102 def ADDRIndirect : ComplexPattern;
98103
99104 class R600ALU_Word0 {
100105 field bits<32> Word0;
12191224 defm DOT4_eg : DOT4_Common<0xBE>;
12201225 defm CUBE_eg : CUBE_Common<0xC0>;
12211226
1227 let hasSideEffects = 1 in {
1228 def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
1229 }
1230
12221231 def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common;
12231232
12241233 def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
14681477 >;
14691478
14701479 }
1480
1481 //===----------------------------------------------------------------------===//
1482 // Regist loads and stores - for indirect addressing
1483 //===----------------------------------------------------------------------===//
1484
1485 defm R600_ : RegisterLoadStore ;
14711486
14721487 let Predicates = [isCayman] in {
14731488
1212 #ifndef R600MACHINEFUNCTIONINFO_H
1313 #define R600MACHINEFUNCTIONINFO_H
1414
15 #include "llvm/ADT/BitVector.h"
1516 #include "llvm/CodeGen/MachineFunction.h"
1617 #include "llvm/CodeGen/SelectionDAG.h"
1718 #include
2324 public:
2425 R600MachineFunctionInfo(const MachineFunction &MF);
2526 SmallVector LiveOuts;
27 std::vector IndirectRegs;
2628 SDNode *Outputs[16];
2729 };
2830
1414 #include "R600RegisterInfo.h"
1515 #include "AMDGPUTargetMachine.h"
1616 #include "R600Defines.h"
17 #include "R600InstrInfo.h"
1718 #include "R600MachineFunctionInfo.h"
1819
1920 using namespace llvm;
4243 Reserved.set(AMDGPU::PRED_SEL_ZERO);
4344 Reserved.set(AMDGPU::PRED_SEL_ONE);
4445
46 for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
47 E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
48 Reserved.set(*I);
49 }
50
51 const R600InstrInfo *RII = static_cast(&TII);
52 std::vector IndirectRegs = RII->getIndirectReservedRegs(MF);
53 for (std::vector::iterator I = IndirectRegs.begin(),
54 E = IndirectRegs.end();
55 I != E; ++I) {
56 Reserved.set(*I);
57 }
4558 return Reserved;
4659 }
4760
7689 case 3: return AMDGPU::sel_w;
7790 }
7891 }
92
2626 foreach Chan = [ "X", "Y", "Z", "W" ] in {
2727 // 32-bit Temporary Registers
2828 def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
29
30 // Indirect addressing offset registers
31 def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
32 Index, Chan>;
33 def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
34 Chan>;
2935 }
3036 // 128-bit Temporary Registers
3137 def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
5662 def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
5763 def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
5864 def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
65 def AR_X : R600Reg<"AR.x", 0>;
5966
6067 def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
6168 (add (sequence "ArrayBase%u", 448, 464))>;
6471 def ALU_CONST : R600Reg<"CBuf", 0>;
6572 // interpolation param reference, SRCx_SEL contains index
6673 def ALU_PARAM : R600Reg<"Param", 0>;
74
75 let isAllocatable = 0 in {
76
77 // XXX: Only use the X channel, until we support wider stack widths
78 def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
79
80 } // End isAllocatable = 0
6781
6882 def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
6983 (add (sequence "T%u_X", 0, 127))>;
8498 def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
8599 R600_TReg32,
86100 R600_ArrayBase,
101 R600_Addr,
87102 ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
88103 ALU_CONST, ALU_PARAM
89104 )>;
98113 (add (sequence "T%u_XYZW", 0, 127))> {
99114 let CopyCost = -1;
100115 }
116
117 //===----------------------------------------------------------------------===//
118 // Register classes for indirect addressing
119 //===----------------------------------------------------------------------===//
120
121 // Super register for all the Indirect Registers. This register class is used
122 // by the REG_SEQUENCE instruction to specify the registers to use for direct
123 // reads / writes which may be written / read by an indirect address.
124 class IndirectSuper subregs> :
125 RegisterWithSubRegs {
126 let Namespace = "AMDGPU";
127 let SubRegIndices =
128 [indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6,
129 indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12,
130 indirect_13,indirect_14,indirect_15];
131 }
132
133 def IndirectSuperReg : IndirectSuper<"Indirect",
134 [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
135 TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
136 TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
137 >;
138
139 def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
140
141 // This register class defines the registers that are the storage units for
142 // the "Indirect Addressing" pseudo memory space.
143 // XXX: Only use the X channel, until we support wider stack widths
144 def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
145 (add (sequence "TRegMem%u_X", 0, 16))
146 >;
8686 SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
8787 return RC != &AMDGPU::EXECRegRegClass;
8888 }
89
90 //===----------------------------------------------------------------------===//
91 // Indirect addressing callbacks
92 //===----------------------------------------------------------------------===//
93
94 unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
95 unsigned Channel) const {
96 assert(Channel == 0);
97 return RegIndex;
98 }
99
100
101 int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
102 llvm_unreachable("Unimplemented");
103 }
104
105 int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
106 llvm_unreachable("Unimplemented");
107 }
108
109 const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
110 unsigned SourceReg) const {
111 llvm_unreachable("Unimplemented");
112 }
113
114 const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
115 llvm_unreachable("Unimplemented");
116 }
117
118 MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
119 MachineBasicBlock *MBB,
120 MachineBasicBlock::iterator I,
121 unsigned ValueReg,
122 unsigned Address, unsigned OffsetReg) const {
123 llvm_unreachable("Unimplemented");
124 }
125
126 MachineInstrBuilder SIInstrInfo::buildIndirectRead(
127 MachineBasicBlock *MBB,
128 MachineBasicBlock::iterator I,
129 unsigned ValueReg,
130 unsigned Address, unsigned OffsetReg) const {
131 llvm_unreachable("Unimplemented");
132 }
133
134 const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
135 llvm_unreachable("Unimplemented");
136 }
4747 virtual bool isMov(unsigned Opcode) const;
4848
4949 virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
50
51 virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
52
53 virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
54
55 virtual unsigned calculateIndirectAddress(unsigned RegIndex,
56 unsigned Channel) const;
57
58 virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
59 unsigned SourceReg) const;
60
61 virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
62
63 virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
64 MachineBasicBlock::iterator I,
65 unsigned ValueReg,
66 unsigned Address,
67 unsigned OffsetReg) const;
68
69 virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
70 MachineBasicBlock::iterator I,
71 unsigned ValueReg,
72 unsigned Address,
73 unsigned OffsetReg) const;
74
75 virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
5076 };
5177
5278 } // End namespace llvm