llvm.org GIT mirror llvm / a2b4eb6
R600/SI: Add support for private address space load/store Private address space is emulated using the register file with MOVRELS and MOVRELD instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194626 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
29 changed file(s) with 525 addition(s) and 250 deletion(s). Raw diff Collapse all Expand all
308308 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
309309 SDLoc(N), N->getValueType(0), Ops);
310310 }
311 case AMDGPUISD::REGISTER_LOAD: {
312 const AMDGPUSubtarget &ST = TM.getSubtarget();
313 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
314 break;
315 SDValue Addr, Offset;
316
317 SelectADDRIndirect(N->getOperand(1), Addr, Offset);
318 const SDValue Ops[] = {
319 Addr,
320 Offset,
321 CurDAG->getTargetConstant(0, MVT::i32),
322 N->getOperand(0),
323 };
324 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
325 CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
326 Ops);
327 }
328 case AMDGPUISD::REGISTER_STORE: {
329 const AMDGPUSubtarget &ST = TM.getSubtarget();
330 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
331 break;
332 SDValue Addr, Offset;
333 SelectADDRIndirect(N->getOperand(2), Addr, Offset);
334 const SDValue Ops[] = {
335 N->getOperand(1),
336 Addr,
337 Offset,
338 CurDAG->getTargetConstant(0, MVT::i32),
339 N->getOperand(0),
340 };
341 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
342 CurDAG->getVTList(MVT::Other),
343 Ops);
344 }
311345 }
312346 return SelectCode(N);
313347 }
1414
1515 #include "AMDGPUISelLowering.h"
1616 #include "AMDGPU.h"
17 #include "AMDGPUFrameLowering.h"
1718 #include "AMDGPURegisterInfo.h"
1819 #include "AMDGPUSubtarget.h"
1920 #include "AMDILIntrinsicInfo.h"
249250 // AMDGPU DAG lowering
250251 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
251252 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
253 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
252254 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
253 case ISD::STORE: return LowerSTORE(Op, DAG);
254255 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
255256 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
256257 }
325326 &Args[0], Args.size());
326327 }
327328
329 SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
330 SelectionDAG &DAG) const {
331
332 MachineFunction &MF = DAG.getMachineFunction();
333 const AMDGPUFrameLowering *TFL =
334 static_cast(getTargetMachine().getFrameLowering());
335
336 FrameIndexSDNode *FIN = dyn_cast(Op);
337 assert(FIN);
338
339 unsigned FrameIndex = FIN->getIndex();
340 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
341 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
342 Op.getValueType());
343 }
328344
329345 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
330346 SelectionDAG &DAG) const {
562578 }
563579
564580 StoreSDNode *Store = cast(Op);
565 if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
581 if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
582 Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
566583 Store->getValue().getValueType().isVector()) {
567584 return SplitVectorStore(Op, DAG);
568585 }
2727 void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
2828 SmallVectorImpl &Args,
2929 unsigned Start, unsigned Count) const;
30 SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
3031 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
3132 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
3233 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
119119
120120 bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
121121 MachineBasicBlock *MBB = MI->getParent();
122 int OffsetOpIdx =
123 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::addr);
124 // addr is a custom operand with multiple MI operands, and only the
125 // first MI operand is given a name.
126 int RegOpIdx = OffsetOpIdx + 1;
127 int ChanOpIdx =
128 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::chan);
122129
123130 if (isRegisterLoad(*MI)) {
124 unsigned RegIndex = MI->getOperand(2).getImm();
125 unsigned Channel = MI->getOperand(3).getImm();
131 int DstOpIdx =
132 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
133 unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
134 unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
126135 unsigned Address = calculateIndirectAddress(RegIndex, Channel);
127 unsigned OffsetReg = MI->getOperand(1).getReg();
136 unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
128137 if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
129 buildMovInstr(MBB, MI, MI->getOperand(0).getReg(),
138 buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
130139 getIndirectAddrRegClass()->getRegister(Address));
131140 } else {
132 buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(),
141 buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
133142 Address, OffsetReg);
134143 }
135144 } else if (isRegisterStore(*MI)) {
136 unsigned RegIndex = MI->getOperand(2).getImm();
137 unsigned Channel = MI->getOperand(3).getImm();
145 int ValOpIdx =
146 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::val);
147 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
148 unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
149 unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
138150 unsigned Address = calculateIndirectAddress(RegIndex, Channel);
139 unsigned OffsetReg = MI->getOperand(1).getReg();
151 unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
140152 if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
141153 buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
142 MI->getOperand(0).getReg());
154 MI->getOperand(ValOpIdx).getReg());
143155 } else {
144 buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(),
145 calculateIndirectAddress(RegIndex, Channel),
146 OffsetReg);
156 buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(),
157 calculateIndirectAddress(RegIndex, Channel),
158 OffsetReg);
147159 }
148160 } else {
149161 return false;
259271 return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
260272 }
261273
274 int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
275 const MachineRegisterInfo &MRI = MF.getRegInfo();
276 const MachineFrameInfo *MFI = MF.getFrameInfo();
277 int Offset = -1;
278
279 if (MFI->getNumObjects() == 0) {
280 return -1;
281 }
282
283 if (MRI.livein_empty()) {
284 return 0;
285 }
286
287 const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
288 for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
289 LE = MRI.livein_end();
290 LI != LE; ++LI) {
291 unsigned Reg = LI->first;
292 if (TargetRegisterInfo::isVirtualRegister(Reg) ||
293 !IndirectRC->contains(Reg))
294 continue;
295
296 unsigned RegIndex;
297 unsigned RegEnd;
298 for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
299 ++RegIndex) {
300 if (IndirectRC->getRegister(RegIndex) == Reg)
301 break;
302 }
303 Offset = std::max(Offset, (int)RegIndex);
304 }
305
306 return Offset + 1;
307 }
308
309 int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
310 int Offset = 0;
311 const MachineFrameInfo *MFI = MF.getFrameInfo();
312
313 // Variable sized objects are not supported
314 assert(!MFI->hasVarSizedObjects());
315
316 if (MFI->getNumObjects() == 0) {
317 return -1;
318 }
319
320 Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
321
322 return getIndirectIndexBegin(MF) + Offset;
323 }
324
262325
263326 void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
264327 DebugLoc DL) const {
9898 MachineInstr *MI,
9999 const SmallVectorImpl &Ops,
100100 MachineInstr *LoadMI) const;
101 /// \returns the smallest register index that will be accessed by an indirect
102 /// read or write or -1 if indirect addressing is not used by this program.
103 virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
104
105 /// \returns the largest register index that will be accessed by an indirect
106 /// read or write or -1 if indirect addressing is not used by this program.
107 virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
108
101109 public:
102110 bool canFoldMemoryOperand(const MachineInstr *MI,
103111 const SmallVectorImpl &Ops) const;
143151 virtual unsigned getIEQOpcode() const = 0;
144152 virtual bool isMov(unsigned opcode) const = 0;
145153
146 /// \returns the smallest register index that will be accessed by an indirect
147 /// read or write or -1 if indirect addressing is not used by this program.
148 virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
149
150 /// \returns the largest register index that will be accessed by an indirect
151 /// read or write or -1 if indirect addressing is not used by this program.
152 virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
153
154154 /// \brief Calculate the "Indirect Address" for the given \p RegIndex and
155155 /// \p Channel
156156 ///
3434 }
3535
3636 def InstFlag : OperandWithDefaultOps ;
37 def ADDRIndirect : ComplexPattern;
3738
3839 def COND_EQ : PatLeaf <
3940 (cond),
276277
277278 multiclass RegisterLoadStore
278279 ComplexPattern addrPat> {
280 let UseNamedOperandTable = 1 in {
281
279282 def RegisterLoad : AMDGPUShaderInst <
280283 (outs dstClass:$dst),
281284 (ins addrClass:$addr, i32imm:$chan),
294297 let isRegisterStore = 1;
295298 }
296299 }
300 }
297301
298302 } // End isCodeGenOnly = 1, isPseudo = 1
299303
4949 assert(!"Unimplemented"); return NULL;
5050 }
5151
52 virtual unsigned getHWRegIndex(unsigned Reg) const {
53 assert(!"Unimplemented"); return 0;
54 }
55
5256 /// \returns the sub reg enum value for the given \p Channel
5357 /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
5458 unsigned getSubRegFromChannel(unsigned Channel) const;
518518 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
519519 case ISD::STORE: return LowerSTORE(Op, DAG);
520520 case ISD::LOAD: return LowerLOAD(Op, DAG);
521 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
522521 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
523522 case ISD::INTRINSIC_VOID: {
524523 SDValue Chain = Op.getOperand(0);
842841 false, false, false, 0);
843842 }
844843
845 SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
846
847 MachineFunction &MF = DAG.getMachineFunction();
848 const AMDGPUFrameLowering *TFL =
849 static_cast(getTargetMachine().getFrameLowering());
850
851 FrameIndexSDNode *FIN = dyn_cast(Op);
852 assert(FIN);
853
854 unsigned FrameIndex = FIN->getIndex();
855 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
856 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
857 }
858
859844 bool R600TargetLowering::isZero(SDValue Op) const {
860845 if(ConstantSDNode *Cst = dyn_cast(Op)) {
861846 return Cst->isNullValue();
5858 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
5959 SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
6060 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
61 SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
6261 SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
6362
6463 SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
10231023 return 2;
10241024 }
10251025
1026 int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
1027 const MachineRegisterInfo &MRI = MF.getRegInfo();
1028 const MachineFrameInfo *MFI = MF.getFrameInfo();
1029 int Offset = 0;
1030
1031 if (MFI->getNumObjects() == 0) {
1032 return -1;
1033 }
1034
1035 if (MRI.livein_empty()) {
1036 return 0;
1037 }
1038
1039 for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
1040 LE = MRI.livein_end();
1041 LI != LE; ++LI) {
1042 Offset = std::max(Offset,
1043 GET_REG_INDEX(RI.getEncodingValue(LI->first)));
1044 }
1045
1046 return Offset + 1;
1047 }
1048
1049 int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
1050 int Offset = 0;
1051 const MachineFrameInfo *MFI = MF.getFrameInfo();
1052
1053 // Variable sized objects are not supported
1054 assert(!MFI->hasVarSizedObjects());
1055
1056 if (MFI->getNumObjects() == 0) {
1057 return -1;
1058 }
1059
1060 Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
1061
1062 return getIndirectIndexBegin(MF) + Offset;
1063 }
1064
1065 std::vector R600InstrInfo::getIndirectReservedRegs(
1026 void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
10661027 const MachineFunction &MF) const {
10671028 const AMDGPUFrameLowering *TFL =
10681029 static_cast(TM.getFrameLowering());
1069 std::vector Regs;
10701030
10711031 unsigned StackWidth = TFL->getStackWidth(MF);
10721032 int End = getIndirectIndexEnd(MF);
10731033
1074 if (End == -1) {
1075 return Regs;
1076 }
1034 if (End == -1)
1035 return;
10771036
10781037 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
10791038 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
1080 Regs.push_back(SuperReg);
1039 Reserved.set(SuperReg);
10811040 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
10821041 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1083 Regs.push_back(Reg);
1084 }
1085 }
1086 return Regs;
1042 Reserved.set(Reg);
1043 }
1044 }
10871045 }
10881046
10891047 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
192192 virtual int getInstrLatency(const InstrItineraryData *ItinData,
193193 SDNode *Node) const { return 1;}
194194
195 /// \returns a list of all the registers that may be accesed using indirect
196 /// addressing.
197 std::vector getIndirectReservedRegs(const MachineFunction &MF) const;
198
199 virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
200
201 virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
202
195 /// \brief Reserve the registers that may be accesed using indirect addressing.
196 void reserveIndirectRegisters(BitVector &Reserved,
197 const MachineFunction &MF) const;
203198
204199 virtual unsigned calculateIndirectAddress(unsigned RegIndex,
205200 unsigned Channel) const;
7474 def ADDRVTX_READ : ComplexPattern;
7575 def ADDRGA_CONST_OFFSET : ComplexPattern;
7676 def ADDRGA_VAR_OFFSET : ComplexPattern;
77 def ADDRIndirect : ComplexPattern;
7877
7978
8079 def R600_Pred : PredicateOperand
2727 BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
2828 BitVector Reserved(getNumRegs());
2929
30 const R600InstrInfo *TII = static_cast(TM.getInstrInfo());
31
3032 Reserved.set(AMDGPU::ZERO);
3133 Reserved.set(AMDGPU::HALF);
3234 Reserved.set(AMDGPU::ONE);
4749 Reserved.set(*I);
4850 }
4951
50 const R600InstrInfo *RII =
51 static_cast(TM.getInstrInfo());
52 std::vector IndirectRegs = RII->getIndirectReservedRegs(MF);
53 for (std::vector::iterator I = IndirectRegs.begin(),
54 E = IndirectRegs.end();
55 I != E; ++I) {
56 Reserved.set(*I);
57 }
52 TII->reserveIndirectRegisters(Reserved, MF);
53
5854 return Reserved;
5955 }
6056
7268 return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
7369 }
7470
71 unsigned R600RegisterInfo::getHWRegIndex(unsigned Reg) const {
72 return GET_REG_INDEX(getEncodingValue(Reg));
73 }
74
7575 const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
7676 MVT VT) const {
7777 switch(VT.SimpleTy) {
3838 /// \brief get the HW encoding for a register's channel.
3939 unsigned getHWRegChan(unsigned reg) const;
4040
41 virtual unsigned getHWRegIndex(unsigned Reg) const;
42
4143 /// \brief get the register class of the specified type to use in the
4244 /// CFGStructurizer
4345 virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
7474 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
7575 setOperationAction(ISD::STORE, MVT::v16i32, Custom);
7676
77 // We need to custom lower loads/stores from private memory
78 setOperationAction(ISD::LOAD, MVT::i32, Custom);
79 setOperationAction(ISD::LOAD, MVT::i64, Custom);
80 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
81 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
82
83 setOperationAction(ISD::STORE, MVT::i32, Custom);
84 setOperationAction(ISD::STORE, MVT::i64, Custom);
85 setOperationAction(ISD::STORE, MVT::i128, Custom);
86 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
87 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
88
89
7790 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
7891 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
7992
94107 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
95108
96109 setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
110 setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
97111 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
98112 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
99113
105119 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
106120
107121 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
122 setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
108123
109124 setTargetDAGCombine(ISD::SELECT_CC);
110125
121136 bool *IsFast) const {
122137 // XXX: This depends on the address space and also we may want to revist
123138 // the alignment values we specify in the DataLayout.
139 if (!VT.isSimple() || VT == MVT::Other)
140 return false;
124141 return VT.bitsGT(MVT::i32);
125142 }
126143
349366 MI->eraseFromParent();
350367 break;
351368 }
369 case AMDGPU::SI_RegisterStorePseudo: {
370 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
371 const SIInstrInfo *TII =
372 static_cast(getTargetMachine().getInstrInfo());
373 unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
374 MachineInstrBuilder MIB =
375 BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
376 Reg);
377 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
378 MIB.addOperand(MI->getOperand(i));
379
380 MI->eraseFromParent();
381 }
352382 }
353383 return BB;
354384 }
394424 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
395425 case ISD::LOAD: {
396426 LoadSDNode *Load = dyn_cast(Op);
397 if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
427 if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
428 Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
398429 Op.getValueType().isVector()) {
399430 SDValue MergedValues[2] = {
400431 SplitVectorLoad(Op, DAG),
402433 };
403434 return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
404435 } else {
405 return SDValue();
406 }
407 }
408 case ISD::STORE: {
409 StoreSDNode *Store = dyn_cast(Op);
410 if (Store->getValue().getValueType().isVector() &&
411 Store->getValue().getValueType().getVectorNumElements() >= 8)
412 return SplitVectorStore(Op, DAG);
413 else
414 return AMDGPUTargetLowering::LowerOperation(Op, DAG);
436 return LowerLOAD(Op, DAG);
437 }
415438 }
416439
417440 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
418441 case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
442 case ISD::STORE: return LowerSTORE(Op, DAG);
419443 case ISD::ANY_EXTEND: // Fall-through
420444 case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
421445 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
627651 return Chain;
628652 }
629653
654 SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
655 SDLoc DL(Op);
656 LoadSDNode *Load = cast(Op);
657
658 if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
659 return SDValue();
660
661 SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
662 Load->getBasePtr(), DAG.getConstant(0, MVT::i32));
663 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
664 DAG.getConstant(2, MVT::i32));
665
666 SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
667 Load->getChain(), Ptr,
668 DAG.getTargetConstant(0, MVT::i32),
669 Op.getOperand(2));
670 SDValue MergedValues[2] = {
671 Ret,
672 Load->getChain()
673 };
674 return DAG.getMergeValues(MergedValues, 2, DL);
675
676 }
677
630678 SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
631679 SelectionDAG &DAG) const {
632680
683731
684732 return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi);
685733 }
734
735 SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
736 SDLoc DL(Op);
737 StoreSDNode *Store = cast(Op);
738 EVT VT = Store->getMemoryVT();
739
740 SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
741 if (Ret.getNode())
742 return Ret;
743
744 if (VT.isVector() && VT.getVectorNumElements() >= 8)
745 return SplitVectorStore(Op, DAG);
746
747 if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
748 return SDValue();
749
750 SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32);
751 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
752 DAG.getConstant(2, MVT::i32));
753 SDValue Chain = Store->getChain();
754 SmallVector Values;
755
756 if (VT == MVT::i64) {
757 for (unsigned i = 0; i < 2; ++i) {
758 Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
759 Store->getValue(), DAG.getConstant(i, MVT::i32)));
760 }
761 } else if (VT == MVT::i128) {
762 for (unsigned i = 0; i < 2; ++i) {
763 for (unsigned j = 0; j < 2; ++j) {
764 Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
765 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
766 Store->getValue(), DAG.getConstant(i, MVT::i32)),
767 DAG.getConstant(j, MVT::i32)));
768 }
769 }
770 } else {
771 Values.push_back(Store->getValue());
772 }
773
774 for (unsigned i = 0; i < Values.size(); ++i) {
775 SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
776 Ptr, DAG.getConstant(i, MVT::i32));
777 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
778 Chain, Values[i], PartPtr,
779 DAG.getTargetConstant(0, MVT::i32));
780 }
781 return Chain;
782 }
783
686784
687785 SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
688786 SelectionDAG &DAG) const {
2424 SDValue Chain, unsigned Offset) const;
2525 SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
2626 SelectionDAG &DAG) const;
27 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
2728 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
2829 SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
30 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
2931 SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
3032 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
3133
185185
186186 RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
187187
188 if (!Op.isReg())
188 if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
189189 return std::make_pair(0, 0);
190190
191191 unsigned Reg = Op.getReg();
229229 MachineBasicBlock::iterator I,
230230 unsigned DstReg,
231231 unsigned SrcReg) const {
232 llvm_unreachable("Not Implemented");
232 return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
233 DstReg) .addReg(SrcReg);
233234 }
234235
235236 bool SIInstrInfo::isMov(unsigned Opcode) const {
602603 return RegIndex;
603604 }
604605
605
606 int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
607 llvm_unreachable("Unimplemented");
608 }
609
610 int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
611 llvm_unreachable("Unimplemented");
612 }
613
614606 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
615 llvm_unreachable("Unimplemented");
607 return &AMDGPU::VReg_32RegClass;
616608 }
617609
618610 MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
620612 MachineBasicBlock::iterator I,
621613 unsigned ValueReg,
622614 unsigned Address, unsigned OffsetReg) const {
623 llvm_unreachable("Unimplemented");
615 const DebugLoc &DL = MBB->findDebugLoc(I);
616 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
617 getIndirectIndexBegin(*MBB->getParent()));
618
619 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
620 .addReg(IndirectBaseReg, RegState::Define)
621 .addOperand(I->getOperand(0))
622 .addReg(IndirectBaseReg)
623 .addReg(OffsetReg)
624 .addImm(0)
625 .addReg(ValueReg);
624626 }
625627
626628 MachineInstrBuilder SIInstrInfo::buildIndirectRead(
628630 MachineBasicBlock::iterator I,
629631 unsigned ValueReg,
630632 unsigned Address, unsigned OffsetReg) const {
631 llvm_unreachable("Unimplemented");
632 }
633 const DebugLoc &DL = MBB->findDebugLoc(I);
634 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
635 getIndirectIndexBegin(*MBB->getParent()));
636
637 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
638 .addOperand(I->getOperand(0))
639 .addOperand(I->getOperand(1))
640 .addReg(IndirectBaseReg)
641 .addReg(OffsetReg)
642 .addImm(0);
643
644 }
645
646 void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
647 const MachineFunction &MF) const {
648 int End = getIndirectIndexEnd(MF);
649 int Begin = getIndirectIndexBegin(MF);
650
651 if (End == -1)
652 return;
653
654
655 for (int Index = Begin; Index <= End; ++Index)
656 Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
657
658 for (int Index = std::max(0, Index - 1); Index <= End; ++Index)
659 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
660
661 for (int Index = std::max(0, Index - 2); Index <= End; ++Index)
662 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
663
664 for (int Index = std::max(0, Index - 3); Index <= End; ++Index)
665 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
666
667 for (int Index = std::max(0, Index - 7); Index <= End; ++Index)
668 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
669
670 for (int Index = std::max(0, Index - 15); Index <= End; ++Index)
671 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
672 }
2323 class SIInstrInfo : public AMDGPUInstrInfo {
2424 private:
2525 const SIRegisterInfo RI;
26
27 MachineInstrBuilder buildIndirectIndexLoop(MachineBasicBlock &MBB,
28 MachineBasicBlock::iterator I,
29 unsigned OffsetVGPR,
30 unsigned MovRelOp,
31 unsigned Dst,
32 unsigned Src0) const;
33 // If you add or remove instructions from this function, you will
2634
2735 public:
2836 explicit SIInstrInfo(AMDGPUTargetMachine &tm);
5765
5866 virtual bool verifyInstruction(const MachineInstr *MI,
5967 StringRef &ErrInfo) const;
60 virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
61
62 virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
6368
6469 bool isSALUInstr(const MachineInstr &MI) const;
6570 unsigned getVALUOp(const MachineInstr &MI) const;
113118 unsigned ValueReg,
114119 unsigned Address,
115120 unsigned OffsetReg) const;
116 };
121 void reserveIndirectRegisters(BitVector &Reserved,
122 const MachineFunction &MF) const;
123
124 void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
125 unsigned SavReg, unsigned IndexReg) const;
126 };
117127
118128 namespace AMDGPU {
119129
120120 return false;
121121 }]>;
122122
123 def FRAMEri64 : Operand {
124 let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index);
125 }
126
123127 //===----------------------------------------------------------------------===//
124128 // SI assembler operands
125129 //===----------------------------------------------------------------------===//
12921292
12931293 let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
12941294
1295 //defm SI_ : RegisterLoadStore ;
1296
1297 let UseNamedOperandTable = 1 in {
1298
1299 def SI_RegisterLoad : AMDGPUShaderInst <
1300 (outs VReg_32:$dst, SReg_64:$temp),
1301 (ins FRAMEri64:$addr, i32imm:$chan),
1302 "", []
1303 > {
1304 let isRegisterLoad = 1;
1305 let mayLoad = 1;
1306 }
1307
1308 class SIRegStore : AMDGPUShaderInst <
1309 outs,
1310 (ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan),
1311 "", []
1312 > {
1313 let isRegisterStore = 1;
1314 let mayStore = 1;
1315 }
1316
1317 let usesCustomInserter = 1 in {
1318 def SI_RegisterStorePseudo : SIRegStore<(outs)>;
1319 } // End usesCustomInserter = 1
1320 def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
1321
1322
1323 } // End UseNamedOperandTable = 1
1324
12951325 def SI_INDIRECT_SRC : InstSI <
12961326 (outs VReg_32:$dst, SReg_64:$temp),
12971327 (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
13081338 let Constraints = "$src = $dst";
13091339 }
13101340
1341 def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST;
13111342 def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST;
13121343 def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST;
13131344 def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST;
19872018 (V_CMP_U_F32_e64 $src0, $src1)
19882019 >;
19892020
1990 //============================================================================//
2021 //===----------------------------------------------------------------------===//
19912022 // Miscellaneous Patterns
19922023 //===----------------------------------------------------------------------===//
19932024
19962027 (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
19972028 (i32 (EXTRACT_SUBREG $x, sub0)), sub0),
19982029 (i32 (EXTRACT_SUBREG $x, sub1)), sub1)
2030 >;
2031
2032 def : Pat <
2033 (i32 (trunc i64:$a)),
2034 (EXTRACT_SUBREG $a, sub0)
19992035 >;
20002036
20012037 def : Pat <
376376 unsigned Dst = MI.getOperand(0).getReg();
377377 unsigned Vec = MI.getOperand(2).getReg();
378378 unsigned Off = MI.getOperand(4).getImm();
379
380 MachineInstr *MovRel =
379 unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
380 if (!SubReg)
381 SubReg = Vec;
382
383 MachineInstr *MovRel =
381384 BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
382 .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
385 .addReg(SubReg + Off)
383386 .addReg(AMDGPU::M0, RegState::Implicit)
384387 .addReg(Vec, RegState::Implicit);
385388
394397 unsigned Dst = MI.getOperand(0).getReg();
395398 unsigned Off = MI.getOperand(4).getImm();
396399 unsigned Val = MI.getOperand(5).getReg();
400 unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
401 if (!SubReg)
402 SubReg = Dst;
397403
398404 MachineInstr *MovRel =
399405 BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
400 .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
406 .addReg(SubReg + Off, RegState::Define)
401407 .addReg(Val)
402408 .addReg(AMDGPU::M0, RegState::Implicit)
403409 .addReg(Dst, RegState::Implicit);
476482 IndirectSrc(MI);
477483 break;
478484
485 case AMDGPU::SI_INDIRECT_DST_V1:
479486 case AMDGPU::SI_INDIRECT_DST_V2:
480487 case AMDGPU::SI_INDIRECT_DST_V4:
481488 case AMDGPU::SI_INDIRECT_DST_V8:
1414
1515 #include "SIRegisterInfo.h"
1616 #include "AMDGPUTargetMachine.h"
17 #include "SIInstrInfo.h"
1718
1819 using namespace llvm;
1920
2526 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
2627 BitVector Reserved(getNumRegs());
2728 Reserved.set(AMDGPU::EXEC);
29 Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
30 const SIInstrInfo *TII = static_cast(TM.getInstrInfo());
31 TII->reserveIndirectRegisters(Reserved, MF);
2832 return Reserved;
2933 }
3034
4852 default:
4953 case MVT::i32: return &AMDGPU::VReg_32RegClass;
5054 }
55 }
56
57 unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
58 return getEncodingValue(Reg);
5159 }
5260
5361 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
4141 /// CFGStructurizer
4242 virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
4343
44 virtual unsigned getHWRegIndex(unsigned Reg) const;
45
4446 /// \brief Return the 'base' register class for this register.
4547 /// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc.
4648 const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
+0
-104
test/CodeGen/R600/indirect-addressing.ll less more
None ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
1
2 ; This test checks that uses and defs of the AR register happen in the same
3 ; instruction clause.
4
5 ; CHECK: @mova_same_clause
6 ; CHECK: MOVA_INT
7 ; CHECK-NOT: ALU clause
8 ; CHECK: 0 + AR.x
9 ; CHECK: MOVA_INT
10 ; CHECK-NOT: ALU clause
11 ; CHECK: 0 + AR.x
12
13 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
14 entry:
15 %stack = alloca [5 x i32], align 4
16 %0 = load i32 addrspace(1)* %in, align 4
17 %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
18 store i32 4, i32* %arrayidx1, align 4
19 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
20 %1 = load i32 addrspace(1)* %arrayidx2, align 4
21 %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
22 store i32 5, i32* %arrayidx3, align 4
23 %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
24 %2 = load i32* %arrayidx10, align 4
25 store i32 %2, i32 addrspace(1)* %out, align 4
26 %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
27 %3 = load i32* %arrayidx12
28 %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
29 store i32 %3, i32 addrspace(1)* %arrayidx13
30 ret void
31 }
32
33 ; This test checks that the stack offset is calculated correctly for structs.
34 ; All register loads/stores should be optimized away, so there shouldn't be
35 ; any MOVA instructions.
36 ;
37 ; XXX: This generated code has unnecessary MOVs, we should be able to optimize
38 ; this.
39
40 ; CHECK: @multiple_structs
41 ; CHECK-NOT: MOVA_INT
42
43 %struct.point = type { i32, i32 }
44
45 define void @multiple_structs(i32 addrspace(1)* %out) {
46 entry:
47 %a = alloca %struct.point
48 %b = alloca %struct.point
49 %a.x.ptr = getelementptr %struct.point* %a, i32 0, i32 0
50 %a.y.ptr = getelementptr %struct.point* %a, i32 0, i32 1
51 %b.x.ptr = getelementptr %struct.point* %b, i32 0, i32 0
52 %b.y.ptr = getelementptr %struct.point* %b, i32 0, i32 1
53 store i32 0, i32* %a.x.ptr
54 store i32 1, i32* %a.y.ptr
55 store i32 2, i32* %b.x.ptr
56 store i32 3, i32* %b.y.ptr
57 %a.indirect.ptr = getelementptr %struct.point* %a, i32 0, i32 0
58 %b.indirect.ptr = getelementptr %struct.point* %b, i32 0, i32 0
59 %a.indirect = load i32* %a.indirect.ptr
60 %b.indirect = load i32* %b.indirect.ptr
61 %0 = add i32 %a.indirect, %b.indirect
62 store i32 %0, i32 addrspace(1)* %out
63 ret void
64 }
65
66 ; Test direct access of a private array inside a loop. The private array
67 ; loads and stores should be lowered to copies, so there shouldn't be any
68 ; MOVA instructions.
69
70 ; CHECK: @direct_loop
71 ; CHECK-NOT: MOVA_INT
72
73 define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
74 entry:
75 %prv_array_const = alloca [2 x i32]
76 %prv_array = alloca [2 x i32]
77 %a = load i32 addrspace(1)* %in
78 %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
79 %b = load i32 addrspace(1)* %b_src_ptr
80 %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
81 store i32 %a, i32* %a_dst_ptr
82 %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
83 store i32 %b, i32* %b_dst_ptr
84 br label %for.body
85
86 for.body:
87 %inc = phi i32 [0, %entry], [%count, %for.body]
88 %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
89 %x = load i32* %x_ptr
90 %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
91 %y = load i32* %y_ptr
92 %xy = add i32 %x, %y
93 store i32 %xy, i32* %y_ptr
94 %count = add i32 %inc, 1
95 %done = icmp eq i32 %count, 4095
96 br i1 %done, label %for.end, label %for.body
97
98 for.end:
99 %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
100 %value = load i32* %value_ptr
101 store i32 %value, i32 addrspace(1)* %out
102 ret void
103 }
298298 ; R600-CHECK: 31
299299 ; SI-CHECK-LABEL: @load_i64_sext
300300 ; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]:[0-9]\]]]
301 ; SI-CHECK: V_LSHL_B64 [[LSHL:v\[[0-9]:[0-9]\]]], [[VAL]], 32
302 ; SI-CHECK: V_ASHR_I64 v{{\[[0-9]:[0-9]\]}}, [[LSHL]], 32
303301
304302 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
305303 entry:
0 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
1 ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
2
3 ; This test checks that uses and defs of the AR register happen in the same
4 ; instruction clause.
5
6 ; R600-CHECK-LABEL: @mova_same_clause
7 ; R600-CHECK: MOVA_INT
8 ; R600-CHECK-NOT: ALU clause
9 ; R600-CHECK: 0 + AR.x
10 ; R600-CHECK: MOVA_INT
11 ; R600-CHECK-NOT: ALU clause
12 ; R600-CHECK: 0 + AR.x
13
14 ; SI-CHECK-LABEL: @mova_same_clause
15 ; SI-CHECK: V_READFIRSTLANE
16 ; SI-CHECK: V_MOVRELD
17 ; SI-CHECK: S_CBRANCH
18 ; SI-CHECK: V_READFIRSTLANE
19 ; SI-CHECK: V_MOVRELD
20 ; SI-CHECK: S_CBRANCH
21 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
22 entry:
23 %stack = alloca [5 x i32], align 4
24 %0 = load i32 addrspace(1)* %in, align 4
25 %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
26 store i32 4, i32* %arrayidx1, align 4
27 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
28 %1 = load i32 addrspace(1)* %arrayidx2, align 4
29 %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
30 store i32 5, i32* %arrayidx3, align 4
31 %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
32 %2 = load i32* %arrayidx10, align 4
33 store i32 %2, i32 addrspace(1)* %out, align 4
34 %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
35 %3 = load i32* %arrayidx12
36 %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
37 store i32 %3, i32 addrspace(1)* %arrayidx13
38 ret void
39 }
40
41 ; This test checks that the stack offset is calculated correctly for structs.
42 ; All register loads/stores should be optimized away, so there shouldn't be
43 ; any MOVA instructions.
44 ;
45 ; XXX: This generated code has unnecessary MOVs, we should be able to optimize
46 ; this.
47
48 ; R600-CHECK-LABEL: @multiple_structs
49 ; R600-CHECK-NOT: MOVA_INT
50 ; SI-CHECK-LABEL: @multiple_structs
51 ; SI-CHECK-NOT: V_MOVREL
52 %struct.point = type { i32, i32 }
53
54 define void @multiple_structs(i32 addrspace(1)* %out) {
55 entry:
56 %a = alloca %struct.point
57 %b = alloca %struct.point
58 %a.x.ptr = getelementptr %struct.point* %a, i32 0, i32 0
59 %a.y.ptr = getelementptr %struct.point* %a, i32 0, i32 1
60 %b.x.ptr = getelementptr %struct.point* %b, i32 0, i32 0
61 %b.y.ptr = getelementptr %struct.point* %b, i32 0, i32 1
62 store i32 0, i32* %a.x.ptr
63 store i32 1, i32* %a.y.ptr
64 store i32 2, i32* %b.x.ptr
65 store i32 3, i32* %b.y.ptr
66 %a.indirect.ptr = getelementptr %struct.point* %a, i32 0, i32 0
67 %b.indirect.ptr = getelementptr %struct.point* %b, i32 0, i32 0
68 %a.indirect = load i32* %a.indirect.ptr
69 %b.indirect = load i32* %b.indirect.ptr
70 %0 = add i32 %a.indirect, %b.indirect
71 store i32 %0, i32 addrspace(1)* %out
72 ret void
73 }
74
75 ; Test direct access of a private array inside a loop. The private array
76 ; loads and stores should be lowered to copies, so there shouldn't be any
77 ; MOVA instructions.
78
79 ; R600-CHECK-LABLE: @direct_loop
80 ; R600-CHECK-NOT: MOVA_INT
81 ; SI-CHECK-LABEL: @direct_loop
82 ; SI-CHECK-NOT: V_MOVREL
83
84 define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
85 entry:
86 %prv_array_const = alloca [2 x i32]
87 %prv_array = alloca [2 x i32]
88 %a = load i32 addrspace(1)* %in
89 %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
90 %b = load i32 addrspace(1)* %b_src_ptr
91 %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
92 store i32 %a, i32* %a_dst_ptr
93 %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
94 store i32 %b, i32* %b_dst_ptr
95 br label %for.body
96
97 for.body:
98 %inc = phi i32 [0, %entry], [%count, %for.body]
99 %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
100 %x = load i32* %x_ptr
101 %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
102 %y = load i32* %y_ptr
103 %xy = add i32 %x, %y
104 store i32 %xy, i32* %y_ptr
105 %count = add i32 %inc, 1
106 %done = icmp eq i32 %count, 4095
107 br i1 %done, label %for.end, label %for.body
108
109 for.end:
110 %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
111 %value = load i32* %value_ptr
112 store i32 %value, i32 addrspace(1)* %out
113 ret void
114 }
4242 ;EG-CHECK: ASHR
4343
4444 ;SI-CHECK-LABEL: @ashr_i64
45 ;SI-CHECK: V_ASHR_I64
45 ;SI-CHECK: S_ASHR_I64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
4646 define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
4747 entry:
4848 %0 = sext i32 %in to i64
0 ; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
11
22 ; SI-LABEL: @unaligned_load_store_i32:
3 ; SI: V_ADD_I32_e64 [[REG:v[0-9]+]]
4 ; DS_READ_U8 {{v[0-9]+}}, 0, [[REG]]
3 ; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
54 define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
65 %v = load i32 addrspace(3)* %p, align 1
76 store i32 %v, i32 addrspace(3)* %r, align 1
98 }
109
1110 ; SI-LABEL: @unaligned_load_store_v4i32:
12 ; SI: V_ADD_I32_e64 [[REG:v[0-9]+]]
13 ; DS_READ_U8 {{v[0-9]+}}, 0, [[REG]]
11 ; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
1412 define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
1513 %v = load <4 x i32> addrspace(3)* %p, align 1
1614 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1