llvm.org GIT mirror llvm / 9c3e0ed
R600: Add support for global addresses with constant initializers git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199825 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
4 changed file(s) with 152 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
2020 #include "AMDILIntrinsicInfo.h"
2121 #include "R600MachineFunctionInfo.h"
2222 #include "SIMachineFunctionInfo.h"
23 #include "llvm/Analysis/ValueTracking.h"
2324 #include "llvm/CodeGen/CallingConvLower.h"
2425 #include "llvm/CodeGen/MachineFunction.h"
2526 #include "llvm/CodeGen/MachineRegisterInfo.h"
275276 return Op;
276277 }
277278
279 SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
280 const GlobalValue *GV,
281 const SDValue &InitPtr,
282 SDValue Chain,
283 SelectionDAG &DAG) const {
284 const DataLayout *TD = getTargetMachine().getDataLayout();
285 SDLoc DL(InitPtr);
286 if (const ConstantInt *CI = dyn_cast(Init)) {
287 EVT VT = EVT::getEVT(CI->getType());
288 PointerType *PtrTy = PointerType::get(CI->getType(), 0);
289 return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
290 MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
291 TD->getPrefTypeAlignment(CI->getType()));
292 } else if (const ConstantFP *CFP = dyn_cast(Init)) {
293 EVT VT = EVT::getEVT(CFP->getType());
294 PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
295 return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr,
296 MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
297 TD->getPrefTypeAlignment(CFP->getType()));
298 } else if (Init->getType()->isAggregateType()) {
299 EVT PtrVT = InitPtr.getValueType();
300 unsigned NumElements = Init->getType()->getArrayNumElements();
301 SmallVector Chains;
302 for (unsigned i = 0; i < NumElements; ++i) {
303 SDValue Offset = DAG.getConstant(i * TD->getTypeAllocSize(
304 Init->getType()->getArrayElementType()), PtrVT);
305 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
306 Chains.push_back(LowerConstantInitializer(Init->getAggregateElement(i),
307 GV, Ptr, Chain, DAG));
308 }
309 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
310 Chains.size());
311 } else {
312 Init->dump();
313 llvm_unreachable("Unhandled constant initializer");
314 }
315 }
316
278317 SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
279318 SDValue Op,
280319 SelectionDAG &DAG) const {
281320
282321 const DataLayout *TD = getTargetMachine().getDataLayout();
283322 GlobalAddressSDNode *G = cast(Op);
284
285 assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
286 // XXX: What does the value of G->getOffset() mean?
287 assert(G->getOffset() == 0 &&
323 const GlobalValue *GV = G->getGlobal();
324
325 switch (G->getAddressSpace()) {
326 default: llvm_unreachable("Global Address lowering not implemented for this "
327 "address space");
328 case AMDGPUAS::LOCAL_ADDRESS: {
329 // XXX: What does the value of G->getOffset() mean?
330 assert(G->getOffset() == 0 &&
288331 "Do not know what to do with an non-zero offset");
289332
290 const GlobalValue *GV = G->getGlobal();
291
292 unsigned Offset;
293 if (MFI->LocalMemoryObjects.count(GV) == 0) {
294 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
295 Offset = MFI->LDSSize;
296 MFI->LocalMemoryObjects[GV] = Offset;
297 // XXX: Account for alignment?
298 MFI->LDSSize += Size;
299 } else {
300 Offset = MFI->LocalMemoryObjects[GV];
301 }
302
303 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
333 unsigned Offset;
334 if (MFI->LocalMemoryObjects.count(GV) == 0) {
335 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
336 Offset = MFI->LDSSize;
337 MFI->LocalMemoryObjects[GV] = Offset;
338 // XXX: Account for alignment?
339 MFI->LDSSize += Size;
340 } else {
341 Offset = MFI->LocalMemoryObjects[GV];
342 }
343
344 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
345 }
346 case AMDGPUAS::CONSTANT_ADDRESS: {
347 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
348 Type *EltType = GV->getType()->getElementType();
349 unsigned Size = TD->getTypeAllocSize(EltType);
350 unsigned Alignment = TD->getPrefTypeAlignment(EltType);
351
352 const GlobalVariable *Var = dyn_cast(GV);
353 const Constant *Init = Var->getInitializer();
354 int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
355 SDValue InitPtr = DAG.getFrameIndex(FI,
356 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
357 SmallVector WorkList;
358
359 for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(),
360 E = DAG.getEntryNode()->use_end(); I != E; ++I) {
361 if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD)
362 continue;
363 WorkList.push_back(*I);
364 }
365 SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG);
366 for (SmallVector::iterator I = WorkList.begin(),
367 E = WorkList.end(); I != E; ++I) {
368 SmallVector Ops;
369 Ops.push_back(Chain);
370 for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) {
371 Ops.push_back((*I)->getOperand(i));
372 }
373 DAG.UpdateNodeOperands(*I, &Ops[0], Ops.size());
374 }
375 return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op),
376 getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
377 }
378 }
304379 }
305380
306381 void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
593668 LoadSDNode *Load = cast(Op);
594669 ISD::LoadExtType ExtType = Load->getExtensionType();
595670
671 // Lower loads constant address space global variable loads
672 if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
673 isa(GetUnderlyingObject(Load->getPointerInfo().V))) {
674
675 SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL,
676 getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
677 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
678 DAG.getConstant(2, MVT::i32));
679 return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
680 Load->getChain(), Ptr,
681 DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
682 }
683
596684 if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
597685 ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
598686 return SDValue();
2727 void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
2828 SmallVectorImpl &Args,
2929 unsigned Start, unsigned Count) const;
30 SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
31 const SDValue &InitPtr,
32 SDValue Chain,
33 SelectionDAG &DAG) const;
3034 SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
3135 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
3236 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
142142 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
143143
144144 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
145 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
145146 setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
146147
147148 setTargetDAGCombine(ISD::SELECT_CC);
0 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
1
2 ; XXX: Test on SI once 64-bit adds are supportes.
3
4 @float_gv = internal addrspace(2) unnamed_addr constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
5
6 ; FUNC-LABEL: @float
7
8 ; R600-DAG: MOV {{\** *}}T2.X
9 ; R600-DAG: MOV {{\** *}}T3.X
10 ; R600-DAG: MOV {{\** *}}T4.X
11 ; R600-DAG: MOV {{\** *}}T5.X
12 ; R600-DAG: MOV {{\** *}}T6.X
13 ; R600: MOVA_INT
14
15 define void @float(float addrspace(1)* %out, i32 %index) {
16 entry:
17 %0 = getelementptr inbounds [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
18 %1 = load float addrspace(2)* %0
19 store float %1, float addrspace(1)* %out
20 ret void
21 }
22
23 @i32_gv = internal addrspace(2) unnamed_addr constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
24
25 ; FUNC-LABEL: @i32
26
27 ; R600-DAG: MOV {{\** *}}T2.X
28 ; R600-DAG: MOV {{\** *}}T3.X
29 ; R600-DAG: MOV {{\** *}}T4.X
30 ; R600-DAG: MOV {{\** *}}T5.X
31 ; R600-DAG: MOV {{\** *}}T6.X
32 ; R600: MOVA_INT
33
34 define void @i32(i32 addrspace(1)* %out, i32 %index) {
35 entry:
36 %0 = getelementptr inbounds [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
37 %1 = load i32 addrspace(2)* %0
38 store i32 %1, i32 addrspace(1)* %out
39 ret void
40 }