llvm.org GIT mirror llvm / ab3be33
[AMDGPU] Get address space mapping by target triple environment As we introduced target triple environment amdgiz and amdgizcl, the address space values are no longer enums. We have to decide the value by target triple. The basic idea is to use struct AMDGPUAS to represent address space values. For address space values which are not depend on target triple, use static const members, so that they don't occupy extra memory space and is equivalent to a compile time constant. Since the struct is lightweight and cheap, it can be created on the fly at the point of usage. Or it can be added as member to a pass and created at the beginning of the run* function. Differential Revision: https://reviews.llvm.org/D31284 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298846 91177308-0d34-0410-b5e6-96231b3b80d8 Yaxun Liu 3 years ago
39 changed file(s) with 456 addition(s) and 300 deletion(s). Raw diff Collapse all Expand all
2222 class Target;
2323 class TargetMachine;
2424 class PassRegistry;
25 class Module;
2526
2627 // R600 Passes
2728 FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
149150 /// however on the GPU, each address space points to
150151 /// a separate piece of memory that is unique from other
151152 /// memory locations.
152 namespace AMDGPUAS {
153 enum AddressSpaces : unsigned {
154 PRIVATE_ADDRESS = 0, ///< Address space for private memory.
155 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
156 CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
157 LOCAL_ADDRESS = 3, ///< Address space for local memory.
158 FLAT_ADDRESS = 4, ///< Address space for flat memory.
159 REGION_ADDRESS = 5, ///< Address space for region memory.
160 PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
161 PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
153 struct AMDGPUAS {
154 // The following address space values depend on the triple environment.
155 unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
156 unsigned CONSTANT_ADDRESS; ///< Address space for constant memory (VTX2)
157 unsigned FLAT_ADDRESS; ///< Address space for flat memory.
158 unsigned REGION_ADDRESS; ///< Address space for region memory.
159
160 // The maximum value for flat, generic, local, private, constant and region.
161 const static unsigned MAX_COMMON_ADDRESS = 5;
162
163 const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0).
164 const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory.
165 const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0)
166 const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1)
162167
163168 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this
164169 // order to be able to dynamically index a constant buffer, for example:
165170 //
166171 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
167172
168 CONSTANT_BUFFER_0 = 8,
169 CONSTANT_BUFFER_1 = 9,
170 CONSTANT_BUFFER_2 = 10,
171 CONSTANT_BUFFER_3 = 11,
172 CONSTANT_BUFFER_4 = 12,
173 CONSTANT_BUFFER_5 = 13,
174 CONSTANT_BUFFER_6 = 14,
175 CONSTANT_BUFFER_7 = 15,
176 CONSTANT_BUFFER_8 = 16,
177 CONSTANT_BUFFER_9 = 17,
178 CONSTANT_BUFFER_10 = 18,
179 CONSTANT_BUFFER_11 = 19,
180 CONSTANT_BUFFER_12 = 20,
181 CONSTANT_BUFFER_13 = 21,
182 CONSTANT_BUFFER_14 = 22,
183 CONSTANT_BUFFER_15 = 23,
173 const static unsigned CONSTANT_BUFFER_0 = 8;
174 const static unsigned CONSTANT_BUFFER_1 = 9;
175 const static unsigned CONSTANT_BUFFER_2 = 10;
176 const static unsigned CONSTANT_BUFFER_3 = 11;
177 const static unsigned CONSTANT_BUFFER_4 = 12;
178 const static unsigned CONSTANT_BUFFER_5 = 13;
179 const static unsigned CONSTANT_BUFFER_6 = 14;
180 const static unsigned CONSTANT_BUFFER_7 = 15;
181 const static unsigned CONSTANT_BUFFER_8 = 16;
182 const static unsigned CONSTANT_BUFFER_9 = 17;
183 const static unsigned CONSTANT_BUFFER_10 = 18;
184 const static unsigned CONSTANT_BUFFER_11 = 19;
185 const static unsigned CONSTANT_BUFFER_12 = 20;
186 const static unsigned CONSTANT_BUFFER_13 = 21;
187 const static unsigned CONSTANT_BUFFER_14 = 22;
188 const static unsigned CONSTANT_BUFFER_15 = 23;
184189
185190 // Some places use this if the address space can't be determined.
186 UNKNOWN_ADDRESS_SPACE = ~0u
191 const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u;
187192 };
188193
189 } // namespace AMDGPUAS
194 namespace llvm {
195 namespace AMDGPU {
196 AMDGPUAS getAMDGPUAS(const Module &M);
197 AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
198 AMDGPUAS getAMDGPUAS(Triple T);
199 } // namespace AMDGPU
200 } // namespace llvm
190201
191202 #endif
3636 AU.setPreservesAll();
3737 }
3838
39 // Must match the table in getAliasResult.
40 AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_) : AS(AS_) {
41 // These arrarys are indexed by address space value
42 // enum elements 0 ... to 5
43 static const AliasResult ASAliasRulesPrivIsZero[6][6] = {
44 /* Private Global Constant Group Flat Region*/
45 /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
46 /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias},
47 /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
48 /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
49 /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
50 /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
51 };
52 static const AliasResult ASAliasRulesGenIsZero[6][6] = {
53 /* Flat Global Region Group Constant Private */
54 /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
55 /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
56 /* Region */ {NoAlias , NoAlias , MayAlias, NoAlias, NoAlias , MayAlias},
57 /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
58 /* Constant */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
59 /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias}
60 };
61 assert(AS.MAX_COMMON_ADDRESS <= 5);
62 if (AS.FLAT_ADDRESS == 0) {
63 assert(AS.GLOBAL_ADDRESS == 1 &&
64 AS.REGION_ADDRESS == 2 &&
65 AS.LOCAL_ADDRESS == 3 &&
66 AS.CONSTANT_ADDRESS == 4 &&
67 AS.PRIVATE_ADDRESS == 5);
68 ASAliasRules = &ASAliasRulesGenIsZero;
69 } else {
70 assert(AS.PRIVATE_ADDRESS == 0 &&
71 AS.GLOBAL_ADDRESS == 1 &&
72 AS.CONSTANT_ADDRESS == 2 &&
73 AS.LOCAL_ADDRESS == 3 &&
74 AS.FLAT_ADDRESS == 4 &&
75 AS.REGION_ADDRESS == 5);
76 ASAliasRules = &ASAliasRulesPrivIsZero;
77 }
78 }
79
80 AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
81 unsigned AS2) const {
82 if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS)
83 report_fatal_error("Pointer address space out of range");
84 return (*ASAliasRules)[AS1][AS2];
85 }
86
3987 AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
4088 const MemoryLocation &LocB) {
41 // This array is indexed by the AMDGPUAS::AddressSpaces
42 // enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS
43 // see "llvm/Transforms/AMDSPIRUtils.h"
44 static const AliasResult ASAliasRules[5][5] = {
45 /* Private Global Constant Group Flat */
46 /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias},
47 /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias},
48 /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
49 /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias},
50 /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}
51 };
5289 unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
5390 unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
54 if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS ||
55 asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS)
56 report_fatal_error("Pointer address space out of range");
5791
58 AliasResult Result = ASAliasRules[asA][asB];
92 AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
5993 if (Result == NoAlias) return Result;
6094
6195 if (isa(LocA.Ptr) && isa(LocB.Ptr)) {
74108 bool OrLocal) {
75109 const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
76110
77 if (Base->getType()->getPointerAddressSpace() ==
78 AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) {
111 if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) {
79112 return true;
80113 }
81114
1212 #ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
1313 #define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
1414
15 #include "AMDGPU.h"
1516 #include "llvm/Analysis/AliasAnalysis.h"
1617 #include "llvm/IR/Function.h"
1718 #include "llvm/IR/Module.h"
2425 friend AAResultBase;
2526
2627 const DataLayout &DL;
28 AMDGPUAS AS;
2729
2830 public:
29 explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {}
31 explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
32 DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS) {}
3033 AMDGPUAAResult(AMDGPUAAResult &&Arg)
31 : AAResultBase(std::move(Arg)), DL(Arg.DL){}
34 : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
35 ASAliasRules(Arg.ASAliasRules){}
3236
3337 /// Handle invalidation events from the new pass manager.
3438 ///
4145 private:
4246 bool Aliases(const MDNode *A, const MDNode *B) const;
4347 bool PathAliases(const MDNode *A, const MDNode *B) const;
48
49 class ASAliasRulesTy {
50 public:
51 ASAliasRulesTy(AMDGPUAS AS_);
52 AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
53 private:
54 AMDGPUAS AS;
55 const AliasResult (*ASAliasRules)[6][6];
56 } ASAliasRules;
4457 };
4558
4659 /// Analysis pass providing a never-invalidated alias analysis result.
5265 typedef AMDGPUAAResult Result;
5366
5467 AMDGPUAAResult run(Function &F, AnalysisManager &AM) {
55 return AMDGPUAAResult(F.getParent()->getDataLayout());
68 return AMDGPUAAResult(F.getParent()->getDataLayout(),
69 Triple(F.getParent()->getTargetTriple()));
5670 }
5771 };
5872
7185 const AMDGPUAAResult &getResult() const { return *Result; }
7286
7387 bool doInitialization(Module &M) override {
74 Result.reset(new AMDGPUAAResult(M.getDataLayout()));
88 Result.reset(new AMDGPUAAResult(M.getDataLayout(),
89 Triple(M.getTargetTriple())));
7590 return false;
7691 }
7792 bool doFinalization(Module &M) override {
2727 class AMDGPUAnnotateKernelFeatures : public ModulePass {
2828 private:
2929 const TargetMachine *TM;
30 static bool hasAddrSpaceCast(const Function &F);
30 AMDGPUAS AS;
31 static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
3132
3233 void addAttrToCallers(Function *Intrin, StringRef AttrName);
3334 bool addAttrsForIntrinsics(Module &M, ArrayRef);
4748 ModulePass::getAnalysisUsage(AU);
4849 }
4950
50 static bool visitConstantExpr(const ConstantExpr *CE);
51 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
5152 static bool visitConstantExprsRecursively(
5253 const Constant *EntryC,
53 SmallPtrSet &ConstantExprVisited);
54 SmallPtrSet &ConstantExprVisited,
55 AMDGPUAS AS);
5456 };
5557
5658 }
6466
6567
6668 // The queue ptr is only needed when casting to flat, not from it.
67 static bool castRequiresQueuePtr(unsigned SrcAS) {
68 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
69 }
70
71 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
72 return castRequiresQueuePtr(ASC->getSrcAddressSpace());
73 }
74
75 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
69 static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
70 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
71 }
72
73 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
74 const AMDGPUAS &AS) {
75 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
76 }
77
78 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
79 AMDGPUAS AS) {
7680 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
7781 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
78 return castRequiresQueuePtr(SrcAS);
82 return castRequiresQueuePtr(SrcAS, AS);
7983 }
8084
8185 return false;
8387
8488 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
8589 const Constant *EntryC,
86 SmallPtrSet &ConstantExprVisited) {
90 SmallPtrSet &ConstantExprVisited,
91 AMDGPUAS AS) {
8792
8893 if (!ConstantExprVisited.insert(EntryC).second)
8994 return false;
96101
97102 // Check this constant expression.
98103 if (const auto *CE = dyn_cast(C)) {
99 if (visitConstantExpr(CE))
104 if (visitConstantExpr(CE, AS))
100105 return true;
101106 }
102107
117122 }
118123
119124 // Return true if an addrspacecast is used that requires the queue ptr.
120 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
125 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
126 AMDGPUAS AS) {
121127 SmallPtrSet ConstantExprVisited;
122128
123129 for (const BasicBlock &BB : F) {
124130 for (const Instruction &I : BB) {
125131 if (const AddrSpaceCastInst *ASC = dyn_cast(&I)) {
126 if (castRequiresQueuePtr(ASC))
132 if (castRequiresQueuePtr(ASC, AS))
127133 return true;
128134 }
129135
132138 if (!OpC)
133139 continue;
134140
135 if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
141 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS))
136142 return true;
137143 }
138144 }
172178
173179 bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
174180 Triple TT(M.getTargetTriple());
181 AS = AMDGPU::getAMDGPUAS(M);
175182
176183 static const StringRef IntrinsicToAttr[][2] = {
177184 // .x omitted
215222
216223 bool HasApertureRegs =
217224 TM && TM->getSubtarget(F).hasApertureRegs();
218 if (!HasApertureRegs && hasAddrSpaceCast(F))
225 if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
219226 F.addFnAttr("amdgpu-queue-ptr");
220227 }
221228 }
3636 LoopInfo *LI;
3737 DenseMap noClobberClones;
3838 bool isKernelFunc;
39 AMDGPUAS AMDGPUASI;
3940
4041 public:
4142 static char ID;
129130 Value *Ptr = I.getPointerOperand();
130131 if (!DA->isUniform(Ptr))
131132 return;
132 auto isGlobalLoad = [](LoadInst &Load)->bool {
133 return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
133 auto isGlobalLoad = [&](LoadInst &Load)->bool {
134 return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
134135 };
135136 // We're tracking up to the Function boundaries
136137 // We cannot go beyond because of FunctionPass restrictions
165166 }
166167
167168 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
169 AMDGPUASI = AMDGPU::getAMDGPUAS(M);
168170 return false;
169171 }
170172
1616 //
1717
1818 #include "AMDGPUAsmPrinter.h"
19 #include "AMDGPUTargetMachine.h"
1920 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
2021 #include "InstPrinter/AMDGPUInstPrinter.h"
2122 #include "Utils/AMDGPUBaseInfo.h"
9192
9293 AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
9394 std::unique_ptr Streamer)
94 : AsmPrinter(TM, std::move(Streamer)) {}
95 : AsmPrinter(TM, std::move(Streamer)) {
96 AMDGPUASI = static_cast(&TM)->getAMDGPUAS();
97 }
9598
9699 StringRef AMDGPUAsmPrinter::getPassName() const {
97100 return "AMDGPU Assembly Printer";
173176 void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
174177
175178 // Group segment variables aren't emitted in HSA.
176 if (AMDGPU::isGroupSegment(GV))
179 if (AMDGPU::isGroupSegment(GV, AMDGPUASI))
177180 return;
178181
179182 AsmPrinter::EmitGlobalVariable(GV);
1515 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
1616
1717 #include "AMDKernelCodeT.h"
18 #include "AMDGPU.h"
1819 #include "llvm/ADT/StringRef.h"
1920 #include "llvm/CodeGen/AsmPrinter.h"
2021 #include
149150 protected:
150151 std::vector DisasmLines, HexLines;
151152 size_t DisasmLineMaxLen;
153 AMDGPUAS AMDGPUASI;
152154 };
153155
154156 } // end namespace llvm
3030 #endif
3131
3232 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
33 : CallLowering(&TLI) {
33 : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
3434 }
3535
3636 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
4848 MachineRegisterInfo &MRI = MF.getRegInfo();
4949 const Function &F = *MF.getFunction();
5050 const DataLayout &DL = F.getParent()->getDataLayout();
51 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
51 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
5252 LLT PtrType = getLLTForType(*PtrTy, DL);
5353 unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
5454 unsigned KernArgSegmentPtr =
6969 MachineFunction &MF = MIRBuilder.getMF();
7070 const Function &F = *MF.getFunction();
7171 const DataLayout &DL = F.getParent()->getDataLayout();
72 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
72 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
7373 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
7474 unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
7575 unsigned Align = DL.getABITypeAlignment(ParamTy);
1414 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
1515 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
1616
17 #include "AMDGPU.h"
1718 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
1819
1920 namespace llvm {
2122 class AMDGPUTargetLowering;
2223
2324 class AMDGPUCallLowering: public CallLowering {
25 AMDGPUAS AMDGPUASI;
2426
2527 unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
2628 unsigned Offset) const;
6666 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
6767 // make the right decision when generating code for different targets.
6868 const AMDGPUSubtarget *Subtarget;
69 AMDGPUAS AMDGPUASI;
6970
7071 public:
7172 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
72 : SelectionDAGISel(TM, OptLevel) {}
73 : SelectionDAGISel(TM, OptLevel){
74 AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
75 }
7376 ~AMDGPUDAGToDAGISel() override = default;
7477
7578 bool runOnMachineFunction(MachineFunction &MF) override;
268271
269272 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
270273 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
271 cast(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
274 cast(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
272275 return N;
273276
274277 const SITargetLowering& Lowering =
585588 if (!N->readMem())
586589 return false;
587590 if (CbId == -1)
588 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
589
590 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
591 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
592
593 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
591594 }
592595
593596 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
15351538 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
15361539 MemSDNode *Mem = cast(N);
15371540 unsigned AS = Mem->getAddressSpace();
1538 if (AS == AMDGPUAS::FLAT_ADDRESS) {
1541 if (AS == AMDGPUASI.FLAT_ADDRESS) {
15391542 SelectCode(N);
15401543 return;
15411544 }
5858 AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
5959 const AMDGPUSubtarget &STI)
6060 : TargetLowering(TM), Subtarget(&STI) {
61 AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
6162 // Lower floating point store/load to integer store/load to reduce the number
6263 // of patterns in tablegen.
6364 setOperationAction(ISD::LOAD, MVT::f32, Promote);
966967 GlobalAddressSDNode *G = cast(Op);
967968 const GlobalValue *GV = G->getGlobal();
968969
969 switch (G->getAddressSpace()) {
970 case AMDGPUAS::LOCAL_ADDRESS: {
970 if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
971971 // XXX: What does the value of G->getOffset() mean?
972972 assert(G->getOffset() == 0 &&
973973 "Do not know what to do with an non-zero offset");
974974
975975 // TODO: We could emit code to handle the initialization somewhere.
976 if (hasDefinedInitializer(GV))
977 break;
978
979 unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
980 return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
981 }
976 if (!hasDefinedInitializer(GV)) {
977 unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
978 return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
979 }
982980 }
983981
984982 const Function &Fn = *DAG.getMachineFunction().getFunction();
1515 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
1616 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
1717
18 #include "AMDGPU.h"
1819 #include "llvm/Target/TargetLowering.h"
1920
2021 namespace llvm {
3334
3435 protected:
3536 const AMDGPUSubtarget *Subtarget;
37 AMDGPUAS AMDGPUASI;
3638
3739 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
3840 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
223225 /// type of implicit parameter.
224226 uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
225227 const ImplicitParameter Param) const;
228
229 AMDGPUAS getAMDGPUAS() const {
230 return AMDGPUASI;
231 }
226232 };
227233
228234 namespace AMDGPUISD {
2929 void AMDGPUInstrInfo::anchor() {}
3030
3131 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
32 : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
32 : AMDGPUGenInstrInfo(-1, -1), ST(ST), AMDGPUASI(ST.getAMDGPUAS()) {}
3333
3434 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
3535 // the first 16 loads will be interleaved with the stores, and the next 16 will
1515 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
1616 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
1717
18 #include "AMDGPU.h"
1819 #include "llvm/Target/TargetInstrInfo.h"
1920 #include "Utils/AMDGPUBaseInfo.h"
2021
3435 const AMDGPUSubtarget &ST;
3536
3637 virtual void anchor();
38 protected:
39 AMDGPUAS AMDGPUASI;
3740
3841 public:
3942 explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
3232 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
3333 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
3434 : InstructionSelector(), TII(*STI.getInstrInfo()),
35 TRI(*STI.getRegisterInfo()), RBI(RBI) {}
35 TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {}
3636
3737 MachineOperand
3838 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
290290 if (!I.hasOneMemOperand())
291291 return false;
292292
293 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS)
293 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS)
294294 return false;
295295
296296 if (!isInstrUniform(I))
1313 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
1414 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
1515
16 #include "AMDGPU.h"
1617 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
1718 #include "llvm/ADT/ArrayRef.h"
1819 #include "llvm/ADT/SmallVector.h"
3435 const AMDGPURegisterBankInfo &RBI);
3536
3637 bool select(MachineInstr &I) const override;
37
3838 private:
3939 struct GEPInfo {
4040 const MachineInstr &GEP;
5858 const SIInstrInfo &TII;
5959 const SIRegisterInfo &TRI;
6060 const AMDGPURegisterBankInfo &RBI;
61 protected:
62 AMDGPUAS AMDGPUASI;
6163 };
6264
6365 } // End llvm namespace.
203203 //===----------------------------------------------------------------------===//
204204
205205 class PrivateMemOp : PatFrag
206 return cast(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
206 return cast(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
207207 }]>;
208208
209209 class PrivateLoad : PrivateMemOp <
221221 def store_private : PrivateStore ;
222222
223223 class GlobalMemOp : PatFrag
224 return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
224 return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
225225 }]>;
226226
227227 // Global address space loads
241241
242242
243243 class ConstantMemOp : PatFrag
244 return cast(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
244 return cast(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
245245 }]>;
246246
247247 // Constant address space loads
252252 def constant_load : ConstantLoad;
253253
254254 class LocalMemOp : PatFrag
255 return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
255 return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
256256 }]>;
257257
258258 // Local address space loads
265265 >;
266266
267267 class FlatMemOp : PatFrag
268 return cast(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS;
268 return cast(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS;
269269 }]>;
270270
271271 class FlatLoad : FlatMemOp <
347347 class local_binary_atomic_op :
348348 PatFrag<(ops node:$ptr, node:$value),
349349 (atomic_op node:$ptr, node:$value), [{
350 return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
350 return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
351351 }]>;
352352
353353
365365
366366 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
367367 (AMDGPUstore_mskor node:$val, node:$ptr), [{
368 return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
368 return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
369369 }]>;
370370
371371 multiclass AtomicCmpSwapLocal {
375375 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
376376 AtomicSDNode *AN = cast(N);
377377 return AN->getMemoryVT() == MVT::i32 &&
378 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
378 AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
379379 }]>;
380380
381381 def _64_local : PatFrag<
383383 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
384384 AtomicSDNode *AN = cast(N);
385385 return AN->getMemoryVT() == MVT::i64 &&
386 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
386 AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
387387 }]>;
388388 }
389389
393393 def "" : PatFrag<
394394 (ops node:$ptr, node:$value),
395395 (atomic_op node:$ptr, node:$value),
396 [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
396 [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
397397
398398 def _noret : PatFrag<
399399 (ops node:$ptr, node:$value),
400400 (atomic_op node:$ptr, node:$value),
401 [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
401 [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
402402
403403 def _ret : PatFrag<
404404 (ops node:$ptr, node:$value),
405405 (atomic_op node:$ptr, node:$value),
406 [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
406 [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
407407 }
408408
409409 defm atomic_swap_global : global_binary_atomic_op;
421421 def AMDGPUatomic_cmp_swap_global : PatFrag<
422422 (ops node:$ptr, node:$value),
423423 (AMDGPUatomic_cmp_swap node:$ptr, node:$value),
424 [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
424 [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
425425
426426 def atomic_cmp_swap_global : PatFrag<
427427 (ops node:$ptr, node:$cmp, node:$value),
428428 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
429 [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
429 [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
430430
431431 def atomic_cmp_swap_global_noret : PatFrag<
432432 (ops node:$ptr, node:$cmp, node:$value),
433433 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
434 [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
434 [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
435435
436436 def atomic_cmp_swap_global_ret : PatFrag<
437437 (ops node:$ptr, node:$cmp, node:$value),
438438 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
439 [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
439 [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
440440
441441 //===----------------------------------------------------------------------===//
442442 // Misc Pattern Fragments
7171 Module *Mod = nullptr;
7272 const DataLayout *DL = nullptr;
7373 MDNode *MaxWorkGroupSizeRange = nullptr;
74 AMDGPUAS AS;
7475
7576 // FIXME: This should be per-kernel.
7677 uint32_t LocalMemLimit = 0;
153154 const AMDGPUSubtarget &ST = TM->getSubtarget(F);
154155 if (!ST.isPromoteAllocaEnabled())
155156 return false;
157 AS = AMDGPU::getAMDGPUAS(*F.getParent());
156158
157159 FunctionType *FTy = F.getFunctionType();
158160
161163 // we cannot use local memory in the pass.
162164 for (Type *ParamTy : FTy->params()) {
163165 PointerType *PtrTy = dyn_cast(ParamTy);
164 if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
166 if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
165167 LocalMemLimit = 0;
166168 DEBUG(dbgs() << "Function has local memory argument. Promoting to "
167169 "local memory disabled.\n");
178180 // Check how much local memory is being used by global objects
179181 CurrentLocalMemUsage = 0;
180182 for (GlobalVariable &GV : Mod->globals()) {
181 if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
183 if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
182184 continue;
183185
184186 for (const User *U : GV.users()) {
316318
317319 Type *I32Ty = Type::getInt32Ty(Mod->getContext());
318320 Value *CastDispatchPtr = Builder.CreateBitCast(
319 DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
321 DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
320322
321323 // We could do a single 64-bit load here, but it's likely that the basic
322324 // 32-bit and extract sequence is already present, and it is probably easier
412414 }
413415 }
414416
415 static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
417 static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
416418 ArrayType *AllocaTy = dyn_cast(Alloca->getAllocatedType());
417419
418420 DEBUG(dbgs() << "Alloca candidate for vectorization\n");
467469 IRBuilder<> Builder(Inst);
468470 switch (Inst->getOpcode()) {
469471 case Instruction::Load: {
470 Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
472 Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
471473 Value *Ptr = Inst->getOperand(0);
472474 Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
473475
479481 break;
480482 }
481483 case Instruction::Store: {
482 Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
484 Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
483485
484486 Value *Ptr = Inst->getOperand(1);
485487 Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
672674
673675 DEBUG(dbgs() << "Trying to promote " << I << '\n');
674676
675 if (tryPromoteAllocaToVector(&I)) {
677 if (tryPromoteAllocaToVector(&I, AS)) {
676678 DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
677679 return;
678680 }
733735 Twine(F->getName()) + Twine('.') + I.getName(),
734736 nullptr,
735737 GlobalVariable::NotThreadLocal,
736 AMDGPUAS::LOCAL_ADDRESS);
738 AS.LOCAL_ADDRESS);
737739 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
738740 GV->setAlignment(I.getAlignment());
739741
766768 if (ICmpInst *CI = dyn_cast(V)) {
767769 Value *Src0 = CI->getOperand(0);
768770 Type *EltTy = Src0->getType()->getPointerElementType();
769 PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
771 PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
770772
771773 if (isa(CI->getOperand(0)))
772774 CI->setOperand(0, ConstantPointerNull::get(NewTy));
783785 continue;
784786
785787 Type *EltTy = V->getType()->getPointerElementType();
786 PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
788 PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
787789
788790 // FIXME: It doesn't really make sense to try to do this for all
789791 // instructions.
851853 Type *SrcTy = Src->getType()->getPointerElementType();
852854 Function *ObjectSize = Intrinsic::getDeclaration(Mod,
853855 Intrinsic::objectsize,
854 { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
856 { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
855857 );
856858
857859 CallInst *NewCall = Builder.CreateCall(
134134
135135 FeatureDisable(false),
136136 InstrItins(getInstrItineraryForCPU(GPU)) {
137 AS = AMDGPU::getAMDGPUAS(TT);
137138 initializeSubtargetDependencies(TT, GPU, FS);
138139 }
139140
156156
157157 InstrItineraryData InstrItins;
158158 SelectionDAGTargetInfo TSInfo;
159 AMDGPUAS AS;
159160
160161 public:
161162 AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
211212
212213 unsigned getMaxPrivateElementSize() const {
213214 return MaxPrivateElementSize;
215 }
216
217 AMDGPUAS getAMDGPUAS() const {
218 return AS;
214219 }
215220
216221 bool has16BitInsts() const {
239239 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
240240 FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
241241 TLOF(createTLOF(getTargetTriple())) {
242 AS = AMDGPU::getAMDGPUAS(TT);
242243 initAsmInfo();
243244 }
244245
808809 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
809810 return new GCNPassConfig(this, PM);
810811 }
812
3434 protected:
3535 std::unique_ptr TLOF;
3636 AMDGPUIntrinsicInfo IntrinsicInfo;
37 AMDGPUAS AS;
3738
3839 StringRef getGPUName(const Function &F) const;
3940 StringRef getFeatureString(const Function &F) const;
5657 TargetLoweringObjectFile *getObjFileLowering() const override {
5758 return TLOF.get();
5859 }
60 AMDGPUAS getAMDGPUAS() const {
61 return AS;
62 }
5963
6064 void adjustPassManager(PassManagerBuilder &) override;
6165 /// Get the integer value of a null pointer in the given address space.
6266 uint64_t getNullPointerValue(unsigned AddrSpace) const {
63 switch(AddrSpace) {
64 case AMDGPUAS::LOCAL_ADDRESS:
65 case AMDGPUAS::REGION_ADDRESS:
67 if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS)
6668 return -1;
67 default:
68 return 0;
69 }
69 return 0;
7070 }
7171
7272 };
66 //
77 //===----------------------------------------------------------------------===//
88
9 #include "AMDGPUTargetMachine.h"
910 #include "AMDGPUTargetObjectFile.h"
1011 #include "AMDGPU.h"
1112 #include "llvm/MC/MCContext.h"
2122
2223 MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(
2324 const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
24 if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) &&
25 auto AS = static_cast(&TM)->getAMDGPUAS();
26 if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO, AS) &&
2527 AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple()))
2628 return TextSection;
2729
1515 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
1616 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
1717
18 #include "AMDGPU.h"
1819 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
1920 #include "llvm/Target/TargetMachine.h"
2021
4747 const DataLayout &DL = BB->getModule()->getDataLayout();
4848 for (const Instruction &I : *BB) {
4949 const GetElementPtrInst *GEP = dyn_cast(&I);
50 if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
50 if (!GEP || GEP->getAddressSpace() != ST->getAMDGPUAS().PRIVATE_ADDRESS)
5151 continue;
5252
5353 const Value *Ptr = GEP->getPointerOperand();
107107 }
108108
109109 unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
110 switch (AddrSpace) {
111 case AMDGPUAS::GLOBAL_ADDRESS:
112 case AMDGPUAS::CONSTANT_ADDRESS:
113 case AMDGPUAS::FLAT_ADDRESS:
110 AMDGPUAS AS = ST->getAMDGPUAS();
111 if (AddrSpace == AS.GLOBAL_ADDRESS ||
112 AddrSpace == AS.CONSTANT_ADDRESS ||
113 AddrSpace == AS.FLAT_ADDRESS)
114114 return 128;
115 case AMDGPUAS::LOCAL_ADDRESS:
116 case AMDGPUAS::REGION_ADDRESS:
115 if (AddrSpace == AS.LOCAL_ADDRESS ||
116 AddrSpace == AS.REGION_ADDRESS)
117117 return 64;
118 case AMDGPUAS::PRIVATE_ADDRESS:
118 if (AddrSpace == AS.PRIVATE_ADDRESS)
119119 return 8 * ST->getMaxPrivateElementSize();
120 default:
121 if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
122 (AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
123 AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
124 (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
125 AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
126 return 128;
127 llvm_unreachable("unhandled address space");
128 }
120
121 if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
122 (AddrSpace == AS.PARAM_D_ADDRESS ||
123 AddrSpace == AS.PARAM_I_ADDRESS ||
124 (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
125 AddrSpace <= AS.CONSTANT_BUFFER_15)))
126 return 128;
127 llvm_unreachable("unhandled address space");
129128 }
130129
131130 bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
134133 // We allow vectorization of flat stores, even though we may need to decompose
135134 // them later if they may access private memory. We don't have enough context
136135 // here, and legalization can handle it.
137 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
136 if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) {
138137 return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
139138 ChainSizeInBytes <= ST->getMaxPrivateElementSize();
140139 }
361360 // All other loads are not divergent, because if threads issue loads with the
362361 // same arguments, they will always get the same result.
363362 if (const LoadInst *Load = dyn_cast(V))
364 return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
363 return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS;
365364
366365 // Atomics are divergent because they are executed sequentially: when an
367366 // atomic operation refers to the same address in each thread, then each
109109 if (IsGraphicsShader)
110110 return -1;
111111 return ST->hasFlatAddressSpace() ?
112 AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
112 ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
113113 }
114114
115115 unsigned getVectorSplitCost() { return 0; }
2020 class MubufLoad : PatFrag <
2121 (ops node:$ptr), (op node:$ptr), [{
2222 auto const AS = cast(N)->getAddressSpace();
23 return AS == AMDGPUAS::GLOBAL_ADDRESS ||
24 AS == AMDGPUAS::CONSTANT_ADDRESS;
23 return AS == AMDGPUASI.GLOBAL_ADDRESS ||
24 AS == AMDGPUASI.CONSTANT_ADDRESS;
2525 }]>;
2626
2727 def mubuf_load : MubufLoad ;
135135 class flat_binary_atomic_op : PatFrag<
136136 (ops node:$ptr, node:$value),
137137 (atomic_op node:$ptr, node:$value),
138 [{return cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
138 [{return cast(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
139139 >;
140140
141141 def atomic_cmp_swap_flat : flat_binary_atomic_op;
283283 class flat_ld : PatFrag<(ops node:$ptr),
284284 (ld node:$ptr), [{
285285 auto const AS = cast(N)->getAddressSpace();
286 return AS == AMDGPUAS::FLAT_ADDRESS ||
287 AS == AMDGPUAS::GLOBAL_ADDRESS ||
288 AS == AMDGPUAS::CONSTANT_ADDRESS;
286 return AS == AMDGPUASI.FLAT_ADDRESS ||
287 AS == AMDGPUASI.GLOBAL_ADDRESS ||
288 AS == AMDGPUASI.CONSTANT_ADDRESS;
289289 }]>;
290290
291291 class flat_st : PatFrag<(ops node:$val, node:$ptr),
292292 (st node:$val, node:$ptr), [{
293293 auto const AS = cast(N)->getAddressSpace();
294 return AS == AMDGPUAS::FLAT_ADDRESS ||
295 AS == AMDGPUAS::GLOBAL_ADDRESS;
294 return AS == AMDGPUASI.FLAT_ADDRESS ||
295 AS == AMDGPUASI.GLOBAL_ADDRESS;
296296 }]>;
297297
298298 def atomic_flat_load : flat_ld ;
263263
264264 AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
265265 unsigned AddressSpace) const {
266 switch (AddressSpace) {
267 case AMDGPUAS::PRIVATE_ADDRESS:
266 if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
268267 return AddressSpaceQualifier::Private;
269 case AMDGPUAS::GLOBAL_ADDRESS:
268 if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
270269 return AddressSpaceQualifier::Global;
271 case AMDGPUAS::CONSTANT_ADDRESS:
270 if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
272271 return AddressSpaceQualifier::Constant;
273 case AMDGPUAS::LOCAL_ADDRESS:
272 if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
274273 return AddressSpaceQualifier::Local;
275 case AMDGPUAS::FLAT_ADDRESS:
274 if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
276275 return AddressSpaceQualifier::Generic;
277 case AMDGPUAS::REGION_ADDRESS:
276 if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
278277 return AddressSpaceQualifier::Region;
279 }
280278
281279 llvm_unreachable("Unknown address space qualifier");
282280 }
303301 "image3d_t", ValueKind::Image)
304302 .Default(isa(Ty) ?
305303 (Ty->getPointerAddressSpace() ==
306 AMDGPUAS::LOCAL_ADDRESS ?
304 AMDGPUASI.LOCAL_ADDRESS ?
307305 ValueKind::DynamicSharedPointer :
308306 ValueKind::GlobalBuffer) :
309307 ValueKind::ByValue);
459457 return;
460458
461459 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
462 AMDGPUAS::GLOBAL_ADDRESS);
460 AMDGPUASI.GLOBAL_ADDRESS);
463461 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
464462 }
465463
512510
513511 if (auto PtrTy = dyn_cast(Ty)) {
514512 auto ElTy = PtrTy->getElementType();
515 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized())
513 if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized())
516514 Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy);
517515 }
518516
575573 }
576574
577575 void MetadataStreamer::begin(const Module &Mod) {
576 AMDGPUASI = getAMDGPUAS(Mod);
578577 emitVersion();
579578 emitPrintf(Mod);
580579 }
1515 #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
1616 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
1717
18 #include "AMDGPU.h"
1819 #include "AMDGPUCodeObjectMetadata.h"
1920 #include "AMDKernelCodeT.h"
2021 #include "llvm/ADT/StringRef.h"
3536 class MetadataStreamer final {
3637 private:
3738 Metadata CodeObjectMetadata;
39 AMDGPUAS AMDGPUASI;
3840
3941 void dump(StringRef YamlString) const;
4042
556556 }
557557
558558 case Intrinsic::r600_implicitarg_ptr: {
559 MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
559 MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
560560 uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
561561 return DAG.getConstant(ByteOffset, DL, PtrVT);
562562 }
706706 SDValue Op,
707707 SelectionDAG &DAG) const {
708708 GlobalAddressSDNode *GSD = cast(Op);
709 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
709 if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
710710 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
711711
712712 const DataLayout &DL = DAG.getDataLayout();
713713 const GlobalValue *GV = GSD->getGlobal();
714 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
714 MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
715715
716716 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
717717 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
868868 unsigned DwordOffset) const {
869869 unsigned ByteOffset = DwordOffset * 4;
870870 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
871 AMDGPUAS::CONSTANT_BUFFER_0);
871 AMDGPUASI.CONSTANT_BUFFER_0);
872872
873873 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
874874 assert(isInt<16>(ByteOffset));
11061106 //TODO: Who creates the i8 stores?
11071107 assert(Store->isTruncatingStore()
11081108 || Store->getValue().getValueType() == MVT::i8);
1109 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1109 assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
11101110
11111111 SDValue Mask;
11121112 if (Store->getMemoryVT() == MVT::i8) {
12041204 SDLoc DL(Op);
12051205
12061206 // Neither LOCAL nor PRIVATE can do vectors at the moment
1207 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1207 if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
12081208 VT.isVector()) {
1209 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
1209 if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
1210 StoreNode->isTruncatingStore()) {
12101211 // Add an extra level of chain to isolate this vector
12111212 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
12121213 // TODO: can the chain be replaced without creating a new store?
12291230 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
12301231 DAG.getConstant(2, DL, PtrVT));
12311232
1232 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1233 if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
12331234 // It is beneficial to create MSKOR here instead of combiner to avoid
12341235 // artificial dependencies introduced by RMW
12351236 if (StoreNode->isTruncatingStore()) {
12821283 }
12831284
12841285 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1285 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1286 if (AS != AMDGPUASI.PRIVATE_ADDRESS)
12861287 return SDValue();
12871288
12881289 if (MemVT.bitsLT(MVT::i32))
13011302
13021303 // return (512 + (kc_bank << 12)
13031304 static int
1304 ConstantAddressBlock(unsigned AddressSpace) {
1305 ConstantAddressBlock(unsigned AddressSpace, AMDGPUAS AMDGPUASI) {
13051306 switch (AddressSpace) {
1306 case AMDGPUAS::CONSTANT_BUFFER_0:
1307 case AMDGPUASI.CONSTANT_BUFFER_0:
13071308 return 512;
1308 case AMDGPUAS::CONSTANT_BUFFER_1:
1309 case AMDGPUASI.CONSTANT_BUFFER_1:
13091310 return 512 + 4096;
1310 case AMDGPUAS::CONSTANT_BUFFER_2:
1311 case AMDGPUASI.CONSTANT_BUFFER_2:
13111312 return 512 + 4096 * 2;
1312 case AMDGPUAS::CONSTANT_BUFFER_3:
1313 case AMDGPUASI.CONSTANT_BUFFER_3:
13131314 return 512 + 4096 * 3;
1314 case AMDGPUAS::CONSTANT_BUFFER_4:
1315 case AMDGPUASI.CONSTANT_BUFFER_4:
13151316 return 512 + 4096 * 4;
1316 case AMDGPUAS::CONSTANT_BUFFER_5:
1317 case AMDGPUASI.CONSTANT_BUFFER_5:
13171318 return 512 + 4096 * 5;
1318 case AMDGPUAS::CONSTANT_BUFFER_6:
1319 case AMDGPUASI.CONSTANT_BUFFER_6:
13191320 return 512 + 4096 * 6;
1320 case AMDGPUAS::CONSTANT_BUFFER_7:
1321 case AMDGPUASI.CONSTANT_BUFFER_7:
13211322 return 512 + 4096 * 7;
1322 case AMDGPUAS::CONSTANT_BUFFER_8:
1323 case AMDGPUASI.CONSTANT_BUFFER_8:
13231324 return 512 + 4096 * 8;
1324 case AMDGPUAS::CONSTANT_BUFFER_9:
1325 case AMDGPUASI.CONSTANT_BUFFER_9:
13251326 return 512 + 4096 * 9;
1326 case AMDGPUAS::CONSTANT_BUFFER_10:
1327 case AMDGPUASI.CONSTANT_BUFFER_10:
13271328 return 512 + 4096 * 10;
1328 case AMDGPUAS::CONSTANT_BUFFER_11:
1329 case AMDGPUASI.CONSTANT_BUFFER_11:
13291330 return 512 + 4096 * 11;
1330 case AMDGPUAS::CONSTANT_BUFFER_12:
1331 case AMDGPUASI.CONSTANT_BUFFER_12:
13311332 return 512 + 4096 * 12;
1332 case AMDGPUAS::CONSTANT_BUFFER_13:
1333 case AMDGPUASI.CONSTANT_BUFFER_13:
13331334 return 512 + 4096 * 13;
1334 case AMDGPUAS::CONSTANT_BUFFER_14:
1335 case AMDGPUASI.CONSTANT_BUFFER_14:
13351336 return 512 + 4096 * 14;
1336 case AMDGPUAS::CONSTANT_BUFFER_15:
1337 case AMDGPUASI.CONSTANT_BUFFER_15:
13371338 return 512 + 4096 * 15;
13381339 default:
13391340 return -1;
14011402 EVT MemVT = LoadNode->getMemoryVT();
14021403 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
14031404
1404 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1405 if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
14051406 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
14061407 return lowerPrivateExtLoad(Op, DAG);
14071408 }
14111412 SDValue Chain = LoadNode->getChain();
14121413 SDValue Ptr = LoadNode->getBasePtr();
14131414
1414 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1415 LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1415 if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
1416 LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
14161417 VT.isVector()) {
14171418 return scalarizeVectorLoad(LoadNode, DAG);
14181419 }
14191420
1420 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1421 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace(),
1422 AMDGPUASI);
14211423 if (ConstantBlock > -1 &&
14221424 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
14231425 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
14491451 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
14501452 DAG.getConstant(4, DL, MVT::i32)),
14511453 DAG.getConstant(LoadNode->getAddressSpace() -
1452 AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
1454 AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
14531455 );
14541456 }
14551457
14851487 return DAG.getMergeValues(MergedValues, DL);
14861488 }
14871489
1488 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1490 if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
14891491 return SDValue();
14901492 }
14911493
15621564 }
15631565
15641566 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1565 AMDGPUAS::CONSTANT_BUFFER_0);
1567 AMDGPUASI.CONSTANT_BUFFER_0);
15661568
15671569 // i64 isn't a legal type, so the register type used ends up as i32, which
15681570 // isn't expected here. It attempts to create this sextload, but it ends up
315315 class LoadParamFrag : PatFrag <
316316 (ops node:$ptr), (load_type node:$ptr),
317317 [{ return isConstantLoad(cast(N), 0) ||
318 (cast(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
318 (cast(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }]
319319 >;
320320
321321 def vtx_id3_az_extloadi8 : LoadParamFrag;
325325 class LoadVtxId1 : PatFrag <
326326 (ops node:$ptr), (load node:$ptr), [{
327327 const MemSDNode *LD = cast(N);
328 return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
329 (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
328 return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
329 (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
330330 !isa(GetUnderlyingObject(
331331 LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
332332 }]>;
338338 class LoadVtxId2 : PatFrag <
339339 (ops node:$ptr), (load node:$ptr), [{
340340 const MemSDNode *LD = cast(N);
341 return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
341 return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
342342 isa(GetUnderlyingObject(
343343 LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
344344 }]>;
201201 // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
202202 // specified.
203203 const SISubtarget &ST = MF.getSubtarget();
204 auto AMDGPUASI = ST.getAMDGPUAS();
204205 if (ST.debuggerEmitPrologue())
205206 emitDebuggerPrologue(MF, MBB);
206207
339340
340341 PointerType *PtrTy =
341342 PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
342 AMDGPUAS::CONSTANT_ADDRESS);
343 AMDGPUASI.CONSTANT_ADDRESS);
343344 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
344345 auto MMO = MF.getMachineMemOperand(PtrInfo,
345346 MachineMemOperand::MOLoad |
596596 if (AM.BaseGV)
597597 return false;
598598
599 switch (AS) {
600 case AMDGPUAS::GLOBAL_ADDRESS:
599 if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
601600 if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
602601 // Assume the we will use FLAT for all global memory accesses
603602 // on VI.
612611 }
613612
614613 return isLegalMUBUFAddressingMode(AM);
615
616 case AMDGPUAS::CONSTANT_ADDRESS:
614 } else if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
617615 // If the offset isn't a multiple of 4, it probably isn't going to be
618616 // correctly aligned.
619617 // FIXME: Can we get the real alignment here?
651649
652650 return false;
653651
654 case AMDGPUAS::PRIVATE_ADDRESS:
652 } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
655653 return isLegalMUBUFAddressingMode(AM);
656
657 case AMDGPUAS::LOCAL_ADDRESS:
658 case AMDGPUAS::REGION_ADDRESS:
654 } else if (AS == AMDGPUASI.LOCAL_ADDRESS ||
655 AS == AMDGPUASI.REGION_ADDRESS) {
659656 // Basic, single offset DS instructions allow a 16-bit unsigned immediate
660657 // field.
661658 // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
670667 return true;
671668
672669 return false;
673
674 case AMDGPUAS::FLAT_ADDRESS:
675 case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
670 } else if (AS == AMDGPUASI.FLAT_ADDRESS ||
671 AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) {
676672 // For an unknown address space, this usually means that this is for some
677673 // reason being used for pure arithmetic, and not based on some addressing
678674 // computation. We don't have instructions that compute pointers with any
679675 // addressing modes, so treat them as having no offset like flat
680676 // instructions.
681677 return isLegalFlatAddressingMode(AM);
682
683 default:
678 } else {
684679 llvm_unreachable("unhandled address space");
685680 }
686681 }
701696 return false;
702697 }
703698
704 if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
705 AddrSpace == AMDGPUAS::REGION_ADDRESS) {
699 if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS ||
700 AddrSpace == AMDGPUASI.REGION_ADDRESS) {
706701 // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
707702 // aligned, 8 byte access in a single operation using ds_read2/write2_b32
708703 // with adjacent offsets.
717712 // will access scratch. If we had access to the IR function, then we
718713 // could determine if any private memory was used in the function.
719714 if (!Subtarget->hasUnalignedScratchAccess() &&
720 (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
721 AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
715 (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS ||
716 AddrSpace == AMDGPUASI.FLAT_ADDRESS)) {
722717 return false;
723718 }
724719
726721 // If we have an uniform constant load, it still requires using a slow
727722 // buffer instruction if unaligned.
728723 if (IsFast) {
729 *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ?
724 *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ?
730725 (Align % 4 == 0) : true;
731726 }
732727
766761 return MVT::Other;
767762 }
768763
769 static bool isFlatGlobalAddrSpace(unsigned AS) {
770 return AS == AMDGPUAS::GLOBAL_ADDRESS ||
771 AS == AMDGPUAS::FLAT_ADDRESS ||
772 AS == AMDGPUAS::CONSTANT_ADDRESS;
764 static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
765 return AS == AMDGPUASI.GLOBAL_ADDRESS ||
766 AS == AMDGPUASI.FLAT_ADDRESS ||
767 AS == AMDGPUASI.CONSTANT_ADDRESS;
773768 }
774769
775770 bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
776771 unsigned DestAS) const {
777 return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
772 return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) &&
773 isFlatGlobalAddrSpace(DestAS, AMDGPUASI);
778774 }
779775
780776 bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
788784 unsigned DestAS) const {
789785 // Flat -> private/local is a simple truncate.
790786 // Flat -> global is no-op
791 if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
787 if (SrcAS == AMDGPUASI.FLAT_ADDRESS)
792788 return true;
793789
794790 return isNoopAddrSpaceCast(SrcAS, DestAS);
849845 unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
850846
851847 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
852 MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
848 MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
853849 SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
854850 MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
855851 return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
862858 const ISD::InputArg *Arg) const {
863859 const DataLayout &DL = DAG.getDataLayout();
864860 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
865 PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
861 PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
866862 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
867863
868864 unsigned Align = DL.getABITypeAlignment(Ty);
10721068 auto *ParamTy =
10731069 dyn_cast(FType->getParamType(Ins[i].getOrigArgIndex()));
10741070 if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
1075 ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
1071 ParamTy && ParamTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
10761072 // On SI local pointers are just offsets into LDS, so they are always
10771073 // less than 16-bits. On CI and newer they could potentially be
10781074 // real pointers, so we can't guarantee their size.
22052201
22062202 bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
22072203 const Triple &TT = getTargetMachine().getTargetTriple();
2208 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
2204 return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
22092205 AMDGPU::shouldEmitConstantsToTextSection(TT);
22102206 }
22112207
22122208 bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
2213 return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2214 GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
2209 return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
2210 GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
22152211 !shouldEmitFixup(GV) &&
22162212 !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
22172213 }
23502346 SelectionDAG &DAG) const {
23512347
23522348 if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly.
2353 unsigned RegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
2349 unsigned RegNo = (AS == AMDGPUASI.LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
23542350 AMDGPU::SRC_PRIVATE_BASE;
23552351 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32);
23562352 }
23662362
23672363 // Offset into amd_queue_t for group_segment_aperture_base_hi /
23682364 // private_segment_aperture_base_hi.
2369 uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
2365 uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
23702366
23712367 SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr,
23722368 DAG.getConstant(StructOffset, SL, MVT::i64));
23752371 // TODO: We should use the value from the IR intrinsic call, but it might not
23762372 // be available and how do we get it?
23772373 Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
2378 AMDGPUAS::CONSTANT_ADDRESS));
2374 AMDGPUASI.CONSTANT_ADDRESS));
23792375
23802376 MachinePointerInfo PtrInfo(V, StructOffset);
23812377 return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo,
23962392 static_cast(getTargetMachine());
23972393
23982394 // flat -> local/private
2399 if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
2395 if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
24002396 unsigned DestAS = ASC->getDestAddressSpace();
2401 if (DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
2397
2398 if (DestAS == AMDGPUASI.LOCAL_ADDRESS ||
2399 DestAS == AMDGPUASI.PRIVATE_ADDRESS) {
24022400 unsigned NullVal = TM.getNullPointerValue(DestAS);
24032401 SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
24042402 SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
24102408 }
24112409
24122410 // local/private -> flat
2413 if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
2411 if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
24142412 unsigned SrcAS = ASC->getSrcAddressSpace();
2415 if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
2413
2414 if (SrcAS == AMDGPUASI.LOCAL_ADDRESS ||
2415 SrcAS == AMDGPUASI.PRIVATE_ADDRESS) {
24162416 unsigned NullVal = TM.getNullPointerValue(SrcAS);
24172417 SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
24182418
25122512 bool
25132513 SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
25142514 // We can fold offsets for anything that doesn't require a GOT relocation.
2515 return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2516 GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
2515 return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
2516 GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
25172517 !shouldEmitGOTReloc(GA->getGlobal());
25182518 }
25192519
25642564 SelectionDAG &DAG) const {
25652565 GlobalAddressSDNode *GSD = cast(Op);
25662566
2567 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
2568 GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
2567 if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
2568 GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS)
25692569 return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
25702570
25712571 SDLoc DL(GSD);
25822582 SIInstrInfo::MO_GOTPCREL32);
25832583
25842584 Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
2585 PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
2585 PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
25862586 const DataLayout &DataLayout = DAG.getDataLayout();
25872587 unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
25882588 // FIXME: Use a PseudoSourceValue once those can be assigned an address space.
32283228 SIMachineFunctionInfo *MFI = MF.getInfo();
32293229 // If there is a possibilty that flat instruction access scratch memory
32303230 // then we need to use the same legalization rules we use for private.
3231 if (AS == AMDGPUAS::FLAT_ADDRESS)
3231 if (AS == AMDGPUASI.FLAT_ADDRESS)
32323232 AS = MFI->hasFlatScratchInit() ?
3233 AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
3233 AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
32343234
32353235 unsigned NumElements = MemVT.getVectorNumElements();
3236 switch (AS) {
3237 case AMDGPUAS::CONSTANT_ADDRESS:
3236 if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
32383237 if (isMemOpUniform(Load))
32393238 return SDValue();
32403239 // Non-uniform loads will be selected to MUBUF instructions, so they
32413240 // have the same legalization requirements as global and private
32423241 // loads.
32433242 //
3244 LLVM_FALLTHROUGH;
3245 case AMDGPUAS::GLOBAL_ADDRESS:
3243 }
3244 if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
32463245 if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
32473246 isMemOpHasNoClobberedMemOperand(Load))
32483247 return SDValue();
32503249 // have the same legalization requirements as global and private
32513250 // loads.
32523251 //
3253 LLVM_FALLTHROUGH;
3254 case AMDGPUAS::FLAT_ADDRESS:
3252 }
3253 if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS ||
3254 AS == AMDGPUASI.FLAT_ADDRESS) {
32553255 if (NumElements > 4)
32563256 return SplitVectorLoad(Op, DAG);
32573257 // v4 loads are supported for private and global memory.
32583258 return SDValue();
3259 case AMDGPUAS::PRIVATE_ADDRESS:
3259 }
3260 if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
32603261 // Depending on the setting of the private_element_size field in the
32613262 // resource descriptor, we can only make private accesses up to a certain
32623263 // size.
32753276 default:
32763277 llvm_unreachable("unsupported private_element_size");
32773278 }
3278 case AMDGPUAS::LOCAL_ADDRESS:
3279 } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
32793280 if (NumElements > 2)
32803281 return SplitVectorLoad(Op, DAG);
32813282
32843285
32853286 // If properly aligned, if we split we might be able to use ds_read_b64.
32863287 return SplitVectorLoad(Op, DAG);
3287 default:
3288 return SDValue();
3289 }
3288 }
3289 return SDValue();
32903290 }
32913291
32923292 SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
36553655 SIMachineFunctionInfo *MFI = MF.getInfo();
36563656 // If there is a possibilty that flat instruction access scratch memory
36573657 // then we need to use the same legalization rules we use for private.
3658 if (AS == AMDGPUAS::FLAT_ADDRESS)
3658 if (AS == AMDGPUASI.FLAT_ADDRESS)
36593659 AS = MFI->hasFlatScratchInit() ?
3660 AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
3660 AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
36613661
36623662 unsigned NumElements = VT.getVectorNumElements();
3663 switch (AS) {
3664 case AMDGPUAS::GLOBAL_ADDRESS:
3665 case AMDGPUAS::FLAT_ADDRESS:
3663 if (AS == AMDGPUASI.GLOBAL_ADDRESS ||
3664 AS == AMDGPUASI.FLAT_ADDRESS) {
36663665 if (NumElements > 4)
36673666 return SplitVectorStore(Op, DAG);
36683667 return SDValue();
3669 case AMDGPUAS::PRIVATE_ADDRESS: {
3668 } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
36703669 switch (Subtarget->getMaxPrivateElementSize()) {
36713670 case 4:
36723671 return scalarizeVectorStore(Store, DAG);
36813680 default:
36823681 llvm_unreachable("unsupported private_element_size");
36833682 }
3684 }
3685 case AMDGPUAS::LOCAL_ADDRESS: {
3683 } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
36863684 if (NumElements > 2)
36873685 return SplitVectorStore(Op, DAG);
36883686
36913689
36923690 // If properly aligned, if we split we might be able to use ds_write_b64.
36933691 return SplitVectorStore(Op, DAG);
3694 }
3695 default:
3692 } else {
36963693 llvm_unreachable("unhandled address space");
36973694 }
36983695 }
37233720 unsigned AS = AtomicNode->getAddressSpace();
37243721
37253722 // No custom lowering required for local address space
3726 if (!isFlatGlobalAddrSpace(AS))
3723 if (!isFlatGlobalAddrSpace(AS, AMDGPUASI))
37273724 return Op;
37283725
37293726 // Non-local address space requires custom lowering for atomic compare
37803777 /// the immediate offsets of a memory instruction for the given address space.
37813778 static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
37823779 const SISubtarget &STI) {
3783 switch (AS) {
3784 case AMDGPUAS::GLOBAL_ADDRESS:
3780 auto AMDGPUASI = STI.getAMDGPUAS();
3781 if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
37853782 // MUBUF instructions a 12-bit offset in bytes.
37863783 return isUInt<12>(OffsetSize);
3787 case AMDGPUAS::CONSTANT_ADDRESS:
3784 }
3785 if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
37883786 // SMRD instructions have an 8-bit offset in dwords on SI and
37893787 // a 20-bit offset in bytes on VI.
37903788 if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
37913789 return isUInt<20>(OffsetSize);
37923790 else
37933791 return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
3794 case AMDGPUAS::LOCAL_ADDRESS:
3795 case AMDGPUAS::REGION_ADDRESS:
3792 }
3793 if (AS == AMDGPUASI.LOCAL_ADDRESS ||
3794 AS == AMDGPUASI.REGION_ADDRESS) {
37963795 // The single offset versions have a 16-bit offset in bytes.
37973796 return isUInt<16>(OffsetSize);
3798 case AMDGPUAS::PRIVATE_ADDRESS:
3797 }
37993798 // Indirect register addressing does not use any offsets.
3800 default:
3801 return false;
3802 }
3799 return false;
38033800 }
38043801
38053802 // (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
38573854
38583855 // TODO: We could also do this for multiplies.
38593856 unsigned AS = N->getAddressSpace();
3860 if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
3857 if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUASI.PRIVATE_ADDRESS) {
38613858 SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
38623859 if (NewPtr) {
38633860 SmallVector NewOps(N->op_begin(), N->op_end());
37463746 return AMDGPU::NoRegister;
37473747
37483748 assert(!MI.memoperands_empty() &&
3749 (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
3749 (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
37503750
37513751 FrameIndex = Addr->getIndex();
37523752 return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
38533853 return true;
38543854
38553855 for (const MachineMemOperand *MMO : MI.memoperands()) {
3856 if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
3856 if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
38573857 return true;
38583858 }
38593859 return false;
106106 >;
107107
108108 def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
109 return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
109 return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
110110 }]>;
111111
112112 def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
143143
144144 def si_st_local : PatFrag <
145145 (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
146 return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
146 return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
147147 }]>;
148148
149149 def si_store_local : PatFrag <
225225 def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
226226 auto Ld = cast(N);
227227 return Ld->getAlignment() >= 4 &&
228 ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
228 ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
229229 static_cast(getTargetLowering())->isMemOpUniform(N)) ||
230 (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
230 (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
231231 static_cast(getTargetLowering())->isMemOpUniform(N) &&
232232 static_cast(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
233233 }]>;
1818 #include "llvm/IR/GlobalValue.h"
1919 #include "llvm/IR/Instruction.h"
2020 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Module.h"
2122 #include "llvm/MC/MCContext.h"
2223 #include "llvm/MC/MCInstrDesc.h"
2324 #include "llvm/MC/MCRegisterInfo.h"
353354 ELF::SHF_AMDGPU_HSA_AGENT);
354355 }
355356
356 bool isGroupSegment(const GlobalValue *GV) {
357 return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
358 }
359
360 bool isGlobalSegment(const GlobalValue *GV) {
361 return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
362 }
363
364 bool isReadOnlySegment(const GlobalValue *GV) {
365 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
357 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
358 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
359 }
360
361 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
362 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
363 }
364
365 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
366 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
366367 }
367368
368369 bool shouldEmitConstantsToTextSection(const Triple &TT) {
735736 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
736737 isUInt<20>(EncodedOffset);
737738 }
738
739739 } // end namespace AMDGPU
740
740741 } // end namespace llvm
742
743 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
744 const unsigned AMDGPUAS::GLOBAL_ADDRESS;
745 const unsigned AMDGPUAS::LOCAL_ADDRESS;
746 const unsigned AMDGPUAS::PARAM_D_ADDRESS;
747 const unsigned AMDGPUAS::PARAM_I_ADDRESS;
748 const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
749 const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
750 const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
751 const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
752 const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
753 const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
754 const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
755 const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
756 const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
757 const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
758 const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
759 const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
760 const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
761 const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
762 const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
763 const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
764 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
765
766 namespace llvm {
767 namespace AMDGPU {
768
769 AMDGPUAS getAMDGPUAS(Triple T) {
770 auto Env = T.getEnvironmentName();
771 AMDGPUAS AS;
772 if (Env == "amdgiz" || Env == "amdgizcl") {
773 AS.FLAT_ADDRESS = 0;
774 AS.CONSTANT_ADDRESS = 4;
775 AS.PRIVATE_ADDRESS = 5;
776 AS.REGION_ADDRESS = 2;
777 }
778 else {
779 AS.FLAT_ADDRESS = 4;
780 AS.CONSTANT_ADDRESS = 2;
781 AS.PRIVATE_ADDRESS = 0;
782 AS.REGION_ADDRESS = 5;
783 }
784 return AS;
785 }
786
787 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
788 return getAMDGPUAS(M.getTargetTriple());
789 }
790
791 AMDGPUAS getAMDGPUAS(const Module &M) {
792 return getAMDGPUAS(Triple(M.getTargetTriple()));
793 }
794 } // namespace AMDGPU
795 } // namespace llvm
99 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1010 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1111
12 #include "AMDGPU.h"
1213 #include "AMDKernelCodeT.h"
1314 #include "SIDefines.h"
1415 #include "llvm/ADT/StringRef.h"
159160
160161 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
161162
162 bool isGroupSegment(const GlobalValue *GV);
163 bool isGlobalSegment(const GlobalValue *GV);
164 bool isReadOnlySegment(const GlobalValue *GV);
163 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
164 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
165 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS);
165166
166167 /// \returns True if constants should be emitted to .text section for given
167168 /// target triple \p TT, false otherwise.