llvm.org GIT mirror llvm / b479681
AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination into TargetParser. Also switch away from target features to CPU string when determining isa version. This fixes an issue when we output wrong isa version in the object code when features of a particular CPU are altered (i.e. gfx902 w/o xnack used to result in gfx900). Differential Revision: https://reviews.llvm.org/D51890 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@341982 91177308-0d34-0410-b5e6-96231b3b80d8 Konstantin Zhuravlyov 1 year, 5 months ago
15 changed file(s) with 456 addition(s) and 436 deletion(s). Raw diff Collapse all Expand all
319319 GK_AMDGCN_LAST = GK_GFX906,
320320 };
321321
322 /// Instruction set architecture version.
323 struct IsaVersion {
324 unsigned Major;
325 unsigned Minor;
326 unsigned Stepping;
327 };
328
322329 // This isn't comprehensive for now, just things that are needed from the
323330 // frontend driver.
324331 enum ArchFeatureKind : uint32_t {
334341 FEATURE_FAST_DENORMAL_F32 = 1 << 5
335342 };
336343
337 GPUKind parseArchAMDGCN(StringRef CPU);
338 GPUKind parseArchR600(StringRef CPU);
339344 StringRef getArchNameAMDGCN(GPUKind AK);
340345 StringRef getArchNameR600(GPUKind AK);
341346 StringRef getCanonicalArchName(StringRef Arch);
347 GPUKind parseArchAMDGCN(StringRef CPU);
348 GPUKind parseArchR600(StringRef CPU);
342349 unsigned getArchAttrAMDGCN(GPUKind AK);
343350 unsigned getArchAttrR600(GPUKind AK);
344351
345352 void fillValidArchListAMDGCN(SmallVectorImpl &Values);
346353 void fillValidArchListR600(SmallVectorImpl &Values);
347354
348 }
355 StringRef getArchNameFromElfMach(unsigned ElfMach);
356 unsigned getElfMach(StringRef GPU);
357 IsaVersion getIsaVersion(StringRef GPU);
358
359 } // namespace AMDGPU
349360
350361 } // namespace llvm
351362
1616 #include "llvm/ADT/ArrayRef.h"
1717 #include "llvm/ADT/StringSwitch.h"
1818 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/ELF.h"
1920 #include
2021
2122 using namespace llvm;
2223 using namespace ARM;
2324 using namespace AArch64;
25 using namespace AMDGPU;
2426
2527 namespace {
2628
946948 TT.isOSWindows();
947949 }
948950
951 namespace {
952
949953 struct GPUInfo {
950954 StringLiteral Name;
951955 StringLiteral CanonicalName;
953957 unsigned Features;
954958 };
955959
956 using namespace AMDGPU;
957 static constexpr GPUInfo R600GPUs[26] = {
958 // Name Canonical Kind Features
959 // Name
960 //
960 constexpr GPUInfo R600GPUs[26] = {
961 // Name Canonical Kind Features
962 // Name
961963 {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
962964 {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
963965 {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
988990
989991 // This table should be sorted by the value of GPUKind
990992 // Don't bother listing the implicitly true features
991 static constexpr GPUInfo AMDGCNGPUs[32] = {
992 // Name Canonical Kind Features
993 // Name
993 constexpr GPUInfo AMDGCNGPUs[32] = {
994 // Name Canonical Kind Features
995 // Name
994996 {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
995997 {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
996998 {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
10251027 {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
10261028 };
10271029
1028 static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK,
1029 ArrayRef Table) {
1030 const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) {
10301031 GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
10311032
10321033 auto I = std::lower_bound(Table.begin(), Table.end(), Search,
10391040 return I;
10401041 }
10411042
1043 } // namespace
1044
10421045 StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
10431046 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
10441047 return Entry->CanonicalName;
10911094 for (const auto C : R600GPUs)
10921095 Values.push_back(C.Name);
10931096 }
1097
1098 StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) {
1099 AMDGPU::GPUKind AK;
1100
1101 switch (ElfMach) {
1102 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
1103 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
1104 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
1105 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
1106 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
1107 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
1108 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
1109 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
1110 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
1111 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
1112 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
1113 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
1114 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
1115 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
1116 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
1117 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
1118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
1119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
1120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
1121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
1122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
1123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
1124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
1125 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
1126 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
1127 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
1128 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
1129 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
1130 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
1131 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
1132 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
1133 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
1134 }
1135
1136 StringRef GPUName = getArchNameAMDGCN(AK);
1137 if (GPUName != "")
1138 return GPUName;
1139 return getArchNameR600(AK);
1140 }
1141
1142 unsigned AMDGPU::getElfMach(StringRef GPU) {
1143 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
1144 if (AK == AMDGPU::GPUKind::GK_NONE)
1145 AK = parseArchR600(GPU);
1146
1147 switch (AK) {
1148 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
1149 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
1150 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
1151 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
1152 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
1153 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
1154 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
1155 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
1156 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
1157 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
1158 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
1159 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
1160 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
1161 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
1162 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
1163 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
1164 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
1165 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
1166 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
1167 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
1168 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
1169 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
1170 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
1171 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
1172 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
1173 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
1174 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
1175 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
1176 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
1177 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
1178 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
1179 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
1180 }
1181
1182 llvm_unreachable("unknown GPU");
1183 }
1184
1185 AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
1186 if (GPU == "generic")
1187 return {7, 0, 0};
1188
1189 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
1190 if (AK == AMDGPU::GPUKind::GK_NONE)
1191 return {0, 0, 0};
1192
1193 switch (AK) {
1194 case GK_GFX600: return {6, 0, 0};
1195 case GK_GFX601: return {6, 0, 1};
1196 case GK_GFX700: return {7, 0, 0};
1197 case GK_GFX701: return {7, 0, 1};
1198 case GK_GFX702: return {7, 0, 2};
1199 case GK_GFX703: return {7, 0, 3};
1200 case GK_GFX704: return {7, 0, 4};
1201 case GK_GFX801: return {8, 0, 1};
1202 case GK_GFX802: return {8, 0, 2};
1203 case GK_GFX803: return {8, 0, 3};
1204 case GK_GFX810: return {8, 1, 0};
1205 case GK_GFX900: return {9, 0, 0};
1206 case GK_GFX902: return {9, 0, 2};
1207 case GK_GFX904: return {9, 0, 4};
1208 case GK_GFX906: return {9, 0, 6};
1209 default: return {0, 0, 0};
1210 }
1211 }
3939 #include "llvm/MC/MCStreamer.h"
4040 #include "llvm/Support/AMDGPUMetadata.h"
4141 #include "llvm/Support/MathExtras.h"
42 #include "llvm/Support/TargetParser.h"
4243 #include "llvm/Support/TargetRegistry.h"
4344 #include "llvm/Target/TargetLoweringObjectFile.h"
4445
133134 getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
134135
135136 // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
136 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
137 IsaVersion Version = getIsaVersion(getSTI()->getCPU());
137138 getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
138 ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
139 Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
139140 }
140141
141142 void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
239240 *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
240241 CurrentProgramInfo.NumVGPRsForWavesPerEU,
241242 CurrentProgramInfo.NumSGPRsForWavesPerEU -
242 IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
243 IsaInfo::getNumExtraSGPRs(getSTI(),
243244 CurrentProgramInfo.VCCUsed,
244245 CurrentProgramInfo.FlatUsed),
245246 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
560561
561562 int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
562563 const GCNSubtarget &ST) const {
563 return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
564 return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
564565 UsesVCC, UsesFlatScratch);
565566 }
566567
757758
758759 // 48 SGPRs - vcc, - flat_scr, -xnack
759760 int MaxSGPRGuess =
760 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
761 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
761762 ST.hasFlatAddressSpace());
762763 MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
763764 MaxVGPR = std::max(MaxVGPR, 23);
822823 // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
823824 // unified.
824825 unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
825 STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
826 getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
826827
827828 // Check the addressable register limit before we add ExtraSGPRs.
828829 if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
904905 }
905906
906907 ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
907 STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
908 getSTI(), ProgInfo.NumSGPRsForWavesPerEU);
908909 ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
909 STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
910 getSTI(), ProgInfo.NumVGPRsForWavesPerEU);
910911
911912 // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
912913 // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
11361137 const SIMachineFunctionInfo *MFI = MF.getInfo();
11371138 const GCNSubtarget &STM = MF.getSubtarget();
11381139
1139 AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
1140 AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
11401141
11411142 Out.compute_pgm_resource_registers =
11421143 CurrentProgramInfo.ComputePGMRSrc1 |
123123 return *this;
124124 }
125125
126 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
127 const FeatureBitset &FeatureBits) :
126 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
128127 TargetTriple(TT),
129 SubtargetFeatureBits(FeatureBits),
130128 Has16BitInsts(false),
131129 HasMadMixInsts(false),
132130 FP32Denormals(false),
143141 { }
144142
145143 GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
146 const GCNTargetMachine &TM) :
144 const GCNTargetMachine &TM) :
147145 AMDGPUGenSubtargetInfo(TT, GPU, FS),
148 AMDGPUSubtarget(TT, getFeatureBits()),
146 AMDGPUSubtarget(TT),
149147 TargetTriple(TT),
150148 Gen(SOUTHERN_ISLANDS),
151149 IsaVersion(ISAVersion0_0_0),
447445 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
448446 const TargetMachine &TM) :
449447 R600GenSubtargetInfo(TT, GPU, FS),
450 AMDGPUSubtarget(TT, getFeatureBits()),
448 AMDGPUSubtarget(TT),
451449 InstrInfo(*this),
452450 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
453451 FMA(false),
6262 Triple TargetTriple;
6363
6464 protected:
65 const FeatureBitset &SubtargetFeatureBits;
6665 bool Has16BitInsts;
6766 bool HasMadMixInsts;
6867 bool FP32Denormals;
7877 unsigned WavefrontSize;
7978
8079 public:
81 AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
80 AMDGPUSubtarget(const Triple &TT);
8281
8382 static const AMDGPUSubtarget &get(const MachineFunction &MF);
8483 static const AMDGPUSubtarget &get(const TargetMachine &TM,
202201
203202 /// \returns Maximum number of work groups per compute unit supported by the
204203 /// subtarget and limited by given \p FlatWorkGroupSize.
205 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
206 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
207 FlatWorkGroupSize);
208 }
204 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
209205
210206 /// \returns Minimum flat work group size supported by the subtarget.
211 unsigned getMinFlatWorkGroupSize() const {
212 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
213 }
207 virtual unsigned getMinFlatWorkGroupSize() const = 0;
214208
215209 /// \returns Maximum flat work group size supported by the subtarget.
216 unsigned getMaxFlatWorkGroupSize() const {
217 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
218 }
210 virtual unsigned getMaxFlatWorkGroupSize() const = 0;
219211
220212 /// \returns Maximum number of waves per execution unit supported by the
221213 /// subtarget and limited by given \p FlatWorkGroupSize.
222 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
223 return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
224 FlatWorkGroupSize);
225 }
214 virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
226215
227216 /// \returns Minimum number of waves per execution unit supported by the
228217 /// subtarget.
229 unsigned getMinWavesPerEU() const {
230 return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
231 }
218 virtual unsigned getMinWavesPerEU() const = 0;
232219
233220 unsigned getMaxWavesPerEU() const { return 10; }
234221
707694 /// \returns Number of execution units per compute unit supported by the
708695 /// subtarget.
709696 unsigned getEUsPerCU() const {
710 return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
697 return AMDGPU::IsaInfo::getEUsPerCU(this);
711698 }
712699
713700 /// \returns Maximum number of waves per compute unit supported by the
714701 /// subtarget without any kind of limitation.
715702 unsigned getMaxWavesPerCU() const {
716 return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
703 return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
717704 }
718705
719706 /// \returns Maximum number of waves per compute unit supported by the
720707 /// subtarget and limited by given \p FlatWorkGroupSize.
721708 unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
722 return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
723 FlatWorkGroupSize);
709 return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
724710 }
725711
726712 /// \returns Maximum number of waves per execution unit supported by the
732718 /// \returns Number of waves per work group supported by the subtarget and
733719 /// limited by given \p FlatWorkGroupSize.
734720 unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
735 return AMDGPU::IsaInfo::getWavesPerWorkGroup(
736 MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
721 return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
737722 }
738723
739724 // static wrappers
852837
853838 /// \returns SGPR allocation granularity supported by the subtarget.
854839 unsigned getSGPRAllocGranule() const {
855 return AMDGPU::IsaInfo::getSGPRAllocGranule(
856 MCSubtargetInfo::getFeatureBits());
840 return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
857841 }
858842
859843 /// \returns SGPR encoding granularity supported by the subtarget.
860844 unsigned getSGPREncodingGranule() const {
861 return AMDGPU::IsaInfo::getSGPREncodingGranule(
862 MCSubtargetInfo::getFeatureBits());
845 return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
863846 }
864847
865848 /// \returns Total number of SGPRs supported by the subtarget.
866849 unsigned getTotalNumSGPRs() const {
867 return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
850 return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
868851 }
869852
870853 /// \returns Addressable number of SGPRs supported by the subtarget.
871854 unsigned getAddressableNumSGPRs() const {
872 return AMDGPU::IsaInfo::getAddressableNumSGPRs(
873 MCSubtargetInfo::getFeatureBits());
855 return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
874856 }
875857
876858 /// \returns Minimum number of SGPRs that meets the given number of waves per
877859 /// execution unit requirement supported by the subtarget.
878860 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
879 return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
880 WavesPerEU);
861 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
881862 }
882863
883864 /// \returns Maximum number of SGPRs that meets the given number of waves per
884865 /// execution unit requirement supported by the subtarget.
885866 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
886 return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
887 WavesPerEU, Addressable);
867 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
888868 }
889869
890870 /// \returns Reserved number of SGPRs for given function \p MF.
902882
903883 /// \returns VGPR allocation granularity supported by the subtarget.
904884 unsigned getVGPRAllocGranule() const {
905 return AMDGPU::IsaInfo::getVGPRAllocGranule(
906 MCSubtargetInfo::getFeatureBits());
885 return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
907886 }
908887
909888 /// \returns VGPR encoding granularity supported by the subtarget.
910889 unsigned getVGPREncodingGranule() const {
911 return AMDGPU::IsaInfo::getVGPREncodingGranule(
912 MCSubtargetInfo::getFeatureBits());
890 return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
913891 }
914892
915893 /// \returns Total number of VGPRs supported by the subtarget.
916894 unsigned getTotalNumVGPRs() const {
917 return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
895 return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
918896 }
919897
920898 /// \returns Addressable number of VGPRs supported by the subtarget.
921899 unsigned getAddressableNumVGPRs() const {
922 return AMDGPU::IsaInfo::getAddressableNumVGPRs(
923 MCSubtargetInfo::getFeatureBits());
900 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
924901 }
925902
926903 /// \returns Minimum number of VGPRs that meets given number of waves per
927904 /// execution unit requirement supported by the subtarget.
928905 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
929 return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
930 WavesPerEU);
906 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
931907 }
932908
933909 /// \returns Maximum number of VGPRs that meets given number of waves per
934910 /// execution unit requirement supported by the subtarget.
935911 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
936 return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
937 WavesPerEU);
912 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
938913 }
939914
940915 /// \returns Maximum number of VGPRs that meets number of waves per execution
950925 void getPostRAMutations(
951926 std::vector> &Mutations)
952927 const override;
928
929 /// \returns Maximum number of work groups per compute unit supported by the
930 /// subtarget and limited by given \p FlatWorkGroupSize.
931 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
932 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
933 }
934
935 /// \returns Minimum flat work group size supported by the subtarget.
936 unsigned getMinFlatWorkGroupSize() const override {
937 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
938 }
939
940 /// \returns Maximum flat work group size supported by the subtarget.
941 unsigned getMaxFlatWorkGroupSize() const override {
942 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
943 }
944
945 /// \returns Maximum number of waves per execution unit supported by the
946 /// subtarget and limited by given \p FlatWorkGroupSize.
947 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
948 return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
949 }
950
951 /// \returns Minimum number of waves per execution unit supported by the
952 /// subtarget.
953 unsigned getMinWavesPerEU() const override {
954 return AMDGPU::IsaInfo::getMinWavesPerEU(this);
955 }
953956 };
954957
955958 class R600Subtarget final : public R600GenSubtargetInfo,
10601063 bool enableSubRegLiveness() const override {
10611064 return true;
10621065 }
1066
1067 /// \returns Maximum number of work groups per compute unit supported by the
1068 /// subtarget and limited by given \p FlatWorkGroupSize.
1069 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1070 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1071 }
1072
1073 /// \returns Minimum flat work group size supported by the subtarget.
1074 unsigned getMinFlatWorkGroupSize() const override {
1075 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
1076 }
1077
1078 /// \returns Maximum flat work group size supported by the subtarget.
1079 unsigned getMaxFlatWorkGroupSize() const override {
1080 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
1081 }
1082
1083 /// \returns Maximum number of waves per execution unit supported by the
1084 /// subtarget and limited by given \p FlatWorkGroupSize.
1085 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1086 return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1087 }
1088
1089 /// \returns Minimum number of waves per execution unit supported by the
1090 /// subtarget.
1091 unsigned getMinWavesPerEU() const override {
1092 return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1093 }
10631094 };
10641095
10651096 } // end namespace llvm
4848 #include "llvm/Support/MachineValueType.h"
4949 #include "llvm/Support/MathExtras.h"
5050 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
5152 #include "llvm/Support/TargetRegistry.h"
5253 #include "llvm/Support/raw_ostream.h"
5354 #include
916917 // Currently there is none suitable machinery in the core llvm-mc for this.
917918 // MCSymbol::isRedefinable is intended for another purpose, and
918919 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
919 AMDGPU::IsaInfo::IsaVersion ISA =
920 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
920 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
921921 MCContext &Ctx = getContext();
922922 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
923923 MCSymbol *Sym =
18251825 unsigned DwordRegIndex,
18261826 unsigned RegWidth) {
18271827 // Symbols are only defined for GCN targets
1828 if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
1828 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
18291829 return true;
18301830
18311831 auto SymbolName = getGprCountSymbolName(RegKind);
26362636 unsigned &SGPRBlocks) {
26372637 // TODO(scott.linder): These calculations are duplicated from
26382638 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2639 IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
2639 IsaVersion Version = getIsaVersion(getSTI().getCPU());
26402640
26412641 unsigned NumVGPRs = NextFreeVGPR;
26422642 unsigned NumSGPRs = NextFreeSGPR;
2643 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
2643 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
26442644
26452645 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
26462646 NumSGPRs > MaxAddressableNumSGPRs)
26472647 return OutOfRangeError(SGPRRange);
26482648
26492649 NumSGPRs +=
2650 IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
2650 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
26512651
26522652 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
26532653 NumSGPRs > MaxAddressableNumSGPRs)
26562656 if (Features.test(FeatureSGPRInitBug))
26572657 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
26582658
2659 VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
2660 SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
2659 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2660 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
26612661
26622662 return false;
26632663 }
26772677
26782678 StringSet<> Seen;
26792679
2680 IsaInfo::IsaVersion IVersion =
2681 IsaInfo::getIsaVersion(getSTI().getFeatureBits());
2680 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
26822681
26832682 SMRange VGPRRange;
26842683 uint64_t NextFreeVGPR = 0;
29372936 // If this directive has no arguments, then use the ISA version for the
29382937 // targeted GPU.
29392938 if (getLexer().is(AsmToken::EndOfStatement)) {
2940 AMDGPU::IsaInfo::IsaVersion ISA =
2941 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2939 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
29422940 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
29432941 ISA.Stepping,
29442942 "AMD", "AMDGPU");
30002998
30012999 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
30023000 amd_kernel_code_t Header;
3003 AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
3001 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
30043002
30053003 while (true) {
30063004 // Lex EndOfStatement. This is in a while loop, because lexing a comment
36783676
36793677 static bool
36803678 encodeCnt(
3681 const AMDGPU::IsaInfo::IsaVersion ISA,
3679 const AMDGPU::IsaVersion ISA,
36823680 int64_t &IntVal,
36833681 int64_t CntVal,
36843682 bool Saturate,
3685 unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
3686 unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
3683 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3684 unsigned (*decode)(const IsaVersion &Version, unsigned))
36873685 {
36883686 bool Failed = false;
36893687
37143712 if (getParser().parseAbsoluteExpression(CntVal))
37153713 return true;
37163714
3717 AMDGPU::IsaInfo::IsaVersion ISA =
3718 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3715 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
37193716
37203717 bool Failed = true;
37213718 bool Sat = CntName.endswith("_sat");
37503747
37513748 OperandMatchResultTy
37523749 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3753 AMDGPU::IsaInfo::IsaVersion ISA =
3754 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3750 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
37553751 int64_t Waitcnt = getWaitcntBitMask(ISA);
37563752 SMLoc S = Parser.getTok().getLoc();
37573753
11541154 void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
11551155 const MCSubtargetInfo &STI,
11561156 raw_ostream &O) {
1157 AMDGPU::IsaInfo::IsaVersion ISA =
1158 AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
1157 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
11591158
11601159 unsigned SImm16 = MI->getOperand(OpNo).getImm();
11611160 unsigned Vmcnt, Expcnt, Lgkmcnt;
2626 #include "llvm/MC/MCObjectFileInfo.h"
2727 #include "llvm/MC/MCSectionELF.h"
2828 #include "llvm/Support/FormattedStream.h"
29 #include "llvm/Support/TargetParser.h"
2930
3031 namespace llvm {
3132 #include "AMDGPUPTNote.h"
3738 //===----------------------------------------------------------------------===//
3839 // AMDGPUTargetStreamer
3940 //===----------------------------------------------------------------------===//
40
41 static const struct {
42 const char *Name;
43 unsigned Mach;
44 } MachTable[] = {
45 // Radeon HD 2000/3000 Series (R600).
46 { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
47 { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
48 { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
49 { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
50 // Radeon HD 4000 Series (R700).
51 { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
52 { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
53 { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
54 // Radeon HD 5000 Series (Evergreen).
55 { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
56 { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
57 { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
58 { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
59 { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
60 // Radeon HD 6000 Series (Northern Islands).
61 { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
62 { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
63 { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
64 { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
65 // AMDGCN GFX6.
66 { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
67 { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
68 { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
69 { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
70 { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
71 { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
72 { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
73 // AMDGCN GFX7.
74 { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
75 { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
76 { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
77 { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
78 { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
79 { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
80 { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
81 { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
82 { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
83 { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
84 // AMDGCN GFX8.
85 { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
86 { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
87 { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
88 { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
89 { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
90 { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
91 { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
92 { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
93 { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
94 { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
95 { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
96 // AMDGCN GFX9.
97 { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
98 { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
99 { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
100 { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
101 // Not specified processor.
102 { nullptr, ELF::EF_AMDGPU_MACH_NONE }
103 };
104
105 unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
106 auto Entry = MachTable;
107 for (; Entry->Name && GPU != Entry->Name; ++Entry)
108 ;
109 return Entry->Mach;
110 }
111
112 const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
113 auto Entry = MachTable;
114 for (; Entry->Name && Mach != Entry->Mach; ++Entry)
115 ;
116 return Entry->Name;
117 }
11841
11942 bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
12043 HSAMD::Metadata HSAMetadata;
204127 bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
205128 amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
206129
207 IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
130 IsaVersion IVersion = getIsaVersion(STI.getCPU());
208131
209132 OS << "\t.amdhsa_kernel " << KernelName << '\n';
210133
341264 unsigned EFlags = MCA.getELFHeaderEFlags();
342265
343266 EFlags &= ~ELF::EF_AMDGPU_MACH;
344 EFlags |= getMACH(STI.getCPU());
267 EFlags |= getElfMach(STI.getCPU());
345268
346269 EFlags &= ~ELF::EF_AMDGPU_XNACK;
347270 if (AMDGPU::hasXNACK(STI))
3030 protected:
3131 MCContext &getContext() const { return Streamer.getContext(); }
3232
33 /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
34 unsigned getMACH(StringRef GPU) const;
35
3633 public:
37 /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
38 static const char *getMachName(unsigned Mach);
39
4034 AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
4135
4236 virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
368368 const SIRegisterInfo *TRI = nullptr;
369369 const MachineRegisterInfo *MRI = nullptr;
370370 const MachineLoopInfo *MLI = nullptr;
371 AMDGPU::IsaInfo::IsaVersion IV;
371 AMDGPU::IsaVersion IV;
372372
373373 DenseSet BlockVisitedSet;
374374 DenseSet TrackedWaitcntSet;
18401840 TRI = &TII->getRegisterInfo();
18411841 MRI = &MF.getRegInfo();
18421842 MLI = &getAnalysis();
1843 IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
1843 IV = AMDGPU::getIsaVersion(ST->getCPU());
18441844 const SIMachineFunctionInfo *MFI = MF.getInfo();
18451845
18461846 ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
252252 /// Instruction info.
253253 const SIInstrInfo *TII = nullptr;
254254
255 IsaInfo::IsaVersion IV;
255 IsaVersion IV;
256256
257257 SICacheControl(const GCNSubtarget &ST);
258258
604604
605605 SICacheControl::SICacheControl(const GCNSubtarget &ST) {
606606 TII = ST.getInstrInfo();
607 IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
607 IV = getIsaVersion(ST.getCPU());
608608 }
609609
610610 /* static */
136136
137137 namespace IsaInfo {
138138
139 IsaVersion getIsaVersion(const FeatureBitset &Features) {
140 // GCN GFX6 (Southern Islands (SI)).
141 if (Features.test(FeatureISAVersion6_0_0))
142 return {6, 0, 0};
143 if (Features.test(FeatureISAVersion6_0_1))
144 return {6, 0, 1};
145
146 // GCN GFX7 (Sea Islands (CI)).
147 if (Features.test(FeatureISAVersion7_0_0))
148 return {7, 0, 0};
149 if (Features.test(FeatureISAVersion7_0_1))
150 return {7, 0, 1};
151 if (Features.test(FeatureISAVersion7_0_2))
152 return {7, 0, 2};
153 if (Features.test(FeatureISAVersion7_0_3))
154 return {7, 0, 3};
155 if (Features.test(FeatureISAVersion7_0_4))
156 return {7, 0, 4};
157 if (Features.test(FeatureSeaIslands))
158 return {7, 0, 0};
159
160 // GCN GFX8 (Volcanic Islands (VI)).
161 if (Features.test(FeatureISAVersion8_0_1))
162 return {8, 0, 1};
163 if (Features.test(FeatureISAVersion8_0_2))
164 return {8, 0, 2};
165 if (Features.test(FeatureISAVersion8_0_3))
166 return {8, 0, 3};
167 if (Features.test(FeatureISAVersion8_1_0))
168 return {8, 1, 0};
169 if (Features.test(FeatureVolcanicIslands))
170 return {8, 0, 0};
171
172 // GCN GFX9.
173 if (Features.test(FeatureISAVersion9_0_0))
174 return {9, 0, 0};
175 if (Features.test(FeatureISAVersion9_0_2))
176 return {9, 0, 2};
177 if (Features.test(FeatureISAVersion9_0_4))
178 return {9, 0, 4};
179 if (Features.test(FeatureISAVersion9_0_6))
180 return {9, 0, 6};
181 if (Features.test(FeatureGFX9))
182 return {9, 0, 0};
183
184 if (Features.test(FeatureSouthernIslands))
185 return {0, 0, 0};
186 return {7, 0, 0};
187 }
188
189139 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
190140 auto TargetTriple = STI->getTargetTriple();
191 auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
141 auto Version = getIsaVersion(STI->getCPU());
192142
193143 Stream << TargetTriple.getArchName() << '-'
194144 << TargetTriple.getVendorName() << '-'
195145 << TargetTriple.getOSName() << '-'
196146 << TargetTriple.getEnvironmentName() << '-'
197147 << "gfx"
198 << ISAVersion.Major
199 << ISAVersion.Minor
200 << ISAVersion.Stepping;
148 << Version.Major
149 << Version.Minor
150 << Version.Stepping;
201151
202152 if (hasXNACK(*STI))
203153 Stream << "+xnack";
209159 return STI->getFeatureBits().test(FeatureCodeObjectV3);
210160 }
211161
212 unsigned getWavefrontSize(const FeatureBitset &Features) {
213 if (Features.test(FeatureWavefrontSize16))
162 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
163 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
214164 return 16;
215 if (Features.test(FeatureWavefrontSize32))
165 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
216166 return 32;
217167
218168 return 64;
219169 }
220170
221 unsigned getLocalMemorySize(const FeatureBitset &Features) {
222 if (Features.test(FeatureLocalMemorySize32768))
171 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
172 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
223173 return 32768;
224 if (Features.test(FeatureLocalMemorySize65536))
174 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
225175 return 65536;
226176
227177 return 0;
228178 }
229179
230 unsigned getEUsPerCU(const FeatureBitset &Features) {
180 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
231181 return 4;
232182 }
233183
234 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
184 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
235185 unsigned FlatWorkGroupSize) {
236 if (!Features.test(FeatureGCN))
186 if (!STI->getFeatureBits().test(FeatureGCN))
237187 return 8;
238 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
188 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
239189 if (N == 1)
240190 return 40;
241191 N = 40 / N;
242192 return std::min(N, 16u);
243193 }
244194
245 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
246 return getMaxWavesPerEU() * getEUsPerCU(Features);
247 }
248
249 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
195 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
196 return getMaxWavesPerEU() * getEUsPerCU(STI);
197 }
198
199 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
250200 unsigned FlatWorkGroupSize) {
251 return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
252 }
253
254 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
201 return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
202 }
203
204 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
255205 return 1;
256206 }
257207
260210 return 10;
261211 }
262212
263 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
213 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
264214 unsigned FlatWorkGroupSize) {
265 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
266 getEUsPerCU(Features)) / getEUsPerCU(Features);
267 }
268
269 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
215 return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
216 getEUsPerCU(STI)) / getEUsPerCU(STI);
217 }
218
219 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
270220 return 1;
271221 }
272222
273 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
223 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
274224 return 2048;
275225 }
276226
277 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
227 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
278228 unsigned FlatWorkGroupSize) {
279 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
280 getWavefrontSize(Features);
281 }
282
283 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
284 IsaVersion Version = getIsaVersion(Features);
229 return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
230 getWavefrontSize(STI);
231 }
232
233 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
234 IsaVersion Version = getIsaVersion(STI->getCPU());
285235 if (Version.Major >= 8)
286236 return 16;
287237 return 8;
288238 }
289239
290 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
240 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
291241 return 8;
292242 }
293243
294 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
295 IsaVersion Version = getIsaVersion(Features);
244 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
245 IsaVersion Version = getIsaVersion(STI->getCPU());
296246 if (Version.Major >= 8)
297247 return 800;
298248 return 512;
299249 }
300250
301 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
302 if (Features.test(FeatureSGPRInitBug))
251 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
252 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
303253 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
304254
305 IsaVersion Version = getIsaVersion(Features);
255 IsaVersion Version = getIsaVersion(STI->getCPU());
306256 if (Version.Major >= 8)
307257 return 102;
308258 return 104;
309259 }
310260
311 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
261 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
312262 assert(WavesPerEU != 0);
313263
314264 if (WavesPerEU >= getMaxWavesPerEU())
315265 return 0;
316266
317 unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
318 if (Features.test(FeatureTrapHandler))
267 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
268 if (STI->getFeatureBits().test(FeatureTrapHandler))
319269 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
320 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
321 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
322 }
323
324 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
270 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
271 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
272 }
273
274 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
325275 bool Addressable) {
326276 assert(WavesPerEU != 0);
327277
328 IsaVersion Version = getIsaVersion(Features);
329 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
278 IsaVersion Version = getIsaVersion(STI->getCPU());
279 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
330280 if (Version.Major >= 8 && !Addressable)
331281 AddressableNumSGPRs = 112;
332 unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
333 if (Features.test(FeatureTrapHandler))
282 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
283 if (STI->getFeatureBits().test(FeatureTrapHandler))
334284 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
335 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
285 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
336286 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
337287 }
338288
339 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
289 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
340290 bool FlatScrUsed, bool XNACKUsed) {
341291 unsigned ExtraSGPRs = 0;
342292 if (VCCUsed)
343293 ExtraSGPRs = 2;
344294
345 IsaVersion Version = getIsaVersion(Features);
295 IsaVersion Version = getIsaVersion(STI->getCPU());
346296 if (Version.Major < 8) {
347297 if (FlatScrUsed)
348298 ExtraSGPRs = 4;
357307 return ExtraSGPRs;
358308 }
359309
360 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
310 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
361311 bool FlatScrUsed) {
362 return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
363 Features[AMDGPU::FeatureXNACK]);
364 }
365
366 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
367 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
312 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
313 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
314 }
315
316 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
317 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
368318 // SGPRBlocks is actual number of SGPR blocks minus 1.
369 return NumSGPRs / getSGPREncodingGranule(Features) - 1;
370 }
371
372 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
319 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
320 }
321
322 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
373323 return 4;
374324 }
375325
376 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
377 return getVGPRAllocGranule(Features);
378 }
379
380 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
326 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
327 return getVGPRAllocGranule(STI);
328 }
329
330 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
381331 return 256;
382332 }
383333
384 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
385 return getTotalNumVGPRs(Features);
386 }
387
388 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
334 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
335 return getTotalNumVGPRs(STI);
336 }
337
338 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
389339 assert(WavesPerEU != 0);
390340
391341 if (WavesPerEU >= getMaxWavesPerEU())
392342 return 0;
393343 unsigned MinNumVGPRs =
394 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
395 getVGPRAllocGranule(Features)) + 1;
396 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
397 }
398
399 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
344 alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
345 getVGPRAllocGranule(STI)) + 1;
346 return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
347 }
348
349 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
400350 assert(WavesPerEU != 0);
401351
402 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
403 getVGPRAllocGranule(Features));
404 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
352 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
353 getVGPRAllocGranule(STI));
354 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
405355 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
406356 }
407357
408 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
409 NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
358 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
359 NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
410360 // VGPRBlocks is actual number of VGPR blocks minus 1.
411 return NumVGPRs / getVGPREncodingGranule(Features) - 1;
361 return NumVGPRs / getVGPREncodingGranule(STI) - 1;
412362 }
413363
414364 } // end namespace IsaInfo
415365
416366 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
417 const FeatureBitset &Features) {
418 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
367 const MCSubtargetInfo *STI) {
368 IsaVersion Version = getIsaVersion(STI->getCPU());
419369
420370 memset(&Header, 0, sizeof(Header));
421371
422372 Header.amd_kernel_code_version_major = 1;
423373 Header.amd_kernel_code_version_minor = 2;
424374 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
425 Header.amd_machine_version_major = ISA.Major;
426 Header.amd_machine_version_minor = ISA.Minor;
427 Header.amd_machine_version_stepping = ISA.Stepping;
375 Header.amd_machine_version_major = Version.Major;
376 Header.amd_machine_version_minor = Version.Minor;
377 Header.amd_machine_version_stepping = Version.Stepping;
428378 Header.kernel_code_entry_byte_offset = sizeof(Header);
429379 // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
430380 Header.wavefront_size = 6;
512462 return Ints;
513463 }
514464
515 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
465 unsigned getVmcntBitMask(const IsaVersion &Version) {
516466 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
517467 if (Version.Major < 9)
518468 return VmcntLo;
521471 return VmcntLo | VmcntHi;
522472 }
523473
524 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
474 unsigned getExpcntBitMask(const IsaVersion &Version) {
525475 return (1 << getExpcntBitWidth()) - 1;
526476 }
527477
528 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
478 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
529479 return (1 << getLgkmcntBitWidth()) - 1;
530480 }
531481
532 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
482 unsigned getWaitcntBitMask(const IsaVersion &Version) {
533483 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
534484 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
535485 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
541491 return Waitcnt | VmcntHi;
542492 }
543493
544 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
494 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
545495 unsigned VmcntLo =
546496 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
547497 if (Version.Major < 9)
553503 return VmcntLo | VmcntHi;
554504 }
555505
556 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
506 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
557507 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
558508 }
559509
560 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
510 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
561511 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
562512 }
563513
564 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
514 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
565515 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
566516 Vmcnt = decodeVmcnt(Version, Waitcnt);
567517 Expcnt = decodeExpcnt(Version, Waitcnt);
568518 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
569519 }
570520
571 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
521 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
572522 unsigned Vmcnt) {
573523 Waitcnt =
574524 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
579529 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
580530 }
581531
582 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
532 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
583533 unsigned Expcnt) {
584534 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
585535 }
586536
587 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
537 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
588538 unsigned Lgkmcnt) {
589539 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
590540 }
591541
592 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
542 unsigned encodeWaitcnt(const IsaVersion &Version,
593543 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
594544 unsigned Waitcnt = getWaitcntBitMask(Version);
595545 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1818 #include "llvm/Support/AMDHSAKernelDescriptor.h"
1919 #include "llvm/Support/Compiler.h"
2020 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/TargetParser.h"
2122 #include
2223 #include
2324 #include
5556 TRAP_NUM_SGPRS = 16
5657 };
5758
58 /// Instruction set architecture version.
59 struct IsaVersion {
60 unsigned Major;
61 unsigned Minor;
62 unsigned Stepping;
63 };
64
65 /// \returns Isa version for given subtarget \p Features.
66 IsaVersion getIsaVersion(const FeatureBitset &Features);
67
6859 /// Streams isa version string for given subtarget \p STI into \p Stream.
6960 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
7061
7263 /// false otherwise.
7364 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
7465
75 /// \returns Wavefront size for given subtarget \p Features.
76 unsigned getWavefrontSize(const FeatureBitset &Features);
77
78 /// \returns Local memory size in bytes for given subtarget \p Features.
79 unsigned getLocalMemorySize(const FeatureBitset &Features);
66 /// \returns Wavefront size for given subtarget \p STI.
67 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
68
69 /// \returns Local memory size in bytes for given subtarget \p STI.
70 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
8071
8172 /// \returns Number of execution units per compute unit for given subtarget \p
82 /// Features.
83 unsigned getEUsPerCU(const FeatureBitset &Features);
73 /// STI.
74 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
8475
8576 /// \returns Maximum number of work groups per compute unit for given subtarget
86 /// \p Features and limited by given \p FlatWorkGroupSize.
87 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
77 /// \p STI and limited by given \p FlatWorkGroupSize.
78 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
8879 unsigned FlatWorkGroupSize);
8980
9081 /// \returns Maximum number of waves per compute unit for given subtarget \p
91 /// Features without any kind of limitation.
92 unsigned getMaxWavesPerCU(const FeatureBitset &Features);
82 /// STI without any kind of limitation.
83 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
9384
9485 /// \returns Maximum number of waves per compute unit for given subtarget \p
95 /// Features and limited by given \p FlatWorkGroupSize.
96 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
86 /// STI and limited by given \p FlatWorkGroupSize.
87 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
9788 unsigned FlatWorkGroupSize);
9889
9990 /// \returns Minimum number of waves per execution unit for given subtarget \p
100 /// Features.
101 unsigned getMinWavesPerEU(const FeatureBitset &Features);
91 /// STI.
92 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
10293
10394 /// \returns Maximum number of waves per execution unit for given subtarget \p
104 /// Features without any kind of limitation.
95 /// STI without any kind of limitation.
10596 unsigned getMaxWavesPerEU();
10697
10798 /// \returns Maximum number of waves per execution unit for given subtarget \p
108 /// Features and limited by given \p FlatWorkGroupSize.
109 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
99 /// STI and limited by given \p FlatWorkGroupSize.
100 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
110101 unsigned FlatWorkGroupSize);
111102
112 /// \returns Minimum flat work group size for given subtarget \p Features.
113 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
114
115 /// \returns Maximum flat work group size for given subtarget \p Features.
116 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
117
118 /// \returns Number of waves per work group for given subtarget \p Features and
103 /// \returns Minimum flat work group size for given subtarget \p STI.
104 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
105
106 /// \returns Maximum flat work group size for given subtarget \p STI.
107 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
108
109 /// \returns Number of waves per work group for given subtarget \p STI and
119110 /// limited by given \p FlatWorkGroupSize.
120 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
111 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
121112 unsigned FlatWorkGroupSize);
122113
123 /// \returns SGPR allocation granularity for given subtarget \p Features.
124 unsigned getSGPRAllocGranule(const FeatureBitset &Features);
125
126 /// \returns SGPR encoding granularity for given subtarget \p Features.
127 unsigned getSGPREncodingGranule(const FeatureBitset &Features);
128
129 /// \returns Total number of SGPRs for given subtarget \p Features.
130 unsigned getTotalNumSGPRs(const FeatureBitset &Features);
131
132 /// \returns Addressable number of SGPRs for given subtarget \p Features.
133 unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
114 /// \returns SGPR allocation granularity for given subtarget \p STI.
115 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
116
117 /// \returns SGPR encoding granularity for given subtarget \p STI.
118 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
119
120 /// \returns Total number of SGPRs for given subtarget \p STI.
121 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
122
123 /// \returns Addressable number of SGPRs for given subtarget \p STI.
124 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
134125
135126 /// \returns Minimum number of SGPRs that meets the given number of waves per
136 /// execution unit requirement for given subtarget \p Features.
137 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
127 /// execution unit requirement for given subtarget \p STI.
128 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
138129
139130 /// \returns Maximum number of SGPRs that meets the given number of waves per
140 /// execution unit requirement for given subtarget \p Features.
141 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
131 /// execution unit requirement for given subtarget \p STI.
132 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
142133 bool Addressable);
143134
144135 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
145 /// Features when the given special registers are used.
146 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
136 /// STI when the given special registers are used.
137 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
147138 bool FlatScrUsed, bool XNACKUsed);
148139
149140 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
150 /// Features when the given special registers are used. XNACK is inferred from
151 /// \p Features.
152 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
141 /// STI when the given special registers are used. XNACK is inferred from
142 /// \p STI.
143 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
153144 bool FlatScrUsed);
154145
155 /// \returns Number of SGPR blocks needed for given subtarget \p Features when
146 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
156147 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
157148 /// register counts.
158 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
159
160 /// \returns VGPR allocation granularity for given subtarget \p Features.
161 unsigned getVGPRAllocGranule(const FeatureBitset &Features);
162
163 /// \returns VGPR encoding granularity for given subtarget \p Features.
164 unsigned getVGPREncodingGranule(const FeatureBitset &Features);
165
166 /// \returns Total number of VGPRs for given subtarget \p Features.
167 unsigned getTotalNumVGPRs(const FeatureBitset &Features);
168
169 /// \returns Addressable number of VGPRs for given subtarget \p Features.
170 unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
149 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
150
151 /// \returns VGPR allocation granularity for given subtarget \p STI.
152 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
153
154 /// \returns VGPR encoding granularity for given subtarget \p STI.
155 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
156
157 /// \returns Total number of VGPRs for given subtarget \p STI.
158 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
159
160 /// \returns Addressable number of VGPRs for given subtarget \p STI.
161 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
171162
172163 /// \returns Minimum number of VGPRs that meets given number of waves per
173 /// execution unit requirement for given subtarget \p Features.
174 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
164 /// execution unit requirement for given subtarget \p STI.
165 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
175166
176167 /// \returns Maximum number of VGPRs that meets given number of waves per
177 /// execution unit requirement for given subtarget \p Features.
178 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
179
180 /// \returns Number of VGPR blocks needed for given subtarget \p Features when
168 /// execution unit requirement for given subtarget \p STI.
169 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
170
171 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
181172 /// \p NumVGPRs are used.
182 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
173 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
183174
184175 } // end namespace IsaInfo
185176
232223 int getMCOpcode(uint16_t Opcode, unsigned Gen);
233224
234225 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
235 const FeatureBitset &Features);
226 const MCSubtargetInfo *STI);
236227
237228 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
238229
267258 bool OnlyFirstRequired = false);
268259
269260 /// \returns Vmcnt bit mask for given isa \p Version.
270 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
261 unsigned getVmcntBitMask(const IsaVersion &Version);
271262
272263 /// \returns Expcnt bit mask for given isa \p Version.
273 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
264 unsigned getExpcntBitMask(const IsaVersion &Version);
274265
275266 /// \returns Lgkmcnt bit mask for given isa \p Version.
276 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
267 unsigned getLgkmcntBitMask(const IsaVersion &Version);
277268
278269 /// \returns Waitcnt bit mask for given isa \p Version.
279 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
270 unsigned getWaitcntBitMask(const IsaVersion &Version);
280271
281272 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
282 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
273 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
283274
284275 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
285 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
276 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
286277
287278 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
288 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
279 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
289280
290281 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
291282 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
296287 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
297288 /// \p Expcnt = \p Waitcnt[6:4]
298289 /// \p Lgkmcnt = \p Waitcnt[11:8]
299 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
290 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
300291 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
301292
302293 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
303 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
294 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
304295 unsigned Vmcnt);
305296
306297 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
307 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
298 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
308299 unsigned Expcnt);
309300
310301 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
311 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
302 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
312303 unsigned Lgkmcnt);
313304
314305 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
323314 ///
324315 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
325316 /// isa \p Version.
326 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
317 unsigned encodeWaitcnt(const IsaVersion &Version,
327318 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
328319
329320 unsigned getInitialPSInputAddr(const Function &F);
0 ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s
1
2 ; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
3 define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
4 store float 0.0, float addrspace(1)* %out0
5 ret void
6 }
7
11 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
22
33 .hsa_code_object_isa
4 // GFX8: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU"
4 // GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
55 // GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"