llvm.org GIT mirror llvm / 867f487
Revert "AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination into TargetParser." This reverts commit r341982. The change introduced a layering violation. Reverting to unbreak our integrate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@342023 91177308-0d34-0410-b5e6-96231b3b80d8 Ilya Biryukov 1 year, 5 months ago
15 changed file(s) with 436 addition(s) and 456 deletion(s). Raw diff Collapse all Expand all
319319 GK_AMDGCN_LAST = GK_GFX906,
320320 };
321321
322 /// Instruction set architecture version.
323 struct IsaVersion {
324 unsigned Major;
325 unsigned Minor;
326 unsigned Stepping;
327 };
328
329322 // This isn't comprehensive for now, just things that are needed from the
330323 // frontend driver.
331324 enum ArchFeatureKind : uint32_t {
341334 FEATURE_FAST_DENORMAL_F32 = 1 << 5
342335 };
343336
337 GPUKind parseArchAMDGCN(StringRef CPU);
338 GPUKind parseArchR600(StringRef CPU);
344339 StringRef getArchNameAMDGCN(GPUKind AK);
345340 StringRef getArchNameR600(GPUKind AK);
346341 StringRef getCanonicalArchName(StringRef Arch);
347 GPUKind parseArchAMDGCN(StringRef CPU);
348 GPUKind parseArchR600(StringRef CPU);
349342 unsigned getArchAttrAMDGCN(GPUKind AK);
350343 unsigned getArchAttrR600(GPUKind AK);
351344
352345 void fillValidArchListAMDGCN(SmallVectorImpl &Values);
353346 void fillValidArchListR600(SmallVectorImpl &Values);
354347
355 StringRef getArchNameFromElfMach(unsigned ElfMach);
356 unsigned getElfMach(StringRef GPU);
357 IsaVersion getIsaVersion(StringRef GPU);
358
359 } // namespace AMDGPU
348 }
360349
361350 } // namespace llvm
362351
1616 #include "llvm/ADT/ArrayRef.h"
1717 #include "llvm/ADT/StringSwitch.h"
1818 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/ELF.h"
2019 #include
2120
2221 using namespace llvm;
2322 using namespace ARM;
2423 using namespace AArch64;
25 using namespace AMDGPU;
2624
2725 namespace {
2826
948946 TT.isOSWindows();
949947 }
950948
951 namespace {
952
953949 struct GPUInfo {
954950 StringLiteral Name;
955951 StringLiteral CanonicalName;
957953 unsigned Features;
958954 };
959955
960 constexpr GPUInfo R600GPUs[26] = {
961 // Name Canonical Kind Features
962 // Name
956 using namespace AMDGPU;
957 static constexpr GPUInfo R600GPUs[26] = {
958 // Name Canonical Kind Features
959 // Name
960 //
963961 {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
964962 {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
965963 {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
990988
991989 // This table should be sorted by the value of GPUKind
992990 // Don't bother listing the implicitly true features
993 constexpr GPUInfo AMDGCNGPUs[32] = {
994 // Name Canonical Kind Features
995 // Name
991 static constexpr GPUInfo AMDGCNGPUs[32] = {
992 // Name Canonical Kind Features
993 // Name
996994 {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
997995 {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
998996 {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
10271025 {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
10281026 };
10291027
1030 const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) {
1028 static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK,
1029 ArrayRef Table) {
10311030 GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
10321031
10331032 auto I = std::lower_bound(Table.begin(), Table.end(), Search,
10401039 return I;
10411040 }
10421041
1043 } // namespace
1044
10451042 StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
10461043 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
10471044 return Entry->CanonicalName;
10941091 for (const auto C : R600GPUs)
10951092 Values.push_back(C.Name);
10961093 }
1097
1098 StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) {
1099 AMDGPU::GPUKind AK;
1100
1101 switch (ElfMach) {
1102 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
1103 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
1104 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
1105 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
1106 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
1107 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
1108 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
1109 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
1110 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
1111 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
1112 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
1113 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
1114 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
1115 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
1116 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
1117 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
1118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
1119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
1120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
1121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
1122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
1123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
1124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
1125 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
1126 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
1127 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
1128 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
1129 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
1130 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
1131 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
1132 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
1133 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
1134 }
1135
1136 StringRef GPUName = getArchNameAMDGCN(AK);
1137 if (GPUName != "")
1138 return GPUName;
1139 return getArchNameR600(AK);
1140 }
1141
1142 unsigned AMDGPU::getElfMach(StringRef GPU) {
1143 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
1144 if (AK == AMDGPU::GPUKind::GK_NONE)
1145 AK = parseArchR600(GPU);
1146
1147 switch (AK) {
1148 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
1149 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
1150 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
1151 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
1152 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
1153 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
1154 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
1155 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
1156 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
1157 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
1158 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
1159 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
1160 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
1161 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
1162 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
1163 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
1164 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
1165 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
1166 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
1167 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
1168 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
1169 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
1170 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
1171 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
1172 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
1173 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
1174 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
1175 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
1176 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
1177 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
1178 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
1179 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
1180 }
1181
1182 llvm_unreachable("unknown GPU");
1183 }
1184
1185 AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
1186 if (GPU == "generic")
1187 return {7, 0, 0};
1188
1189 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
1190 if (AK == AMDGPU::GPUKind::GK_NONE)
1191 return {0, 0, 0};
1192
1193 switch (AK) {
1194 case GK_GFX600: return {6, 0, 0};
1195 case GK_GFX601: return {6, 0, 1};
1196 case GK_GFX700: return {7, 0, 0};
1197 case GK_GFX701: return {7, 0, 1};
1198 case GK_GFX702: return {7, 0, 2};
1199 case GK_GFX703: return {7, 0, 3};
1200 case GK_GFX704: return {7, 0, 4};
1201 case GK_GFX801: return {8, 0, 1};
1202 case GK_GFX802: return {8, 0, 2};
1203 case GK_GFX803: return {8, 0, 3};
1204 case GK_GFX810: return {8, 1, 0};
1205 case GK_GFX900: return {9, 0, 0};
1206 case GK_GFX902: return {9, 0, 2};
1207 case GK_GFX904: return {9, 0, 4};
1208 case GK_GFX906: return {9, 0, 6};
1209 default: return {0, 0, 0};
1210 }
1211 }
3939 #include "llvm/MC/MCStreamer.h"
4040 #include "llvm/Support/AMDGPUMetadata.h"
4141 #include "llvm/Support/MathExtras.h"
42 #include "llvm/Support/TargetParser.h"
4342 #include "llvm/Support/TargetRegistry.h"
4443 #include "llvm/Target/TargetLoweringObjectFile.h"
4544
134133 getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
135134
136135 // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
137 IsaVersion Version = getIsaVersion(getSTI()->getCPU());
136 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
138137 getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
139 Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
138 ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
140139 }
141140
142141 void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
240239 *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
241240 CurrentProgramInfo.NumVGPRsForWavesPerEU,
242241 CurrentProgramInfo.NumSGPRsForWavesPerEU -
243 IsaInfo::getNumExtraSGPRs(getSTI(),
242 IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
244243 CurrentProgramInfo.VCCUsed,
245244 CurrentProgramInfo.FlatUsed),
246245 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
561560
562561 int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
563562 const GCNSubtarget &ST) const {
564 return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
563 return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
565564 UsesVCC, UsesFlatScratch);
566565 }
567566
758757
759758 // 48 SGPRs - vcc, - flat_scr, -xnack
760759 int MaxSGPRGuess =
761 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
760 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
762761 ST.hasFlatAddressSpace());
763762 MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
764763 MaxVGPR = std::max(MaxVGPR, 23);
823822 // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
824823 // unified.
825824 unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
826 getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
825 STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
827826
828827 // Check the addressable register limit before we add ExtraSGPRs.
829828 if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
905904 }
906905
907906 ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
908 getSTI(), ProgInfo.NumSGPRsForWavesPerEU);
907 STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
909908 ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
910 getSTI(), ProgInfo.NumVGPRsForWavesPerEU);
909 STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
911910
912911 // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
913912 // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
11371136 const SIMachineFunctionInfo *MFI = MF.getInfo();
11381137 const GCNSubtarget &STM = MF.getSubtarget();
11391138
1140 AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
1139 AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
11411140
11421141 Out.compute_pgm_resource_registers =
11431142 CurrentProgramInfo.ComputePGMRSrc1 |
123123 return *this;
124124 }
125125
126 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
126 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
127 const FeatureBitset &FeatureBits) :
127128 TargetTriple(TT),
129 SubtargetFeatureBits(FeatureBits),
128130 Has16BitInsts(false),
129131 HasMadMixInsts(false),
130132 FP32Denormals(false),
141143 { }
142144
143145 GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
144 const GCNTargetMachine &TM) :
146 const GCNTargetMachine &TM) :
145147 AMDGPUGenSubtargetInfo(TT, GPU, FS),
146 AMDGPUSubtarget(TT),
148 AMDGPUSubtarget(TT, getFeatureBits()),
147149 TargetTriple(TT),
148150 Gen(SOUTHERN_ISLANDS),
149151 IsaVersion(ISAVersion0_0_0),
445447 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
446448 const TargetMachine &TM) :
447449 R600GenSubtargetInfo(TT, GPU, FS),
448 AMDGPUSubtarget(TT),
450 AMDGPUSubtarget(TT, getFeatureBits()),
449451 InstrInfo(*this),
450452 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
451453 FMA(false),
6262 Triple TargetTriple;
6363
6464 protected:
65 const FeatureBitset &SubtargetFeatureBits;
6566 bool Has16BitInsts;
6667 bool HasMadMixInsts;
6768 bool FP32Denormals;
7778 unsigned WavefrontSize;
7879
7980 public:
80 AMDGPUSubtarget(const Triple &TT);
81 AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
8182
8283 static const AMDGPUSubtarget &get(const MachineFunction &MF);
8384 static const AMDGPUSubtarget &get(const TargetMachine &TM,
201202
202203 /// \returns Maximum number of work groups per compute unit supported by the
203204 /// subtarget and limited by given \p FlatWorkGroupSize.
204 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
205 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
206 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
207 FlatWorkGroupSize);
208 }
205209
206210 /// \returns Minimum flat work group size supported by the subtarget.
207 virtual unsigned getMinFlatWorkGroupSize() const = 0;
211 unsigned getMinFlatWorkGroupSize() const {
212 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
213 }
208214
209215 /// \returns Maximum flat work group size supported by the subtarget.
210 virtual unsigned getMaxFlatWorkGroupSize() const = 0;
216 unsigned getMaxFlatWorkGroupSize() const {
217 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
218 }
211219
212220 /// \returns Maximum number of waves per execution unit supported by the
213221 /// subtarget and limited by given \p FlatWorkGroupSize.
214 virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
222 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
223 return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
224 FlatWorkGroupSize);
225 }
215226
216227 /// \returns Minimum number of waves per execution unit supported by the
217228 /// subtarget.
218 virtual unsigned getMinWavesPerEU() const = 0;
229 unsigned getMinWavesPerEU() const {
230 return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
231 }
219232
220233 unsigned getMaxWavesPerEU() const { return 10; }
221234
694707 /// \returns Number of execution units per compute unit supported by the
695708 /// subtarget.
696709 unsigned getEUsPerCU() const {
697 return AMDGPU::IsaInfo::getEUsPerCU(this);
710 return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
698711 }
699712
700713 /// \returns Maximum number of waves per compute unit supported by the
701714 /// subtarget without any kind of limitation.
702715 unsigned getMaxWavesPerCU() const {
703 return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
716 return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
704717 }
705718
706719 /// \returns Maximum number of waves per compute unit supported by the
707720 /// subtarget and limited by given \p FlatWorkGroupSize.
708721 unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
709 return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
722 return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
723 FlatWorkGroupSize);
710724 }
711725
712726 /// \returns Maximum number of waves per execution unit supported by the
718732 /// \returns Number of waves per work group supported by the subtarget and
719733 /// limited by given \p FlatWorkGroupSize.
720734 unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
721 return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
735 return AMDGPU::IsaInfo::getWavesPerWorkGroup(
736 MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
722737 }
723738
724739 // static wrappers
837852
838853 /// \returns SGPR allocation granularity supported by the subtarget.
839854 unsigned getSGPRAllocGranule() const {
840 return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
855 return AMDGPU::IsaInfo::getSGPRAllocGranule(
856 MCSubtargetInfo::getFeatureBits());
841857 }
842858
843859 /// \returns SGPR encoding granularity supported by the subtarget.
844860 unsigned getSGPREncodingGranule() const {
845 return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
861 return AMDGPU::IsaInfo::getSGPREncodingGranule(
862 MCSubtargetInfo::getFeatureBits());
846863 }
847864
848865 /// \returns Total number of SGPRs supported by the subtarget.
849866 unsigned getTotalNumSGPRs() const {
850 return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
867 return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
851868 }
852869
853870 /// \returns Addressable number of SGPRs supported by the subtarget.
854871 unsigned getAddressableNumSGPRs() const {
855 return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
872 return AMDGPU::IsaInfo::getAddressableNumSGPRs(
873 MCSubtargetInfo::getFeatureBits());
856874 }
857875
858876 /// \returns Minimum number of SGPRs that meets the given number of waves per
859877 /// execution unit requirement supported by the subtarget.
860878 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
861 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
879 return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
880 WavesPerEU);
862881 }
863882
864883 /// \returns Maximum number of SGPRs that meets the given number of waves per
865884 /// execution unit requirement supported by the subtarget.
866885 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
867 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
886 return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
887 WavesPerEU, Addressable);
868888 }
869889
870890 /// \returns Reserved number of SGPRs for given function \p MF.
882902
883903 /// \returns VGPR allocation granularity supported by the subtarget.
884904 unsigned getVGPRAllocGranule() const {
885 return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
905 return AMDGPU::IsaInfo::getVGPRAllocGranule(
906 MCSubtargetInfo::getFeatureBits());
886907 }
887908
888909 /// \returns VGPR encoding granularity supported by the subtarget.
889910 unsigned getVGPREncodingGranule() const {
890 return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
911 return AMDGPU::IsaInfo::getVGPREncodingGranule(
912 MCSubtargetInfo::getFeatureBits());
891913 }
892914
893915 /// \returns Total number of VGPRs supported by the subtarget.
894916 unsigned getTotalNumVGPRs() const {
895 return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
917 return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
896918 }
897919
898920 /// \returns Addressable number of VGPRs supported by the subtarget.
899921 unsigned getAddressableNumVGPRs() const {
900 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
922 return AMDGPU::IsaInfo::getAddressableNumVGPRs(
923 MCSubtargetInfo::getFeatureBits());
901924 }
902925
903926 /// \returns Minimum number of VGPRs that meets given number of waves per
904927 /// execution unit requirement supported by the subtarget.
905928 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
906 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
929 return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
930 WavesPerEU);
907931 }
908932
909933 /// \returns Maximum number of VGPRs that meets given number of waves per
910934 /// execution unit requirement supported by the subtarget.
911935 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
912 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
936 return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
937 WavesPerEU);
913938 }
914939
915940 /// \returns Maximum number of VGPRs that meets number of waves per execution
925950 void getPostRAMutations(
926951 std::vector> &Mutations)
927952 const override;
928
929 /// \returns Maximum number of work groups per compute unit supported by the
930 /// subtarget and limited by given \p FlatWorkGroupSize.
931 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
932 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
933 }
934
935 /// \returns Minimum flat work group size supported by the subtarget.
936 unsigned getMinFlatWorkGroupSize() const override {
937 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
938 }
939
940 /// \returns Maximum flat work group size supported by the subtarget.
941 unsigned getMaxFlatWorkGroupSize() const override {
942 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
943 }
944
945 /// \returns Maximum number of waves per execution unit supported by the
946 /// subtarget and limited by given \p FlatWorkGroupSize.
947 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
948 return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
949 }
950
951 /// \returns Minimum number of waves per execution unit supported by the
952 /// subtarget.
953 unsigned getMinWavesPerEU() const override {
954 return AMDGPU::IsaInfo::getMinWavesPerEU(this);
955 }
956953 };
957954
958955 class R600Subtarget final : public R600GenSubtargetInfo,
10631060 bool enableSubRegLiveness() const override {
10641061 return true;
10651062 }
1066
1067 /// \returns Maximum number of work groups per compute unit supported by the
1068 /// subtarget and limited by given \p FlatWorkGroupSize.
1069 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1070 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1071 }
1072
1073 /// \returns Minimum flat work group size supported by the subtarget.
1074 unsigned getMinFlatWorkGroupSize() const override {
1075 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
1076 }
1077
1078 /// \returns Maximum flat work group size supported by the subtarget.
1079 unsigned getMaxFlatWorkGroupSize() const override {
1080 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
1081 }
1082
1083 /// \returns Maximum number of waves per execution unit supported by the
1084 /// subtarget and limited by given \p FlatWorkGroupSize.
1085 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1086 return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1087 }
1088
1089 /// \returns Minimum number of waves per execution unit supported by the
1090 /// subtarget.
1091 unsigned getMinWavesPerEU() const override {
1092 return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1093 }
10941063 };
10951064
10961065 } // end namespace llvm
4848 #include "llvm/Support/MachineValueType.h"
4949 #include "llvm/Support/MathExtras.h"
5050 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
5251 #include "llvm/Support/TargetRegistry.h"
5352 #include "llvm/Support/raw_ostream.h"
5453 #include
917916 // Currently there is none suitable machinery in the core llvm-mc for this.
918917 // MCSymbol::isRedefinable is intended for another purpose, and
919918 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
920 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
919 AMDGPU::IsaInfo::IsaVersion ISA =
920 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
921921 MCContext &Ctx = getContext();
922922 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
923923 MCSymbol *Sym =
18251825 unsigned DwordRegIndex,
18261826 unsigned RegWidth) {
18271827 // Symbols are only defined for GCN targets
1828 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1828 if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
18291829 return true;
18301830
18311831 auto SymbolName = getGprCountSymbolName(RegKind);
26362636 unsigned &SGPRBlocks) {
26372637 // TODO(scott.linder): These calculations are duplicated from
26382638 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2639 IsaVersion Version = getIsaVersion(getSTI().getCPU());
2639 IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
26402640
26412641 unsigned NumVGPRs = NextFreeVGPR;
26422642 unsigned NumSGPRs = NextFreeSGPR;
2643 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2643 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
26442644
26452645 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
26462646 NumSGPRs > MaxAddressableNumSGPRs)
26472647 return OutOfRangeError(SGPRRange);
26482648
26492649 NumSGPRs +=
2650 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2650 IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
26512651
26522652 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
26532653 NumSGPRs > MaxAddressableNumSGPRs)
26562656 if (Features.test(FeatureSGPRInitBug))
26572657 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
26582658
2659 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2660 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2659 VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
2660 SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
26612661
26622662 return false;
26632663 }
26772677
26782678 StringSet<> Seen;
26792679
2680 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2680 IsaInfo::IsaVersion IVersion =
2681 IsaInfo::getIsaVersion(getSTI().getFeatureBits());
26812682
26822683 SMRange VGPRRange;
26832684 uint64_t NextFreeVGPR = 0;
29362937 // If this directive has no arguments, then use the ISA version for the
29372938 // targeted GPU.
29382939 if (getLexer().is(AsmToken::EndOfStatement)) {
2939 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
2940 AMDGPU::IsaInfo::IsaVersion ISA =
2941 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
29402942 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
29412943 ISA.Stepping,
29422944 "AMD", "AMDGPU");
29983000
29993001 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
30003002 amd_kernel_code_t Header;
3001 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3003 AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
30023004
30033005 while (true) {
30043006 // Lex EndOfStatement. This is in a while loop, because lexing a comment
36763678
36773679 static bool
36783680 encodeCnt(
3679 const AMDGPU::IsaVersion ISA,
3681 const AMDGPU::IsaInfo::IsaVersion ISA,
36803682 int64_t &IntVal,
36813683 int64_t CntVal,
36823684 bool Saturate,
3683 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3684 unsigned (*decode)(const IsaVersion &Version, unsigned))
3685 unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
3686 unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
36853687 {
36863688 bool Failed = false;
36873689
37123714 if (getParser().parseAbsoluteExpression(CntVal))
37133715 return true;
37143716
3715 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3717 AMDGPU::IsaInfo::IsaVersion ISA =
3718 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
37163719
37173720 bool Failed = true;
37183721 bool Sat = CntName.endswith("_sat");
37473750
37483751 OperandMatchResultTy
37493752 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3750 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3753 AMDGPU::IsaInfo::IsaVersion ISA =
3754 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
37513755 int64_t Waitcnt = getWaitcntBitMask(ISA);
37523756 SMLoc S = Parser.getTok().getLoc();
37533757
11541154 void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
11551155 const MCSubtargetInfo &STI,
11561156 raw_ostream &O) {
1157 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
1157 AMDGPU::IsaInfo::IsaVersion ISA =
1158 AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
11581159
11591160 unsigned SImm16 = MI->getOperand(OpNo).getImm();
11601161 unsigned Vmcnt, Expcnt, Lgkmcnt;
2626 #include "llvm/MC/MCObjectFileInfo.h"
2727 #include "llvm/MC/MCSectionELF.h"
2828 #include "llvm/Support/FormattedStream.h"
29 #include "llvm/Support/TargetParser.h"
3029
3130 namespace llvm {
3231 #include "AMDGPUPTNote.h"
3837 //===----------------------------------------------------------------------===//
3938 // AMDGPUTargetStreamer
4039 //===----------------------------------------------------------------------===//
40
41 static const struct {
42 const char *Name;
43 unsigned Mach;
44 } MachTable[] = {
45 // Radeon HD 2000/3000 Series (R600).
46 { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
47 { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
48 { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
49 { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
50 // Radeon HD 4000 Series (R700).
51 { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
52 { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
53 { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
54 // Radeon HD 5000 Series (Evergreen).
55 { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
56 { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
57 { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
58 { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
59 { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
60 // Radeon HD 6000 Series (Northern Islands).
61 { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
62 { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
63 { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
64 { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
65 // AMDGCN GFX6.
66 { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
67 { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
68 { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
69 { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
70 { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
71 { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
72 { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
73 // AMDGCN GFX7.
74 { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
75 { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
76 { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
77 { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
78 { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
79 { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
80 { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
81 { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
82 { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
83 { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
84 // AMDGCN GFX8.
85 { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
86 { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
87 { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
88 { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
89 { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
90 { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
91 { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
92 { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
93 { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
94 { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
95 { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
96 // AMDGCN GFX9.
97 { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
98 { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
99 { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
100 { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
101 // Not specified processor.
102 { nullptr, ELF::EF_AMDGPU_MACH_NONE }
103 };
104
105 unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
106 auto Entry = MachTable;
107 for (; Entry->Name && GPU != Entry->Name; ++Entry)
108 ;
109 return Entry->Mach;
110 }
111
112 const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
113 auto Entry = MachTable;
114 for (; Entry->Name && Mach != Entry->Mach; ++Entry)
115 ;
116 return Entry->Name;
117 }
41118
42119 bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
43120 HSAMD::Metadata HSAMetadata;
127204 bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
128205 amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
129206
130 IsaVersion IVersion = getIsaVersion(STI.getCPU());
207 IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
131208
132209 OS << "\t.amdhsa_kernel " << KernelName << '\n';
133210
264341 unsigned EFlags = MCA.getELFHeaderEFlags();
265342
266343 EFlags &= ~ELF::EF_AMDGPU_MACH;
267 EFlags |= getElfMach(STI.getCPU());
344 EFlags |= getMACH(STI.getCPU());
268345
269346 EFlags &= ~ELF::EF_AMDGPU_XNACK;
270347 if (AMDGPU::hasXNACK(STI))
3030 protected:
3131 MCContext &getContext() const { return Streamer.getContext(); }
3232
33 /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
34 unsigned getMACH(StringRef GPU) const;
35
3336 public:
37 /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
38 static const char *getMachName(unsigned Mach);
39
3440 AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
3541
3642 virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
368368 const SIRegisterInfo *TRI = nullptr;
369369 const MachineRegisterInfo *MRI = nullptr;
370370 const MachineLoopInfo *MLI = nullptr;
371 AMDGPU::IsaVersion IV;
371 AMDGPU::IsaInfo::IsaVersion IV;
372372
373373 DenseSet BlockVisitedSet;
374374 DenseSet TrackedWaitcntSet;
18401840 TRI = &TII->getRegisterInfo();
18411841 MRI = &MF.getRegInfo();
18421842 MLI = &getAnalysis();
1843 IV = AMDGPU::getIsaVersion(ST->getCPU());
1843 IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
18441844 const SIMachineFunctionInfo *MFI = MF.getInfo();
18451845
18461846 ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
252252 /// Instruction info.
253253 const SIInstrInfo *TII = nullptr;
254254
255 IsaVersion IV;
255 IsaInfo::IsaVersion IV;
256256
257257 SICacheControl(const GCNSubtarget &ST);
258258
604604
605605 SICacheControl::SICacheControl(const GCNSubtarget &ST) {
606606 TII = ST.getInstrInfo();
607 IV = getIsaVersion(ST.getCPU());
607 IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
608608 }
609609
610610 /* static */
136136
137137 namespace IsaInfo {
138138
139 IsaVersion getIsaVersion(const FeatureBitset &Features) {
140 // GCN GFX6 (Southern Islands (SI)).
141 if (Features.test(FeatureISAVersion6_0_0))
142 return {6, 0, 0};
143 if (Features.test(FeatureISAVersion6_0_1))
144 return {6, 0, 1};
145
146 // GCN GFX7 (Sea Islands (CI)).
147 if (Features.test(FeatureISAVersion7_0_0))
148 return {7, 0, 0};
149 if (Features.test(FeatureISAVersion7_0_1))
150 return {7, 0, 1};
151 if (Features.test(FeatureISAVersion7_0_2))
152 return {7, 0, 2};
153 if (Features.test(FeatureISAVersion7_0_3))
154 return {7, 0, 3};
155 if (Features.test(FeatureISAVersion7_0_4))
156 return {7, 0, 4};
157 if (Features.test(FeatureSeaIslands))
158 return {7, 0, 0};
159
160 // GCN GFX8 (Volcanic Islands (VI)).
161 if (Features.test(FeatureISAVersion8_0_1))
162 return {8, 0, 1};
163 if (Features.test(FeatureISAVersion8_0_2))
164 return {8, 0, 2};
165 if (Features.test(FeatureISAVersion8_0_3))
166 return {8, 0, 3};
167 if (Features.test(FeatureISAVersion8_1_0))
168 return {8, 1, 0};
169 if (Features.test(FeatureVolcanicIslands))
170 return {8, 0, 0};
171
172 // GCN GFX9.
173 if (Features.test(FeatureISAVersion9_0_0))
174 return {9, 0, 0};
175 if (Features.test(FeatureISAVersion9_0_2))
176 return {9, 0, 2};
177 if (Features.test(FeatureISAVersion9_0_4))
178 return {9, 0, 4};
179 if (Features.test(FeatureISAVersion9_0_6))
180 return {9, 0, 6};
181 if (Features.test(FeatureGFX9))
182 return {9, 0, 0};
183
184 if (Features.test(FeatureSouthernIslands))
185 return {0, 0, 0};
186 return {7, 0, 0};
187 }
188
139189 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
140190 auto TargetTriple = STI->getTargetTriple();
141 auto Version = getIsaVersion(STI->getCPU());
191 auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
142192
143193 Stream << TargetTriple.getArchName() << '-'
144194 << TargetTriple.getVendorName() << '-'
145195 << TargetTriple.getOSName() << '-'
146196 << TargetTriple.getEnvironmentName() << '-'
147197 << "gfx"
148 << Version.Major
149 << Version.Minor
150 << Version.Stepping;
198 << ISAVersion.Major
199 << ISAVersion.Minor
200 << ISAVersion.Stepping;
151201
152202 if (hasXNACK(*STI))
153203 Stream << "+xnack";
159209 return STI->getFeatureBits().test(FeatureCodeObjectV3);
160210 }
161211
162 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
163 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
212 unsigned getWavefrontSize(const FeatureBitset &Features) {
213 if (Features.test(FeatureWavefrontSize16))
164214 return 16;
165 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
215 if (Features.test(FeatureWavefrontSize32))
166216 return 32;
167217
168218 return 64;
169219 }
170220
171 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
172 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
221 unsigned getLocalMemorySize(const FeatureBitset &Features) {
222 if (Features.test(FeatureLocalMemorySize32768))
173223 return 32768;
174 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
224 if (Features.test(FeatureLocalMemorySize65536))
175225 return 65536;
176226
177227 return 0;
178228 }
179229
180 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
230 unsigned getEUsPerCU(const FeatureBitset &Features) {
181231 return 4;
182232 }
183233
184 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
234 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
185235 unsigned FlatWorkGroupSize) {
186 if (!STI->getFeatureBits().test(FeatureGCN))
236 if (!Features.test(FeatureGCN))
187237 return 8;
188 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
238 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
189239 if (N == 1)
190240 return 40;
191241 N = 40 / N;
192242 return std::min(N, 16u);
193243 }
194244
195 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
196 return getMaxWavesPerEU() * getEUsPerCU(STI);
197 }
198
199 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
245 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
246 return getMaxWavesPerEU() * getEUsPerCU(Features);
247 }
248
249 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
200250 unsigned FlatWorkGroupSize) {
201 return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
202 }
203
204 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
251 return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
252 }
253
254 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
205255 return 1;
206256 }
207257
210260 return 10;
211261 }
212262
213 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
263 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
214264 unsigned FlatWorkGroupSize) {
215 return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
216 getEUsPerCU(STI)) / getEUsPerCU(STI);
217 }
218
219 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
265 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
266 getEUsPerCU(Features)) / getEUsPerCU(Features);
267 }
268
269 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
220270 return 1;
221271 }
222272
223 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
273 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
224274 return 2048;
225275 }
226276
227 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
277 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
228278 unsigned FlatWorkGroupSize) {
229 return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
230 getWavefrontSize(STI);
231 }
232
233 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
234 IsaVersion Version = getIsaVersion(STI->getCPU());
279 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
280 getWavefrontSize(Features);
281 }
282
283 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
284 IsaVersion Version = getIsaVersion(Features);
235285 if (Version.Major >= 8)
236286 return 16;
237287 return 8;
238288 }
239289
240 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
290 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
241291 return 8;
242292 }
243293
244 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
245 IsaVersion Version = getIsaVersion(STI->getCPU());
294 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
295 IsaVersion Version = getIsaVersion(Features);
246296 if (Version.Major >= 8)
247297 return 800;
248298 return 512;
249299 }
250300
251 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
252 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
301 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
302 if (Features.test(FeatureSGPRInitBug))
253303 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
254304
255 IsaVersion Version = getIsaVersion(STI->getCPU());
305 IsaVersion Version = getIsaVersion(Features);
256306 if (Version.Major >= 8)
257307 return 102;
258308 return 104;
259309 }
260310
261 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
311 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
262312 assert(WavesPerEU != 0);
263313
264314 if (WavesPerEU >= getMaxWavesPerEU())
265315 return 0;
266316
267 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
268 if (STI->getFeatureBits().test(FeatureTrapHandler))
317 unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
318 if (Features.test(FeatureTrapHandler))
269319 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
270 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
271 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
272 }
273
274 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
320 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
321 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
322 }
323
324 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
275325 bool Addressable) {
276326 assert(WavesPerEU != 0);
277327
278 IsaVersion Version = getIsaVersion(STI->getCPU());
279 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
328 IsaVersion Version = getIsaVersion(Features);
329 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
280330 if (Version.Major >= 8 && !Addressable)
281331 AddressableNumSGPRs = 112;
282 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
283 if (STI->getFeatureBits().test(FeatureTrapHandler))
332 unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
333 if (Features.test(FeatureTrapHandler))
284334 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
285 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
335 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
286336 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
287337 }
288338
289 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
339 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
290340 bool FlatScrUsed, bool XNACKUsed) {
291341 unsigned ExtraSGPRs = 0;
292342 if (VCCUsed)
293343 ExtraSGPRs = 2;
294344
295 IsaVersion Version = getIsaVersion(STI->getCPU());
345 IsaVersion Version = getIsaVersion(Features);
296346 if (Version.Major < 8) {
297347 if (FlatScrUsed)
298348 ExtraSGPRs = 4;
307357 return ExtraSGPRs;
308358 }
309359
310 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
360 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
311361 bool FlatScrUsed) {
312 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
313 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
314 }
315
316 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
317 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
362 return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
363 Features[AMDGPU::FeatureXNACK]);
364 }
365
366 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
367 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
318368 // SGPRBlocks is actual number of SGPR blocks minus 1.
319 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
320 }
321
322 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
369 return NumSGPRs / getSGPREncodingGranule(Features) - 1;
370 }
371
372 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
323373 return 4;
324374 }
325375
326 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
327 return getVGPRAllocGranule(STI);
328 }
329
330 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
376 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
377 return getVGPRAllocGranule(Features);
378 }
379
380 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
331381 return 256;
332382 }
333383
334 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
335 return getTotalNumVGPRs(STI);
336 }
337
338 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
384 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
385 return getTotalNumVGPRs(Features);
386 }
387
388 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
339389 assert(WavesPerEU != 0);
340390
341391 if (WavesPerEU >= getMaxWavesPerEU())
342392 return 0;
343393 unsigned MinNumVGPRs =
344 alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
345 getVGPRAllocGranule(STI)) + 1;
346 return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
347 }
348
349 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
394 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
395 getVGPRAllocGranule(Features)) + 1;
396 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
397 }
398
399 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
350400 assert(WavesPerEU != 0);
351401
352 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
353 getVGPRAllocGranule(STI));
354 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
402 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
403 getVGPRAllocGranule(Features));
404 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
355405 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
356406 }
357407
358 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
359 NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
408 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
409 NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
360410 // VGPRBlocks is actual number of VGPR blocks minus 1.
361 return NumVGPRs / getVGPREncodingGranule(STI) - 1;
411 return NumVGPRs / getVGPREncodingGranule(Features) - 1;
362412 }
363413
364414 } // end namespace IsaInfo
365415
366416 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
367 const MCSubtargetInfo *STI) {
368 IsaVersion Version = getIsaVersion(STI->getCPU());
417 const FeatureBitset &Features) {
418 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
369419
370420 memset(&Header, 0, sizeof(Header));
371421
372422 Header.amd_kernel_code_version_major = 1;
373423 Header.amd_kernel_code_version_minor = 2;
374424 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
375 Header.amd_machine_version_major = Version.Major;
376 Header.amd_machine_version_minor = Version.Minor;
377 Header.amd_machine_version_stepping = Version.Stepping;
425 Header.amd_machine_version_major = ISA.Major;
426 Header.amd_machine_version_minor = ISA.Minor;
427 Header.amd_machine_version_stepping = ISA.Stepping;
378428 Header.kernel_code_entry_byte_offset = sizeof(Header);
379429 // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
380430 Header.wavefront_size = 6;
462512 return Ints;
463513 }
464514
465 unsigned getVmcntBitMask(const IsaVersion &Version) {
515 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
466516 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
467517 if (Version.Major < 9)
468518 return VmcntLo;
471521 return VmcntLo | VmcntHi;
472522 }
473523
474 unsigned getExpcntBitMask(const IsaVersion &Version) {
524 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
475525 return (1 << getExpcntBitWidth()) - 1;
476526 }
477527
478 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
528 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
479529 return (1 << getLgkmcntBitWidth()) - 1;
480530 }
481531
482 unsigned getWaitcntBitMask(const IsaVersion &Version) {
532 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
483533 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
484534 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
485535 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
491541 return Waitcnt | VmcntHi;
492542 }
493543
494 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
544 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
495545 unsigned VmcntLo =
496546 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
497547 if (Version.Major < 9)
503553 return VmcntLo | VmcntHi;
504554 }
505555
506 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
556 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
507557 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
508558 }
509559
510 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
560 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
511561 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
512562 }
513563
514 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
564 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
515565 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
516566 Vmcnt = decodeVmcnt(Version, Waitcnt);
517567 Expcnt = decodeExpcnt(Version, Waitcnt);
518568 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
519569 }
520570
521 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
571 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
522572 unsigned Vmcnt) {
523573 Waitcnt =
524574 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
529579 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
530580 }
531581
532 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
582 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
533583 unsigned Expcnt) {
534584 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
535585 }
536586
537 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
587 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
538588 unsigned Lgkmcnt) {
539589 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
540590 }
541591
542 unsigned encodeWaitcnt(const IsaVersion &Version,
592 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
543593 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
544594 unsigned Waitcnt = getWaitcntBitMask(Version);
545595 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1818 #include "llvm/Support/AMDHSAKernelDescriptor.h"
1919 #include "llvm/Support/Compiler.h"
2020 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/TargetParser.h"
2221 #include
2322 #include
2423 #include
5655 TRAP_NUM_SGPRS = 16
5756 };
5857
58 /// Instruction set architecture version.
59 struct IsaVersion {
60 unsigned Major;
61 unsigned Minor;
62 unsigned Stepping;
63 };
64
65 /// \returns Isa version for given subtarget \p Features.
66 IsaVersion getIsaVersion(const FeatureBitset &Features);
67
5968 /// Streams isa version string for given subtarget \p STI into \p Stream.
6069 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
6170
6372 /// false otherwise.
6473 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
6574
66 /// \returns Wavefront size for given subtarget \p STI.
67 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
68
69 /// \returns Local memory size in bytes for given subtarget \p STI.
70 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
75 /// \returns Wavefront size for given subtarget \p Features.
76 unsigned getWavefrontSize(const FeatureBitset &Features);
77
78 /// \returns Local memory size in bytes for given subtarget \p Features.
79 unsigned getLocalMemorySize(const FeatureBitset &Features);
7180
7281 /// \returns Number of execution units per compute unit for given subtarget \p
73 /// STI.
74 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
82 /// Features.
83 unsigned getEUsPerCU(const FeatureBitset &Features);
7584
7685 /// \returns Maximum number of work groups per compute unit for given subtarget
77 /// \p STI and limited by given \p FlatWorkGroupSize.
78 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
86 /// \p Features and limited by given \p FlatWorkGroupSize.
87 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
7988 unsigned FlatWorkGroupSize);
8089
8190 /// \returns Maximum number of waves per compute unit for given subtarget \p
82 /// STI without any kind of limitation.
83 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
91 /// Features without any kind of limitation.
92 unsigned getMaxWavesPerCU(const FeatureBitset &Features);
8493
8594 /// \returns Maximum number of waves per compute unit for given subtarget \p
86 /// STI and limited by given \p FlatWorkGroupSize.
87 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
95 /// Features and limited by given \p FlatWorkGroupSize.
96 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
8897 unsigned FlatWorkGroupSize);
8998
9099 /// \returns Minimum number of waves per execution unit for given subtarget \p
91 /// STI.
92 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
100 /// Features.
101 unsigned getMinWavesPerEU(const FeatureBitset &Features);
93102
94103 /// \returns Maximum number of waves per execution unit for given subtarget \p
95 /// STI without any kind of limitation.
104 /// Features without any kind of limitation.
96105 unsigned getMaxWavesPerEU();
97106
98107 /// \returns Maximum number of waves per execution unit for given subtarget \p
99 /// STI and limited by given \p FlatWorkGroupSize.
100 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
108 /// Features and limited by given \p FlatWorkGroupSize.
109 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
101110 unsigned FlatWorkGroupSize);
102111
103 /// \returns Minimum flat work group size for given subtarget \p STI.
104 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
105
106 /// \returns Maximum flat work group size for given subtarget \p STI.
107 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
108
109 /// \returns Number of waves per work group for given subtarget \p STI and
112 /// \returns Minimum flat work group size for given subtarget \p Features.
113 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
114
115 /// \returns Maximum flat work group size for given subtarget \p Features.
116 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
117
118 /// \returns Number of waves per work group for given subtarget \p Features and
110119 /// limited by given \p FlatWorkGroupSize.
111 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
120 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
112121 unsigned FlatWorkGroupSize);
113122
114 /// \returns SGPR allocation granularity for given subtarget \p STI.
115 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
116
117 /// \returns SGPR encoding granularity for given subtarget \p STI.
118 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
119
120 /// \returns Total number of SGPRs for given subtarget \p STI.
121 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
122
123 /// \returns Addressable number of SGPRs for given subtarget \p STI.
124 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
123 /// \returns SGPR allocation granularity for given subtarget \p Features.
124 unsigned getSGPRAllocGranule(const FeatureBitset &Features);
125
126 /// \returns SGPR encoding granularity for given subtarget \p Features.
127 unsigned getSGPREncodingGranule(const FeatureBitset &Features);
128
129 /// \returns Total number of SGPRs for given subtarget \p Features.
130 unsigned getTotalNumSGPRs(const FeatureBitset &Features);
131
132 /// \returns Addressable number of SGPRs for given subtarget \p Features.
133 unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
125134
126135 /// \returns Minimum number of SGPRs that meets the given number of waves per
127 /// execution unit requirement for given subtarget \p STI.
128 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
136 /// execution unit requirement for given subtarget \p Features.
137 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
129138
130139 /// \returns Maximum number of SGPRs that meets the given number of waves per
131 /// execution unit requirement for given subtarget \p STI.
132 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
140 /// execution unit requirement for given subtarget \p Features.
141 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
133142 bool Addressable);
134143
135144 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
136 /// STI when the given special registers are used.
137 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
145 /// Features when the given special registers are used.
146 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
138147 bool FlatScrUsed, bool XNACKUsed);
139148
140149 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
141 /// STI when the given special registers are used. XNACK is inferred from
142 /// \p STI.
143 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
150 /// Features when the given special registers are used. XNACK is inferred from
151 /// \p Features.
152 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
144153 bool FlatScrUsed);
145154
146 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
155 /// \returns Number of SGPR blocks needed for given subtarget \p Features when
147156 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
148157 /// register counts.
149 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
150
151 /// \returns VGPR allocation granularity for given subtarget \p STI.
152 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
153
154 /// \returns VGPR encoding granularity for given subtarget \p STI.
155 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
156
157 /// \returns Total number of VGPRs for given subtarget \p STI.
158 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
159
160 /// \returns Addressable number of VGPRs for given subtarget \p STI.
161 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
158 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
159
160 /// \returns VGPR allocation granularity for given subtarget \p Features.
161 unsigned getVGPRAllocGranule(const FeatureBitset &Features);
162
163 /// \returns VGPR encoding granularity for given subtarget \p Features.
164 unsigned getVGPREncodingGranule(const FeatureBitset &Features);
165
166 /// \returns Total number of VGPRs for given subtarget \p Features.
167 unsigned getTotalNumVGPRs(const FeatureBitset &Features);
168
169 /// \returns Addressable number of VGPRs for given subtarget \p Features.
170 unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
162171
163172 /// \returns Minimum number of VGPRs that meets given number of waves per
164 /// execution unit requirement for given subtarget \p STI.
165 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
173 /// execution unit requirement for given subtarget \p Features.
174 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
166175
167176 /// \returns Maximum number of VGPRs that meets given number of waves per
168 /// execution unit requirement for given subtarget \p STI.
169 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
170
171 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
177 /// execution unit requirement for given subtarget \p Features.
178 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
179
180 /// \returns Number of VGPR blocks needed for given subtarget \p Features when
172181 /// \p NumVGPRs are used.
173 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
182 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
174183
175184 } // end namespace IsaInfo
176185
223232 int getMCOpcode(uint16_t Opcode, unsigned Gen);
224233
225234 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
226 const MCSubtargetInfo *STI);
235 const FeatureBitset &Features);
227236
228237 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
229238
258267 bool OnlyFirstRequired = false);
259268
260269 /// \returns Vmcnt bit mask for given isa \p Version.
261 unsigned getVmcntBitMask(const IsaVersion &Version);
270 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
262271
263272 /// \returns Expcnt bit mask for given isa \p Version.
264 unsigned getExpcntBitMask(const IsaVersion &Version);
273 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
265274
266275 /// \returns Lgkmcnt bit mask for given isa \p Version.
267 unsigned getLgkmcntBitMask(const IsaVersion &Version);
276 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
268277
269278 /// \returns Waitcnt bit mask for given isa \p Version.
270 unsigned getWaitcntBitMask(const IsaVersion &Version);
279 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
271280
272281 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
273 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
282 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
274283
275284 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
276 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
285 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
277286
278287 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
279 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
288 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
280289
281290 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
282291 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
287296 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
288297 /// \p Expcnt = \p Waitcnt[6:4]
289298 /// \p Lgkmcnt = \p Waitcnt[11:8]
290 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
299 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
291300 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
292301
293302 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
294 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
303 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
295304 unsigned Vmcnt);
296305
297306 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
298 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
307 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
299308 unsigned Expcnt);
300309
301310 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
302 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
311 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
303312 unsigned Lgkmcnt);
304313
305314 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
314323 ///
315324 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
316325 /// isa \p Version.
317 unsigned encodeWaitcnt(const IsaVersion &Version,
326 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
318327 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
319328
320329 unsigned getInitialPSInputAddr(const Function &F);
+0
-8
test/CodeGen/AMDGPU/gfx902-without-xnack.ll less more
None ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s
1
2 ; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
3 define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
4 store float 0.0, float addrspace(1)* %out0
5 ret void
6 }
7
11 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
22
33 .hsa_code_object_isa
4 // GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
4 // GFX8: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU"
55 // GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"