llvm.org GIT mirror llvm / 017228c
[AMDGPU] Add target information that is required by tools to metadata Differential Revision: https://reviews.llvm.org/D28760#fb670e28 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294449 91177308-0d34-0410-b5e6-96231b3b80d8 Konstantin Zhuravlyov 3 years ago
17 changed file(s) with 641 addition(s) and 281 deletion(s). Raw diff Collapse all Expand all
108108 TS->EmitDirectiveHSACodeObjectVersion(2, 1);
109109
110110 const MCSubtargetInfo *STI = TM.getMCSubtargetInfo();
111 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
111 AMDGPU::IsaInfo::IsaVersion ISA =
112 AMDGPU::IsaInfo::getIsaVersion(STI->getFeatureBits());
112113 TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
113114 "AMD", "AMDGPU");
114115
115116 // Emit runtime metadata.
116 TS->EmitRuntimeMetadata(M);
117 TS->EmitRuntimeMetadata(STI->getFeatureBits(), M);
117118 }
118119
119120 bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
484485 DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
485486 "addressable scalar registers",
486487 MaxSGPR + 1, DS_Error,
487 DK_ResourceLimit, MaxAddressableNumSGPRs);
488 DK_ResourceLimit,
489 MaxAddressableNumSGPRs);
488490 Ctx.diagnose(Diag);
489491 MaxSGPR = MaxAddressableNumSGPRs - 1;
490492 }
508510
509511 if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
510512 STM.hasSGPRInitBug()) {
511 unsigned MaxNumSGPRs = STM.getAddressableNumSGPRs();
512 if (ProgInfo.NumSGPR > MaxNumSGPRs) {
513 // This can happen due to a compiler bug or when using inline asm to use the
514 // registers which are usually reserved for vcc etc.
515
513 unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
514 if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
515 // This can happen due to a compiler bug or when using inline asm to use
516 // the registers which are usually reserved for vcc etc.
516517 LLVMContext &Ctx = MF.getFunction()->getContext();
517518 DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
518519 "scalar registers",
519520 ProgInfo.NumSGPR, DS_Error,
520 DK_ResourceLimit, MaxNumSGPRs);
521 DK_ResourceLimit,
522 MaxAddressableNumSGPRs);
521523 Ctx.diagnose(Diag);
522 ProgInfo.NumSGPR = MaxNumSGPRs;
523 ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
524 ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
525 ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
524526 }
525527 }
526528
527529 if (STM.hasSGPRInitBug()) {
528 ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
529 ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
530 ProgInfo.NumSGPR =
531 AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
532 ProgInfo.NumSGPRsForWavesPerEU =
533 AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
530534 }
531535
532536 if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
553557 STM.getVGPREncodingGranule());
554558 ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
555559
556 // Record first reserved register and reserved register count fields, and
557 // update max register counts if "amdgpu-debugger-reserve-regs" attribute was
558 // requested.
560 // Record first reserved VGPR and number of reserved VGPRs.
559561 ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
560562 ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
561563
4040
4141 // Version and revision of runtime metadata
4242 const unsigned char MDVersion = 2;
43 const unsigned char MDRevision = 0;
43 const unsigned char MDRevision = 1;
4444
4545 // Name of keys for runtime metadata.
4646 namespace KeyName {
4747
48 const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version
49 const char Language[] = "amd.Language"; // Language
50 const char LanguageVersion[] = "amd.LanguageVersion"; // Language version
51 const char Kernels[] = "amd.Kernels"; // Kernels
52 const char KernelName[] = "amd.KernelName"; // Kernel name
53 const char Args[] = "amd.Args"; // Kernel arguments
54 const char ArgSize[] = "amd.ArgSize"; // Kernel arg size
55 const char ArgAlign[] = "amd.ArgAlign"; // Kernel arg alignment
56 const char ArgTypeName[] = "amd.ArgTypeName"; // Kernel type name
57 const char ArgName[] = "amd.ArgName"; // Kernel name
58 const char ArgKind[] = "amd.ArgKind"; // Kernel argument kind
59 const char ArgValueType[] = "amd.ArgValueType"; // Kernel argument value type
60 const char ArgAddrQual[] = "amd.ArgAddrQual"; // Kernel argument address qualifier
61 const char ArgAccQual[] = "amd.ArgAccQual"; // Kernel argument access qualifier
62 const char ArgIsConst[] = "amd.ArgIsConst"; // Kernel argument is const qualified
63 const char ArgIsRestrict[] = "amd.ArgIsRestrict"; // Kernel argument is restrict qualified
64 const char ArgIsVolatile[] = "amd.ArgIsVolatile"; // Kernel argument is volatile qualified
65 const char ArgIsPipe[] = "amd.ArgIsPipe"; // Kernel argument is pipe qualified
66 const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; // Required work group size
67 const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; // Work group size hint
68 const char VecTypeHint[] = "amd.VecTypeHint"; // Vector type hint
69 const char KernelIndex[] = "amd.KernelIndex"; // Kernel index for device enqueue
70 const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; // No partial work groups
71 const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information
72 const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier
73 const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type
48 // Runtime metadata version
49 const char MDVersion[] = "amd.MDVersion";
50
51 // Instruction set architecture information
52 const char IsaInfo[] = "amd.IsaInfo";
53 // Wavefront size
54 const char IsaInfoWavefrontSize[] = "amd.IsaInfoWavefrontSize";
55 // Local memory size in bytes
56 const char IsaInfoLocalMemorySize[] = "amd.IsaInfoLocalMemorySize";
57 // Number of execution units per compute unit
58 const char IsaInfoEUsPerCU[] = "amd.IsaInfoEUsPerCU";
59 // Maximum number of waves per execution unit
60 const char IsaInfoMaxWavesPerEU[] = "amd.IsaInfoMaxWavesPerEU";
61 // Maximum flat work group size
62 const char IsaInfoMaxFlatWorkGroupSize[] = "amd.IsaInfoMaxFlatWorkGroupSize";
63 // SGPR allocation granularity
64 const char IsaInfoSGPRAllocGranule[] = "amd.IsaInfoSGPRAllocGranule";
65 // Total number of SGPRs
66 const char IsaInfoTotalNumSGPRs[] = "amd.IsaInfoTotalNumSGPRs";
67 // Addressable number of SGPRs
68 const char IsaInfoAddressableNumSGPRs[] = "amd.IsaInfoAddressableNumSGPRs";
69 // VGPR allocation granularity
70 const char IsaInfoVGPRAllocGranule[] = "amd.IsaInfoVGPRAllocGranule";
71 // Total number of VGPRs
72 const char IsaInfoTotalNumVGPRs[] = "amd.IsaInfoTotalNumVGPRs";
73 // Addressable number of VGPRs
74 const char IsaInfoAddressableNumVGPRs[] = "amd.IsaInfoAddressableNumVGPRs";
75
76 // Language
77 const char Language[] = "amd.Language";
78 // Language version
79 const char LanguageVersion[] = "amd.LanguageVersion";
80
81 // Kernels
82 const char Kernels[] = "amd.Kernels";
83 // Kernel name
84 const char KernelName[] = "amd.KernelName";
85 // Kernel arguments
86 const char Args[] = "amd.Args";
87 // Kernel argument size in bytes
88 const char ArgSize[] = "amd.ArgSize";
89 // Kernel argument alignment
90 const char ArgAlign[] = "amd.ArgAlign";
91 // Kernel argument type name
92 const char ArgTypeName[] = "amd.ArgTypeName";
93 // Kernel argument name
94 const char ArgName[] = "amd.ArgName";
95 // Kernel argument kind
96 const char ArgKind[] = "amd.ArgKind";
97 // Kernel argument value type
98 const char ArgValueType[] = "amd.ArgValueType";
99 // Kernel argument address qualifier
100 const char ArgAddrQual[] = "amd.ArgAddrQual";
101 // Kernel argument access qualifier
102 const char ArgAccQual[] = "amd.ArgAccQual";
103 // Kernel argument is const qualified
104 const char ArgIsConst[] = "amd.ArgIsConst";
105 // Kernel argument is restrict qualified
106 const char ArgIsRestrict[] = "amd.ArgIsRestrict";
107 // Kernel argument is volatile qualified
108 const char ArgIsVolatile[] = "amd.ArgIsVolatile";
109 // Kernel argument is pipe qualified
110 const char ArgIsPipe[] = "amd.ArgIsPipe";
111 // Required work group size
112 const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize";
113 // Work group size hint
114 const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint";
115 // Vector type hint
116 const char VecTypeHint[] = "amd.VecTypeHint";
117 // Kernel index for device enqueue
118 const char KernelIndex[] = "amd.KernelIndex";
119 // No partial work groups
120 const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups";
121 // Prinf function call information
122 const char PrintfInfo[] = "amd.PrintfInfo";
123 // The actual kernel argument access qualifier
124 const char ArgActualAcc[] = "amd.ArgActualAcc";
125 // Alignment of pointee type
126 const char ArgPointeeAlign[] = "amd.ArgPointeeAlign";
74127
75128 } // end namespace KeyName
76129
174227
175228 } // end namespace Kernel
176229
230 namespace IsaInfo {
231
232 /// \brief In-memory representation of instruction set architecture
233 /// information.
234 struct Metadata {
235 /// \brief Wavefront size.
236 unsigned WavefrontSize = 0;
237 /// \brief Local memory size in bytes.
238 unsigned LocalMemorySize = 0;
239 /// \brief Number of execution units per compute unit.
240 unsigned EUsPerCU = 0;
241 /// \brief Maximum number of waves per execution unit.
242 unsigned MaxWavesPerEU = 0;
243 /// \brief Maximum flat work group size.
244 unsigned MaxFlatWorkGroupSize = 0;
245 /// \brief SGPR allocation granularity.
246 unsigned SGPRAllocGranule = 0;
247 /// \brief Total number of SGPRs.
248 unsigned TotalNumSGPRs = 0;
249 /// \brief Addressable number of SGPRs.
250 unsigned AddressableNumSGPRs = 0;
251 /// \brief VGPR allocation granularity.
252 unsigned VGPRAllocGranule = 0;
253 /// \brief Total number of VGPRs.
254 unsigned TotalNumVGPRs = 0;
255 /// \brief Addressable number of VGPRs.
256 unsigned AddressableNumVGPRs = 0;
257
258 Metadata() = default;
259 };
260
261 } // end namespace IsaInfo
262
177263 namespace Program {
178264
179265 // In-memory representation of program information.
180266 struct Metadata {
181267 std::vector MDVersionSeq;
268 IsaInfo::Metadata IsaInfo;
182269 std::vector PrintfInfo;
183270 std::vector Kernels;
184271
322322 return 1;
323323 }
324324
325 unsigned SISubtarget::getMinNumSGPRs(unsigned WavesPerEU) const {
326 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
327 switch (WavesPerEU) {
328 case 0: return 0;
329 case 10: return 0;
330 case 9: return 0;
331 case 8: return 81;
332 default: return 97;
333 }
334 } else {
335 switch (WavesPerEU) {
336 case 0: return 0;
337 case 10: return 0;
338 case 9: return 49;
339 case 8: return 57;
340 case 7: return 65;
341 case 6: return 73;
342 case 5: return 81;
343 default: return 97;
344 }
345 }
346 }
347
348 unsigned SISubtarget::getMaxNumSGPRs(unsigned WavesPerEU,
349 bool Addressable) const {
350 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
351 switch (WavesPerEU) {
352 case 0: return 80;
353 case 10: return 80;
354 case 9: return 80;
355 case 8: return 96;
356 default: return Addressable ? getAddressableNumSGPRs() : 112;
357 }
358 } else {
359 switch (WavesPerEU) {
360 case 0: return 48;
361 case 10: return 48;
362 case 9: return 56;
363 case 8: return 64;
364 case 7: return 72;
365 case 6: return 80;
366 case 5: return 96;
367 default: return getAddressableNumSGPRs();
368 }
369 }
370 }
371
372325 unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
373326 const SIMachineFunctionInfo &MFI = *MF.getInfo();
374327 if (MFI.hasFlatScratchInit()) {
427380 }
428381
429382 if (hasSGPRInitBug())
430 MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
383 MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
431384
432385 return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
433386 MaxAddressableNumSGPRs);
434 }
435
436 unsigned SISubtarget::getMinNumVGPRs(unsigned WavesPerEU) const {
437 switch (WavesPerEU) {
438 case 0: return 0;
439 case 10: return 0;
440 case 9: return 25;
441 case 8: return 29;
442 case 7: return 33;
443 case 6: return 37;
444 case 5: return 41;
445 case 4: return 49;
446 case 3: return 65;
447 case 2: return 85;
448 default: return 129;
449 }
450 }
451
452 unsigned SISubtarget::getMaxNumVGPRs(unsigned WavesPerEU) const {
453 switch (WavesPerEU) {
454 case 0: return 24;
455 case 10: return 24;
456 case 9: return 28;
457 case 8: return 32;
458 case 7: return 36;
459 case 6: return 40;
460 case 5: return 48;
461 case 4: return 64;
462 case 3: return 84;
463 case 2: return 128;
464 default: return getTotalNumVGPRs();
465 }
466387 }
467388
468389 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
364364 return true;
365365 }
366366
367 void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
368 bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
369
367370 /// \returns Number of execution units per compute unit supported by the
368371 /// subtarget.
369372 unsigned getEUsPerCU() const {
370 return 4;
373 return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
371374 }
372375
373376 /// \returns Maximum number of work groups per compute unit supported by the
374 /// subtarget and limited by given flat work group size.
377 /// subtarget and limited by given \p FlatWorkGroupSize.
375378 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
376 if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
377 return 8;
378 return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16;
379 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
380 FlatWorkGroupSize);
379381 }
380382
381383 /// \returns Maximum number of waves per compute unit supported by the
382384 /// subtarget without any kind of limitation.
383385 unsigned getMaxWavesPerCU() const {
384 return getMaxWavesPerEU() * getEUsPerCU();
386 return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
385387 }
386388
387389 /// \returns Maximum number of waves per compute unit supported by the
388 /// subtarget and limited by given flat work group size.
390 /// subtarget and limited by given \p FlatWorkGroupSize.
389391 unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
390 return getWavesPerWorkGroup(FlatWorkGroupSize);
392 return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
393 FlatWorkGroupSize);
391394 }
392395
393396 /// \returns Minimum number of waves per execution unit supported by the
394397 /// subtarget.
395398 unsigned getMinWavesPerEU() const {
396 return 1;
399 return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
397400 }
398401
399402 /// \returns Maximum number of waves per execution unit supported by the
400403 /// subtarget without any kind of limitation.
401404 unsigned getMaxWavesPerEU() const {
402 if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
403 return 8;
404 // FIXME: Need to take scratch memory into account.
405 return 10;
405 return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
406406 }
407407
408408 /// \returns Maximum number of waves per execution unit supported by the
409 /// subtarget and limited by given flat work group size.
409 /// subtarget and limited by given \p FlatWorkGroupSize.
410410 unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
411 return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) /
412 getEUsPerCU();
411 return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
412 FlatWorkGroupSize);
413413 }
414414
415415 /// \returns Minimum flat work group size supported by the subtarget.
416416 unsigned getMinFlatWorkGroupSize() const {
417 return 1;
417 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
418418 }
419419
420420 /// \returns Maximum flat work group size supported by the subtarget.
421421 unsigned getMaxFlatWorkGroupSize() const {
422 return 2048;
423 }
424
425 /// \returns Number of waves per work group given the flat work group size.
422 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
423 }
424
425 /// \returns Number of waves per work group supported by the subtarget and
426 /// limited by given \p FlatWorkGroupSize.
426427 unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
427 return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize();
428 }
429
430 void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
431 bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
428 return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
429 FlatWorkGroupSize);
430 }
432431
433432 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
434433 /// for function \p F, or minimum/maximum flat work group sizes explicitly
491490 };
492491
493492 class SISubtarget final : public AMDGPUSubtarget {
494 public:
495 enum {
496 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
497 // doesn't spill SGPRs as much as when 80 is set.
498 FIXED_SGPR_COUNT_FOR_INIT_BUG = 96
499 };
500
501493 private:
502494 SIInstrInfo InstrInfo;
503495 SIFrameLowering FrameLowering;
643635
644636 /// \returns SGPR allocation granularity supported by the subtarget.
645637 unsigned getSGPRAllocGranule() const {
646 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
647 return 16;
648 return 8;
638 return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
649639 }
650640
651641 /// \returns SGPR encoding granularity supported by the subtarget.
652642 unsigned getSGPREncodingGranule() const {
653 return 8;
643 return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
654644 }
655645
656646 /// \returns Total number of SGPRs supported by the subtarget.
657647 unsigned getTotalNumSGPRs() const {
658 if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
659 return 800;
660 return 512;
648 return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
661649 }
662650
663651 /// \returns Addressable number of SGPRs supported by the subtarget.
664652 unsigned getAddressableNumSGPRs() const {
665 if (hasSGPRInitBug())
666 return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
667 if (getGeneration() >= VOLCANIC_ISLANDS)
668 return 102;
669 return 104;
653 return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
670654 }
671655
672656 /// \returns Minimum number of SGPRs that meets the given number of waves per
673657 /// execution unit requirement supported by the subtarget.
674 unsigned getMinNumSGPRs(unsigned WavesPerEU) const;
658 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
659 return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
660 }
675661
676662 /// \returns Maximum number of SGPRs that meets the given number of waves per
677663 /// execution unit requirement supported by the subtarget.
678 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const;
664 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
665 return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
666 Addressable);
667 }
679668
680669 /// \returns Reserved number of SGPRs for given function \p MF.
681670 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
692681
693682 /// \returns VGPR allocation granularity supported by the subtarget.
694683 unsigned getVGPRAllocGranule() const {
695 return 4;
684 return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
696685 }
697686
698687 /// \returns VGPR encoding granularity supported by the subtarget.
699688 unsigned getVGPREncodingGranule() const {
700 return getVGPRAllocGranule();
689 return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
701690 }
702691
703692 /// \returns Total number of VGPRs supported by the subtarget.
704693 unsigned getTotalNumVGPRs() const {
705 return 256;
694 return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
706695 }
707696
708697 /// \returns Addressable number of VGPRs supported by the subtarget.
709698 unsigned getAddressableNumVGPRs() const {
710 return getTotalNumVGPRs();
699 return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
711700 }
712701
713702 /// \returns Minimum number of VGPRs that meets given number of waves per
714703 /// execution unit requirement supported by the subtarget.
715 unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
704 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
705 return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
706 }
716707
717708 /// \returns Maximum number of VGPRs that meets given number of waves per
718709 /// execution unit requirement supported by the subtarget.
719 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
710 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
711 return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
712 }
720713
721714 /// \returns Reserved number of VGPRs for given function \p MF.
722715 unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
800800 // Currently there is none suitable machinery in the core llvm-mc for this.
801801 // MCSymbol::isRedefinable is intended for another purpose, and
802802 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
803 AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
803 AMDGPU::IsaInfo::IsaVersion ISA =
804 AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
804805 MCContext &Ctx = getContext();
805 MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
806 Sym->setVariableValue(MCConstantExpr::create(Isa.Major, Ctx));
806 MCSymbol *Sym =
807 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
808 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
807809 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
808 Sym->setVariableValue(MCConstantExpr::create(Isa.Minor, Ctx));
810 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
809811 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
810 Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
812 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
811813 }
812814 KernelScope.initialize(getContext());
813815 }
18661868 // If this directive has no arguments, then use the ISA version for the
18671869 // targeted GPU.
18681870 if (getLexer().is(AsmToken::EndOfStatement)) {
1869 AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
1870 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
1871 Isa.Stepping,
1871 AMDGPU::IsaInfo::IsaVersion ISA =
1872 AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
1873 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
1874 ISA.Stepping,
18721875 "AMD", "AMDGPU");
18731876 return false;
18741877 }
24542457 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
24552458 Parser.Lex();
24562459
2457 IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
2460 AMDGPU::IsaInfo::IsaVersion ISA =
2461 AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
24582462 if (CntName == "vmcnt")
2459 IntVal = encodeVmcnt(IV, IntVal, CntVal);
2463 IntVal = encodeVmcnt(ISA, IntVal, CntVal);
24602464 else if (CntName == "expcnt")
2461 IntVal = encodeExpcnt(IV, IntVal, CntVal);
2465 IntVal = encodeExpcnt(ISA, IntVal, CntVal);
24622466 else if (CntName == "lgkmcnt")
2463 IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
2467 IntVal = encodeLgkmcnt(ISA, IntVal, CntVal);
24642468 else
24652469 return true;
24662470
24692473
24702474 OperandMatchResultTy
24712475 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
2472 IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
2473 int64_t Waitcnt = getWaitcntBitMask(IV);
2476 AMDGPU::IsaInfo::IsaVersion ISA =
2477 AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
2478 int64_t Waitcnt = getWaitcntBitMask(ISA);
24742479 SMLoc S = Parser.getTok().getLoc();
24752480
24762481 switch(getLexer().getKind()) {
10561056 void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
10571057 const MCSubtargetInfo &STI,
10581058 raw_ostream &O) {
1059 IsaVersion IV = getIsaVersion(STI.getFeatureBits());
1059 AMDGPU::IsaInfo::IsaVersion ISA =
1060 AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
10601061
10611062 unsigned SImm16 = MI->getOperand(OpNo).getImm();
10621063 unsigned Vmcnt, Expcnt, Lgkmcnt;
1063 decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
1064 decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
10641065
10651066 bool NeedSpace = false;
10661067
1067 if (Vmcnt != getVmcntBitMask(IV)) {
1068 if (Vmcnt != getVmcntBitMask(ISA)) {
10681069 O << "vmcnt(" << Vmcnt << ')';
10691070 NeedSpace = true;
10701071 }
10711072
1072 if (Expcnt != getExpcntBitMask(IV)) {
1073 if (Expcnt != getExpcntBitMask(ISA)) {
10731074 if (NeedSpace)
10741075 O << ' ';
10751076 O << "expcnt(" << Expcnt << ')';
10761077 NeedSpace = true;
10771078 }
10781079
1079 if (Lgkmcnt != getLgkmcntBitMask(IV)) {
1080 if (Lgkmcnt != getLgkmcntBitMask(ISA)) {
10801081 if (NeedSpace)
10811082 O << ' ';
10821083 O << "lgkmcnt(" << Lgkmcnt << ')';
1515 #include "AMDGPU.h"
1616 #include "AMDGPURuntimeMetadata.h"
1717 #include "MCTargetDesc/AMDGPURuntimeMD.h"
18 #include "Utils/AMDGPUBaseInfo.h"
1819 #include "llvm/ADT/SmallVector.h"
1920 #include "llvm/ADT/StringRef.h"
2021 #include "llvm/ADT/StringSwitch.h"
9192 static const bool flow = true;
9293 };
9394
95 template <> struct MappingTraits {
96 static void mapping(IO &YamlIO, IsaInfo::Metadata &I) {
97 YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize);
98 YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize);
99 YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU);
100 YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU);
101 YamlIO.mapRequired(KeyName::IsaInfoMaxFlatWorkGroupSize,
102 I.MaxFlatWorkGroupSize);
103 YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule);
104 YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs);
105 YamlIO.mapRequired(KeyName::IsaInfoAddressableNumSGPRs,
106 I.AddressableNumSGPRs);
107 YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule);
108 YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs);
109 YamlIO.mapRequired(KeyName::IsaInfoAddressableNumVGPRs,
110 I.AddressableNumVGPRs);
111 }
112 static const bool flow = true;
113 };
114
94115 template <> struct MappingTraits {
95116 static void mapping(IO &YamlIO, Program::Metadata &Prog) {
96117 YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
118 YamlIO.mapRequired(KeyName::IsaInfo, Prog.IsaInfo);
97119 YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
98120 YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
99121 }
382404 }
383405 }
384406
385 std::string llvm::getRuntimeMDYAMLString(Module &M) {
407 std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features,
408 const Module &M) {
386409 Program::Metadata Prog;
387410 Prog.MDVersionSeq.push_back(MDVersion);
388411 Prog.MDVersionSeq.push_back(MDRevision);
412 Prog.IsaInfo.WavefrontSize = AMDGPU::IsaInfo::getWavefrontSize(Features);
413 Prog.IsaInfo.LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(Features);
414 Prog.IsaInfo.EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(Features);
415 Prog.IsaInfo.MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(Features);
416 Prog.IsaInfo.MaxFlatWorkGroupSize =
417 AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(Features);
418 Prog.IsaInfo.SGPRAllocGranule =
419 AMDGPU::IsaInfo::getSGPRAllocGranule(Features);
420 Prog.IsaInfo.TotalNumSGPRs = AMDGPU::IsaInfo::getTotalNumSGPRs(Features);
421 Prog.IsaInfo.AddressableNumSGPRs =
422 AMDGPU::IsaInfo::getAddressableNumSGPRs(Features);
423 Prog.IsaInfo.VGPRAllocGranule =
424 AMDGPU::IsaInfo::getVGPRAllocGranule(Features);
425 Prog.IsaInfo.TotalNumVGPRs = AMDGPU::IsaInfo::getTotalNumVGPRs(Features);
426 Prog.IsaInfo.AddressableNumVGPRs =
427 AMDGPU::IsaInfo::getAddressableNumVGPRs(Features);
389428
390429 // Set PrintfInfo.
391430 if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
1616 #include
1717
1818 namespace llvm {
19 class FeatureBitset;
1920 class Module;
2021
2122 // Get runtime metadata as YAML string.
22 std::string getRuntimeMDYAMLString(Module &M);
23 std::string getRuntimeMDYAMLString(const FeatureBitset &Features,
24 const Module &M);
2325
2426 }
2527 #endif
9292 OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
9393 }
9494
95 void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) {
95 void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
96 const Module &M) {
9697 OS << "\t.amdgpu_runtime_metadata\n";
97 OS << getRuntimeMDYAMLString(M);
98 OS << getRuntimeMDYAMLString(Features, M);
9899 OS << "\n\t.end_amdgpu_runtime_metadata\n";
99100 }
100101
235236 );
236237 }
237238
238 void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) {
239 EmitRuntimeMetadata(getRuntimeMDYAMLString(M));
240 }
239 void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
240 const Module &M) {
241 EmitRuntimeMetadata(getRuntimeMDYAMLString(Features, M));
242 }
1616 #include "AMDGPUPTNote.h"
1717
1818 class DataLayout;
19 class FeatureBitset;
1920 class Function;
2021 class MCELFStreamer;
2122 class MCSymbol;
4546
4647 virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
4748
48 virtual void EmitRuntimeMetadata(Module &M) = 0;
49 virtual void EmitRuntimeMetadata(const FeatureBitset &Features,
50 const Module &M) = 0;
4951
5052 virtual void EmitRuntimeMetadata(StringRef Metadata) = 0;
5153 };
6971
7072 void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
7173
72 void EmitRuntimeMetadata(Module &M) override;
74 void EmitRuntimeMetadata(const FeatureBitset &Features,
75 const Module &M) override;
7376
7477 void EmitRuntimeMetadata(StringRef Metadata) override;
7578 };
100103
101104 void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
102105
103 void EmitRuntimeMetadata(Module &M) override;
106 void EmitRuntimeMetadata(const FeatureBitset &Features,
107 const Module &M) override;
104108
105109 void EmitRuntimeMetadata(StringRef Metadata) override;
106110 };
4646 #define DEBUG_TYPE "si-insert-waits"
4747
4848 using namespace llvm;
49 using namespace llvm::AMDGPU;
5049
5150 namespace {
5251
7574 const SIInstrInfo *TII = nullptr;
7675 const SIRegisterInfo *TRI = nullptr;
7776 const MachineRegisterInfo *MRI;
78 IsaVersion IV;
77 AMDGPU::IsaInfo::IsaVersion ISA;
7978
8079 /// \brief Constant zero value
8180 static const Counters ZeroCounts;
426425
427426 // Build the wait instruction
428427 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
429 .addImm(encodeWaitcnt(IV,
430 Counts.Named.VM,
431 Counts.Named.EXP,
432 Counts.Named.LGKM));
428 .addImm(AMDGPU::encodeWaitcnt(ISA,
429 Counts.Named.VM,
430 Counts.Named.EXP,
431 Counts.Named.LGKM));
433432
434433 LastOpcodeType = OTHER;
435434 LastInstWritesM0 = false;
457456 unsigned Imm = I->getOperand(0).getImm();
458457 Counters Counts, WaitOn;
459458
460 Counts.Named.VM = decodeVmcnt(IV, Imm);
461 Counts.Named.EXP = decodeExpcnt(IV, Imm);
462 Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
459 Counts.Named.VM = AMDGPU::decodeVmcnt(ISA, Imm);
460 Counts.Named.EXP = AMDGPU::decodeExpcnt(ISA, Imm);
461 Counts.Named.LGKM = AMDGPU::decodeLgkmcnt(ISA, Imm);
463462
464463 for (unsigned i = 0; i < 3; ++i) {
465464 if (Counts.Array[i] <= LastIssued.Array[i])
533532 TII = ST->getInstrInfo();
534533 TRI = &TII->getRegisterInfo();
535534 MRI = &MF.getRegInfo();
536 IV = getIsaVersion(ST->getFeatureBits());
535 ISA = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
537536 const SIMachineFunctionInfo *MFI = MF.getInfo();
538537
539 HardwareLimits.Named.VM = getVmcntBitMask(IV);
540 HardwareLimits.Named.EXP = getExpcntBitMask(IV);
541 HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
538 HardwareLimits.Named.VM = AMDGPU::getVmcntBitMask(ISA);
539 HardwareLimits.Named.EXP = AMDGPU::getExpcntBitMask(ISA);
540 HardwareLimits.Named.LGKM = AMDGPU::getLgkmcntBitMask(ISA);
542541
543542 WaitedOn = ZeroCounts;
544543 DelayedWaitOn = ZeroCounts;
7575 /// \returns Lgkmcnt bit width.
7676 unsigned getLgkmcntBitWidth() { return 4; }
7777
78 } // anonymous namespace
78 } // namespace anonymous
7979
8080 namespace llvm {
8181 namespace AMDGPU {
8282
83 namespace IsaInfo {
84
8385 IsaVersion getIsaVersion(const FeatureBitset &Features) {
84
86 // CI.
8587 if (Features.test(FeatureISAVersion7_0_0))
8688 return {7, 0, 0};
87
8889 if (Features.test(FeatureISAVersion7_0_1))
8990 return {7, 0, 1};
90
9191 if (Features.test(FeatureISAVersion7_0_2))
9292 return {7, 0, 2};
9393
94 // VI.
9495 if (Features.test(FeatureISAVersion8_0_0))
9596 return {8, 0, 0};
96
9797 if (Features.test(FeatureISAVersion8_0_1))
9898 return {8, 0, 1};
99
10099 if (Features.test(FeatureISAVersion8_0_2))
101100 return {8, 0, 2};
102
103101 if (Features.test(FeatureISAVersion8_0_3))
104102 return {8, 0, 3};
105
106103 if (Features.test(FeatureISAVersion8_0_4))
107104 return {8, 0, 4};
108
109105 if (Features.test(FeatureISAVersion8_1_0))
110106 return {8, 1, 0};
111107
112 return {0, 0, 0};
113 }
108 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
109 return {0, 0, 0};
110 return {7, 0, 0};
111 }
112
113 unsigned getWavefrontSize(const FeatureBitset &Features) {
114 if (Features.test(FeatureWavefrontSize16))
115 return 16;
116 if (Features.test(FeatureWavefrontSize32))
117 return 32;
118
119 return 64;
120 }
121
122 unsigned getLocalMemorySize(const FeatureBitset &Features) {
123 if (Features.test(FeatureLocalMemorySize32768))
124 return 32768;
125 if (Features.test(FeatureLocalMemorySize65536))
126 return 65536;
127
128 return 0;
129 }
130
131 unsigned getEUsPerCU(const FeatureBitset &Features) {
132 return 4;
133 }
134
135 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
136 unsigned FlatWorkGroupSize) {
137 if (!Features.test(FeatureGCN))
138 return 8;
139 return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
140 }
141
142 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
143 return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
144 }
145
146 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
147 unsigned FlatWorkGroupSize) {
148 return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
149 }
150
151 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
152 return 1;
153 }
154
155 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
156 if (!Features.test(FeatureGCN))
157 return 8;
158 // FIXME: Need to take scratch memory into account.
159 return 10;
160 }
161
162 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
163 unsigned FlatWorkGroupSize) {
164 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
165 getEUsPerCU(Features)) / getEUsPerCU(Features);
166 }
167
168 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
169 return 1;
170 }
171
172 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
173 return 2048;
174 }
175
176 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
177 unsigned FlatWorkGroupSize) {
178 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
179 getWavefrontSize(Features);
180 }
181
182 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
183 IsaVersion Version = getIsaVersion(Features);
184 if (Version.Major >= 8)
185 return 16;
186 return 8;
187 }
188
189 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
190 return 8;
191 }
192
193 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
194 IsaVersion Version = getIsaVersion(Features);
195 if (Version.Major >= 8)
196 return 800;
197 return 512;
198 }
199
200 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
201 if (Features.test(FeatureSGPRInitBug))
202 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
203
204 IsaVersion Version = getIsaVersion(Features);
205 if (Version.Major >= 8)
206 return 102;
207 return 104;
208 }
209
210 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
211 IsaVersion Version = getIsaVersion(Features);
212 if (Version.Major >= 8) {
213 switch (WavesPerEU) {
214 case 0: return 0;
215 case 10: return 0;
216 case 9: return 0;
217 case 8: return 81;
218 default: return 97;
219 }
220 } else {
221 switch (WavesPerEU) {
222 case 0: return 0;
223 case 10: return 0;
224 case 9: return 49;
225 case 8: return 57;
226 case 7: return 65;
227 case 6: return 73;
228 case 5: return 81;
229 default: return 97;
230 }
231 }
232 }
233
234 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
235 bool Addressable) {
236 IsaVersion Version = getIsaVersion(Features);
237 if (Version.Major >= 8) {
238 switch (WavesPerEU) {
239 case 0: return 80;
240 case 10: return 80;
241 case 9: return 80;
242 case 8: return 96;
243 default: return Addressable ? getAddressableNumSGPRs(Features) : 112;
244 }
245 } else {
246 switch (WavesPerEU) {
247 case 0: return 48;
248 case 10: return 48;
249 case 9: return 56;
250 case 8: return 64;
251 case 7: return 72;
252 case 6: return 80;
253 case 5: return 96;
254 default: return getAddressableNumSGPRs(Features);
255 }
256 }
257 }
258
259 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
260 return 4;
261 }
262
263 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
264 return getVGPRAllocGranule(Features);
265 }
266
267 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
268 return 256;
269 }
270
271 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
272 return getTotalNumVGPRs(Features);
273 }
274
275 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
276 switch (WavesPerEU) {
277 case 0: return 0;
278 case 10: return 0;
279 case 9: return 25;
280 case 8: return 29;
281 case 7: return 33;
282 case 6: return 37;
283 case 5: return 41;
284 case 4: return 49;
285 case 3: return 65;
286 case 2: return 85;
287 default: return 129;
288 }
289 }
290
291 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
292 switch (WavesPerEU) {
293 case 0: return 24;
294 case 10: return 24;
295 case 9: return 28;
296 case 8: return 32;
297 case 7: return 36;
298 case 6: return 40;
299 case 5: return 48;
300 case 4: return 64;
301 case 3: return 84;
302 case 2: return 128;
303 default: return getTotalNumVGPRs(Features);
304 }
305 }
306
307 } // namespace IsaInfo
114308
115309 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
116310 const FeatureBitset &Features) {
117
118 IsaVersion ISA = getIsaVersion(Features);
311 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
119312
120313 memset(&Header, 0, sizeof(Header));
121314
223416 return Ints;
224417 }
225418
226 unsigned getWaitcntBitMask(IsaVersion Version) {
419 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
420 return (1 << getVmcntBitWidth()) - 1;
421 }
422
423 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
424 return (1 << getExpcntBitWidth()) - 1;
425 }
426
427 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
428 return (1 << getLgkmcntBitWidth()) - 1;
429 }
430
431 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
227432 unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
228433 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
229434 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
230435 return Vmcnt | Expcnt | Lgkmcnt;
231436 }
232437
233 unsigned getVmcntBitMask(IsaVersion Version) {
234 return (1 << getVmcntBitWidth()) - 1;
235 }
236
237 unsigned getExpcntBitMask(IsaVersion Version) {
238 return (1 << getExpcntBitWidth()) - 1;
239 }
240
241 unsigned getLgkmcntBitMask(IsaVersion Version) {
242 return (1 << getLgkmcntBitWidth()) - 1;
243 }
244
245 unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
438 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
246439 return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
247440 }
248441
249 unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
442 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
250443 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
251444 }
252445
253 unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
446 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
254447 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
255448 }
256449
257 void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
450 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
258451 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
259452 Vmcnt = decodeVmcnt(Version, Waitcnt);
260453 Expcnt = decodeExpcnt(Version, Waitcnt);
261454 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
262455 }
263456
264 unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
457 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
458 unsigned Vmcnt) {
265459 return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
266460 }
267461
268 unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
462 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
463 unsigned Expcnt) {
269464 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
270465 }
271466
272 unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
467 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
468 unsigned Lgkmcnt) {
273469 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
274470 }
275471
276 unsigned encodeWaitcnt(IsaVersion Version,
472 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
277473 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
278474 unsigned Waitcnt = getWaitcntBitMask(Version);
279475 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
3333
3434 namespace AMDGPU {
3535
36 LLVM_READONLY
37 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
38
36 namespace IsaInfo {
37
38 enum {
39 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
40 // doesn't spill SGPRs as much as when 80 is set.
41 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
42 };
43
44 /// \brief Instruction set architecture version.
3945 struct IsaVersion {
4046 unsigned Major;
4147 unsigned Minor;
4248 unsigned Stepping;
4349 };
4450
51 /// \returns Isa version for given subtarget \p Features.
4552 IsaVersion getIsaVersion(const FeatureBitset &Features);
53
54 /// \returns Wavefront size for given subtarget \p Features.
55 unsigned getWavefrontSize(const FeatureBitset &Features);
56
57 /// \returns Local memory size in bytes for given subtarget \p Features.
58 unsigned getLocalMemorySize(const FeatureBitset &Features);
59
60 /// \returns Number of execution units per compute unit for given subtarget \p
61 /// Features.
62 unsigned getEUsPerCU(const FeatureBitset &Features);
63
64 /// \returns Maximum number of work groups per compute unit for given subtarget
65 /// \p Features and limited by given \p FlatWorkGroupSize.
66 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
67 unsigned FlatWorkGroupSize);
68
69 /// \returns Maximum number of waves per compute unit for given subtarget \p
70 /// Features without any kind of limitation.
71 unsigned getMaxWavesPerCU(const FeatureBitset &Features);
72
73 /// \returns Maximum number of waves per compute unit for given subtarget \p
74 /// Features and limited by given \p FlatWorkGroupSize.
75 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
76 unsigned FlatWorkGroupSize);
77
78 /// \returns Minimum number of waves per execution unit for given subtarget \p
79 /// Features.
80 unsigned getMinWavesPerEU(const FeatureBitset &Features);
81
82 /// \returns Maximum number of waves per execution unit for given subtarget \p
83 /// Features without any kind of limitation.
84 unsigned getMaxWavesPerEU(const FeatureBitset &Features);
85
86 /// \returns Maximum number of waves per execution unit for given subtarget \p
87 /// Features and limited by given \p FlatWorkGroupSize.
88 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
89 unsigned FlatWorkGroupSize);
90
91 /// \returns Minimum flat work group size for given subtarget \p Features.
92 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
93
94 /// \returns Maximum flat work group size for given subtarget \p Features.
95 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
96
97 /// \returns Number of waves per work group for given subtarget \p Features and
98 /// limited by given \p FlatWorkGroupSize.
99 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
100 unsigned FlatWorkGroupSize);
101
102 /// \returns SGPR allocation granularity for given subtarget \p Features.
103 unsigned getSGPRAllocGranule(const FeatureBitset &Features);
104
105 /// \returns SGPR encoding granularity for given subtarget \p Features.
106 unsigned getSGPREncodingGranule(const FeatureBitset &Features);
107
108 /// \returns Total number of SGPRs for given subtarget \p Features.
109 unsigned getTotalNumSGPRs(const FeatureBitset &Features);
110
111 /// \returns Addressable number of SGPRs for given subtarget \p Features.
112 unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
113
114 /// \returns Minimum number of SGPRs that meets the given number of waves per
115 /// execution unit requirement for given subtarget \p Features.
116 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
117
118 /// \returns Maximum number of SGPRs that meets the given number of waves per
119 /// execution unit requirement for given subtarget \p Features.
120 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
121 bool Addressable);
122
123 /// \returns VGPR allocation granularity for given subtarget \p Features.
124 unsigned getVGPRAllocGranule(const FeatureBitset &Features);
125
126 /// \returns VGPR encoding granularity for given subtarget \p Features.
127 unsigned getVGPREncodingGranule(const FeatureBitset &Features);
128
129 /// \returns Total number of VGPRs for given subtarget \p Features.
130 unsigned getTotalNumVGPRs(const FeatureBitset &Features);
131
132 /// \returns Addressable number of VGPRs for given subtarget \p Features.
133 unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
134
135 /// \returns Minimum number of VGPRs that meets given number of waves per
136 /// execution unit requirement for given subtarget \p Features.
137 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
138
139 /// \returns Maximum number of VGPRs that meets given number of waves per
140 /// execution unit requirement for given subtarget \p Features.
141 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
142
143 } // namespace IsaInfo
144
145 LLVM_READONLY
146 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
147
46148 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
47149 const FeatureBitset &Features);
48150 MCSection *getHSATextSection(MCContext &Ctx);
83185 std::pair Default,
84186 bool OnlyFirstRequired = false);
85187
188 /// \returns Vmcnt bit mask for given isa \p Version.
189 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
190
191 /// \returns Expcnt bit mask for given isa \p Version.
192 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
193
194 /// \returns Lgkmcnt bit mask for given isa \p Version.
195 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
196
86197 /// \returns Waitcnt bit mask for given isa \p Version.
87 unsigned getWaitcntBitMask(IsaVersion Version);
88
89 /// \returns Vmcnt bit mask for given isa \p Version.
90 unsigned getVmcntBitMask(IsaVersion Version);
91
92 /// \returns Expcnt bit mask for given isa \p Version.
93 unsigned getExpcntBitMask(IsaVersion Version);
94
95 /// \returns Lgkmcnt bit mask for given isa \p Version.
96 unsigned getLgkmcntBitMask(IsaVersion Version);
198 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
97199
98200 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
99 unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
201 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
100202
101203 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
102 unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
204 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
103205
104206 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
105 unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
207 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
106208
107209 /// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
108210 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
112214 /// \p Vmcnt = \p Waitcnt[3:0]
113215 /// \p Expcnt = \p Waitcnt[6:4]
114216 /// \p Lgkmcnt = \p Waitcnt[11:8]
115 void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
217 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
116218 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
117219
118220 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
119 unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
221 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
222 unsigned Vmcnt);
120223
121224 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
122 unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
225 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
226 unsigned Expcnt);
123227
124228 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
125 unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
229 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
230 unsigned Lgkmcnt);
126231
127232 /// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
128233 /// \p Version.
134239 ///
135240 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
136241 /// isa \p Version.
137 unsigned encodeWaitcnt(IsaVersion Version,
242 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
138243 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
139244
140245 unsigned getInitialPSInputAddr(const Function &F);
0 ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
11 ; check llc does not crash for invalid opencl version metadata
22
3 ; CHECK: { amd.MDVersion: [ 2, 0 ] }
3 ; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
44
55 !opencl.ocl.version = !{}
0 ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
11 ; check llc does not crash for invalid opencl version metadata
22
3 ; CHECK: { amd.MDVersion: [ 2, 0 ] }
3 ; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
44
55 !opencl.ocl.version = !{!0}
66 !0 = !{}
0 ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
11 ; check llc does not crash for invalid opencl version metadata
22
3 ; CHECK: { amd.MDVersion: [ 2, 0 ] }
3 ; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
44
55 !opencl.ocl.version = !{!0}
66 !0 = !{i32 1}
None ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES
0 ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=SI
1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=VI
12 ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -amdgpu-dump-rtmd -amdgpu-check-rtmd-parser %s -o - 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=PARSER %s
23
34 %struct.A = type { i8, float }
1011 %opencl.clk_event_t = type opaque
1112
1213 ; CHECK: ---
13 ; CHECK-NEXT: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
14
15 ; CHECK-NEXT: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
14 ; SI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
15 ; VI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
16
17 ; CHECK: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
1618 ; CHECK-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
1719 ; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
1820 ; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
344346 ; NOTES-NEXT: Owner Data size Description
345347 ; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001)
346348 ; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003)
347 ; NOTES-NEXT: AMD 0x00005196 Unknown note type: (0x00000008)
349
350 ; SI: AMD 0x0000530d Unknown note type: (0x00000008)
351 ; VI: AMD 0x0000530e Unknown note type: (0x00000008)
348352
349353 !llvm.printf.fmts = !{!100, !101}
350354