llvm.org GIT mirror llvm / d8dc65b
[AMDGPU][llvm-mc] Predefined symbols to access register counts (.kernel.{v|s}gpr_count) The feature allows for conditional assembly, filling the entries of .amd_kernel_code_t etc. Symbols are defined with value 0 at the beginning of each kernel scope. After each register usage, the respective symbol is set to: value = max( value, ( register index + 1 ) ) Thus, at the end of scope the value represents a count of used registers. Kernel scopes begin at .amdgpu_hsa_kernel directive, end at the next .amdgpu_hsa_kernel (or EOF, whichever comes first). There is also dummy scope that lies from the beginning of source file til the first .amdgpu_hsa_kernel. Test added. Differential Revision: https://reviews.llvm.org/D27859 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290608 91177308-0d34-0410-b5e6-96231b3b80d8 Artem Tamazov 3 years ago
4 changed file(s) with 162 addition(s) and 54 deletion(s). Raw diff Collapse all Expand all
660660 // AsmParser
661661 //===----------------------------------------------------------------------===//
662662
663 // Holds info related to the current kernel, e.g. count of SGPRs used.
664 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
665 // .amdgpu_hsa_kernel or at EOF.
666 class KernelScopeInfo {
667 int SgprIndexUnusedMin;
668 int VgprIndexUnusedMin;
669 MCContext *Ctx;
670
671 void usesSgprAt(int i) {
672 if (i >= SgprIndexUnusedMin) {
673 SgprIndexUnusedMin = ++i;
674 if (Ctx) {
675 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
676 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
677 }
678 }
679 }
680 void usesVgprAt(int i) {
681 if (i >= VgprIndexUnusedMin) {
682 VgprIndexUnusedMin = ++i;
683 if (Ctx) {
684 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
685 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
686 }
687 }
688 }
689 public:
690 KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr)
691 {}
692 void initialize(MCContext &Context) {
693 Ctx = &Context;
694 usesSgprAt(SgprIndexUnusedMin = -1);
695 usesVgprAt(VgprIndexUnusedMin = -1);
696 }
697 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
698 switch (RegKind) {
699 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
700 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
701 default: break;
702 }
703 }
704 };
705
663706 class AMDGPUAsmParser : public MCTargetAsmParser {
664707 const MCInstrInfo &MII;
665708 MCAsmParser &Parser;
667710 unsigned ForcedEncodingSize;
668711 bool ForcedDPP;
669712 bool ForcedSDWA;
713 KernelScopeInfo KernelScope;
670714
671715 /// @name Auto-generated Match Functions
672716 /// {
692736 bool ParseSectionDirectiveHSADataGlobalProgram();
693737 bool ParseSectionDirectiveHSARodataReadonlyAgent();
694738 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum);
695 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth);
739 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex);
696740 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn);
697741
698742 public:
730774 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
731775 Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
732776 }
777 KernelScope.initialize(getContext());
733778 }
734779
735780 bool isSI() const {
12391284 }
12401285 }
12411286
1242 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth)
1287 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex)
12431288 {
1289 if (DwordRegIndex) { *DwordRegIndex = 0; }
12441290 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
12451291 if (getLexer().is(AsmToken::Identifier)) {
12461292 StringRef RegName = Parser.getTok().getString();
13001346 } else if (getLexer().is(AsmToken::LBrac)) {
13011347 // List of consecutive registers: [s0,s1,s2,s3]
13021348 Parser.Lex();
1303 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
1349 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
13041350 return false;
13051351 if (RegWidth != 1)
13061352 return false;
13121358 } else if (getLexer().is(AsmToken::RBrac)) {
13131359 Parser.Lex();
13141360 break;
1315 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1)) {
1361 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
13161362 if (RegWidth1 != 1) {
13171363 return false;
13181364 }
13401386 {
13411387 unsigned Size = 1;
13421388 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1343 // SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords.
1389 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
13441390 Size = std::min(RegWidth, 4u);
13451391 }
13461392 if (RegNum % Size != 0)
13471393 return false;
1394 if (DwordRegIndex) { *DwordRegIndex = RegNum; }
13481395 RegNum = RegNum / Size;
13491396 int RCID = getRegClass(RegKind, RegWidth);
13501397 if (RCID == -1)
13701417 SMLoc StartLoc = Tok.getLoc();
13711418 SMLoc EndLoc = Tok.getEndLoc();
13721419 RegisterKind RegKind;
1373 unsigned Reg, RegNum, RegWidth;
1374
1375 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
1420 unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1421
1422 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
13761423 return nullptr;
13771424 }
1425 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
13781426 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
13791427 }
13801428
18411889 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
18421890 ELF::STT_AMDGPU_HSA_KERNEL);
18431891 Lex();
1892 KernelScope.initialize(getContext());
18441893 return false;
18451894 }
18461895
0 // RUN: llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
1
2 .byte .kernel.sgpr_count
3 // CHECK: .byte 0
4 .byte .kernel.vgpr_count
5 // CHECK: .byte 0
6 v_mov_b32_e32 v5, s8
7 s_endpgm
8 .byte .kernel.sgpr_count
9 // CHECK: .byte 9
10 .byte .kernel.vgpr_count
11 // CHECK: .byte 6
12
13 .amdgpu_hsa_kernel K1
14 K1:
15 .byte .kernel.sgpr_count
16 // CHECK: .byte 0
17 .byte .kernel.vgpr_count
18 // CHECK: .byte 0
19 v_mov_b32_e32 v1, s86
20 s_endpgm
21 .byte .kernel.sgpr_count
22 // CHECK: .byte 87
23 .byte .kernel.vgpr_count
24 // CHECK: .byte 2
25
26 .amdgpu_hsa_kernel K2
27 .byte .kernel.sgpr_count
28 // CHECK: .byte 0
29 .byte .kernel.vgpr_count
30 // CHECK: .byte 0
31 K2:
32 s_load_dwordx8 s[16:23], s[0:1], 0x0
33 v_mov_b32_e32 v0, v0
34 s_endpgm
35 .byte .kernel.sgpr_count
36 // CHECK: .byte 24
37 .byte .kernel.vgpr_count
38 // CHECK: .byte 1
39
40 .text
41 .amdgpu_hsa_kernel K3
42 K3:
43 A = .kernel.vgpr_count
44 v_mov_b32_e32 v[A], s0
45 B = .kernel.vgpr_count
46 v_mov_b32_e32 v[B], s0
47 v_mov_b32_e32 v[B], v[A]
48 C = .kernel.vgpr_count
49 v_mov_b32_e32 v[C], v[A]
50 D = .kernel.sgpr_count + 3 // align
51 E = D + 4
52 s_load_dwordx4 s[D:D+3], s[E:E+1], 0x0
53 s_endpgm
54
55 .byte .kernel.sgpr_count
56 // CHECK: .byte 10
57 .byte .kernel.vgpr_count
58 // CHECK: .byte 3
0 // RUN: llvm-mc -arch=amdgcn -mcpu=SI %s | FileCheck %s --check-prefix=SI
1 // RUN: llvm-mc -arch=amdgcn -mcpu=bonaire %s | FileCheck %s --check-prefix=BONAIRE
2 // RUN: llvm-mc -arch=amdgcn -mcpu=hawaii %s | FileCheck %s --check-prefix=HAWAII
3 // RUN: llvm-mc -arch=amdgcn -mcpu=kabini %s | FileCheck %s --check-prefix=KABINI
4 // RUN: llvm-mc -arch=amdgcn -mcpu=iceland %s | FileCheck %s --check-prefix=ICELAND
5 // RUN: llvm-mc -arch=amdgcn -mcpu=carrizo %s | FileCheck %s --check-prefix=CARRIZO
6 // RUN: llvm-mc -arch=amdgcn -mcpu=tonga %s | FileCheck %s --check-prefix=TONGA
7 // RUN: llvm-mc -arch=amdgcn -mcpu=fiji %s | FileCheck %s --check-prefix=FIJI
8 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx804 %s | FileCheck %s --check-prefix=GFX804
9 // RUN: llvm-mc -arch=amdgcn -mcpu=stoney %s | FileCheck %s --check-prefix=STONEY
10
11 .byte .option.machine_version_major
12 // SI: .byte 0
13 // BONAIRE: .byte 7
14 // HAWAII: .byte 7
15 // KABINI: .byte 7
16 // ICELAND: .byte 8
17 // CARRIZO: .byte 8
18 // TONGA: .byte 8
19 // FIJI: .byte 8
20 // GFX804: .byte 8
21 // STONEY: .byte 8
22
23 .byte .option.machine_version_minor
24 // SI: .byte 0
25 // BONAIRE: .byte 0
26 // HAWAII: .byte 0
27 // KABINI: .byte 0
28 // ICELAND: .byte 0
29 // CARRIZO: .byte 0
30 // TONGA: .byte 0
31 // FIJI: .byte 0
32 // GFX804: .byte 0
33 // STONEY: .byte 1
34
35 .byte .option.machine_version_stepping
36 // SI: .byte 0
37 // BONAIRE: .byte 0
38 // HAWAII: .byte 1
39 // KABINI: .byte 2
40 // ICELAND: .byte 0
41 // CARRIZO: .byte 1
42 // TONGA: .byte 2
43 // FIJI: .byte 3
44 // GFX804: .byte 4
45 // STONEY: .byte 0
+0
-46
test/MC/AMDGPU/symbol_special.s less more
None // RUN: llvm-mc -arch=amdgcn -mcpu=SI %s | FileCheck %s --check-prefix=SI
1 // RUN: llvm-mc -arch=amdgcn -mcpu=bonaire %s | FileCheck %s --check-prefix=BONAIRE
2 // RUN: llvm-mc -arch=amdgcn -mcpu=hawaii %s | FileCheck %s --check-prefix=HAWAII
3 // RUN: llvm-mc -arch=amdgcn -mcpu=kabini %s | FileCheck %s --check-prefix=KABINI
4 // RUN: llvm-mc -arch=amdgcn -mcpu=iceland %s | FileCheck %s --check-prefix=ICELAND
5 // RUN: llvm-mc -arch=amdgcn -mcpu=carrizo %s | FileCheck %s --check-prefix=CARRIZO
6 // RUN: llvm-mc -arch=amdgcn -mcpu=tonga %s | FileCheck %s --check-prefix=TONGA
7 // RUN: llvm-mc -arch=amdgcn -mcpu=fiji %s | FileCheck %s --check-prefix=FIJI
8 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx804 %s | FileCheck %s --check-prefix=GFX804
9 // RUN: llvm-mc -arch=amdgcn -mcpu=stoney %s | FileCheck %s --check-prefix=STONEY
10
11 .byte .option.machine_version_major
12 // SI: .byte 0
13 // BONAIRE: .byte 7
14 // HAWAII: .byte 7
15 // KABINI: .byte 7
16 // ICELAND: .byte 8
17 // CARRIZO: .byte 8
18 // TONGA: .byte 8
19 // FIJI: .byte 8
20 // GFX804: .byte 8
21 // STONEY: .byte 8
22
23 .byte .option.machine_version_minor
24 // SI: .byte 0
25 // BONAIRE: .byte 0
26 // HAWAII: .byte 0
27 // KABINI: .byte 0
28 // ICELAND: .byte 0
29 // CARRIZO: .byte 0
30 // TONGA: .byte 0
31 // FIJI: .byte 0
32 // GFX804: .byte 0
33 // STONEY: .byte 1
34
35 .byte .option.machine_version_stepping
36 // SI: .byte 0
37 // BONAIRE: .byte 0
38 // HAWAII: .byte 1
39 // KABINI: .byte 2
40 // ICELAND: .byte 0
41 // CARRIZO: .byte 1
42 // TONGA: .byte 2
43 // FIJI: .byte 3
44 // GFX804: .byte 4
45 // STONEY: .byte 0