llvm.org GIT mirror llvm / c08c654
[AMDGPU] gfx1010 s_code_end generation Also add some missing metadata in the streamer. Differential Revision: https://reviews.llvm.org/D61531 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359937 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 1 year, 5 months ago
5 changed file(s) with 125 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
294294
295295 bool AMDGPUAsmPrinter::doFinalization(Module &M) {
296296 CallGraphResourceInfo.clear();
297
298 if (AMDGPU::isGFX10(*getGlobalSTI())) {
299 OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
300 getTargetStreamer()->EmitCodeEnd();
301 }
302
297303 return AsmPrinter::doFinalization(M);
298304 }
299305
927933 1ULL << ScratchAlignShift) >>
928934 ScratchAlignShift;
929935
936 if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
937 ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
938 ProgInfo.MemOrdered = 1;
939 }
940
930941 ProgInfo.ComputePGMRSrc1 =
931942 S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
932943 S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
935946 S_00B848_PRIV(ProgInfo.Priv) |
936947 S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
937948 S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
938 S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
949 S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
950 S_00B848_WGP_MODE(ProgInfo.WgpMode) |
951 S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
939952
940953 // 0 = X, 1 = XY, 2 = XYZ
941954 unsigned TIDIGCompCnt = 0;
10761089 Out.compute_pgm_resource_registers =
10771090 CurrentProgramInfo.ComputePGMRSrc1 |
10781091 (CurrentProgramInfo.ComputePGMRSrc2 << 32);
1079 Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
1092 Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
10801093
10811094 if (CurrentProgramInfo.DynamicCallStack)
10821095 Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
231231 OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
232232 OS << StrOS.str() << '\n';
233233 OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
234 return true;
235 }
236
237 bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
238 const uint32_t Encoded_s_code_end = 0xbf9f0000;
239 OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
240 OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
234241 return true;
235242 }
236243
551558 return true;
552559 }
553560
561 bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
562 const uint32_t Encoded_s_code_end = 0xbf9f0000;
563
564 MCStreamer &OS = getStreamer();
565 OS.PushSection();
566 OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
567 for (unsigned I = 0; I < 32; ++I)
568 OS.EmitIntValue(Encoded_s_code_end, 4);
569 OS.PopSection();
570 return true;
571 }
572
554573 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
555574 const MCSubtargetInfo &STI, StringRef KernelName,
556575 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
7373 /// \returns True on success, false on failure.
7474 virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
7575
76 /// \returns True on success, false on failure.
77 virtual bool EmitCodeEnd() = 0;
78
7679 virtual void EmitAmdhsaKernelDescriptor(
7780 const MCSubtargetInfo &STI, StringRef KernelName,
7881 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
111114
112115 /// \returns True on success, false on failure.
113116 bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
117
118 /// \returns True on success, false on failure.
119 bool EmitCodeEnd() override;
114120
115121 void EmitAmdhsaKernelDescriptor(
116122 const MCSubtargetInfo &STI, StringRef KernelName,
154160 /// \returns True on success, false on failure.
155161 bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
156162
163 /// \returns True on success, false on failure.
164 bool EmitCodeEnd() override;
165
157166 void EmitAmdhsaKernelDescriptor(
158167 const MCSubtargetInfo &STI, StringRef KernelName,
159168 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
2727 uint32_t DX10Clamp = 0;
2828 uint32_t DebugMode = 0;
2929 uint32_t IEEEMode = 0;
30 uint32_t WgpMode = 0; // GFX10+
31 uint32_t MemOrdered = 0; // GFX10+
3032 uint64_t ScratchSize = 0;
3133
3234 uint64_t ComputePGMRSrc1 = 0;
0 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10,GFX10-ASM %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10,GFX10-OBJ %s
2
3 ; GCN: a_kernel1:
4 ; GCN-NEXT: s_endpgm
5 ; GCN-ASM-NEXT: [[END_LABEL1:\.Lfunc_end.*]]:
6 ; GCN-ASM-NEXT: .size a_kernel1, [[END_LABEL1]]-a_kernel1
7 ; GCN-ASM: .section .AMDGPU.config
8
9 ; GCN-OBJ-NEXT: s_nop 0
10
11 define amdgpu_kernel void @a_kernel1() {
12 ret void
13 }
14
15 ; GCN: a_kernel2:
16 ; GCN-NEXT: s_endpgm
17 ; GCN-ASM-NEXT: [[END_LABEL2:\.Lfunc_end.*]]:
18 ; GCN-ASM-NEXT: .size a_kernel2, [[END_LABEL2]]-a_kernel2
19 ; GCN-ASM: .section .AMDGPU.config
20
21 ; GCN-OBJ-NEXT: {{^$}}
22
23 define amdgpu_kernel void @a_kernel2() {
24 ret void
25 }
26
27 ; GCN-ASM: .text
28 ; GCN-ASM-NEXT: .globl a_function
29 ; GCN-ASM-NEXT: .p2align 2
30 ; GCN-ASM-NEXT: .type a_function,@function
31
32 ; GCN-NEXT: a_function:
33 ; GCN: s_setpc_b64
34 ; GCN-ASM-NEXT: [[END_LABEL3:\.Lfunc_end.*]]:
35 ; GCN-ASM-NEXT: .size a_function, [[END_LABEL3]]-a_function
36 ; GFX10-ASM: .p2alignl 6, 3214868480
37 ; GFX10-ASM-NEXT: .fill 32, 4, 3214868480
38
39 ; GFX10-OBJ-NEXT: s_code_end
40
41 ; GFX10-OBJ: s_code_end // 000000000140:
42 ; GFX10-OBJ-NEXT: s_code_end
43 ; GFX10-OBJ-NEXT: s_code_end
44 ; GFX10-OBJ-NEXT: s_code_end
45 ; GFX10-OBJ-NEXT: s_code_end
46 ; GFX10-OBJ-NEXT: s_code_end
47 ; GFX10-OBJ-NEXT: s_code_end
48 ; GFX10-OBJ-NEXT: s_code_end
49
50 ; GFX10-OBJ-NEXT: s_code_end
51 ; GFX10-OBJ-NEXT: s_code_end
52 ; GFX10-OBJ-NEXT: s_code_end
53 ; GFX10-OBJ-NEXT: s_code_end
54 ; GFX10-OBJ-NEXT: s_code_end
55 ; GFX10-OBJ-NEXT: s_code_end
56 ; GFX10-OBJ-NEXT: s_code_end
57 ; GFX10-OBJ-NEXT: s_code_end
58
59 ; GFX10-OBJ-NEXT: s_code_end
60 ; GFX10-OBJ-NEXT: s_code_end
61 ; GFX10-OBJ-NEXT: s_code_end
62 ; GFX10-OBJ-NEXT: s_code_end
63 ; GFX10-OBJ-NEXT: s_code_end
64 ; GFX10-OBJ-NEXT: s_code_end
65 ; GFX10-OBJ-NEXT: s_code_end
66 ; GFX10-OBJ-NEXT: s_code_end
67
68 ; GFX10-OBJ-NEXT: s_code_end
69 ; GFX10-OBJ-NEXT: s_code_end
70 ; GFX10-OBJ-NEXT: s_code_end
71 ; GFX10-OBJ-NEXT: s_code_end
72 ; GFX10-OBJ-NEXT: s_code_end
73 ; GFX10-OBJ-NEXT: s_code_end
74 ; GFX10-OBJ-NEXT: s_code_end
75 ; GFX10-OBJ-NEXT: s_code_end
76
77 define void @a_function() {
78 ret void
79 }