llvm.org GIT mirror llvm / 8ae3c1c
[AMDGPU] Emit MessagePack HSA Metadata for v3 code object Continue to present HSA metadata as YAML in ASM and when output by tools (e.g. llvm-readobj), but encode it in Messagepack in the code object. Differential Revision: https://reviews.llvm.org/D48179 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@348963 91177308-0d34-0410-b5e6-96231b3b80d8 Scott Linder 11 months ago
31 changed file(s) with 3661 addition(s) and 157 deletion(s). Raw diff Collapse all Expand all
0 //===- AMDGPUMetadataVerifier.h - MsgPack Types -----------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This is a verifier for AMDGPU HSA metadata, which can verify both
11 /// well-typed metadata and untyped metadata. When verifying in the non-strict
12 /// mode, untyped metadata is coerced into the correct type if possible.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
17 #define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
18
19 #include "llvm/BinaryFormat/MsgPackTypes.h"
20
21 namespace llvm {
22 namespace AMDGPU {
23 namespace HSAMD {
24 namespace V3 {
25
26 /// Verifier for AMDGPU HSA metadata.
27 ///
28 /// Operates in two modes:
29 ///
30 /// In strict mode, metadata must already be well-typed.
31 ///
32 /// In non-strict mode, metadata is coerced into expected types when possible.
33 class MetadataVerifier {
34 bool Strict;
35
36 bool verifyScalar(msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
37 function_ref verifyValue = {});
38 bool verifyInteger(msgpack::Node &Node);
39 bool verifyArray(msgpack::Node &Node,
40 function_ref verifyNode,
41 Optional Size = None);
42 bool verifyEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
43 function_ref verifyNode);
44 bool
45 verifyScalarEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
46 msgpack::ScalarNode::ScalarKind SKind,
47 function_ref verifyValue = {});
48 bool verifyIntegerEntry(msgpack::MapNode &MapNode, StringRef Key,
49 bool Required);
50 bool verifyKernelArgs(msgpack::Node &Node);
51 bool verifyKernel(msgpack::Node &Node);
52
53 public:
54 /// Construct a MetadataVerifier, specifying whether it will operate in \p
55 /// Strict mode.
56 MetadataVerifier(bool Strict) : Strict(Strict) {}
57
58 /// Verify given HSA metadata.
59 ///
60 /// \returns True when successful, false when metadata is invalid.
61 bool verify(msgpack::Node &HSAMetadataRoot);
62 };
63
64 } // end namespace V3
65 } // end namespace HSAMD
66 } // end namespace AMDGPU
67 } // end namespace llvm
68
69 #endif // LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
13601360 GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1
13611361 };
13621362
1363 // AMDGPU specific notes.
1363 // AMD specific notes. (Code Object V2)
13641364 enum {
13651365 // Note types with values between 0 and 9 (inclusive) are reserved.
13661366 NT_AMD_AMDGPU_HSA_METADATA = 10,
13671367 NT_AMD_AMDGPU_ISA = 11,
13681368 NT_AMD_AMDGPU_PAL_METADATA = 12
1369 };
1370
1371 // AMDGPU specific notes. (Code Object V3)
1372 enum {
1373 // Note types with values between 0 and 31 (inclusive) are reserved.
1374 NT_AMDGPU_METADATA = 32
13691375 };
13701376
13711377 enum {
430430 /// Converts \p HSAMetadata to \p String.
431431 std::error_code toString(Metadata HSAMetadata, std::string &String);
432432
433 //===----------------------------------------------------------------------===//
434 // HSA metadata for v3 code object.
435 //===----------------------------------------------------------------------===//
436 namespace V3 {
437 /// HSA metadata major version.
438 constexpr uint32_t VersionMajor = 1;
439 /// HSA metadata minor version.
440 constexpr uint32_t VersionMinor = 0;
441
442 /// HSA metadata beginning assembler directive.
443 constexpr char AssemblerDirectiveBegin[] = ".amdgpu_metadata";
444 /// HSA metadata ending assembler directive.
445 constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_metadata";
446 } // end namespace V3
447
433448 } // end namespace HSAMD
434449
435450 //===----------------------------------------------------------------------===//
0 //===- AMDGPUMetadataVerifier.cpp - MsgPack Types ---------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Implements a verifier for AMDGPU HSA metadata.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
15 #include "llvm/Support/AMDGPUMetadata.h"
16
17 namespace llvm {
18 namespace AMDGPU {
19 namespace HSAMD {
20 namespace V3 {
21
22 bool MetadataVerifier::verifyScalar(
23 msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
24 function_ref verifyValue) {
25 auto ScalarPtr = dyn_cast(&Node);
26 if (!ScalarPtr)
27 return false;
28 auto &Scalar = *ScalarPtr;
29 // Do not output extraneous tags for types we know from the spec.
30 Scalar.IgnoreTag = true;
31 if (Scalar.getScalarKind() != SKind) {
32 if (Strict)
33 return false;
34 // If we are not strict, we interpret string values as "implicitly typed"
35 // and attempt to coerce them to the expected type here.
36 if (Scalar.getScalarKind() != msgpack::ScalarNode::SK_String)
37 return false;
38 std::string StringValue = Scalar.getString();
39 Scalar.setScalarKind(SKind);
40 if (Scalar.inputYAML(StringValue) != StringRef())
41 return false;
42 }
43 if (verifyValue)
44 return verifyValue(Scalar);
45 return true;
46 }
47
48 bool MetadataVerifier::verifyInteger(msgpack::Node &Node) {
49 if (!verifyScalar(Node, msgpack::ScalarNode::SK_UInt))
50 if (!verifyScalar(Node, msgpack::ScalarNode::SK_Int))
51 return false;
52 return true;
53 }
54
55 bool MetadataVerifier::verifyArray(
56 msgpack::Node &Node, function_ref verifyNode,
57 Optional Size) {
58 auto ArrayPtr = dyn_cast(&Node);
59 if (!ArrayPtr)
60 return false;
61 auto &Array = *ArrayPtr;
62 if (Size && Array.size() != *Size)
63 return false;
64 for (auto &Item : Array)
65 if (!verifyNode(*Item.get()))
66 return false;
67
68 return true;
69 }
70
71 bool MetadataVerifier::verifyEntry(
72 msgpack::MapNode &MapNode, StringRef Key, bool Required,
73 function_ref verifyNode) {
74 auto Entry = MapNode.find(Key);
75 if (Entry == MapNode.end())
76 return !Required;
77 return verifyNode(*Entry->second.get());
78 }
79
80 bool MetadataVerifier::verifyScalarEntry(
81 msgpack::MapNode &MapNode, StringRef Key, bool Required,
82 msgpack::ScalarNode::ScalarKind SKind,
83 function_ref verifyValue) {
84 return verifyEntry(MapNode, Key, Required, [=](msgpack::Node &Node) {
85 return verifyScalar(Node, SKind, verifyValue);
86 });
87 }
88
89 bool MetadataVerifier::verifyIntegerEntry(msgpack::MapNode &MapNode,
90 StringRef Key, bool Required) {
91 return verifyEntry(MapNode, Key, Required, [this](msgpack::Node &Node) {
92 return verifyInteger(Node);
93 });
94 }
95
96 bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
97 auto ArgsMapPtr = dyn_cast(&Node);
98 if (!ArgsMapPtr)
99 return false;
100 auto &ArgsMap = *ArgsMapPtr;
101
102 if (!verifyScalarEntry(ArgsMap, ".name", false,
103 msgpack::ScalarNode::SK_String))
104 return false;
105 if (!verifyScalarEntry(ArgsMap, ".type_name", false,
106 msgpack::ScalarNode::SK_String))
107 return false;
108 if (!verifyIntegerEntry(ArgsMap, ".size", true))
109 return false;
110 if (!verifyIntegerEntry(ArgsMap, ".offset", true))
111 return false;
112 if (!verifyScalarEntry(ArgsMap, ".value_kind", true,
113 msgpack::ScalarNode::SK_String,
114 [](msgpack::ScalarNode &SNode) {
115 return StringSwitch(SNode.getString())
116 .Case("by_value", true)
117 .Case("global_buffer", true)
118 .Case("dynamic_shared_pointer", true)
119 .Case("sampler", true)
120 .Case("image", true)
121 .Case("pipe", true)
122 .Case("queue", true)
123 .Case("hidden_global_offset_x", true)
124 .Case("hidden_global_offset_y", true)
125 .Case("hidden_global_offset_z", true)
126 .Case("hidden_none", true)
127 .Case("hidden_printf_buffer", true)
128 .Case("hidden_default_queue", true)
129 .Case("hidden_completion_action", true)
130 .Default(false);
131 }))
132 return false;
133 if (!verifyScalarEntry(ArgsMap, ".value_type", true,
134 msgpack::ScalarNode::SK_String,
135 [](msgpack::ScalarNode &SNode) {
136 return StringSwitch(SNode.getString())
137 .Case("struct", true)
138 .Case("i8", true)
139 .Case("u8", true)
140 .Case("i16", true)
141 .Case("u16", true)
142 .Case("f16", true)
143 .Case("i32", true)
144 .Case("u32", true)
145 .Case("f32", true)
146 .Case("i64", true)
147 .Case("u64", true)
148 .Case("f64", true)
149 .Default(false);
150 }))
151 return false;
152 if (!verifyIntegerEntry(ArgsMap, ".pointee_align", false))
153 return false;
154 if (!verifyScalarEntry(ArgsMap, ".address_space", false,
155 msgpack::ScalarNode::SK_String,
156 [](msgpack::ScalarNode &SNode) {
157 return StringSwitch(SNode.getString())
158 .Case("private", true)
159 .Case("global", true)
160 .Case("constant", true)
161 .Case("local", true)
162 .Case("generic", true)
163 .Case("region", true)
164 .Default(false);
165 }))
166 return false;
167 if (!verifyScalarEntry(ArgsMap, ".access", false,
168 msgpack::ScalarNode::SK_String,
169 [](msgpack::ScalarNode &SNode) {
170 return StringSwitch(SNode.getString())
171 .Case("read_only", true)
172 .Case("write_only", true)
173 .Case("read_write", true)
174 .Default(false);
175 }))
176 return false;
177 if (!verifyScalarEntry(ArgsMap, ".actual_access", false,
178 msgpack::ScalarNode::SK_String,
179 [](msgpack::ScalarNode &SNode) {
180 return StringSwitch(SNode.getString())
181 .Case("read_only", true)
182 .Case("write_only", true)
183 .Case("read_write", true)
184 .Default(false);
185 }))
186 return false;
187 if (!verifyScalarEntry(ArgsMap, ".is_const", false,
188 msgpack::ScalarNode::SK_Boolean))
189 return false;
190 if (!verifyScalarEntry(ArgsMap, ".is_restrict", false,
191 msgpack::ScalarNode::SK_Boolean))
192 return false;
193 if (!verifyScalarEntry(ArgsMap, ".is_volatile", false,
194 msgpack::ScalarNode::SK_Boolean))
195 return false;
196 if (!verifyScalarEntry(ArgsMap, ".is_pipe", false,
197 msgpack::ScalarNode::SK_Boolean))
198 return false;
199
200 return true;
201 }
202
203 bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
204 auto KernelMapPtr = dyn_cast(&Node);
205 if (!KernelMapPtr)
206 return false;
207 auto &KernelMap = *KernelMapPtr;
208
209 if (!verifyScalarEntry(KernelMap, ".name", true,
210 msgpack::ScalarNode::SK_String))
211 return false;
212 if (!verifyScalarEntry(KernelMap, ".symbol", true,
213 msgpack::ScalarNode::SK_String))
214 return false;
215 if (!verifyScalarEntry(KernelMap, ".language", false,
216 msgpack::ScalarNode::SK_String,
217 [](msgpack::ScalarNode &SNode) {
218 return StringSwitch(SNode.getString())
219 .Case("OpenCL C", true)
220 .Case("OpenCL C++", true)
221 .Case("HCC", true)
222 .Case("HIP", true)
223 .Case("OpenMP", true)
224 .Case("Assembler", true)
225 .Default(false);
226 }))
227 return false;
228 if (!verifyEntry(
229 KernelMap, ".language_version", false, [this](msgpack::Node &Node) {
230 return verifyArray(
231 Node,
232 [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
233 }))
234 return false;
235 if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::Node &Node) {
236 return verifyArray(Node, [this](msgpack::Node &Node) {
237 return verifyKernelArgs(Node);
238 });
239 }))
240 return false;
241 if (!verifyEntry(KernelMap, ".reqd_workgroup_size", false,
242 [this](msgpack::Node &Node) {
243 return verifyArray(Node,
244 [this](msgpack::Node &Node) {
245 return verifyInteger(Node);
246 },
247 3);
248 }))
249 return false;
250 if (!verifyEntry(KernelMap, ".workgroup_size_hint", false,
251 [this](msgpack::Node &Node) {
252 return verifyArray(Node,
253 [this](msgpack::Node &Node) {
254 return verifyInteger(Node);
255 },
256 3);
257 }))
258 return false;
259 if (!verifyScalarEntry(KernelMap, ".vec_type_hint", false,
260 msgpack::ScalarNode::SK_String))
261 return false;
262 if (!verifyScalarEntry(KernelMap, ".device_enqueue_symbol", false,
263 msgpack::ScalarNode::SK_String))
264 return false;
265 if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_size", true))
266 return false;
267 if (!verifyIntegerEntry(KernelMap, ".group_segment_fixed_size", true))
268 return false;
269 if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true))
270 return false;
271 if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true))
272 return false;
273 if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true))
274 return false;
275 if (!verifyIntegerEntry(KernelMap, ".sgpr_count", true))
276 return false;
277 if (!verifyIntegerEntry(KernelMap, ".vgpr_count", true))
278 return false;
279 if (!verifyIntegerEntry(KernelMap, ".max_flat_workgroup_size", true))
280 return false;
281 if (!verifyIntegerEntry(KernelMap, ".sgpr_spill_count", false))
282 return false;
283 if (!verifyIntegerEntry(KernelMap, ".vgpr_spill_count", false))
284 return false;
285
286 return true;
287 }
288
289 bool MetadataVerifier::verify(msgpack::Node &HSAMetadataRoot) {
290 auto RootMapPtr = dyn_cast(&HSAMetadataRoot);
291 if (!RootMapPtr)
292 return false;
293 auto &RootMap = *RootMapPtr;
294
295 if (!verifyEntry(
296 RootMap, "amdhsa.version", true, [this](msgpack::Node &Node) {
297 return verifyArray(
298 Node,
299 [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
300 }))
301 return false;
302 if (!verifyEntry(
303 RootMap, "amdhsa.printf", false, [this](msgpack::Node &Node) {
304 return verifyArray(Node, [this](msgpack::Node &Node) {
305 return verifyScalar(Node, msgpack::ScalarNode::SK_String);
306 });
307 }))
308 return false;
309 if (!verifyEntry(RootMap, "amdhsa.kernels", true,
310 [this](msgpack::Node &Node) {
311 return verifyArray(Node, [this](msgpack::Node &Node) {
312 return verifyKernel(Node);
313 });
314 }))
315 return false;
316
317 return true;
318 }
319
320 } // end namespace V3
321 } // end namespace HSAMD
322 } // end namespace AMDGPU
323 } // end namespace llvm
0 add_llvm_library(LLVMBinaryFormat
1 AMDGPUMetadataVerifier.cpp
12 Dwarf.cpp
23 Magic.cpp
34 MsgPackReader.cpp
4545
4646 using namespace llvm;
4747 using namespace llvm::AMDGPU;
48 using namespace llvm::AMDGPU::HSAMD;
4849
4950 // TODO: This should get the default rounding mode from the kernel. We just set
5051 // the default here, but this could change if the OpenCL rounding mode pragmas
9899 AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
99100 std::unique_ptr Streamer)
100101 : AsmPrinter(TM, std::move(Streamer)) {
102 if (IsaInfo::hasCodeObjectV3(getSTI()))
103 HSAMetadataStream.reset(new MetadataStreamerV3());
104 else
105 HSAMetadataStream.reset(new MetadataStreamerV2());
101106 }
102107
103108 StringRef AMDGPUAsmPrinter::getPassName() const {
121126 IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
122127
123128 getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
124
125 if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
126 return;
127129 }
128130
129131 if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
131133 return;
132134
133135 if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
134 HSAMetadataStream.begin(M);
136 HSAMetadataStream->begin(M);
135137
136138 if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
137139 readPALMetadata(M);
140
141 if (IsaInfo::hasCodeObjectV3(getSTI()))
142 return;
138143
139144 // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
140145 if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
147152 }
148153
149154 void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
150 // TODO: Add metadata to code object v3.
151 if (IsaInfo::hasCodeObjectV3(getSTI()) &&
152 TM.getTargetTriple().getOS() == Triple::AMDHSA)
153 return;
154
155155 // Following code requires TargetStreamer to be present.
156156 if (!getTargetStreamer())
157157 return;
158158
159 // Emit ISA Version (NT_AMD_AMDGPU_ISA).
160 std::string ISAVersionString;
161 raw_string_ostream ISAVersionStream(ISAVersionString);
162 IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
163 getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
159 if (!IsaInfo::hasCodeObjectV3(getSTI())) {
160 // Emit ISA Version (NT_AMD_AMDGPU_ISA).
161 std::string ISAVersionString;
162 raw_string_ostream ISAVersionStream(ISAVersionString);
163 IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
164 getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
165 }
164166
165167 // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
166168 if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
167 HSAMetadataStream.end();
168 getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata());
169 }
170
171 // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
172 if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
173 // Copy the PAL metadata from the map where we collected it into a vector,
174 // then write it as a .note.
175 PALMD::Metadata PALMetadataVector;
176 for (auto i : PALMetadataMap) {
177 PALMetadataVector.push_back(i.first);
178 PALMetadataVector.push_back(i.second);
169 HSAMetadataStream->end();
170 bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
171 (void)Success;
172 assert(Success && "Malformed HSA Metadata");
173 }
174
175 if (!IsaInfo::hasCodeObjectV3(getSTI())) {
176 // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
177 if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
178 // Copy the PAL metadata from the map where we collected it into a vector,
179 // then write it as a .note.
180 PALMD::Metadata PALMetadataVector;
181 for (auto i : PALMetadataMap) {
182 PALMetadataVector.push_back(i.first);
183 PALMetadataVector.push_back(i.second);
184 }
185 getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
179186 }
180 getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
181187 }
182188 }
183189
210216 getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
211217 }
212218
213 if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
214 return;
215
216 if (!STM.hasCodeObjectV3() && STM.isAmdHsaOS())
217 HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
219 if (STM.isAmdHsaOS())
220 HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
218221 }
219222
220223 void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
5555 SIProgramInfo CurrentProgramInfo;
5656 DenseMap CallGraphResourceInfo;
5757
58 AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream;
58 std::unique_ptr HSAMetadataStream;
5959 std::map PALMetadataMap;
6060
6161 uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
1515 #include "AMDGPUHSAMetadataStreamer.h"
1616 #include "AMDGPU.h"
1717 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
1819 #include "SIMachineFunctionInfo.h"
1920 #include "SIProgramInfo.h"
2021 #include "Utils/AMDGPUBaseInfo.h"
3536 namespace AMDGPU {
3637 namespace HSAMD {
3738
38 void MetadataStreamer::dump(StringRef HSAMetadataString) const {
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
3943 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
4044 }
4145
42 void MetadataStreamer::verify(StringRef HSAMetadataString) const {
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
4347 errs() << "AMDGPU HSA Metadata Parser Test: ";
4448
4549 HSAMD::Metadata FromHSAMetadataString;
6266 }
6367 }
6468
65 AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
69 AccessQualifier
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
6671 if (AccQual.empty())
6772 return AccessQualifier::Unknown;
6873
7378 .Default(AccessQualifier::Default);
7479 }
7580
76 AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
81 AddressSpaceQualifier
82 MetadataStreamerV2::getAddressSpaceQualifier(
7783 unsigned AddressSpace) const {
7884 switch (AddressSpace) {
7985 case AMDGPUAS::PRIVATE_ADDRESS:
9399 }
94100 }
95101
96 ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
97 StringRef BaseTypeName) const {
102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103 StringRef BaseTypeName) const {
98104 if (TypeQual.find("pipe") != StringRef::npos)
99105 return ValueKind::Pipe;
100106
121127 ValueKind::ByValue);
122128 }
123129
124 ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
125131 switch (Ty->getTypeID()) {
126132 case Type::IntegerTyID: {
127133 auto Signed = !TypeName.startswith("u");
153159 }
154160 }
155161
156 std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
157163 switch (Ty->getTypeID()) {
158164 case Type::IntegerTyID: {
159165 if (!Signed)
190196 }
191197 }
192198
193 std::vector MetadataStreamer::getWorkGroupDimensions(
194 MDNode *Node) const {
199 std::vector
200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
195201 std::vector Dims;
196202 if (Node->getNumOperands() != 3)
197203 return Dims;
201207 return Dims;
202208 }
203209
204 Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
205 const MachineFunction &MF,
206 const SIProgramInfo &ProgramInfo) const {
210 Kernel::CodeProps::Metadata
211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
212 const SIProgramInfo &ProgramInfo) const {
207213 const GCNSubtarget &STM = MF.getSubtarget();
208214 const SIMachineFunctionInfo &MFI = *MF.getInfo();
209215 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
230236 return HSACodeProps;
231237 }
232238
233 Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
234 const MachineFunction &MF,
235 const SIProgramInfo &ProgramInfo) const {
239 Kernel::DebugProps::Metadata
240 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
241 const SIProgramInfo &ProgramInfo) const {
236242 const GCNSubtarget &STM = MF.getSubtarget();
237243 HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
238244
252258 return HSADebugProps;
253259 }
254260
255 void MetadataStreamer::emitVersion() {
261 void MetadataStreamerV2::emitVersion() {
256262 auto &Version = HSAMetadata.mVersion;
257263
258264 Version.push_back(VersionMajor);
259265 Version.push_back(VersionMinor);
260266 }
261267
262 void MetadataStreamer::emitPrintf(const Module &Mod) {
268 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
263269 auto &Printf = HSAMetadata.mPrintf;
264270
265271 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
271277 Printf.push_back(cast(Op->getOperand(0))->getString());
272278 }
273279
274 void MetadataStreamer::emitKernelLanguage(const Function &Func) {
280 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
275281 auto &Kernel = HSAMetadata.mKernels.back();
276282
277283 // TODO: What about other languages?
289295 mdconst::extract(Op0->getOperand(1))->getZExtValue());
290296 }
291297
292 void MetadataStreamer::emitKernelAttrs(const Function &Func) {
298 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
293299 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
294300
295301 if (auto Node = Func.getMetadata("reqd_work_group_size"))
307313 }
308314 }
309315
310 void MetadataStreamer::emitKernelArgs(const Function &Func) {
316 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
311317 for (auto &Arg : Func.args())
312318 emitKernelArg(Arg);
313319
314320 emitHiddenKernelArgs(Func);
315321 }
316322
317 void MetadataStreamer::emitKernelArg(const Argument &Arg) {
323 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
318324 auto Func = Arg.getParent();
319325 auto ArgNo = Arg.getArgNo();
320326 const MDNode *Node;
367373 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
368374 }
369375
370 void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
371 ValueKind ValueKind,
372 unsigned PointeeAlign,
373 StringRef Name,
374 StringRef TypeName, StringRef BaseTypeName,
375 StringRef AccQual, StringRef TypeQual) {
376 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
377 ValueKind ValueKind,
378 unsigned PointeeAlign, StringRef Name,
379 StringRef TypeName,
380 StringRef BaseTypeName,
381 StringRef AccQual, StringRef TypeQual) {
376382 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
377383 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
378384
385391 Arg.mPointeeAlign = PointeeAlign;
386392
387393 if (auto PtrTy = dyn_cast(Ty))
388 Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
394 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
389395
390396 Arg.mAccQual = getAccessQualifier(AccQual);
391397
405411 }
406412 }
407413
408 void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
414 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
409415 int HiddenArgNumBytes =
410416 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
411417
447453 }
448454 }
449455
450 void MetadataStreamer::begin(const Module &Mod) {
456 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
457 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
458 }
459
460 void MetadataStreamerV2::begin(const Module &Mod) {
451461 emitVersion();
452462 emitPrintf(Mod);
453463 }
454464
455 void MetadataStreamer::end() {
465 void MetadataStreamerV2::end() {
456466 std::string HSAMetadataString;
457467 if (toString(HSAMetadata, HSAMetadataString))
458468 return;
463473 verify(HSAMetadataString);
464474 }
465475
466 void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
476 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
477 const SIProgramInfo &ProgramInfo) {
467478 auto &Func = MF.getFunction();
468479 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
469480 return;
483494 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
484495 }
485496
497 //===----------------------------------------------------------------------===//
498 // HSAMetadataStreamerV3
499 //===----------------------------------------------------------------------===//
500
501 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
502 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
503 }
504
505 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
506 errs() << "AMDGPU HSA Metadata Parser Test: ";
507
508 std::shared_ptr FromHSAMetadataString =
509 std::make_shared();
510
511 yaml::Input YIn(HSAMetadataString);
512 YIn >> FromHSAMetadataString;
513 if (YIn.error()) {
514 errs() << "FAIL\n";
515 return;
516 }
517
518 std::string ToHSAMetadataString;
519 raw_string_ostream StrOS(ToHSAMetadataString);
520 yaml::Output YOut(StrOS);
521 YOut << FromHSAMetadataString;
522
523 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
524 if (HSAMetadataString != ToHSAMetadataString) {
525 errs() << "Original input: " << HSAMetadataString << '\n'
526 << "Produced output: " << StrOS.str() << '\n';
527 }
528 }
529
530 Optional
531 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
532 return StringSwitch>(AccQual)
533 .Case("read_only", StringRef("read_only"))
534 .Case("write_only", StringRef("write_only"))
535 .Case("read_write", StringRef("read_write"))
536 .Default(None);
537 }
538
539 Optional
540 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
541 switch (AddressSpace) {
542 case AMDGPUAS::PRIVATE_ADDRESS:
543 return StringRef("private");
544 case AMDGPUAS::GLOBAL_ADDRESS:
545 return StringRef("global");
546 case AMDGPUAS::CONSTANT_ADDRESS:
547 return StringRef("constant");
548 case AMDGPUAS::LOCAL_ADDRESS:
549 return StringRef("local");
550 case AMDGPUAS::FLAT_ADDRESS:
551 return StringRef("generic");
552 case AMDGPUAS::REGION_ADDRESS:
553 return StringRef("region");
554 default:
555 return None;
556 }
557 }
558
559 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
560 StringRef BaseTypeName) const {
561 if (TypeQual.find("pipe") != StringRef::npos)
562 return "pipe";
563
564 return StringSwitch(BaseTypeName)
565 .Case("image1d_t", "image")
566 .Case("image1d_array_t", "image")
567 .Case("image1d_buffer_t", "image")
568 .Case("image2d_t", "image")
569 .Case("image2d_array_t", "image")
570 .Case("image2d_array_depth_t", "image")
571 .Case("image2d_array_msaa_t", "image")
572 .Case("image2d_array_msaa_depth_t", "image")
573 .Case("image2d_depth_t", "image")
574 .Case("image2d_msaa_t", "image")
575 .Case("image2d_msaa_depth_t", "image")
576 .Case("image3d_t", "image")
577 .Case("sampler_t", "sampler")
578 .Case("queue_t", "queue")
579 .Default(isa(Ty)
580 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
581 ? "dynamic_shared_pointer"
582 : "global_buffer")
583 : "by_value");
584 }
585
586 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
587 switch (Ty->getTypeID()) {
588 case Type::IntegerTyID: {
589 auto Signed = !TypeName.startswith("u");
590 switch (Ty->getIntegerBitWidth()) {
591 case 8:
592 return Signed ? "i8" : "u8";
593 case 16:
594 return Signed ? "i16" : "u16";
595 case 32:
596 return Signed ? "i32" : "u32";
597 case 64:
598 return Signed ? "i64" : "u64";
599 default:
600 return "struct";
601 }
602 }
603 case Type::HalfTyID:
604 return "f16";
605 case Type::FloatTyID:
606 return "f32";
607 case Type::DoubleTyID:
608 return "f64";
609 case Type::PointerTyID:
610 return getValueType(Ty->getPointerElementType(), TypeName);
611 case Type::VectorTyID:
612 return getValueType(Ty->getVectorElementType(), TypeName);
613 default:
614 return "struct";
615 }
616 }
617
618 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
619 switch (Ty->getTypeID()) {
620 case Type::IntegerTyID: {
621 if (!Signed)
622 return (Twine('u') + getTypeName(Ty, true)).str();
623
624 auto BitWidth = Ty->getIntegerBitWidth();
625 switch (BitWidth) {
626 case 8:
627 return "char";
628 case 16:
629 return "short";
630 case 32:
631 return "int";
632 case 64:
633 return "long";
634 default:
635 return (Twine('i') + Twine(BitWidth)).str();
636 }
637 }
638 case Type::HalfTyID:
639 return "half";
640 case Type::FloatTyID:
641 return "float";
642 case Type::DoubleTyID:
643 return "double";
644 case Type::VectorTyID: {
645 auto VecTy = cast(Ty);
646 auto ElTy = VecTy->getElementType();
647 auto NumElements = VecTy->getVectorNumElements();
648 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
649 }
650 default:
651 return "unknown";
652 }
653 }
654
655 std::shared_ptr
656 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
657 auto Dims = std::make_shared();
658 if (Node->getNumOperands() != 3)
659 return Dims;
660
661 for (auto &Op : Node->operands())
662 Dims->push_back(std::make_shared(
663 mdconst::extract(Op)->getZExtValue()));
664 return Dims;
665 }
666
667 void MetadataStreamerV3::emitVersion() {
668 auto Version = std::make_shared();
669 Version->push_back(std::make_shared(V3::VersionMajor));
670 Version->push_back(std::make_shared(V3::VersionMinor));
671 getRootMetadata("amdhsa.version") = std::move(Version);
672 }
673
674 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
675 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
676 if (!Node)
677 return;
678
679 auto Printf = std::make_shared();
680 for (auto Op : Node->operands())
681 if (Op->getNumOperands())
682 Printf->push_back(std::make_shared(
683 cast(Op->getOperand(0))->getString()));
684 getRootMetadata("amdhsa.printf") = std::move(Printf);
685 }
686
687 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
688 msgpack::MapNode &Kern) {
689 // TODO: What about other languages?
690 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
691 if (!Node || !Node->getNumOperands())
692 return;
693 auto Op0 = Node->getOperand(0);
694 if (Op0->getNumOperands() <= 1)
695 return;
696
697 Kern[".language"] = std::make_shared("OpenCL C");
698 auto LanguageVersion = std::make_shared();
699 LanguageVersion->push_back(std::make_shared(
700 mdconst::extract(Op0->getOperand(0))->getZExtValue()));
701 LanguageVersion->push_back(std::make_shared(
702 mdconst::extract(Op0->getOperand(1))->getZExtValue()));
703 Kern[".language_version"] = std::move(LanguageVersion);
704 }
705
706 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
707 msgpack::MapNode &Kern) {
708
709 if (auto Node = Func.getMetadata("reqd_work_group_size"))
710 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
711 if (auto Node = Func.getMetadata("work_group_size_hint"))
712 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
713 if (auto Node = Func.getMetadata("vec_type_hint")) {
714 Kern[".vec_type_hint"] = std::make_shared(getTypeName(
715 cast(Node->getOperand(0))->getType(),
716 mdconst::extract(Node->getOperand(1))->getZExtValue()));
717 }
718 if (Func.hasFnAttribute("runtime-handle")) {
719 Kern[".device_enqueue_symbol"] = std::make_shared(
720 Func.getFnAttribute("runtime-handle").getValueAsString().str());
721 }
722 }
723
724 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
725 msgpack::MapNode &Kern) {
726 unsigned Offset = 0;
727 auto Args = std::make_shared();
728 for (auto &Arg : Func.args())
729 emitKernelArg(Arg, Offset, *Args);
730
731 emitHiddenKernelArgs(Func, Offset, *Args);
732
733 // TODO: What about other languages?
734 if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
735 auto &DL = Func.getParent()->getDataLayout();
736 auto Int64Ty = Type::getInt64Ty(Func.getContext());
737
738 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
739 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
740 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
741
742 auto Int8PtrTy =
743 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
744
745 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
746 // "none" argument.
747 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
748 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
749 else
750 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
751
752 // Emit "default queue" and "completion action" arguments if enqueue kernel
753 // is used, otherwise emit dummy "none" arguments.
754 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
755 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
756 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
757 } else {
758 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
759 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
760 }
761 }
762
763 Kern[".args"] = std::move(Args);
764 }
765
766 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
767 msgpack::ArrayNode &Args) {
768 auto Func = Arg.getParent();
769 auto ArgNo = Arg.getArgNo();
770 const MDNode *Node;
771
772 StringRef Name;
773 Node = Func->getMetadata("kernel_arg_name");
774 if (Node && ArgNo < Node->getNumOperands())
775 Name = cast(Node->getOperand(ArgNo))->getString();
776 else if (Arg.hasName())
777 Name = Arg.getName();
778
779 StringRef TypeName;
780 Node = Func->getMetadata("kernel_arg_type");
781 if (Node && ArgNo < Node->getNumOperands())
782 TypeName = cast(Node->getOperand(ArgNo))->getString();
783
784 StringRef BaseTypeName;
785 Node = Func->getMetadata("kernel_arg_base_type");
786 if (Node && ArgNo < Node->getNumOperands())
787 BaseTypeName = cast(Node->getOperand(ArgNo))->getString();
788
789 StringRef AccQual;
790 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
791 Arg.hasNoAliasAttr()) {
792 AccQual = "read_only";
793 } else {
794 Node = Func->getMetadata("kernel_arg_access_qual");
795 if (Node && ArgNo < Node->getNumOperands())
796 AccQual = cast(Node->getOperand(ArgNo))->getString();
797 }
798
799 StringRef TypeQual;
800 Node = Func->getMetadata("kernel_arg_type_qual");
801 if (Node && ArgNo < Node->getNumOperands())
802 TypeQual = cast(Node->getOperand(ArgNo))->getString();
803
804 Type *Ty = Arg.getType();
805 const DataLayout &DL = Func->getParent()->getDataLayout();
806
807 unsigned PointeeAlign = 0;
808 if (auto PtrTy = dyn_cast(Ty)) {
809 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
810 PointeeAlign = Arg.getParamAlignment();
811 if (PointeeAlign == 0)
812 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
813 }
814 }
815
816 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
817 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
818 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
819 TypeQual);
820 }
821
822 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
823 StringRef ValueKind, unsigned &Offset,
824 msgpack::ArrayNode &Args,
825 unsigned PointeeAlign, StringRef Name,
826 StringRef TypeName,
827 StringRef BaseTypeName,
828 StringRef AccQual, StringRef TypeQual) {
829 auto ArgPtr = std::make_shared();
830 auto &Arg = *ArgPtr;
831
832 if (!Name.empty())
833 Arg[".name"] = std::make_shared(Name);
834 if (!TypeName.empty())
835 Arg[".type_name"] = std::make_shared(TypeName);
836 auto Size = DL.getTypeAllocSize(Ty);
837 auto Align = DL.getABITypeAlignment(Ty);
838 Arg[".size"] = std::make_shared(Size);
839 Offset = alignTo(Offset, Align);
840 Arg[".offset"] = std::make_shared(Offset);
841 Offset += Size;
842 Arg[".value_kind"] = std::make_shared(ValueKind);
843 Arg[".value_type"] =
844 std::make_shared(getValueType(Ty, BaseTypeName));
845 if (PointeeAlign)
846 Arg[".pointee_align"] = std::make_shared(PointeeAlign);
847
848 if (auto PtrTy = dyn_cast(Ty))
849 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
850 Arg[".address_space"] = std::make_shared(*Qualifier);
851
852 if (auto AQ = getAccessQualifier(AccQual))
853 Arg[".access"] = std::make_shared(*AQ);
854
855 // TODO: Emit Arg[".actual_access"].
856
857 SmallVector SplitTypeQuals;
858 TypeQual.split(SplitTypeQuals, " ", -1, false);
859 for (StringRef Key : SplitTypeQuals) {
860 if (Key == "const")
861 Arg[".is_const"] = std::make_shared(true);
862 else if (Key == "restrict")
863 Arg[".is_restrict"] = std::make_shared(true);
864 else if (Key == "volatile")
865 Arg[".is_volatile"] = std::make_shared(true);
866 else if (Key == "pipe")
867 Arg[".is_pipe"] = std::make_shared(true);
868 }
869
870 Args.push_back(std::move(ArgPtr));
871 }
872
873 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
874 unsigned &Offset,
875 msgpack::ArrayNode &Args) {
876 int HiddenArgNumBytes =
877 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
878
879 if (!HiddenArgNumBytes)
880 return;
881
882 auto &DL = Func.getParent()->getDataLayout();
883 auto Int64Ty = Type::getInt64Ty(Func.getContext());
884
885 if (HiddenArgNumBytes >= 8)
886 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
887 if (HiddenArgNumBytes >= 16)
888 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
889 if (HiddenArgNumBytes >= 24)
890 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
891
892 auto Int8PtrTy =
893 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
894
895 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
896 // "none" argument.
897 if (HiddenArgNumBytes >= 32) {
898 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
899 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
900 else
901 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
902 }
903
904 // Emit "default queue" and "completion action" arguments if enqueue kernel is
905 // used, otherwise emit dummy "none" arguments.
906 if (HiddenArgNumBytes >= 48) {
907 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
908 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
909 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
910 } else {
911 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
912 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
913 }
914 }
915 }
916
917 std::shared_ptr
918 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
919 const SIProgramInfo &ProgramInfo) const {
920 const GCNSubtarget &STM = MF.getSubtarget();
921 const SIMachineFunctionInfo &MFI = *MF.getInfo();
922 const Function &F = MF.getFunction();
923
924 auto HSAKernelProps = std::make_shared();
925 auto &Kern = *HSAKernelProps;
926
927 unsigned MaxKernArgAlign;
928 Kern[".kernarg_segment_size"] = std::make_shared(
929 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
930 Kern[".group_segment_fixed_size"] =
931 std::make_shared(ProgramInfo.LDSSize);
932 Kern[".private_segment_fixed_size"] =
933 std::make_shared(ProgramInfo.ScratchSize);
934 Kern[".kernarg_segment_align"] =
935 std::make_shared(std::max(uint32_t(4), MaxKernArgAlign));
936 Kern[".wavefront_size"] =
937 std::make_shared(STM.getWavefrontSize());
938 Kern[".sgpr_count"] = std::make_shared(ProgramInfo.NumSGPR);
939 Kern[".vgpr_count"] = std::make_shared(ProgramInfo.NumVGPR);
940 Kern[".max_flat_workgroup_size"] =
941 std::make_shared(MFI.getMaxFlatWorkGroupSize());
942 Kern[".sgpr_spill_count"] =
943 std::make_shared(MFI.getNumSpilledSGPRs());
944 Kern[".vgpr_spill_count"] =
945 std::make_shared(MFI.getNumSpilledVGPRs());
946
947 return HSAKernelProps;
948 }
949
950 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
951 return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
952 }
953
954 void MetadataStreamerV3::begin(const Module &Mod) {
955 emitVersion();
956 emitPrintf(Mod);
957 getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
958 }
959
960 void MetadataStreamerV3::end() {
961 std::string HSAMetadataString;
962 raw_string_ostream StrOS(HSAMetadataString);
963 yaml::Output YOut(StrOS);
964 YOut << HSAMetadataRoot;
965
966 if (DumpHSAMetadata)
967 dump(StrOS.str());
968 if (VerifyHSAMetadata)
969 verify(StrOS.str());
970 }
971
972 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
973 const SIProgramInfo &ProgramInfo) {
974 auto &Func = MF.getFunction();
975 auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
976
977 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
978 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
979
980 auto &KernelsNode = getRootMetadata("amdhsa.kernels");
981 auto Kernels = cast(KernelsNode.get());
982
983 {
984 auto &Kern = *KernelProps;
985 Kern[".name"] = std::make_shared(Func.getName());
986 Kern[".symbol"] = std::make_shared(
987 (Twine(Func.getName()) + Twine(".kd")).str());
988 emitKernelLanguage(Func, Kern);
989 emitKernelAttrs(Func, Kern);
990 emitKernelArgs(Func, Kern);
991 }
992
993 Kernels->push_back(std::move(KernelProps));
994 }
995
486996 } // end namespace HSAMD
487997 } // end namespace AMDGPU
488998 } // end namespace llvm
1818 #include "AMDGPU.h"
1919 #include "AMDKernelCodeT.h"
2020 #include "llvm/ADT/StringRef.h"
21 #include "llvm/BinaryFormat/MsgPackTypes.h"
2122 #include "llvm/Support/AMDGPUMetadata.h"
2223
2324 namespace llvm {
2425
26 class AMDGPUTargetStreamer;
2527 class Argument;
2628 class DataLayout;
2729 class Function;
3335 namespace AMDGPU {
3436 namespace HSAMD {
3537
36 class MetadataStreamer final {
38 class MetadataStreamer {
39 public:
40 virtual ~MetadataStreamer(){};
41
42 virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0;
43
44 virtual void begin(const Module &Mod) = 0;
45
46 virtual void end() = 0;
47
48 virtual void emitKernel(const MachineFunction &MF,
49 const SIProgramInfo &ProgramInfo) = 0;
50 };
51
52 class MetadataStreamerV3 final : public MetadataStreamer {
53 private:
54 std::shared_ptr HSAMetadataRoot =
55 std::make_shared();
56
57 void dump(StringRef HSAMetadataString) const;
58
59 void verify(StringRef HSAMetadataString) const;
60
61 Optional getAccessQualifier(StringRef AccQual) const;
62
63 Optional getAddressSpaceQualifier(unsigned AddressSpace) const;
64
65 StringRef getValueKind(Type *Ty, StringRef TypeQual,
66 StringRef BaseTypeName) const;
67
68 StringRef getValueType(Type *Ty, StringRef TypeName) const;
69
70 std::string getTypeName(Type *Ty, bool Signed) const;
71
72 std::shared_ptr
73 getWorkGroupDimensions(MDNode *Node) const;
74
75 std::shared_ptr
76 getHSAKernelProps(const MachineFunction &MF,
77 const SIProgramInfo &ProgramInfo) const;
78
79 void emitVersion();
80
81 void emitPrintf(const Module &Mod);
82
83 void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern);
84
85 void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern);
86
87 void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern);
88
89 void emitKernelArg(const Argument &Arg, unsigned &Offset,
90 msgpack::ArrayNode &Args);
91
92 void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
93 unsigned &Offset, msgpack::ArrayNode &Args,
94 unsigned PointeeAlign = 0, StringRef Name = "",
95 StringRef TypeName = "", StringRef BaseTypeName = "",
96 StringRef AccQual = "", StringRef TypeQual = "");
97
98 void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
99 msgpack::ArrayNode &Args);
100
101 std::shared_ptr &getRootMetadata(StringRef Key) {
102 return (*cast(HSAMetadataRoot.get()))[Key];
103 }
104
105 std::shared_ptr &getHSAMetadataRoot() {
106 return HSAMetadataRoot;
107 }
108
109 public:
110 MetadataStreamerV3() = default;
111 ~MetadataStreamerV3() = default;
112
113 bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
114
115 void begin(const Module &Mod) override;
116
117 void end() override;
118
119 void emitKernel(const MachineFunction &MF,
120 const SIProgramInfo &ProgramInfo) override;
121 };
122
123 class MetadataStreamerV2 final : public MetadataStreamer {
37124 private:
38125 Metadata HSAMetadata;
39126
43130
44131 AccessQualifier getAccessQualifier(StringRef AccQual) const;
45132
46 AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
133 AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const;
47134
48135 ValueKind getValueKind(Type *Ty, StringRef TypeQual,
49136 StringRef BaseTypeName) const;
81168
82169 void emitHiddenKernelArgs(const Function &Func);
83170
84 public:
85 MetadataStreamer() = default;
86 ~MetadataStreamer() = default;
87
88171 const Metadata &getHSAMetadata() const {
89172 return HSAMetadata;
90173 }
91174
92 void begin(const Module &Mod);
175 public:
176 MetadataStreamerV2() = default;
177 ~MetadataStreamerV2() = default;
93178
94 void end();
179 bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
95180
96 void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo);
181 void begin(const Module &Mod) override;
182
183 void end() override;
184
185 void emitKernel(const MachineFunction &MF,
186 const SIProgramInfo &ProgramInfo) override;
97187 };
98188
99189 } // end namespace HSAMD
2222
2323 const char SectionName[] = ".note";
2424
25 const char NoteName[] = "AMD";
25 const char NoteNameV2[] = "AMD";
26 const char NoteNameV3[] = "AMDGPU";
2627
2728 // TODO: Remove this file once we drop code object v2.
2829 enum NoteType{
30643064 }
30653065
30663066 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3067 const char *AssemblerDirectiveBegin;
3068 const char *AssemblerDirectiveEnd;
3069 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3070 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3071 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3072 HSAMD::V3::AssemblerDirectiveEnd)
3073 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3074 HSAMD::AssemblerDirectiveEnd);
3075
30673076 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
30683077 return Error(getParser().getTok().getLoc(),
3069 (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
3078 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
30703079 "not available on non-amdhsa OSes")).str());
30713080 }
30723081
30843093
30853094 if (getLexer().is(AsmToken::Identifier)) {
30863095 StringRef ID = getLexer().getTok().getIdentifier();
3087 if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
3096 if (ID == AssemblerDirectiveEnd) {
30883097 Lex();
30893098 FoundEnd = true;
30903099 break;
31063115
31073116 YamlStream.flush();
31083117
3109 if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
3110 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3118 if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3119 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3120 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3121 } else {
3122 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3123 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3124 }
31113125
31123126 return false;
31133127 }
31443158
31453159 if (IDVal == ".amdhsa_kernel")
31463160 return ParseDirectiveAMDHSAKernel();
3161
3162 // TODO: Restructure/combine with PAL metadata directive.
3163 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3164 return ParseDirectiveHSAMetadata();
31473165 } else {
31483166 if (IDVal == ".hsa_code_object_version")
31493167 return ParseDirectiveHSACodeObjectVersion();
31593177
31603178 if (IDVal == ".amd_amdgpu_isa")
31613179 return ParseDirectiveISAVersion();
3162 }
3163
3164 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3165 return ParseDirectiveHSAMetadata();
3180
3181 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3182 return ParseDirectiveHSAMetadata();
3183 }
31663184
31673185 if (IDVal == PALMD::AssemblerDirective)
31683186 return ParseDirectivePALMetadata();
2929 type = Library
3030 name = AMDGPUCodeGen
3131 parent = AMDGPU
32 required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel
32 required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel BinaryFormat
3333 add_to_library_groups = AMDGPU
1616 #include "Utils/AMDGPUBaseInfo.h"
1717 #include "Utils/AMDKernelCodeTUtils.h"
1818 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
1920 #include "llvm/BinaryFormat/ELF.h"
21 #include "llvm/BinaryFormat/MsgPackTypes.h"
2022 #include "llvm/IR/Constants.h"
2123 #include "llvm/IR/Function.h"
2224 #include "llvm/IR/Metadata.h"
3436
3537 using namespace llvm;
3638 using namespace llvm::AMDGPU;
39 using namespace llvm::AMDGPU::HSAMD;
3740
3841 //===----------------------------------------------------------------------===//
3942 // AMDGPUTargetStreamer
4043 //===----------------------------------------------------------------------===//
4144
42 bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
45 bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
4346 HSAMD::Metadata HSAMetadata;
4447 if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
4548 return false;
4649
4750 return EmitHSAMetadata(HSAMetadata);
51 }
52
53 bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
54 std::shared_ptr HSAMetadataRoot;
55 yaml::Input YIn(HSAMetadataString);
56 YIn >> HSAMetadataRoot;
57 if (YIn.error())
58 return false;
59 return EmitHSAMetadata(HSAMetadataRoot, false);
4860 }
4961
5062 StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
194206 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
195207 return false;
196208
197 OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
209 OS << '\t' << AssemblerDirectiveBegin << '\n';
198210 OS << HSAMetadataString << '\n';
199 OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
211 OS << '\t' << AssemblerDirectiveEnd << '\n';
212 return true;
213 }
214
215 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
216 std::shared_ptr &HSAMetadataRoot, bool Strict) {
217 V3::MetadataVerifier Verifier(Strict);
218 if (!Verifier.verify(*HSAMetadataRoot))
219 return false;
220
221 std::string HSAMetadataString;
222 raw_string_ostream StrOS(HSAMetadataString);
223 yaml::Output YOut(StrOS);
224 YOut << HSAMetadataRoot;
225
226 OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
227 OS << StrOS.str() << '\n';
228 OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
200229 return true;
201230 }
202231
357386 return static_cast(Streamer);
358387 }
359388
360 void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
361 const MCExpr *DescSZ, unsigned NoteType,
389 void AMDGPUTargetELFStreamer::EmitNote(
390 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
362391 function_ref EmitDesc) {
363392 auto &S = getStreamer();
364393 auto &Context = S.getContext();
365394
366 auto NameSZ = sizeof(ElfNote::NoteName);
395 auto NameSZ = Name.size() + 1;
367396
368397 S.PushSection();
369398 S.SwitchSection(Context.getELFSection(
371400 S.EmitIntValue(NameSZ, 4); // namesz
372401 S.EmitValue(DescSZ, 4); // descz
373402 S.EmitIntValue(NoteType, 4); // type
374 S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name
403 S.EmitBytes(Name); // name
375404 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
376405 EmitDesc(S); // desc
377406 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
383412 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
384413 uint32_t Major, uint32_t Minor) {
385414
386 EmitAMDGPUNote(
387 MCConstantExpr::create(8, getContext()),
388 ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
389 [&](MCELFStreamer &OS){
390 OS.EmitIntValue(Major, 4);
391 OS.EmitIntValue(Minor, 4);
392 }
393 );
415 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
416 ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
417 OS.EmitIntValue(Major, 4);
418 OS.EmitIntValue(Minor, 4);
419 });
394420 }
395421
396422 void
406432 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
407433 VendorNameSize + ArchNameSize;
408434
409 EmitAMDGPUNote(
410 MCConstantExpr::create(DescSZ, getContext()),
411 ElfNote::NT_AMDGPU_HSA_ISA,
412 [&](MCELFStreamer &OS) {
413 OS.EmitIntValue(VendorNameSize, 2);
414 OS.EmitIntValue(ArchNameSize, 2);
415 OS.EmitIntValue(Major, 4);
416 OS.EmitIntValue(Minor, 4);
417 OS.EmitIntValue(Stepping, 4);
418 OS.EmitBytes(VendorName);
419 OS.EmitIntValue(0, 1); // NULL terminate VendorName
420 OS.EmitBytes(ArchName);
421 OS.EmitIntValue(0, 1); // NULL terminte ArchName
422 }
423 );
435 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
436 ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
437 OS.EmitIntValue(VendorNameSize, 2);
438 OS.EmitIntValue(ArchNameSize, 2);
439 OS.EmitIntValue(Major, 4);
440 OS.EmitIntValue(Minor, 4);
441 OS.EmitIntValue(Stepping, 4);
442 OS.EmitBytes(VendorName);
443 OS.EmitIntValue(0, 1); // NULL terminate VendorName
444 OS.EmitBytes(ArchName);
445 OS.EmitIntValue(0, 1); // NULL terminte ArchName
446 });
424447 }
425448
426449 void
449472 MCSymbolRefExpr::create(DescEnd, Context),
450473 MCSymbolRefExpr::create(DescBegin, Context), Context);
451474
452 EmitAMDGPUNote(
453 DescSZ,
454 ELF::NT_AMD_AMDGPU_ISA,
455 [&](MCELFStreamer &OS) {
456 OS.EmitLabel(DescBegin);
457 OS.EmitBytes(IsaVersionString);
458 OS.EmitLabel(DescEnd);
459 }
460 );
475 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
476 [&](MCELFStreamer &OS) {
477 OS.EmitLabel(DescBegin);
478 OS.EmitBytes(IsaVersionString);
479 OS.EmitLabel(DescEnd);
480 });
481 return true;
482 }
483
484 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
485 std::shared_ptr &HSAMetadataRoot, bool Strict) {
486 V3::MetadataVerifier Verifier(Strict);
487 if (!Verifier.verify(*HSAMetadataRoot))
488 return false;
489
490 std::string HSAMetadataString;
491 raw_string_ostream StrOS(HSAMetadataString);
492 msgpack::Writer MPWriter(StrOS);
493 HSAMetadataRoot->write(MPWriter);
494
495 // Create two labels to mark the beginning and end of the desc field
496 // and a MCExpr to calculate the size of the desc field.
497 auto &Context = getContext();
498 auto *DescBegin = Context.createTempSymbol();
499 auto *DescEnd = Context.createTempSymbol();
500 auto *DescSZ = MCBinaryExpr::createSub(
501 MCSymbolRefExpr::create(DescEnd, Context),
502 MCSymbolRefExpr::create(DescBegin, Context), Context);
503
504 EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
505 [&](MCELFStreamer &OS) {
506 OS.EmitLabel(DescBegin);
507 OS.EmitBytes(StrOS.str());
508 OS.EmitLabel(DescEnd);
509 });
461510 return true;
462511 }
463512
476525 MCSymbolRefExpr::create(DescEnd, Context),
477526 MCSymbolRefExpr::create(DescBegin, Context), Context);
478527
479 EmitAMDGPUNote(
480 DescSZ,
481 ELF::NT_AMD_AMDGPU_HSA_METADATA,
482 [&](MCELFStreamer &OS) {
483 OS.EmitLabel(DescBegin);
484 OS.EmitBytes(HSAMetadataString);
485 OS.EmitLabel(DescEnd);
486 }
487 );
528 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
529 [&](MCELFStreamer &OS) {
530 OS.EmitLabel(DescBegin);
531 OS.EmitBytes(HSAMetadataString);
532 OS.EmitLabel(DescEnd);
533 });
488534 return true;
489535 }
490536
491537 bool AMDGPUTargetELFStreamer::EmitPALMetadata(
492538 const PALMD::Metadata &PALMetadata) {
493 EmitAMDGPUNote(
494 MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
495 ELF::NT_AMD_AMDGPU_PAL_METADATA,
496 [&](MCELFStreamer &OS){
497 for (auto I : PALMetadata)
498 OS.EmitIntValue(I, sizeof(uint32_t));
499 }
500 );
539 EmitNote(ElfNote::NoteNameV2,
540 MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t),
541 getContext()),
542 ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) {
543 for (auto I : PALMetadata)
544 OS.EmitIntValue(I, sizeof(uint32_t));
545 });
501546 return true;
502547 }
503548
1010 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
1111
1212 #include "AMDKernelCodeT.h"
13 #include "llvm/BinaryFormat/MsgPackTypes.h"
1314 #include "llvm/MC/MCStreamer.h"
1415 #include "llvm/MC/MCSubtargetInfo.h"
1516 #include "llvm/Support/AMDGPUMetadata.h"
5152 virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
5253
5354 /// \returns True on success, false on failure.
54 virtual bool EmitHSAMetadata(StringRef HSAMetadataString);
55 virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString);
56
57 /// \returns True on success, false on failure.
58 virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString);
59
60 /// Emit HSA Metadata
61 ///
62 /// When \p Strict is true, known metadata elements must already be
63 /// well-typed. When \p Strict is false, known types are inferred and
64 /// the \p HSAMetadata structure is updated with the correct types.
65 ///
66 /// \returns True on success, false on failure.
67 virtual bool EmitHSAMetadata(std::shared_ptr &HSAMetadata,
68 bool Strict) = 0;
5569
5670 /// \returns True on success, false on failure.
5771 virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
91105 bool EmitISAVersion(StringRef IsaVersionString) override;
92106
93107 /// \returns True on success, false on failure.
108 bool EmitHSAMetadata(std::shared_ptr &HSAMetadata,
109 bool Strict) override;
110
111 /// \returns True on success, false on failure.
94112 bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
95113
96114 /// \returns True on success, false on failure.
106124 class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
107125 MCStreamer &Streamer;
108126
109 void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType,
110 function_ref EmitDesc);
127 void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
128 function_ref EmitDesc);
111129
112130 public:
113131 AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
131149 bool EmitISAVersion(StringRef IsaVersionString) override;
132150
133151 /// \returns True on success, false on failure.
152 bool EmitHSAMetadata(std::shared_ptr &HSAMetadata,
153 bool Strict) override;
154
155 /// \returns True on success, false on failure.
134156 bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
135157
136158 /// \returns True on success, false on failure.
1818 type = Library
1919 name = AMDGPUDesc
2020 parent = AMDGPU
21 required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support
21 required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support BinaryFormat
2222 add_to_library_groups = AMDGPU
0 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 < %s | FileCheck --check-prefix=CHECK %s
1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-verify-hsa-metadata -filetype=obj -mattr=+code-object-v3 -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s
2
3 ; CHECK-LABEL: {{^}}min_64_max_64:
4 ; CHECK: SGPRBlocks: 0
5 ; CHECK: VGPRBlocks: 0
6 ; CHECK: NumSGPRsForWavesPerEU: 1
7 ; CHECK: NumVGPRsForWavesPerEU: 1
8 define amdgpu_kernel void @min_64_max_64() #0 {
9 entry:
10 ret void
11 }
12 attributes #0 = {"amdgpu-flat-work-group-size"="64,64"}
13
14 ; CHECK-LABEL: {{^}}min_64_max_128:
15 ; CHECK: SGPRBlocks: 0
16 ; CHECK: VGPRBlocks: 0
17 ; CHECK: NumSGPRsForWavesPerEU: 1
18 ; CHECK: NumVGPRsForWavesPerEU: 1
19 define amdgpu_kernel void @min_64_max_128() #1 {
20 entry:
21 ret void
22 }
23 attributes #1 = {"amdgpu-flat-work-group-size"="64,128"}
24
25 ; CHECK-LABEL: {{^}}min_128_max_128:
26 ; CHECK: SGPRBlocks: 0
27 ; CHECK: VGPRBlocks: 0
28 ; CHECK: NumSGPRsForWavesPerEU: 1
29 ; CHECK: NumVGPRsForWavesPerEU: 1
30 define amdgpu_kernel void @min_128_max_128() #2 {
31 entry:
32 ret void
33 }
34 attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
35
36 ; CHECK-LABEL: {{^}}min_1024_max_2048
37 ; CHECK: SGPRBlocks: 1
38 ; CHECK: VGPRBlocks: 7
39 ; CHECK: NumSGPRsForWavesPerEU: 12
40 ; CHECK: NumVGPRsForWavesPerEU: 32
41 @var = addrspace(1) global float 0.0
42 define amdgpu_kernel void @min_1024_max_2048() #3 {
43 %val0 = load volatile float, float addrspace(1)* @var
44 %val1 = load volatile float, float addrspace(1)* @var
45 %val2 = load volatile float, float addrspace(1)* @var
46 %val3 = load volatile float, float addrspace(1)* @var
47 %val4 = load volatile float, float addrspace(1)* @var
48 %val5 = load volatile float, float addrspace(1)* @var
49 %val6 = load volatile float, float addrspace(1)* @var
50 %val7 = load volatile float, float addrspace(1)* @var
51 %val8 = load volatile float, float addrspace(1)* @var
52 %val9 = load volatile float, float addrspace(1)* @var
53 %val10 = load volatile float, float addrspace(1)* @var
54 %val11 = load volatile float, float addrspace(1)* @var
55 %val12 = load volatile float, float addrspace(1)* @var
56 %val13 = load volatile float, float addrspace(1)* @var
57 %val14 = load volatile float, float addrspace(1)* @var
58 %val15 = load volatile float, float addrspace(1)* @var
59 %val16 = load volatile float, float addrspace(1)* @var
60 %val17 = load volatile float, float addrspace(1)* @var
61 %val18 = load volatile float, float addrspace(1)* @var
62 %val19 = load volatile float, float addrspace(1)* @var
63 %val20 = load volatile float, float addrspace(1)* @var
64 %val21 = load volatile float, float addrspace(1)* @var
65 %val22 = load volatile float, float addrspace(1)* @var
66 %val23 = load volatile float, float addrspace(1)* @var
67 %val24 = load volatile float, float addrspace(1)* @var
68 %val25 = load volatile float, float addrspace(1)* @var
69 %val26 = load volatile float, float addrspace(1)* @var
70 %val27 = load volatile float, float addrspace(1)* @var
71 %val28 = load volatile float, float addrspace(1)* @var
72 %val29 = load volatile float, float addrspace(1)* @var
73 %val30 = load volatile float, float addrspace(1)* @var
74 %val31 = load volatile float, float addrspace(1)* @var
75 %val32 = load volatile float, float addrspace(1)* @var
76 %val33 = load volatile float, float addrspace(1)* @var
77 %val34 = load volatile float, float addrspace(1)* @var
78 %val35 = load volatile float, float addrspace(1)* @var
79 %val36 = load volatile float, float addrspace(1)* @var
80 %val37 = load volatile float, float addrspace(1)* @var
81 %val38 = load volatile float, float addrspace(1)* @var
82 %val39 = load volatile float, float addrspace(1)* @var
83 %val40 = load volatile float, float addrspace(1)* @var
84
85 store volatile float %val0, float addrspace(1)* @var
86 store volatile float %val1, float addrspace(1)* @var
87 store volatile float %val2, float addrspace(1)* @var
88 store volatile float %val3, float addrspace(1)* @var
89 store volatile float %val4, float addrspace(1)* @var
90 store volatile float %val5, float addrspace(1)* @var
91 store volatile float %val6, float addrspace(1)* @var
92 store volatile float %val7, float addrspace(1)* @var
93 store volatile float %val8, float addrspace(1)* @var
94 store volatile float %val9, float addrspace(1)* @var
95 store volatile float %val10, float addrspace(1)* @var
96 store volatile float %val11, float addrspace(1)* @var
97 store volatile float %val12, float addrspace(1)* @var
98 store volatile float %val13, float addrspace(1)* @var
99 store volatile float %val14, float addrspace(1)* @var
100 store volatile float %val15, float addrspace(1)* @var
101 store volatile float %val16, float addrspace(1)* @var
102 store volatile float %val17, float addrspace(1)* @var
103 store volatile float %val18, float addrspace(1)* @var
104 store volatile float %val19, float addrspace(1)* @var
105 store volatile float %val20, float addrspace(1)* @var
106 store volatile float %val21, float addrspace(1)* @var
107 store volatile float %val22, float addrspace(1)* @var
108 store volatile float %val23, float addrspace(1)* @var
109 store volatile float %val24, float addrspace(1)* @var
110 store volatile float %val25, float addrspace(1)* @var
111 store volatile float %val26, float addrspace(1)* @var
112 store volatile float %val27, float addrspace(1)* @var
113 store volatile float %val28, float addrspace(1)* @var
114 store volatile float %val29, float addrspace(1)* @var
115 store volatile float %val30, float addrspace(1)* @var
116 store volatile float %val31, float addrspace(1)* @var
117 store volatile float %val32, float addrspace(1)* @var
118 store volatile float %val33, float addrspace(1)* @var
119 store volatile float %val34, float addrspace(1)* @var
120 store volatile float %val35, float addrspace(1)* @var
121 store volatile float %val36, float addrspace(1)* @var
122 store volatile float %val37, float addrspace(1)* @var
123 store volatile float %val38, float addrspace(1)* @var
124 store volatile float %val39, float addrspace(1)* @var
125 store volatile float %val40, float addrspace(1)* @var
126
127 ret void
128 }
129 attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
130
131 ; CHECK: amdhsa.kernels:
132 ; CHECK: .max_flat_workgroup_size: 64
133 ; CHECK: .name: min_64_max_64
134 ; CHECK: .max_flat_workgroup_size: 128
135 ; CHECK: .name: min_64_max_128
136 ; CHECK: .max_flat_workgroup_size: 128
137 ; CHECK: .name: min_128_max_128
138 ; CHECK: .max_flat_workgroup_size: 2048
139 ; CHECK: .name: min_1024_max_2048
140 ; CHECK: amdhsa.version:
141 ; CHECK: - 1
142 ; CHECK: - 0
143
144 ; PARSER: AMDGPU HSA Metadata Parser Test: PASS
22
33 ; ALL-ASM-LABEL: {{^}}fadd:
44
5 ; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_version
6 ; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_isa
57 ; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel
68 ; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t
79
5658 ; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000000 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fadd.kd
5759 ; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000040 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fsub.kd
5860
59 ; OSABI-AMDHSA-ELF-NOT: Displaying notes found
61 ; OSABI-AMDHSA-ELF: Displaying notes found at file offset
62 ; OSABI-AMDHSA-ELF: AMDGPU 0x{{[0-9a-f]+}} NT_AMDGPU_METADATA (AMDGPU Metadata)
6063
6164 define amdgpu_kernel void @fadd(
6265 float addrspace(1)* %r,
0 ; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s
1
2 ; CHECK: .symbol: test_ro_arg.kd
3 ; CHECK: .name: test_ro_arg
4 ; CHECK: .args:
5 ; CHECK-NEXT: - .type_name: 'float*'
6 ; CHECK-NEXT: .value_kind: global_buffer
7 ; CHECK-NEXT: .name: in
8 ; CHECK-NEXT: .access: read_only
9 ; CHECK-NEXT: .offset: 0
10 ; CHECK-NEXT: .is_const: true
11 ; CHECK-NEXT: .size: 8
12 ; CHECK-NEXT: .is_restrict: true
13 ; CHECK-NEXT: .value_type: f32
14 ; CHECK-NEXT: .address_space: global
15 ; CHECK-NEXT: - .type_name: 'float*'
16 ; CHECK-NEXT: .value_kind: global_buffer
17 ; CHECK-NEXT: .name: out
18 ; CHECK-NEXT: .offset: 8
19 ; CHECK-NEXT: .size: 8
20 ; CHECK-NEXT: .value_type: f32
21 ; CHECK-NEXT: .address_space: global
22
23 define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, float addrspace(1)* %out)
24 !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2
25 !kernel_arg_base_type !2 !kernel_arg_type_qual !3 {
26 ret void
27 }
28
29 !0 = !{i32 1, i32 1}
30 !1 = !{!"none", !"none"}
31 !2 = !{!"float*", !"float*"}
32 !3 = !{!"const restrict", !""}
0 ; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
1 ; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
2
3 ; CHECK: ---
4 ; CHECK: amdhsa.kernels:
5 ; CHECK: .symbol: test_non_enqueue_kernel_caller.kd
6 ; CHECK: .name: test_non_enqueue_kernel_caller
7 ; CHECK: .language: OpenCL C
8 ; CHECK: .language_version:
9 ; CHECK-NEXT: - 2
10 ; CHECK-NEXT: - 0
11 ; CHECK: .args:
12 ; CHECK-NEXT: - .type_name: char
13 ; CHECK-NEXT: .value_kind: by_value
14 ; CHECK-NEXT: .offset: 0
15 ; CHECK-NEXT: .size: 1
16 ; CHECK-NEXT: .value_type: i8
17 ; CHECK-NEXT: .name: a
18 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
19 ; CHECK-NEXT: .offset: 8
20 ; CHECK-NEXT: .size: 8
21 ; CHECK-NEXT: .value_type: i64
22 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
23 ; CHECK-NEXT: .offset: 16
24 ; CHECK-NEXT: .size: 8
25 ; CHECK-NEXT: .value_type: i64
26 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
27 ; CHECK-NEXT: .offset: 24
28 ; CHECK-NEXT: .size: 8
29 ; CHECK-NEXT: .value_type: i64
30 ; CHECK-NOT: .value_kind: hidden_default_queue
31 ; CHECK-NOT: .value_kind: hidden_completion_action
32 define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a)
33 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
34 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
35 ret void
36 }
37
38 ; CHECK: .symbol: test_enqueue_kernel_caller.kd
39 ; CHECK: .name: test_enqueue_kernel_caller
40 ; CHECK: .language: OpenCL C
41 ; CHECK: .language_version:
42 ; CHECK-NEXT: - 2
43 ; CHECK-NEXT: - 0
44 ; CHECK: .args:
45 ; CHECK-NEXT: - .type_name: char
46 ; CHECK-NEXT: .value_kind: by_value
47 ; CHECK-NEXT: .offset: 0
48 ; CHECK-NEXT: .size: 1
49 ; CHECK-NEXT: .value_type: i8
50 ; CHECK-NEXT: .name: a
51 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
52 ; CHECK-NEXT: .offset: 8
53 ; CHECK-NEXT: .size: 8
54 ; CHECK-NEXT: .value_type: i64
55 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
56 ; CHECK-NEXT: .offset: 16
57 ; CHECK-NEXT: .size: 8
58 ; CHECK-NEXT: .value_type: i64
59 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
60 ; CHECK-NEXT: .offset: 24
61 ; CHECK-NEXT: .size: 8
62 ; CHECK-NEXT: .value_type: i64
63 ; CHECK-NEXT: - .value_kind: hidden_none
64 ; CHECK-NEXT: .offset: 32
65 ; CHECK-NEXT: .size: 8
66 ; CHECK-NEXT: .value_type: i8
67 ; CHECK-NEXT: .address_space: global
68 ; CHECK-NEXT: - .value_kind: hidden_default_queue
69 ; CHECK-NEXT: .offset: 40
70 ; CHECK-NEXT: .size: 8
71 ; CHECK-NEXT: .value_type: i8
72 ; CHECK-NEXT: .address_space: global
73 ; CHECK-NEXT: - .value_kind: hidden_completion_action
74 ; CHECK-NEXT: .offset: 48
75 ; CHECK-NEXT: .size: 8
76 ; CHECK-NEXT: .value_type: i8
77 ; CHECK-NEXT: .address_space: global
78 define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0
79 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
80 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
81 ret void
82 }
83
84 ; CHECK: amdhsa.version:
85 ; CHECK-NEXT: - 1
86 ; CHECK-NEXT: - 0
87 ; CHECK-NOT: amdhsa.printf:
88
89 attributes #0 = { "calls-enqueue-kernel" }
90
91 !1 = !{i32 0}
92 !2 = !{!"none"}
93 !3 = !{!"char"}
94 !4 = !{!""}
95
96 !opencl.ocl.version = !{!90}
97 !90 = !{i32 2, i32 0}
98
99
100 ; PARSER: AMDGPU HSA Metadata Parser Test: PASS
0 ; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
1 ; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s
2 ; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
3 ; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
4 ; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
5 ; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
6
7 %struct.A = type { i8, float }
8 %opencl.image1d_t = type opaque
9 %opencl.image2d_t = type opaque
10 %opencl.image3d_t = type opaque
11 %opencl.queue_t = type opaque
12 %opencl.pipe_t = type opaque
13 %struct.B = type { i32 addrspace(1)*}
14 %opencl.clk_event_t = type opaque
15
16 @__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
17
18 ; CHECK: ---
19 ; CHECK: amdhsa.kernels:
20 ; CHECK: .symbol: test_char.kd
21 ; CHECK: .name: test_char
22 ; CHECK: .language: OpenCL C
23 ; CHECK: .language_version:
24 ; CHECK-NEXT: - 2
25 ; CHECK-NEXT: - 0
26 ; CHECK: .args:
27 ; CHECK-NEXT: - .type_name: char
28 ; CHECK-NEXT: .value_kind: by_value
29 ; CHECK-NEXT: .offset: 0
30 ; CHECK-NEXT: .size: 1
31 ; CHECK-NEXT: .value_type: i8
32 ; CHECK-NEXT: .name: a
33 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
34 ; CHECK-NEXT: .offset: 8
35 ; CHECK-NEXT: .size: 8
36 ; CHECK-NEXT: .value_type: i64
37 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
38 ; CHECK-NEXT: .offset: 16
39 ; CHECK-NEXT: .size: 8
40 ; CHECK-NEXT: .value_type: i64
41 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
42 ; CHECK-NEXT: .offset: 24
43 ; CHECK-NEXT: .size: 8
44 ; CHECK-NEXT: .value_type: i64
45 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
46 ; CHECK-NEXT: .offset: 32
47 ; CHECK-NEXT: .size: 8
48 ; CHECK-NEXT: .value_type: i8
49 ; CHECK-NEXT: .address_space: global
50 ; CHECK-NOT: .value_kind: hidden_default_queue
51 ; CHECK-NOT: .value_kind: hidden_completion_action
52 define amdgpu_kernel void @test_char(i8 %a)
53 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
54 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
55 ret void
56 }
57
58 ; CHECK: .symbol: test_ushort2.kd
59 ; CHECK: .name: test_ushort2
60 ; CHECK: .language: OpenCL C
61 ; CHECK: .language_version:
62 ; CHECK-NEXT: - 2
63 ; CHECK-NEXT: - 0
64 ; CHECK: .args:
65 ; CHECK-NEXT: - .type_name: ushort2
66 ; CHECK-NEXT: .value_kind: by_value
67 ; CHECK-NEXT: .offset: 0
68 ; CHECK-NEXT: .size: 4
69 ; CHECK-NEXT: .value_type: u16
70 ; CHECK-NEXT: .name: a
71 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
72 ; CHECK-NEXT: .offset: 8
73 ; CHECK-NEXT: .size: 8
74 ; CHECK-NEXT: .value_type: i64
75 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
76 ; CHECK-NEXT: .offset: 16
77 ; CHECK-NEXT: .size: 8
78 ; CHECK-NEXT: .value_type: i64
79 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
80 ; CHECK-NEXT: .offset: 24
81 ; CHECK-NEXT: .size: 8
82 ; CHECK-NEXT: .value_type: i64
83 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
84 ; CHECK-NEXT: .offset: 32
85 ; CHECK-NEXT: .size: 8
86 ; CHECK-NEXT: .value_type: i8
87 ; CHECK-NEXT: .address_space: global
88 define amdgpu_kernel void @test_ushort2(<2 x i16> %a)
89 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10
90 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
91 ret void
92 }
93
94 ; CHECK: .symbol: test_int3.kd
95 ; CHECK: .name: test_int3
96 ; CHECK: .language: OpenCL C
97 ; CHECK: .language_version:
98 ; CHECK-NEXT: - 2
99 ; CHECK-NEXT: - 0
100 ; CHECK: .args:
101 ; CHECK-NEXT: - .type_name: int3
102 ; CHECK-NEXT: .value_kind: by_value
103 ; CHECK-NEXT: .offset: 0
104 ; CHECK-NEXT: .size: 16
105 ; CHECK-NEXT: .value_type: i32
106 ; CHECK-NEXT: .name: a
107 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
108 ; CHECK-NEXT: .offset: 16
109 ; CHECK-NEXT: .size: 8
110 ; CHECK-NEXT: .value_type: i64
111 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
112 ; CHECK-NEXT: .offset: 24
113 ; CHECK-NEXT: .size: 8
114 ; CHECK-NEXT: .value_type: i64
115 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
116 ; CHECK-NEXT: .offset: 32
117 ; CHECK-NEXT: .size: 8
118 ; CHECK-NEXT: .value_type: i64
119 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
120 ; CHECK-NEXT: .offset: 40
121 ; CHECK-NEXT: .size: 8
122 ; CHECK-NEXT: .value_type: i8
123 ; CHECK-NEXT: .address_space: global
124 define amdgpu_kernel void @test_int3(<3 x i32> %a)
125 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11
126 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
127 ret void
128 }
129
130 ; CHECK: .symbol: test_ulong4.kd
131 ; CHECK: .name: test_ulong4
132 ; CHECK: .language: OpenCL C
133 ; CHECK: .language_version:
134 ; CHECK-NEXT: - 2
135 ; CHECK-NEXT: - 0
136 ; CHECK: .args:
137 ; CHECK-NEXT: - .type_name: ulong4
138 ; CHECK-NEXT: .value_kind: by_value
139 ; CHECK-NEXT: .offset: 0
140 ; CHECK-NEXT: .size: 32
141 ; CHECK-NEXT: .value_type: u64
142 ; CHECK-NEXT: .name: a
143 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
144 ; CHECK-NEXT: .offset: 32
145 ; CHECK-NEXT: .size: 8
146 ; CHECK-NEXT: .value_type: i64
147 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
148 ; CHECK-NEXT: .offset: 40
149 ; CHECK-NEXT: .size: 8
150 ; CHECK-NEXT: .value_type: i64
151 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
152 ; CHECK-NEXT: .offset: 48
153 ; CHECK-NEXT: .size: 8
154 ; CHECK-NEXT: .value_type: i64
155 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
156 ; CHECK-NEXT: .offset: 56
157 ; CHECK-NEXT: .size: 8
158 ; CHECK-NEXT: .value_type: i8
159 ; CHECK-NEXT: .address_space: global
160 define amdgpu_kernel void @test_ulong4(<4 x i64> %a)
161 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12
162 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
163 ret void
164 }
165
166 ; CHECK: .symbol: test_half8.kd
167 ; CHECK: .name: test_half8
168 ; CHECK: .language: OpenCL C
169 ; CHECK: .language_version:
170 ; CHECK-NEXT: - 2
171 ; CHECK-NEXT: - 0
172 ; CHECK: .args:
173 ; CHECK-NEXT: - .type_name: half8
174 ; CHECK-NEXT: .value_kind: by_value
175 ; CHECK-NEXT: .offset: 0
176 ; CHECK-NEXT: .size: 16
177 ; CHECK-NEXT: .value_type: f16
178 ; CHECK-NEXT: .name: a
179 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
180 ; CHECK-NEXT: .offset: 16
181 ; CHECK-NEXT: .size: 8
182 ; CHECK-NEXT: .value_type: i64
183 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
184 ; CHECK-NEXT: .offset: 24
185 ; CHECK-NEXT: .size: 8
186 ; CHECK-NEXT: .value_type: i64
187 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
188 ; CHECK-NEXT: .offset: 32
189 ; CHECK-NEXT: .size: 8
190 ; CHECK-NEXT: .value_type: i64
191 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
192 ; CHECK-NEXT: .offset: 40
193 ; CHECK-NEXT: .size: 8
194 ; CHECK-NEXT: .value_type: i8
195 ; CHECK-NEXT: .address_space: global
196 define amdgpu_kernel void @test_half8(<8 x half> %a)
197 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13
198 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
199 ret void
200 }
201
202 ; CHECK: .symbol: test_float16.kd
203 ; CHECK: .name: test_float16
204 ; CHECK: .language: OpenCL C
205 ; CHECK: .language_version:
206 ; CHECK-NEXT: - 2
207 ; CHECK-NEXT: - 0
208 ; CHECK: .args:
209 ; CHECK-NEXT: - .type_name: float16
210 ; CHECK-NEXT: .value_kind: by_value
211 ; CHECK-NEXT: .offset: 0
212 ; CHECK-NEXT: .size: 64
213 ; CHECK-NEXT: .value_type: f32
214 ; CHECK-NEXT: .name: a
215 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
216 ; CHECK-NEXT: .offset: 64
217 ; CHECK-NEXT: .size: 8
218 ; CHECK-NEXT: .value_type: i64
219 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
220 ; CHECK-NEXT: .offset: 72
221 ; CHECK-NEXT: .size: 8
222 ; CHECK-NEXT: .value_type: i64
223 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
224 ; CHECK-NEXT: .offset: 80
225 ; CHECK-NEXT: .size: 8
226 ; CHECK-NEXT: .value_type: i64
227 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
228 ; CHECK-NEXT: .offset: 88
229 ; CHECK-NEXT: .size: 8
230 ; CHECK-NEXT: .value_type: i8
231 ; CHECK-NEXT: .address_space: global
232 define amdgpu_kernel void @test_float16(<16 x float> %a)
233 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14
234 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
235 ret void
236 }
237
238 ; CHECK: .symbol: test_double16.kd
239 ; CHECK: .name: test_double16
240 ; CHECK: .language: OpenCL C
241 ; CHECK: .language_version:
242 ; CHECK-NEXT: - 2
243 ; CHECK-NEXT: - 0
244 ; CHECK: .args:
245 ; CHECK-NEXT: - .type_name: double16
246 ; CHECK-NEXT: .value_kind: by_value
247 ; CHECK-NEXT: .offset: 0
248 ; CHECK-NEXT: .size: 128
249 ; CHECK-NEXT: .value_type: f64
250 ; CHECK-NEXT: .name: a
251 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
252 ; CHECK-NEXT: .offset: 128
253 ; CHECK-NEXT: .size: 8
254 ; CHECK-NEXT: .value_type: i64
255 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
256 ; CHECK-NEXT: .offset: 136
257 ; CHECK-NEXT: .size: 8
258 ; CHECK-NEXT: .value_type: i64
259 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
260 ; CHECK-NEXT: .offset: 144
261 ; CHECK-NEXT: .size: 8
262 ; CHECK-NEXT: .value_type: i64
263 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
264 ; CHECK-NEXT: .offset: 152
265 ; CHECK-NEXT: .size: 8
266 ; CHECK-NEXT: .value_type: i8
267 ; CHECK-NEXT: .address_space: global
268 define amdgpu_kernel void @test_double16(<16 x double> %a)
269 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15
270 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
271 ret void
272 }
273
274 ; CHECK: .symbol: test_pointer.kd
275 ; CHECK: .name: test_pointer
276 ; CHECK: .language: OpenCL C
277 ; CHECK: .language_version:
278 ; CHECK-NEXT: - 2
279 ; CHECK-NEXT: - 0
280 ; CHECK: .args:
281 ; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
282 ; CHECK-NEXT: .value_kind: global_buffer
283 ; CHECK-NEXT: .name: a
284 ; CHECK-NEXT: .offset: 0
285 ; CHECK-NEXT: .size: 8
286 ; CHECK-NEXT: .value_type: i32
287 ; CHECK-NEXT: .address_space: global
288 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
289 ; CHECK-NEXT: .offset: 8
290 ; CHECK-NEXT: .size: 8
291 ; CHECK-NEXT: .value_type: i64
292 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
293 ; CHECK-NEXT: .offset: 16
294 ; CHECK-NEXT: .size: 8
295 ; CHECK-NEXT: .value_type: i64
296 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
297 ; CHECK-NEXT: .offset: 24
298 ; CHECK-NEXT: .size: 8
299 ; CHECK-NEXT: .value_type: i64
300 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
301 ; CHECK-NEXT: .offset: 32
302 ; CHECK-NEXT: .size: 8
303 ; CHECK-NEXT: .value_type: i8
304 ; CHECK-NEXT: .address_space: global
305 define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a)
306 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16
307 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
308 ret void
309 }
310
311 ; CHECK: .symbol: test_image.kd
312 ; CHECK: .name: test_image
313 ; CHECK: .language: OpenCL C
314 ; CHECK: .language_version:
315 ; CHECK-NEXT: - 2
316 ; CHECK-NEXT: - 0
317 ; CHECK: .args:
318 ; CHECK-NEXT: - .type_name: image2d_t
319 ; CHECK-NEXT: .value_kind: image
320 ; CHECK-NEXT: .name: a
321 ; CHECK-NEXT: .offset: 0
322 ; CHECK-NEXT: .size: 8
323 ; CHECK-NEXT: .value_type: struct
324 ; CHECK-NEXT: .address_space: global
325 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
326 ; CHECK-NEXT: .offset: 8
327 ; CHECK-NEXT: .size: 8
328 ; CHECK-NEXT: .value_type: i64
329 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
330 ; CHECK-NEXT: .offset: 16
331 ; CHECK-NEXT: .size: 8
332 ; CHECK-NEXT: .value_type: i64
333 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
334 ; CHECK-NEXT: .offset: 24
335 ; CHECK-NEXT: .size: 8
336 ; CHECK-NEXT: .value_type: i64
337 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
338 ; CHECK-NEXT: .offset: 32
339 ; CHECK-NEXT: .size: 8
340 ; CHECK-NEXT: .value_type: i8
341 ; CHECK-NEXT: .address_space: global
342 define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a)
343 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17
344 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
345 ret void
346 }
347
348 ; CHECK: .symbol: test_sampler.kd
349 ; CHECK: .name: test_sampler
350 ; CHECK: .language: OpenCL C
351 ; CHECK: .language_version:
352 ; CHECK-NEXT: - 2
353 ; CHECK-NEXT: - 0
354 ; CHECK: .args:
355 ; CHECK-NEXT: - .type_name: sampler_t
356 ; CHECK-NEXT: .value_kind: sampler
357 ; CHECK-NEXT: .offset: 0
358 ; CHECK-NEXT: .size: 4
359 ; CHECK-NEXT: .value_type: i32
360 ; CHECK-NEXT: .name: a
361 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
362 ; CHECK-NEXT: .offset: 8
363 ; CHECK-NEXT: .size: 8
364 ; CHECK-NEXT: .value_type: i64
365 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
366 ; CHECK-NEXT: .offset: 16
367 ; CHECK-NEXT: .size: 8
368 ; CHECK-NEXT: .value_type: i64
369 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
370 ; CHECK-NEXT: .offset: 24
371 ; CHECK-NEXT: .size: 8
372 ; CHECK-NEXT: .value_type: i64
373 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
374 ; CHECK-NEXT: .offset: 32
375 ; CHECK-NEXT: .size: 8
376 ; CHECK-NEXT: .value_type: i8
377 ; CHECK-NEXT: .address_space: global
378 define amdgpu_kernel void @test_sampler(i32 %a)
379 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18
380 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
381 ret void
382 }
383
384 ; CHECK: .symbol: test_queue.kd
385 ; CHECK: .name: test_queue
386 ; CHECK: .language: OpenCL C
387 ; CHECK: .language_version:
388 ; CHECK-NEXT: - 2
389 ; CHECK-NEXT: - 0
390 ; CHECK: .args:
391 ; CHECK-NEXT: - .type_name: queue_t
392 ; CHECK-NEXT: .value_kind: queue
393 ; CHECK-NEXT: .name: a
394 ; CHECK-NEXT: .offset: 0
395 ; CHECK-NEXT: .size: 8
396 ; CHECK-NEXT: .value_type: struct
397 ; CHECK-NEXT: .address_space: global
398 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
399 ; CHECK-NEXT: .offset: 8
400 ; CHECK-NEXT: .size: 8
401 ; CHECK-NEXT: .value_type: i64
402 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
403 ; CHECK-NEXT: .offset: 16
404 ; CHECK-NEXT: .size: 8
405 ; CHECK-NEXT: .value_type: i64
406 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
407 ; CHECK-NEXT: .offset: 24
408 ; CHECK-NEXT: .size: 8
409 ; CHECK-NEXT: .value_type: i64
410 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
411 ; CHECK-NEXT: .offset: 32
412 ; CHECK-NEXT: .size: 8
413 ; CHECK-NEXT: .value_type: i8
414 ; CHECK-NEXT: .address_space: global
415 define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a)
416 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19
417 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
418 ret void
419 }
420
421 ; CHECK: .symbol: test_struct.kd
422 ; CHECK: .name: test_struct
423 ; CHECK: .language: OpenCL C
424 ; CHECK: .language_version:
425 ; CHECK-NEXT: - 2
426 ; CHECK-NEXT: - 0
427 ; CHECK: .args:
428 ; CHECK-NEXT: - .type_name: struct A
429 ; CHECK-NEXT: .value_kind: global_buffer
430 ; CHECK-NEXT: .name: a
431 ; CHECK-NEXT: .offset: 0
432 ; CHECK-NEXT: .size: 4
433 ; CHECK-NEXT: .value_type: struct
434 ; CHECK-NEXT: .address_space: private
435 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
436 ; CHECK-NEXT: .offset: 8
437 ; CHECK-NEXT: .size: 8
438 ; CHECK-NEXT: .value_type: i64
439 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
440 ; CHECK-NEXT: .offset: 16
441 ; CHECK-NEXT: .size: 8
442 ; CHECK-NEXT: .value_type: i64
443 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
444 ; CHECK-NEXT: .offset: 24
445 ; CHECK-NEXT: .size: 8
446 ; CHECK-NEXT: .value_type: i64
447 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
448 ; CHECK-NEXT: .offset: 32
449 ; CHECK-NEXT: .size: 8
450 ; CHECK-NEXT: .value_type: i8
451 ; CHECK-NEXT: .address_space: global
452 define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a)
453 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
454 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
455 ret void
456 }
457
458 ; CHECK: .symbol: test_i128.kd
459 ; CHECK: .name: test_i128
460 ; CHECK: .language: OpenCL C
461 ; CHECK: .language_version:
462 ; CHECK-NEXT: - 2
463 ; CHECK-NEXT: - 0
464 ; CHECK: .args:
465 ; CHECK-NEXT: - .type_name: i128
466 ; CHECK-NEXT: .value_kind: by_value
467 ; CHECK-NEXT: .offset: 0
468 ; CHECK-NEXT: .size: 16
469 ; CHECK-NEXT: .value_type: struct
470 ; CHECK-NEXT: .name: a
471 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
472 ; CHECK-NEXT: .offset: 16
473 ; CHECK-NEXT: .size: 8
474 ; CHECK-NEXT: .value_type: i64
475 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
476 ; CHECK-NEXT: .offset: 24
477 ; CHECK-NEXT: .size: 8
478 ; CHECK-NEXT: .value_type: i64
479 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
480 ; CHECK-NEXT: .offset: 32
481 ; CHECK-NEXT: .size: 8
482 ; CHECK-NEXT: .value_type: i64
483 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
484 ; CHECK-NEXT: .offset: 40
485 ; CHECK-NEXT: .size: 8
486 ; CHECK-NEXT: .value_type: i8
487 ; CHECK-NEXT: .address_space: global
488 define amdgpu_kernel void @test_i128(i128 %a)
489 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21
490 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
491 ret void
492 }
493
494 ; CHECK: .symbol: test_multi_arg.kd
495 ; CHECK: .name: test_multi_arg
496 ; CHECK: .language: OpenCL C
497 ; CHECK: .language_version:
498 ; CHECK-NEXT: - 2
499 ; CHECK-NEXT: - 0
500 ; CHECK: .args:
501 ; CHECK-NEXT: - .type_name: int
502 ; CHECK-NEXT: .value_kind: by_value
503 ; CHECK-NEXT: .offset: 0
504 ; CHECK-NEXT: .size: 4
505 ; CHECK-NEXT: .value_type: i32
506 ; CHECK-NEXT: .name: a
507 ; CHECK-NEXT: - .type_name: short2
508 ; CHECK-NEXT: .value_kind: by_value
509 ; CHECK-NEXT: .offset: 4
510 ; CHECK-NEXT: .size: 4
511 ; CHECK-NEXT: .value_type: i16
512 ; CHECK-NEXT: .name: b
513 ; CHECK-NEXT: - .type_name: char3
514 ; CHECK-NEXT: .value_kind: by_value
515 ; CHECK-NEXT: .offset: 8
516 ; CHECK-NEXT: .size: 4
517 ; CHECK-NEXT: .value_type: i8
518 ; CHECK-NEXT: .name: c
519 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
520 ; CHECK-NEXT: .offset: 16
521 ; CHECK-NEXT: .size: 8
522 ; CHECK-NEXT: .value_type: i64
523 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
524 ; CHECK-NEXT: .offset: 24
525 ; CHECK-NEXT: .size: 8
526 ; CHECK-NEXT: .value_type: i64
527 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
528 ; CHECK-NEXT: .offset: 32
529 ; CHECK-NEXT: .size: 8
530 ; CHECK-NEXT: .value_type: i64
531 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
532 ; CHECK-NEXT: .offset: 40
533 ; CHECK-NEXT: .size: 8
534 ; CHECK-NEXT: .value_type: i8
535 ; CHECK-NEXT: .address_space: global
536 define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c)
537 !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24
538 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
539 ret void
540 }
541
542 ; CHECK: .symbol: test_addr_space.kd
543 ; CHECK: .name: test_addr_space
544 ; CHECK: .language: OpenCL C
545 ; CHECK: .language_version:
546 ; CHECK-NEXT: - 2
547 ; CHECK-NEXT: - 0
548 ; CHECK: .args:
549 ; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
550 ; CHECK-NEXT: .value_kind: global_buffer
551 ; CHECK-NEXT: .name: g
552 ; CHECK-NEXT: .offset: 0
553 ; CHECK-NEXT: .size: 8
554 ; CHECK-NEXT: .value_type: i32
555 ; CHECK-NEXT: .address_space: global
556 ; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
557 ; CHECK-NEXT: .value_kind: global_buffer
558 ; CHECK-NEXT: .name: c
559 ; CHECK-NEXT: .offset: 8
560 ; CHECK-NEXT: .size: 8
561 ; CHECK-NEXT: .value_type: i32
562 ; CHECK-NEXT: .address_space: constant
563 ; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
564 ; CHECK-NEXT: .value_kind: dynamic_shared_pointer
565 ; CHECK-NEXT: .name: l
566 ; CHECK-NEXT: .offset: 16
567 ; CHECK-NEXT: .size: 4
568 ; CHECK-NEXT: .value_type: i32
569 ; CHECK-NEXT: .pointee_align: 4
570 ; CHECK-NEXT: .address_space: local
571 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
572 ; CHECK-NEXT: .offset: 24
573 ; CHECK-NEXT: .size: 8
574 ; CHECK-NEXT: .value_type: i64
575 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
576 ; CHECK-NEXT: .offset: 32
577 ; CHECK-NEXT: .size: 8
578 ; CHECK-NEXT: .value_type: i64
579 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
580 ; CHECK-NEXT: .offset: 40
581 ; CHECK-NEXT: .size: 8
582 ; CHECK-NEXT: .value_type: i64
583 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
584 ; CHECK-NEXT: .offset: 48
585 ; CHECK-NEXT: .size: 8
586 ; CHECK-NEXT: .value_type: i8
587 ; CHECK-NEXT: .address_space: global
588 define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
589 i32 addrspace(4)* %c,
590 i32 addrspace(3)* %l)
591 !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
592 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
593 ret void
594 }
595
596 ; CHECK: .symbol: test_type_qual.kd
597 ; CHECK: .name: test_type_qual
598 ; CHECK: .language: OpenCL C
599 ; CHECK: .language_version:
600 ; CHECK-NEXT: - 2
601 ; CHECK-NEXT: - 0
602 ; CHECK: .args:
603 ; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
604 ; CHECK-NEXT: .value_kind: global_buffer
605 ; CHECK-NEXT: .name: a
606 ; CHECK-NEXT: .offset: 0
607 ; CHECK-NEXT: .size: 8
608 ; CHECK-NEXT: .is_volatile: true
609 ; CHECK-NEXT: .value_type: i32
610 ; CHECK-NEXT: .address_space: global
611 ; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
612 ; CHECK-NEXT: .value_kind: global_buffer
613 ; CHECK-NEXT: .name: b
614 ; CHECK-NEXT: .is_const: true
615 ; CHECK-NEXT: .offset: 8
616 ; CHECK-NEXT: .size: 8
617 ; CHECK-NEXT: .is_restrict: true
618 ; CHECK-NEXT: .value_type: i32
619 ; CHECK-NEXT: .address_space: global
620 ; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
621 ; CHECK-NEXT: .value_kind: pipe
622 ; CHECK-NEXT: .name: c
623 ; CHECK-NEXT: .offset: 16
624 ; CHECK-NEXT: .is_pipe: true
625 ; CHECK-NEXT: .size: 8
626 ; CHECK-NEXT: .value_type: struct
627 ; CHECK-NEXT: .address_space: global
628 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
629 ; CHECK-NEXT: .offset: 24
630 ; CHECK-NEXT: .size: 8
631 ; CHECK-NEXT: .value_type: i64
632 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
633 ; CHECK-NEXT: .offset: 32
634 ; CHECK-NEXT: .size: 8
635 ; CHECK-NEXT: .value_type: i64
636 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
637 ; CHECK-NEXT: .offset: 40
638 ; CHECK-NEXT: .size: 8
639 ; CHECK-NEXT: .value_type: i64
640 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
641 ; CHECK-NEXT: .offset: 48
642 ; CHECK-NEXT: .size: 8
643 ; CHECK-NEXT: .value_type: i8
644 ; CHECK-NEXT: .address_space: global
645 define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a,
646 i32 addrspace(1)* %b,
647 %opencl.pipe_t addrspace(1)* %c)
648 !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51
649 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
650 ret void
651 }
652
653 ; CHECK: .symbol: test_access_qual.kd
654 ; CHECK: .name: test_access_qual
655 ; CHECK: .language: OpenCL C
656 ; CHECK: .language_version:
657 ; CHECK-NEXT: - 2
658 ; CHECK-NEXT: - 0
659 ; CHECK: .args:
660 ; CHECK-NEXT: - .type_name: image1d_t
661 ; CHECK-NEXT: .value_kind: image
662 ; CHECK-NEXT: .name: ro
663 ; CHECK-NEXT: .access: read_only
664 ; CHECK-NEXT: .offset: 0
665 ; CHECK-NEXT: .size: 8
666 ; CHECK-NEXT: .value_type: struct
667 ; CHECK-NEXT: .address_space: global
668 ; CHECK-NEXT: - .type_name: image2d_t
669 ; CHECK-NEXT: .value_kind: image
670 ; CHECK-NEXT: .name: wo
671 ; CHECK-NEXT: .access: write_only
672 ; CHECK-NEXT: .offset: 8
673 ; CHECK-NEXT: .size: 8
674 ; CHECK-NEXT: .value_type: struct
675 ; CHECK-NEXT: .address_space: global
676 ; CHECK-NEXT: - .type_name: image3d_t
677 ; CHECK-NEXT: .value_kind: image
678 ; CHECK-NEXT: .name: rw
679 ; CHECK-NEXT: .access: read_write
680 ; CHECK-NEXT: .offset: 16
681 ; CHECK-NEXT: .size: 8
682 ; CHECK-NEXT: .value_type: struct
683 ; CHECK-NEXT: .address_space: global
684 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
685 ; CHECK-NEXT: .offset: 24
686 ; CHECK-NEXT: .size: 8
687 ; CHECK-NEXT: .value_type: i64
688 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
689 ; CHECK-NEXT: .offset: 32
690 ; CHECK-NEXT: .size: 8
691 ; CHECK-NEXT: .value_type: i64
692 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
693 ; CHECK-NEXT: .offset: 40
694 ; CHECK-NEXT: .size: 8
695 ; CHECK-NEXT: .value_type: i64
696 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
697 ; CHECK-NEXT: .offset: 48
698 ; CHECK-NEXT: .size: 8
699 ; CHECK-NEXT: .value_type: i8
700 ; CHECK-NEXT: .address_space: global
701 define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro,
702 %opencl.image2d_t addrspace(1)* %wo,
703 %opencl.image3d_t addrspace(1)* %rw)
704 !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62
705 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
706 ret void
707 }
708
709 ; CHECK: .symbol: test_vec_type_hint_half.kd
710 ; CHECK: .name: test_vec_type_hint_half
711 ; CHECK: .language: OpenCL C
712 ; CHECK: .language_version:
713 ; CHECK-NEXT: - 2
714 ; CHECK-NEXT: - 0
715 ; CHECK: .args:
716 ; CHECK-NEXT: - .type_name: int
717 ; CHECK-NEXT: .value_kind: by_value
718 ; CHECK-NEXT: .offset: 0
719 ; CHECK-NEXT: .size: 4
720 ; CHECK-NEXT: .value_type: i32
721 ; CHECK-NEXT: .name: a
722 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
723 ; CHECK-NEXT: .offset: 8
724 ; CHECK-NEXT: .size: 8
725 ; CHECK-NEXT: .value_type: i64
726 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
727 ; CHECK-NEXT: .offset: 16
728 ; CHECK-NEXT: .size: 8
729 ; CHECK-NEXT: .value_type: i64
730 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
731 ; CHECK-NEXT: .offset: 24
732 ; CHECK-NEXT: .size: 8
733 ; CHECK-NEXT: .value_type: i64
734 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
735 ; CHECK-NEXT: .offset: 32
736 ; CHECK-NEXT: .size: 8
737 ; CHECK-NEXT: .value_type: i8
738 ; CHECK-NEXT: .address_space: global
739 ; CHECK: .vec_type_hint: half
740 define amdgpu_kernel void @test_vec_type_hint_half(i32 %a)
741 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
742 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 {
743 ret void
744 }
745
746 ; CHECK: .symbol: test_vec_type_hint_float.kd
747 ; CHECK: .name: test_vec_type_hint_float
748 ; CHECK: .language: OpenCL C
749 ; CHECK: .language_version:
750 ; CHECK-NEXT: - 2
751 ; CHECK-NEXT: - 0
752 ; CHECK: .args:
753 ; CHECK-NEXT: - .type_name: int
754 ; CHECK-NEXT: .value_kind: by_value
755 ; CHECK-NEXT: .offset: 0
756 ; CHECK-NEXT: .size: 4
757 ; CHECK-NEXT: .value_type: i32
758 ; CHECK-NEXT: .name: a
759 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
760 ; CHECK-NEXT: .offset: 8
761 ; CHECK-NEXT: .size: 8
762 ; CHECK-NEXT: .value_type: i64
763 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
764 ; CHECK-NEXT: .offset: 16
765 ; CHECK-NEXT: .size: 8
766 ; CHECK-NEXT: .value_type: i64
767 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
768 ; CHECK-NEXT: .offset: 24
769 ; CHECK-NEXT: .size: 8
770 ; CHECK-NEXT: .value_type: i64
771 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
772 ; CHECK-NEXT: .offset: 32
773 ; CHECK-NEXT: .size: 8
774 ; CHECK-NEXT: .value_type: i8
775 ; CHECK-NEXT: .address_space: global
776 ; CHECK: .vec_type_hint: float
777 define amdgpu_kernel void @test_vec_type_hint_float(i32 %a)
778 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
779 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 {
780 ret void
781 }
782
783 ; CHECK: .symbol: test_vec_type_hint_double.kd
784 ; CHECK: .name: test_vec_type_hint_double
785 ; CHECK: .language: OpenCL C
786 ; CHECK: .language_version:
787 ; CHECK-NEXT: - 2
788 ; CHECK-NEXT: - 0
789 ; CHECK: .args:
790 ; CHECK-NEXT: - .type_name: int
791 ; CHECK-NEXT: .value_kind: by_value
792 ; CHECK-NEXT: .offset: 0
793 ; CHECK-NEXT: .size: 4
794 ; CHECK-NEXT: .value_type: i32
795 ; CHECK-NEXT: .name: a
796 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
797 ; CHECK-NEXT: .offset: 8
798 ; CHECK-NEXT: .size: 8
799 ; CHECK-NEXT: .value_type: i64
800 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
801 ; CHECK-NEXT: .offset: 16
802 ; CHECK-NEXT: .size: 8
803 ; CHECK-NEXT: .value_type: i64
804 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
805 ; CHECK-NEXT: .offset: 24
806 ; CHECK-NEXT: .size: 8
807 ; CHECK-NEXT: .value_type: i64
808 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
809 ; CHECK-NEXT: .offset: 32
810 ; CHECK-NEXT: .size: 8
811 ; CHECK-NEXT: .value_type: i8
812 ; CHECK-NEXT: .address_space: global
813 ; CHECK: .vec_type_hint: double
814 define amdgpu_kernel void @test_vec_type_hint_double(i32 %a)
815 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
816 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 {
817 ret void
818 }
819
820 ; CHECK: .symbol: test_vec_type_hint_char.kd
821 ; CHECK: .name: test_vec_type_hint_char
822 ; CHECK: .language: OpenCL C
823 ; CHECK: .language_version:
824 ; CHECK-NEXT: - 2
825 ; CHECK-NEXT: - 0
826 ; CHECK: .args:
827 ; CHECK-NEXT: - .type_name: int
828 ; CHECK-NEXT: .value_kind: by_value
829 ; CHECK-NEXT: .offset: 0
830 ; CHECK-NEXT: .size: 4
831 ; CHECK-NEXT: .value_type: i32
832 ; CHECK-NEXT: .name: a
833 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
834 ; CHECK-NEXT: .offset: 8
835 ; CHECK-NEXT: .size: 8
836 ; CHECK-NEXT: .value_type: i64
837 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
838 ; CHECK-NEXT: .offset: 16
839 ; CHECK-NEXT: .size: 8
840 ; CHECK-NEXT: .value_type: i64
841 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
842 ; CHECK-NEXT: .offset: 24
843 ; CHECK-NEXT: .size: 8
844 ; CHECK-NEXT: .value_type: i64
845 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
846 ; CHECK-NEXT: .offset: 32
847 ; CHECK-NEXT: .size: 8
848 ; CHECK-NEXT: .value_type: i8
849 ; CHECK-NEXT: .address_space: global
850 ; CHECK: .vec_type_hint: char
851 define amdgpu_kernel void @test_vec_type_hint_char(i32 %a)
852 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
853 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 {
854 ret void
855 }
856
857 ; CHECK: .symbol: test_vec_type_hint_short.kd
858 ; CHECK: .name: test_vec_type_hint_short
859 ; CHECK: .language: OpenCL C
860 ; CHECK: .language_version:
861 ; CHECK-NEXT: - 2
862 ; CHECK-NEXT: - 0
863 ; CHECK: .args:
864 ; CHECK-NEXT: - .type_name: int
865 ; CHECK-NEXT: .value_kind: by_value
866 ; CHECK-NEXT: .offset: 0
867 ; CHECK-NEXT: .size: 4
868 ; CHECK-NEXT: .value_type: i32
869 ; CHECK-NEXT: .name: a
870 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
871 ; CHECK-NEXT: .offset: 8
872 ; CHECK-NEXT: .size: 8
873 ; CHECK-NEXT: .value_type: i64
874 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
875 ; CHECK-NEXT: .offset: 16
876 ; CHECK-NEXT: .size: 8
877 ; CHECK-NEXT: .value_type: i64
878 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
879 ; CHECK-NEXT: .offset: 24
880 ; CHECK-NEXT: .size: 8
881 ; CHECK-NEXT: .value_type: i64
882 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
883 ; CHECK-NEXT: .offset: 32
884 ; CHECK-NEXT: .size: 8
885 ; CHECK-NEXT: .value_type: i8
886 ; CHECK-NEXT: .address_space: global
887 ; CHECK: .vec_type_hint: short
888 define amdgpu_kernel void @test_vec_type_hint_short(i32 %a)
889 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
890 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 {
891 ret void
892 }
893
894 ; CHECK: .symbol: test_vec_type_hint_long.kd
895 ; CHECK: .name: test_vec_type_hint_long
896 ; CHECK: .language: OpenCL C
897 ; CHECK: .language_version:
898 ; CHECK-NEXT: - 2
899 ; CHECK-NEXT: - 0
900 ; CHECK: .args:
901 ; CHECK-NEXT: - .type_name: int
902 ; CHECK-NEXT: .value_kind: by_value
903 ; CHECK-NEXT: .offset: 0
904 ; CHECK-NEXT: .size: 4
905 ; CHECK-NEXT: .value_type: i32
906 ; CHECK-NEXT: .name: a
907 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
908 ; CHECK-NEXT: .offset: 8
909 ; CHECK-NEXT: .size: 8
910 ; CHECK-NEXT: .value_type: i64
911 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
912 ; CHECK-NEXT: .offset: 16
913 ; CHECK-NEXT: .size: 8
914 ; CHECK-NEXT: .value_type: i64
915 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
916 ; CHECK-NEXT: .offset: 24
917 ; CHECK-NEXT: .size: 8
918 ; CHECK-NEXT: .value_type: i64
919 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
920 ; CHECK-NEXT: .offset: 32
921 ; CHECK-NEXT: .size: 8
922 ; CHECK-NEXT: .value_type: i8
923 ; CHECK-NEXT: .address_space: global
924 ; CHECK: .vec_type_hint: long
925 define amdgpu_kernel void @test_vec_type_hint_long(i32 %a)
926 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
927 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 {
928 ret void
929 }
930
931 ; CHECK: .symbol: test_vec_type_hint_unknown.kd
932 ; CHECK: .name: test_vec_type_hint_unknown
933 ; CHECK: .language: OpenCL C
934 ; CHECK: .language_version:
935 ; CHECK-NEXT: - 2
936 ; CHECK-NEXT: - 0
937 ; CHECK: .args:
938 ; CHECK-NEXT: - .type_name: int
939 ; CHECK-NEXT: .value_kind: by_value
940 ; CHECK-NEXT: .offset: 0
941 ; CHECK-NEXT: .size: 4
942 ; CHECK-NEXT: .value_type: i32
943 ; CHECK-NEXT: .name: a
944 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
945 ; CHECK-NEXT: .offset: 8
946 ; CHECK-NEXT: .size: 8
947 ; CHECK-NEXT: .value_type: i64
948 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
949 ; CHECK-NEXT: .offset: 16
950 ; CHECK-NEXT: .size: 8
951 ; CHECK-NEXT: .value_type: i64
952 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
953 ; CHECK-NEXT: .offset: 24
954 ; CHECK-NEXT: .size: 8
955 ; CHECK-NEXT: .value_type: i64
956 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
957 ; CHECK-NEXT: .offset: 32
958 ; CHECK-NEXT: .size: 8
959 ; CHECK-NEXT: .value_type: i8
960 ; CHECK-NEXT: .address_space: global
961 ; CHECK: .vec_type_hint: unknown
962 define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a)
963 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
964 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 {
965 ret void
966 }
967
968 ; CHECK: .reqd_workgroup_size:
969 ; CHECK-NEXT: - 1
970 ; CHECK-NEXT: - 2
971 ; CHECK-NEXT: - 4
972 ; CHECK: .symbol: test_reqd_wgs_vec_type_hint.kd
973 ; CHECK: .name: test_reqd_wgs_vec_type_hint
974 ; CHECK: .language: OpenCL C
975 ; CHECK: .language_version:
976 ; CHECK-NEXT: - 2
977 ; CHECK-NEXT: - 0
978 ; CHECK: .args:
979 ; CHECK-NEXT: - .type_name: int
980 ; CHECK-NEXT: .value_kind: by_value
981 ; CHECK-NEXT: .offset: 0
982 ; CHECK-NEXT: .size: 4
983 ; CHECK-NEXT: .value_type: i32
984 ; CHECK-NEXT: .name: a
985 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
986 ; CHECK-NEXT: .offset: 8
987 ; CHECK-NEXT: .size: 8
988 ; CHECK-NEXT: .value_type: i64
989 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
990 ; CHECK-NEXT: .offset: 16
991 ; CHECK-NEXT: .size: 8
992 ; CHECK-NEXT: .value_type: i64
993 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
994 ; CHECK-NEXT: .offset: 24
995 ; CHECK-NEXT: .size: 8
996 ; CHECK-NEXT: .value_type: i64
997 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
998 ; CHECK-NEXT: .offset: 32
999 ; CHECK-NEXT: .size: 8
1000 ; CHECK-NEXT: .value_type: i8
1001 ; CHECK-NEXT: .address_space: global
1002 ; CHECK: .vec_type_hint: int
1003 define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a)
1004 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
1005 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5
1006 !reqd_work_group_size !6 {
1007 ret void
1008 }
1009
1010 ; CHECK: .symbol: test_wgs_hint_vec_type_hint.kd
1011 ; CHECK: .workgroup_size_hint:
1012 ; CHECK-NEXT: - 8
1013 ; CHECK-NEXT: - 16
1014 ; CHECK-NEXT: - 32
1015 ; CHECK: .name: test_wgs_hint_vec_type_hint
1016 ; CHECK: .language: OpenCL C
1017 ; CHECK: .language_version:
1018 ; CHECK-NEXT: - 2
1019 ; CHECK-NEXT: - 0
1020 ; CHECK: .args:
1021 ; CHECK-NEXT: - .type_name: int
1022 ; CHECK-NEXT: .value_kind: by_value
1023 ; CHECK-NEXT: .offset: 0
1024 ; CHECK-NEXT: .size: 4
1025 ; CHECK-NEXT: .value_type: i32
1026 ; CHECK-NEXT: .name: a
1027 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
1028 ; CHECK-NEXT: .offset: 8
1029 ; CHECK-NEXT: .size: 8
1030 ; CHECK-NEXT: .value_type: i64
1031 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
1032 ; CHECK-NEXT: .offset: 16
1033 ; CHECK-NEXT: .size: 8
1034 ; CHECK-NEXT: .value_type: i64
1035 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
1036 ; CHECK-NEXT: .offset: 24
1037 ; CHECK-NEXT: .size: 8
1038 ; CHECK-NEXT: .value_type: i64
1039 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
1040 ; CHECK-NEXT: .offset: 32
1041 ; CHECK-NEXT: .size: 8
1042 ; CHECK-NEXT: .value_type: i8
1043 ; CHECK-NEXT: .address_space: global
1044 ; CHECK: .vec_type_hint: uint4
1045 define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a)
1046 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
1047 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7
1048 !work_group_size_hint !8 {
1049 ret void
1050 }
1051
1052 ; CHECK: .symbol: test_arg_ptr_to_ptr.kd
1053 ; CHECK: .name: test_arg_ptr_to_ptr
1054 ; CHECK: .language: OpenCL C
1055 ; CHECK: .language_version:
1056 ; CHECK-NEXT: - 2
1057 ; CHECK-NEXT: - 0
1058 ; CHECK: .args:
1059 ; CHECK-NEXT: - .type_name: 'int addrspace(5)* addrspace(5)*'
1060 ; CHECK-NEXT: .value_kind: global_buffer
1061 ; CHECK-NEXT: .name: a
1062 ; CHECK-NEXT: .offset: 0
1063 ; CHECK-NEXT: .size: 8
1064 ; CHECK-NEXT: .value_type: i32
1065 ; CHECK-NEXT: .address_space: global
1066 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
1067 ; CHECK-NEXT: .offset: 8
1068 ; CHECK-NEXT: .size: 8
1069 ; CHECK-NEXT: .value_type: i64
1070 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
1071 ; CHECK-NEXT: .offset: 16
1072 ; CHECK-NEXT: .size: 8
1073 ; CHECK-NEXT: .value_type: i64
1074 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
1075 ; CHECK-NEXT: .offset: 24
1076 ; CHECK-NEXT: .size: 8
1077 ; CHECK-NEXT: .value_type: i64
1078 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
1079 ; CHECK-NEXT: .offset: 32
1080 ; CHECK-NEXT: .size: 8
1081 ; CHECK-NEXT: .value_type: i8
1082 ; CHECK-NEXT: .address_space: global
1083 define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a)
1084 !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80
1085 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
1086 ret void
1087 }
1088
1089 ; CHECK: .symbol: test_arg_struct_contains_ptr.kd
1090 ; CHECK: .name: test_arg_struct_contains_ptr
1091 ; CHECK: .language: OpenCL C
1092 ; CHECK: .language_version:
1093 ; CHECK-NEXT: - 2
1094 ; CHECK-NEXT: - 0
1095 ; CHECK: .args:
1096 ; CHECK-NEXT: - .type_name: struct B
1097 ; CHECK-NEXT: .value_kind: global_buffer
1098 ; CHECK-NEXT: .name: a
1099 ; CHECK-NEXT: .offset: 0
1100 ; CHECK-NEXT: .size: 4
1101 ; CHECK-NEXT: .value_type: struct
1102 ; CHECK-NEXT: .address_space: private
1103 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
1104 ; CHECK-NEXT: .offset: 8
1105 ; CHECK-NEXT: .size: 8
1106 ; CHECK-NEXT: .value_type: i64
1107 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
1108 ; CHECK-NEXT: .offset: 16
1109 ; CHECK-NEXT: .size: 8
1110 ; CHECK-NEXT: .value_type: i64
1111 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
1112 ; CHECK-NEXT: .offset: 24
1113 ; CHECK-NEXT: .size: 8
1114 ; CHECK-NEXT: .value_type: i64
1115 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
1116 ; CHECK-NEXT: .offset: 32
1117 ; CHECK-NEXT: .size: 8
1118 ; CHECK-NEXT: .value_type: i8
1119 ; CHECK-NEXT: .address_space: global
1120 define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a)
1121 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82
1122 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
1123 ret void
1124 }
1125
1126 ; CHECK: .symbol: test_arg_vector_of_ptr.kd
1127 ; CHECK: .name: test_arg_vector_of_ptr
1128 ; CHECK: .language: OpenCL C
1129 ; CHECK: .language_version:
1130 ; CHECK-NEXT: - 2
1131 ; CHECK-NEXT: - 0
1132 ; CHECK: .args:
1133 ; CHECK-NEXT: - .type_name: 'global int addrspace(5)* __attribute__((ext_vector_type(2)))'
1134 ; CHECK-NEXT: .value_kind: by_value
1135 ; CHECK-NEXT: .offset: 0
1136 ; CHECK-NEXT: .size: 16
1137 ; CHECK-NEXT: .value_type: i32
1138 ; CHECK-NEXT: .name: a
1139 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
1140 ; CHECK-NEXT: .offset: 16
1141 ; CHECK-NEXT: .size: 8
1142 ; CHECK-NEXT: .value_type: i64
1143 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
1144 ; CHECK-NEXT: .offset: 24
1145 ; CHECK-NEXT: .size: 8
1146 ; CHECK-NEXT: .value_type: i64
1147 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
1148 ; CHECK-NEXT: .offset: 32
1149 ; CHECK-NEXT: .size: 8
1150 ; CHECK-NEXT: .value_type: i64
1151 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
1152 ; CHECK-NEXT: .offset: 40
1153 ; CHECK-NEXT: .size: 8
1154 ; CHECK-NEXT: .value_type: i8
1155 ; CHECK-NEXT: .address_space: global
1156 define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a)
1157 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83
1158 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
1159 ret void
1160 }
1161
1162 ; CHECK: .symbol: test_arg_unknown_builtin_type.kd
1163 ; CHECK: .name: test_arg_unknown_builtin_type
1164 ; CHECK: .language: OpenCL C
1165 ; CHECK: .language_version:
1166 ; CHECK-NEXT: - 2
1167 ; CHECK-NEXT: - 0
1168 ; CHECK: .args:
1169 ; CHECK-NEXT: - .type_name: clk_event_t
1170 ; CHECK-NEXT: .value_kind: global_buffer
1171 ; CHECK-NEXT: .name: a
1172 ; CHECK-NEXT: .offset: 0
1173 ; CHECK-NEXT: .size: 8
1174 ; CHECK-NEXT: .value_type: struct
1175 ; CHECK-NEXT: .address_space: global
1176 ; CHECK-NEXT: - .value_kind: hidden_global_offset_x
1177 ; CHECK-NEXT: .offset: 8
1178 ; CHECK-NEXT: .size: 8
1179 ; CHECK-NEXT: .value_type: i64
1180 ; CHECK-NEXT: - .value_kind: hidden_global_offset_y
1181 ; CHECK-NEXT: .offset: 16
1182 ; CHECK-NEXT: .size: 8
1183 ; CHECK-NEXT: .value_type: i64
1184 ; CHECK-NEXT: - .value_kind: hidden_global_offset_z
1185 ; CHECK-NEXT: .offset: 24
1186 ; CHECK-NEXT: .size: 8
1187 ; CHECK-NEXT: .value_type: i64
1188 ; CHECK-NEXT: - .value_kind: hidden_printf_buffer
1189 ; CHECK-NEXT: .offset: 32
1190 ; CHECK-NEXT: .size: 8
1191 ; CHECK-NEXT: .value_type: i8
1192 ; CHECK-NEXT: .address_space: global
1193 define amdgpu_kernel void @test_arg_unknown_builtin_type(
1194 %opencl.clk_event_t addrspace(1)* %a)
1195 !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84
1196 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
1197 ret void
1198 }
1199
1200 ; CHECK: .symbol: test_pointee_align.kd
1201 ; CHECK: .name: test_pointee_align
1202 ; CHECK: .language: OpenCL C
1203 ; CHECK: .language_version:
1204 ; CHECK-NEXT: - 2
1205 ; CHECK-NEXT: - 0
1206 ; CHECK: .args:
1207 ; CHECK-NEXT: - .type_name: 'long addrspace(5)*'
1208 ; CHECK-NEXT: .value_kind: global_buffer
1209 ; CHECK-NEXT: .name: a
1210 ; CHECK-NEXT: .offset: 0
1211 ; CHECK-NEXT: .size: 8
1212 ; CHECK-NEXT: .value_type: i64
1213 ; CHECK-NEXT: .address_space: global
1214 ; CHECK-NEXT: - .type_name: 'char addrspace(5)*'
1215 ; CHECK-NEXT: .value_kind: dynamic_shared_pointer
1216 ; CHECK-NEXT: .name: b
1217 ; CHECK-NEXT: .offset: 8
1218 ; CHECK-NEXT: .size: 4
1219 ; CHECK-NEXT: .value_type: i8
1220 ; CHECK-NEXT: .pointee_align: 1
1221 ; CHECK-NEXT: .address_space: local
1222 ; CHECK-NEXT: - .type_name: 'char2 addrspace(5)*'
1223 ; CHECK-NEXT: .value_kind: dynamic_shared_pointer
1224 ; CHECK-NEXT: .name: c
1225 ; CHECK-NEXT: .offset: 12
1226 ; CHECK-NEXT: .size: 4
1227 ; CHECK-NEXT: .value_type: i8
1228 ; CHECK-NEXT: .pointee_align: 2
1229 ; CHECK-NEXT: .address_space: local
1230 ; CHECK-NEXT: - .type_name: 'char3 addrspace(5)*'
1231 ; CHECK-NEXT: .value_kind: dynamic_shared_pointer
1232 ; CHECK-NEXT: .name: d
1233 ; CHECK-NEXT: .offset: 16
1234 ; CHECK-NEXT: .size: 4
1235 ; CHECK-NEXT: .value_type: i8
1236 ; CHECK-NEXT: .pointee_align: 4
1237 ; CHECK-NEXT: .address_space: local
1238 ; CHECK-NEXT: - .type_name: 'char4 addrspace(5)*'
1239 ; CHECK-NEXT: .value_kind: dynamic_shared_pointer
1240 ; CHECK-NEXT: .name: e
1241 ; CHECK-NEXT: .offset: 20
1242 ; CHECK-NEXT: .size: 4
1243 ; CHECK-NEXT: .value_type: i8
1244 ; CHECK-NEXT: .pointee_align: 4
1245 ; CHECK-NEXT: .address_space: local
1246 ; CHECK-NEXT: - .type_name: 'char8 addrspace(5)*'
1247 ; CHECK-NEXT: .value_kind: dynamic_shared_pointer
1248 ; CHECK-NEXT: .name: f
1249 ; CHECK-NEXT: .offset: 24
1250 ; CHECK-NEXT: .size: 4
1251 ; CHECK-NEXT: .value_type: i8
1252 ; CHECK-NEXT: .pointee_align: 8
1253 ; CHECK-NEXT: .address_space: local
1254 ; CHECK-NEXT: - .type_name: 'char16 addrspace(5)*'