llvm.org GIT mirror llvm / 6b0141c
[AMDGPU] Add metadata for runtime Added emitting metadata to elf for runtime. Runtime requires certain information (metadata) about kernels to be able to execute and query them. Such information is emitted to an elf section as a key-value pair stream. Differential Revision: https://reviews.llvm.org/D21849 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275566 91177308-0d34-0410-b5e6-96231b3b80d8 Yaxun Liu 4 years ago
4 changed file(s) with 1220 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
3838 #include "llvm/Support/MathExtras.h"
3939 #include "llvm/Support/TargetRegistry.h"
4040 #include "llvm/Target/TargetLoweringObjectFile.h"
41
41 #include "AMDGPURuntimeMetadata.h"
42
43 using namespace ::AMDGPU;
4244 using namespace llvm;
4345
4446 // TODO: This should get the default rounding mode from the kernel. We just set
110112 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
111113 TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
112114 "AMD", "AMDGPU");
115 emitStartOfRuntimeMetadata(M);
113116 }
114117
115118 void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
242245 OutStreamer->EmitBytes(StringRef(Comment));
243246 }
244247 }
248
249 emitRuntimeMetadata(*MF.getFunction());
245250
246251 return false;
247252 }
739744 *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
740745 return false;
741746 }
747
748 // Emit a key and an integer value for runtime metadata.
749 static void emitRuntimeMDIntValue(std::unique_ptr &Streamer,
750 RuntimeMD::Key K, uint64_t V,
751 unsigned Size) {
752 Streamer->EmitIntValue(K, 1);
753 Streamer->EmitIntValue(V, Size);
754 }
755
756 // Emit a key and a string value for runtime metadata.
757 static void emitRuntimeMDStringValue(std::unique_ptr &Streamer,
758 RuntimeMD::Key K, StringRef S) {
759 Streamer->EmitIntValue(K, 1);
760 Streamer->EmitIntValue(S.size(), 4);
761 Streamer->EmitBytes(S);
762 }
763
764 // Emit a key and three integer values for runtime metadata.
765 // The three integer values are obtained from MDNode \p Node;
766 static void emitRuntimeMDThreeIntValues(std::unique_ptr &Streamer,
767 RuntimeMD::Key K, MDNode *Node,
768 unsigned Size) {
769 Streamer->EmitIntValue(K, 1);
770 Streamer->EmitIntValue(mdconst::extract(
771 Node->getOperand(0))->getZExtValue(), Size);
772 Streamer->EmitIntValue(mdconst::extract(
773 Node->getOperand(1))->getZExtValue(), Size);
774 Streamer->EmitIntValue(mdconst::extract(
775 Node->getOperand(2))->getZExtValue(), Size);
776 }
777
778 void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
779 OutStreamer->SwitchSection(getObjFileLowering().getContext()
780 .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
781
782 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
783 RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
784 if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
785 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
786 RuntimeMD::OpenCL_C, 1);
787 auto Node = MD->getOperand(0);
788 unsigned short Major = mdconst::extract(Node->getOperand(0))
789 ->getZExtValue();
790 unsigned short Minor = mdconst::extract(Node->getOperand(1))
791 ->getZExtValue();
792 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
793 Major * 100 + Minor * 10, 2);
794 }
795 }
796
797 static Twine getOCLTypeName(Type *Ty, bool isSigned) {
798 if (VectorType* VecTy = dyn_cast(Ty)) {
799 Type* EleTy = VecTy->getElementType();
800 unsigned Size = VecTy->getVectorNumElements();
801 return getOCLTypeName(EleTy, isSigned) + Twine(Size);
802 }
803 switch (Ty->getTypeID()) {
804 case Type::HalfTyID: return "half";
805 case Type::FloatTyID: return "float";
806 case Type::DoubleTyID: return "double";
807 case Type::IntegerTyID: {
808 if (!isSigned)
809 return Twine('u') + getOCLTypeName(Ty, true);
810 auto IntTy = cast(Ty);
811 auto BW = IntTy->getIntegerBitWidth();
812 switch (BW) {
813 case 8:
814 return "char";
815 case 16:
816 return "short";
817 case 32:
818 return "int";
819 case 64:
820 return "long";
821 default:
822 return Twine("i") + Twine(BW);
823 }
824 }
825 default:
826 llvm_unreachable("invalid type");
827 }
828 }
829
830 static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
831 Type *Ty, StringRef TypeName) {
832 if (auto VT = dyn_cast(Ty))
833 return getRuntimeMDValueType(VT->getElementType(), TypeName);
834 else if (auto PT = dyn_cast(Ty))
835 return getRuntimeMDValueType(PT->getElementType(), TypeName);
836 else if (Ty->isHalfTy())
837 return RuntimeMD::KernelArg::F16;
838 else if (Ty->isFloatTy())
839 return RuntimeMD::KernelArg::F32;
840 else if (Ty->isDoubleTy())
841 return RuntimeMD::KernelArg::F64;
842 else if (IntegerType* intTy = dyn_cast(Ty)) {
843 bool Signed = !TypeName.startswith("u");
844 switch (intTy->getIntegerBitWidth()) {
845 case 8:
846 return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
847 case 16:
848 return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
849 case 32:
850 return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
851 case 64:
852 return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
853 default:
854 // Runtime does not recognize other integer types. Report as
855 // struct type.
856 return RuntimeMD::KernelArg::Struct;
857 }
858 } else
859 return RuntimeMD::KernelArg::Struct;
860 }
861
862 void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
863 if (!F.getMetadata("kernel_arg_type"))
864 return;
865
866 MCContext &Context = getObjFileLowering().getContext();
867 OutStreamer->SwitchSection(
868 Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
869 OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
870 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName());
871
872 for (auto &Arg:F.args()) {
873 // Emit KeyArgBegin.
874 unsigned I = Arg.getArgNo();
875 OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
876
877 // Emit KeyArgSize and KeyArgAlign.
878 auto T = Arg.getType();
879 auto DL = F.getParent()->getDataLayout();
880 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
881 DL.getTypeAllocSize(T), 4);
882 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
883 DL.getABITypeAlignment(T), 4);
884
885 // Emit KeyArgTypeName.
886 auto TypeName = dyn_cast(F.getMetadata(
887 "kernel_arg_type")->getOperand(I))->getString();
888 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
889
890 // Emit KeyArgName.
891 if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
892 auto ArgName = cast(ArgNameMD->getOperand(
893 I))->getString();
894 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
895 }
896
897 // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
898 auto TypeQual = cast(F.getMetadata(
899 "kernel_arg_type_qual")->getOperand(I))->getString();
900 SmallVector SplitQ;
901 TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/);
902 for (auto &I:SplitQ) {
903 auto Key = StringSwitch(I)
904 .Case("volatile", RuntimeMD::KeyArgIsVolatile)
905 .Case("restrict", RuntimeMD::KeyArgIsRestrict)
906 .Case("const", RuntimeMD::KeyArgIsConst)
907 .Case("pipe", RuntimeMD::KeyArgIsPipe)
908 .Default(RuntimeMD::KeyNull);
909 OutStreamer->EmitIntValue(Key, 1);
910 }
911
912 // Emit KeyArgTypeKind.
913 auto BaseTypeName = cast(
914 F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
915 auto TypeKind = StringSwitch(BaseTypeName)
916 .Case("sampler_t", RuntimeMD::KernelArg::Sampler)
917 .Case("queue_t", RuntimeMD::KernelArg::Queue)
918 .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
919 "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image)
920 .Cases("image2d_depth_t", "image2d_array_depth_t",
921 "image2d_msaa_t", "image2d_array_msaa_t",
922 "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image)
923 .Cases("image2d_array_msaa_depth_t", "image3d_t",
924 RuntimeMD::KernelArg::Image)
925 .Default(isa(T) ? RuntimeMD::KernelArg::Pointer :
926 RuntimeMD::KernelArg::Value);
927 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
928
929 // Emit KeyArgValueType.
930 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
931 getRuntimeMDValueType(T, BaseTypeName), 2);
932
933 // Emit KeyArgAccQual.
934 auto AccQual = cast(F.getMetadata(
935 "kernel_arg_access_qual")->getOperand(I))->getString();
936 auto AQ = StringSwitch(AccQual)
937 .Case("read_only", RuntimeMD::KernelArg::ReadOnly)
938 .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
939 .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
940 .Default(RuntimeMD::KernelArg::None);
941 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual,
942 AQ, 1);
943
944 // Emit KeyArgAddrQual.
945 if (isa(T))
946 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
947 T->getPointerAddressSpace(), 1);
948
949 // Emit KeyArgEnd
950 OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
951 }
952
953 // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
954 if (auto RWGS = F.getMetadata("reqd_work_group_size"))
955 emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
956 RWGS, 4);
957 if (auto WGSH = F.getMetadata("work_group_size_hint"))
958 emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
959 WGSH, 4);
960 if (auto VTH = F.getMetadata("vec_type_hint")) {
961 auto TypeName = getOCLTypeName(cast(
962 VTH->getOperand(0))->getType(), mdconst::extract(
963 VTH->getOperand(1))->getZExtValue()).str();
964 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint,
965 TypeName);
966 }
967
968 // Emit KeyKernelEnd
969 OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
970 }
126126 unsigned AsmVariant, const char *ExtraCode,
127127 raw_ostream &O) override;
128128
129 void emitStartOfRuntimeMetadata(const Module &M);
130
131 void emitRuntimeMetadata(const Function &F);
132
129133 protected:
130134 std::vector DisasmLines, HexLines;
131135 size_t DisasmLineMaxLen;
0 //===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// Enums and structure types used by runtime metadata.
12 ///
13 /// Runtime requests certain information (metadata) about kernels to be able
14 /// to execute the kernels and answer the queries about the kernels.
15 /// The metadata is represented as a byte stream in an ELF section of a
16 /// binary (code object). The byte stream consists of key-value pairs.
17 /// Each key is an 8 bit unsigned integer. Each value can be an integer,
18 /// a string, or a stream of key-value pairs. There are 3 levels of key-value
19 /// pair streams. At the beginning of the ELF section is the top level
20 /// key-value pair stream. A kernel-level key-value pair stream starts after
21 /// encountering KeyKernelBegin and ends immediately before encountering
22 /// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
23 /// after encountering KeyArgBegin and ends immediately before encountering
24 /// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
25 /// level key-value pair stream. A kernel-argument-level key-value pair stream
26 /// can only appear in a kernel-level key-value pair stream.
27 ///
28 /// The format should be kept backward compatible. New enum values and bit
29 /// fields should be appended at the end. It is suggested to bump up the
30 /// revision number whenever the format changes and document the change
31 /// in the revision in this header.
32 ///
33 //
34 //===----------------------------------------------------------------------===//
35 //
36 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
37 #define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
38
39 #include
40
41 namespace AMDGPU {
42
43 namespace RuntimeMD {
44
45 // Version and revision of runtime metadata
46 const unsigned char MDVersion = 1;
47 const unsigned char MDRevision = 0;
48
49 // ELF section name containing runtime metadata
50 const char SectionName[] = ".AMDGPU.runtime_metadata";
51
52 // Enumeration values of keys in runtime metadata.
53 enum Key {
54 KeyNull = 0, // Place holder. Ignored when encountered
55 KeyMDVersion = 1, // Runtime metadata version
56 KeyLanguage = 2, // Language
57 KeyLanguageVersion = 3, // Language version
58 KeyKernelBegin = 4, // Beginning of kernel-level stream
59 KeyKernelEnd = 5, // End of kernel-level stream
60 KeyKernelName = 6, // Kernel name
61 KeyArgBegin = 7, // Beginning of kernel-arg-level stream
62 KeyArgEnd = 8, // End of kernel-arg-level stream
63 KeyArgSize = 9, // Kernel arg size
64 KeyArgAlign = 10, // Kernel arg alignment
65 KeyArgTypeName = 11, // Kernel type name
66 KeyArgName = 12, // Kernel name
67 KeyArgTypeKind = 13, // Kernel argument type kind
68 KeyArgValueType = 14, // Kernel argument value type
69 KeyArgAddrQual = 15, // Kernel argument address qualifier
70 KeyArgAccQual = 16, // Kernel argument access qualifier
71 KeyArgIsConst = 17, // Kernel argument is const qualified
72 KeyArgIsRestrict = 18, // Kernel argument is restrict qualified
73 KeyArgIsVolatile = 19, // Kernel argument is volatile qualified
74 KeyArgIsPipe = 20, // Kernel argument is pipe qualified
75 KeyReqdWorkGroupSize = 21, // Required work group size
76 KeyWorkGroupSizeHint = 22, // Work group size hint
77 KeyVecTypeHint = 23, // Vector type hint
78 KeyKernelIndex = 24, // Kernel index for device enqueue
79 KeySGPRs = 25, // Number of SGPRs
80 KeyVGPRs = 26, // Number of VGPRs
81 KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD
82 KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD
83 KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits
84 KeyMaxWorkGroupSize = 30, // Maximum work group size
85 KeyNoPartialWorkGroups = 31, // No partial work groups
86 };
87
88 enum Language : uint8_t {
89 OpenCL_C = 0,
90 HCC = 1,
91 OpenMP = 2,
92 OpenCL_CPP = 3,
93 };
94
95 enum LanguageVersion : uint16_t {
96 V100 = 100,
97 V110 = 110,
98 V120 = 120,
99 V200 = 200,
100 V210 = 210,
101 };
102
103 namespace KernelArg {
104 enum TypeKind : uint8_t {
105 Value = 0,
106 Pointer = 1,
107 Image = 2,
108 Sampler = 3,
109 Queue = 4,
110 };
111
112 enum ValueType : uint16_t {
113 Struct = 0,
114 I8 = 1,
115 U8 = 2,
116 I16 = 3,
117 U16 = 4,
118 F16 = 5,
119 I32 = 6,
120 U32 = 7,
121 F32 = 8,
122 I64 = 9,
123 U64 = 10,
124 F64 = 11,
125 };
126
127 enum AccessQualifer : uint8_t {
128 None = 0,
129 ReadOnly = 1,
130 WriteOnly = 2,
131 ReadWrite = 3,
132 };
133 } // namespace KernelArg
134 } // namespace RuntimeMD
135 } // namespace AMDGPU
136
137 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
0 ; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
1
2 %struct.A = type { i8, float }
3 %opencl.image1d_t = type opaque
4 %opencl.image2d_t = type opaque
5 %opencl.image3d_t = type opaque
6 %opencl.queue_t = type opaque
7 %opencl.pipe_t = type opaque
8 %struct.B = type { i32 addrspace(1)*}
9 %opencl.clk_event_t = type opaque
10
11 ; CHECK: .section .AMDGPU.runtime_metadata
12 ; CHECK-NEXT: .byte 1
13 ; CHECK-NEXT: .short 256
14 ; CHECK-NEXT: .byte 2
15 ; CHECK-NEXT: .byte 0
16 ; CHECK-NEXT: .byte 3
17 ; CHECK-NEXT: .short 200
18
19 ; CHECK-LABEL:{{^}}test_char:
20 ; CHECK: .section .AMDGPU.runtime_metadata
21 ; CHECK-NEXT: .byte 4
22 ; CHECK-NEXT: .byte 6
23 ; CHECK-NEXT: .long 9
24 ; CHECK-NEXT: .ascii "test_char"
25 ; CHECK-NEXT: .byte 7
26 ; CHECK-NEXT: .byte 9
27 ; CHECK-NEXT: .long 1
28 ; CHECK-NEXT: .byte 10
29 ; CHECK-NEXT: .long 1
30 ; CHECK-NEXT: .byte 11
31 ; CHECK-NEXT: .long 4
32 ; CHECK-NEXT: .ascii "char"
33 ; CHECK-NEXT: .byte 13
34 ; CHECK-NEXT: .byte 0
35 ; CHECK-NEXT: .byte 14
36 ; CHECK-NEXT: .short 1
37 ; CHECK-NEXT: .byte 16
38 ; CHECK-NEXT: .byte 0
39 ; CHECK-NEXT: .byte 8
40 ; CHECK-NEXT: .byte 5
41
42 define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
43 ret void
44 }
45
46 ; CHECK-LABEL:{{^}}test_ushort2:
47 ; CHECK: .section .AMDGPU.runtime_metadata
48 ; CHECK-NEXT: .byte 4
49 ; CHECK-NEXT: .byte 6
50 ; CHECK-NEXT: .long 12
51 ; CHECK-NEXT: .ascii "test_ushort2"
52 ; CHECK-NEXT: .byte 7
53 ; CHECK-NEXT: .byte 9
54 ; CHECK-NEXT: .long 4
55 ; CHECK-NEXT: .byte 10
56 ; CHECK-NEXT: .long 4
57 ; CHECK-NEXT: .byte 11
58 ; CHECK-NEXT: .long 7
59 ; CHECK-NEXT: .ascii "ushort2"
60 ; CHECK-NEXT: .byte 13
61 ; CHECK-NEXT: .byte 0
62 ; CHECK-NEXT: .byte 14
63 ; CHECK-NEXT: .short 4
64 ; CHECK-NEXT: .byte 16
65 ; CHECK-NEXT: .byte 0
66 ; CHECK-NEXT: .byte 8
67 ; CHECK-NEXT: .byte 5
68
69 define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
70 ret void
71 }
72
73 ; CHECK-LABEL:{{^}}test_int3:
74 ; CHECK: .section .AMDGPU.runtime_metadata
75 ; CHECK-NEXT: .byte 4
76 ; CHECK-NEXT: .byte 6
77 ; CHECK-NEXT: .long 9
78 ; CHECK-NEXT: .ascii "test_int3"
79 ; CHECK-NEXT: .byte 7
80 ; CHECK-NEXT: .byte 9
81 ; CHECK-NEXT: .long 16
82 ; CHECK-NEXT: .byte 10
83 ; CHECK-NEXT: .long 16
84 ; CHECK-NEXT: .byte 11
85 ; CHECK-NEXT: .long 4
86 ; CHECK-NEXT: .ascii "int3"
87 ; CHECK-NEXT: .byte 13
88 ; CHECK-NEXT: .byte 0
89 ; CHECK-NEXT: .byte 14
90 ; CHECK-NEXT: .short 6
91 ; CHECK-NEXT: .byte 16
92 ; CHECK-NEXT: .byte 0
93 ; CHECK-NEXT: .byte 8
94 ; CHECK-NEXT: .byte 5
95
96 define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
97 ret void
98 }
99
100 ; CHECK-LABEL:{{^}}test_ulong4:
101 ; CHECK: .section .AMDGPU.runtime_metadata
102 ; CHECK-NEXT: .byte 4
103 ; CHECK-NEXT: .byte 6
104 ; CHECK-NEXT: .long 11
105 ; CHECK-NEXT: .ascii "test_ulong4"
106 ; CHECK-NEXT: .byte 7
107 ; CHECK-NEXT: .byte 9
108 ; CHECK-NEXT: .long 32
109 ; CHECK-NEXT: .byte 10
110 ; CHECK-NEXT: .long 32
111 ; CHECK-NEXT: .byte 11
112 ; CHECK-NEXT: .long 6
113 ; CHECK-NEXT: .ascii "ulong4"
114 ; CHECK-NEXT: .byte 13
115 ; CHECK-NEXT: .byte 0
116 ; CHECK-NEXT: .byte 14
117 ; CHECK-NEXT: .short 10
118 ; CHECK-NEXT: .byte 16
119 ; CHECK-NEXT: .byte 0
120 ; CHECK-NEXT: .byte 8
121 ; CHECK-NEXT: .byte 5
122
123 define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
124 ret void
125 }
126
127 ; CHECK-LABEL:{{^}}test_half8:
128 ; CHECK: .section .AMDGPU.runtime_metadata
129 ; CHECK-NEXT: .byte 4
130 ; CHECK-NEXT: .byte 6
131 ; CHECK-NEXT: .long 10
132 ; CHECK-NEXT: .ascii "test_half8"
133 ; CHECK-NEXT: .byte 7
134 ; CHECK-NEXT: .byte 9
135 ; CHECK-NEXT: .long 16
136 ; CHECK-NEXT: .byte 10
137 ; CHECK-NEXT: .long 16
138 ; CHECK-NEXT: .byte 11
139 ; CHECK-NEXT: .long 5
140 ; CHECK-NEXT: .ascii "half8"
141 ; CHECK-NEXT: .byte 13
142 ; CHECK-NEXT: .byte 0
143 ; CHECK-NEXT: .byte 14
144 ; CHECK-NEXT: .short 5
145 ; CHECK-NEXT: .byte 16
146 ; CHECK-NEXT: .byte 0
147 ; CHECK-NEXT: .byte 8
148 ; CHECK-NEXT: .byte 5
149
150 define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
151 ret void
152 }
153
154 ; CHECK-LABEL:{{^}}test_float16:
155 ; CHECK: .section .AMDGPU.runtime_metadata
156 ; CHECK-NEXT: .byte 4
157 ; CHECK-NEXT: .byte 6
158 ; CHECK-NEXT: .long 12
159 ; CHECK-NEXT: .ascii "test_float16"
160 ; CHECK-NEXT: .byte 7
161 ; CHECK-NEXT: .byte 9
162 ; CHECK-NEXT: .long 64
163 ; CHECK-NEXT: .byte 10
164 ; CHECK-NEXT: .long 64
165 ; CHECK-NEXT: .byte 11
166 ; CHECK-NEXT: .long 7
167 ; CHECK-NEXT: .ascii "float16"
168 ; CHECK-NEXT: .byte 13
169 ; CHECK-NEXT: .byte 0
170 ; CHECK-NEXT: .byte 14
171 ; CHECK-NEXT: .short 8
172 ; CHECK-NEXT: .byte 16
173 ; CHECK-NEXT: .byte 0
174 ; CHECK-NEXT: .byte 8
175 ; CHECK-NEXT: .byte 5
176
177 define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
178 ret void
179 }
180
181 ; CHECK-LABEL:{{^}}test_double16:
182 ; CHECK: .section .AMDGPU.runtime_metadata
183 ; CHECK-NEXT: .byte 4
184 ; CHECK-NEXT: .byte 6
185 ; CHECK-NEXT: .long 13
186 ; CHECK-NEXT: .ascii "test_double16"
187 ; CHECK-NEXT: .byte 7
188 ; CHECK-NEXT: .byte 9
189 ; CHECK-NEXT: .long 128
190 ; CHECK-NEXT: .byte 10
191 ; CHECK-NEXT: .long 128
192 ; CHECK-NEXT: .byte 11
193 ; CHECK-NEXT: .long 8
194 ; CHECK-NEXT: .ascii "double16"
195 ; CHECK-NEXT: .byte 13
196 ; CHECK-NEXT: .byte 0
197 ; CHECK-NEXT: .byte 14
198 ; CHECK-NEXT: .short 11
199 ; CHECK-NEXT: .byte 16
200 ; CHECK-NEXT: .byte 0
201 ; CHECK-NEXT: .byte 8
202 ; CHECK-NEXT: .byte 5
203
204 define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
205 ret void
206 }
207
208 ; CHECK-LABEL:{{^}}test_pointer:
209 ; CHECK: .section .AMDGPU.runtime_metadata
210 ; CHECK-NEXT: .byte 4
211 ; CHECK-NEXT: .byte 6
212 ; CHECK-NEXT: .long 12
213 ; CHECK-NEXT: .ascii "test_pointer"
214 ; CHECK-NEXT: .byte 7
215 ; CHECK-NEXT: .byte 9
216 ; CHECK-NEXT: .long 8
217 ; CHECK-NEXT: .byte 10
218 ; CHECK-NEXT: .long 8
219 ; CHECK-NEXT: .byte 11
220 ; CHECK-NEXT: .long 5
221 ; CHECK-NEXT: .ascii "int *"
222 ; CHECK-NEXT: .byte 13
223 ; CHECK-NEXT: .byte 1
224 ; CHECK-NEXT: .byte 14
225 ; CHECK-NEXT: .short 6
226 ; CHECK-NEXT: .byte 16
227 ; CHECK-NEXT: .byte 0
228 ; CHECK-NEXT: .byte 15
229 ; CHECK-NEXT: .byte 1
230 ; CHECK-NEXT: .byte 8
231 ; CHECK-NEXT: .byte 5
232
233 define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
234 ret void
235 }
236
237 ; CHECK-LABEL:{{^}}test_image:
238 ; CHECK: .section .AMDGPU.runtime_metadata
239 ; CHECK-NEXT: .byte 4
240 ; CHECK-NEXT: .byte 6
241 ; CHECK-NEXT: .long 10
242 ; CHECK-NEXT: .ascii "test_image"
243 ; CHECK-NEXT: .byte 7
244 ; CHECK-NEXT: .byte 9
245 ; CHECK-NEXT: .long 8
246 ; CHECK-NEXT: .byte 10
247 ; CHECK-NEXT: .long 8
248 ; CHECK-NEXT: .byte 11
249 ; CHECK-NEXT: .long 9
250 ; CHECK-NEXT: .ascii "image2d_t"
251 ; CHECK-NEXT: .byte 13
252 ; CHECK-NEXT: .byte 2
253 ; CHECK-NEXT: .byte 14
254 ; CHECK-NEXT: .short 0
255 ; CHECK-NEXT: .byte 16
256 ; CHECK-NEXT: .byte 0
257 ; CHECK-NEXT: .byte 15
258 ; CHECK-NEXT: .byte 1
259 ; CHECK-NEXT: .byte 8
260 ; CHECK-NEXT: .byte 5
261
262 define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
263 ret void
264 }
265
266 ; CHECK-LABEL:{{^}}test_sampler:
267 ; CHECK: .section .AMDGPU.runtime_metadata
268 ; CHECK-NEXT: .byte 4
269 ; CHECK-NEXT: .byte 6
270 ; CHECK-NEXT: .long 12
271 ; CHECK-NEXT: .ascii "test_sampler"
272 ; CHECK-NEXT: .byte 7
273 ; CHECK-NEXT: .byte 9
274 ; CHECK-NEXT: .long 4
275 ; CHECK-NEXT: .byte 10
276 ; CHECK-NEXT: .long 4
277 ; CHECK-NEXT: .byte 11
278 ; CHECK-NEXT: .long 9
279 ; CHECK-NEXT: .ascii "sampler_t"
280 ; CHECK-NEXT: .byte 13
281 ; CHECK-NEXT: .byte 3
282 ; CHECK-NEXT: .byte 14
283 ; CHECK-NEXT: .short 6
284 ; CHECK-NEXT: .byte 16
285 ; CHECK-NEXT: .byte 0
286 ; CHECK-NEXT: .byte 8
287 ; CHECK-NEXT: .byte 5
288
289 define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
290 ret void
291 }
292
293 ; CHECK-LABEL:{{^}}test_queue:
294 ; CHECK: .section .AMDGPU.runtime_metadata
295 ; CHECK-NEXT: .byte 4
296 ; CHECK-NEXT: .byte 6
297 ; CHECK-NEXT: .long 10
298 ; CHECK-NEXT: .ascii "test_queue"
299 ; CHECK-NEXT: .byte 7
300 ; CHECK-NEXT: .byte 9
301 ; CHECK-NEXT: .long 8
302 ; CHECK-NEXT: .byte 10
303 ; CHECK-NEXT: .long 8
304 ; CHECK-NEXT: .byte 11
305 ; CHECK-NEXT: .long 7
306 ; CHECK-NEXT: .ascii "queue_t"
307 ; CHECK-NEXT: .byte 13
308 ; CHECK-NEXT: .byte 4
309 ; CHECK-NEXT: .byte 14
310 ; CHECK-NEXT: .short 0
311 ; CHECK-NEXT: .byte 16
312 ; CHECK-NEXT: .byte 0
313 ; CHECK-NEXT: .byte 15
314 ; CHECK-NEXT: .byte 1
315 ; CHECK-NEXT: .byte 8
316 ; CHECK-NEXT: .byte 5
317
318 define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
319 ret void
320 }
321
322 ; CHECK-LABEL:{{^}}test_struct:
323 ; CHECK: .section .AMDGPU.runtime_metadata
324 ; CHECK-NEXT: .byte 4
325 ; CHECK-NEXT: .byte 6
326 ; CHECK-NEXT: .long 11
327 ; CHECK-NEXT: .ascii "test_struct"
328 ; CHECK-NEXT: .byte 7
329 ; CHECK-NEXT: .byte 9
330 ; CHECK-NEXT: .long 4
331 ; CHECK-NEXT: .byte 10
332 ; CHECK-NEXT: .long 4
333 ; CHECK-NEXT: .byte 11
334 ; CHECK-NEXT: .long 8
335 ; CHECK-NEXT: .ascii "struct A"
336 ; CHECK-NEXT: .byte 13
337 ; CHECK-NEXT: .byte 1
338 ; CHECK-NEXT: .byte 14
339 ; CHECK-NEXT: .short 0
340 ; CHECK-NEXT: .byte 16
341 ; CHECK-NEXT: .byte 0
342 ; CHECK-NEXT: .byte 15
343 ; CHECK-NEXT: .byte 0
344 ; CHECK-NEXT: .byte 8
345 ; CHECK-NEXT: .byte 5
346
347 define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
348 ret void
349 }
350
351 ; CHECK-LABEL:{{^}}test_i128:
352 ; CHECK: .section .AMDGPU.runtime_metadata
353 ; CHECK-NEXT: .byte 4
354 ; CHECK-NEXT: .byte 6
355 ; CHECK-NEXT: .long 9
356 ; CHECK-NEXT: .ascii "test_i128"
357 ; CHECK-NEXT: .byte 7
358 ; CHECK-NEXT: .byte 9
359 ; CHECK-NEXT: .long 16
360 ; CHECK-NEXT: .byte 10
361 ; CHECK-NEXT: .long 8
362 ; CHECK-NEXT: .byte 11
363 ; CHECK-NEXT: .long 4
364 ; CHECK-NEXT: .ascii "i128"
365 ; CHECK-NEXT: .byte 13
366 ; CHECK-NEXT: .byte 0
367 ; CHECK-NEXT: .byte 14
368 ; CHECK-NEXT: .short 0
369 ; CHECK-NEXT: .byte 16
370 ; CHECK-NEXT: .byte 0
371 ; CHECK-NEXT: .byte 8
372 ; CHECK-NEXT: .byte 5
373
374 define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
375 ret void
376 }
377
378 ; CHECK-LABEL:{{^}}test_multi_arg:
379 ; CHECK: .section .AMDGPU.runtime_metadata
380 ; CHECK-NEXT: .byte 4
381 ; CHECK-NEXT: .byte 6
382 ; CHECK-NEXT: .long 14
383 ; CHECK-NEXT: .ascii "test_multi_arg"
384 ; CHECK-NEXT: .byte 7
385 ; CHECK-NEXT: .byte 9
386 ; CHECK-NEXT: .long 4
387 ; CHECK-NEXT: .byte 10
388 ; CHECK-NEXT: .long 4
389 ; CHECK-NEXT: .byte 11
390 ; CHECK-NEXT: .long 3
391 ; CHECK-NEXT: .ascii "int"
392 ; CHECK-NEXT: .byte 13
393 ; CHECK-NEXT: .byte 0
394 ; CHECK-NEXT: .byte 14
395 ; CHECK-NEXT: .short 6
396 ; CHECK-NEXT: .byte 16
397 ; CHECK-NEXT: .byte 0
398 ; CHECK-NEXT: .byte 8
399 ; CHECK-NEXT: .byte 7
400 ; CHECK-NEXT: .byte 9
401 ; CHECK-NEXT: .long 4
402 ; CHECK-NEXT: .byte 10
403 ; CHECK-NEXT: .long 4
404 ; CHECK-NEXT: .byte 11
405 ; CHECK-NEXT: .long 6
406 ; CHECK-NEXT: .ascii "short2"
407 ; CHECK-NEXT: .byte 13
408 ; CHECK-NEXT: .byte 0
409 ; CHECK-NEXT: .byte 14
410 ; CHECK-NEXT: .short 3
411 ; CHECK-NEXT: .byte 16
412 ; CHECK-NEXT: .byte 0
413 ; CHECK-NEXT: .byte 8
414 ; CHECK-NEXT: .byte 7
415 ; CHECK-NEXT: .byte 9
416 ; CHECK-NEXT: .long 4
417 ; CHECK-NEXT: .byte 10
418 ; CHECK-NEXT: .long 4
419 ; CHECK-NEXT: .byte 11
420 ; CHECK-NEXT: .long 5
421 ; CHECK-NEXT: .ascii "char3"
422 ; CHECK-NEXT: .byte 13
423 ; CHECK-NEXT: .byte 0
424 ; CHECK-NEXT: .byte 14
425 ; CHECK-NEXT: .short 1
426 ; CHECK-NEXT: .byte 16
427 ; CHECK-NEXT: .byte 0
428 ; CHECK-NEXT: .byte 8
429 ; CHECK-NEXT: .byte 5
430
431 define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
432 ret void
433 }
434
435 ; CHECK-LABEL:{{^}}test_addr_space:
436 ; CHECK: .section .AMDGPU.runtime_metadata
437 ; CHECK-NEXT: .byte 4
438 ; CHECK-NEXT: .byte 6
439 ; CHECK-NEXT: .long 15
440 ; CHECK-NEXT: .ascii "test_addr_space"
441 ; CHECK-NEXT: .byte 7
442 ; CHECK-NEXT: .byte 9
443 ; CHECK-NEXT: .long 8
444 ; CHECK-NEXT: .byte 10
445 ; CHECK-NEXT: .long 8
446 ; CHECK-NEXT: .byte 11
447 ; CHECK-NEXT: .long 5
448 ; CHECK-NEXT: .ascii "int *"
449 ; CHECK-NEXT: .byte 13
450 ; CHECK-NEXT: .byte 1
451 ; CHECK-NEXT: .byte 14
452 ; CHECK-NEXT: .short 6
453 ; CHECK-NEXT: .byte 16
454 ; CHECK-NEXT: .byte 0
455 ; CHECK-NEXT: .byte 15
456 ; CHECK-NEXT: .byte 1
457 ; CHECK-NEXT: .byte 8
458 ; CHECK-NEXT: .byte 7
459 ; CHECK-NEXT: .byte 9
460 ; CHECK-NEXT: .long 8
461 ; CHECK-NEXT: .byte 10
462 ; CHECK-NEXT: .long 8
463 ; CHECK-NEXT: .byte 11
464 ; CHECK-NEXT: .long 5
465 ; CHECK-NEXT: .ascii "int *"
466 ; CHECK-NEXT: .byte 13
467 ; CHECK-NEXT: .byte 1
468 ; CHECK-NEXT: .byte 14
469 ; CHECK-NEXT: .short 6
470 ; CHECK-NEXT: .byte 16
471 ; CHECK-NEXT: .byte 0
472 ; CHECK-NEXT: .byte 15
473 ; CHECK-NEXT: .byte 2
474 ; CHECK-NEXT: .byte 8
475 ; CHECK-NEXT: .byte 7
476 ; CHECK-NEXT: .byte 9
477 ; CHECK-NEXT: .long 4
478 ; CHECK-NEXT: .byte 10
479 ; CHECK-NEXT: .long 4
480 ; CHECK-NEXT: .byte 11
481 ; CHECK-NEXT: .long 5
482 ; CHECK-NEXT: .ascii "int *"
483 ; CHECK-NEXT: .byte 13
484 ; CHECK-NEXT: .byte 1
485 ; CHECK-NEXT: .byte 14
486 ; CHECK-NEXT: .short 6
487 ; CHECK-NEXT: .byte 16
488 ; CHECK-NEXT: .byte 0
489 ; CHECK-NEXT: .byte 15
490 ; CHECK-NEXT: .byte 3
491 ; CHECK-NEXT: .byte 8
492 ; CHECK-NEXT: .byte 5
493
494 define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
495 ret void
496 }
497
498 ; CHECK-LABEL:{{^}}test_type_qual:
499 ; CHECK: .section .AMDGPU.runtime_metadata
500 ; CHECK-NEXT: .byte 4
501 ; CHECK-NEXT: .byte 6
502 ; CHECK-NEXT: .long 14
503 ; CHECK-NEXT: .ascii "test_type_qual"
504 ; CHECK-NEXT: .byte 7
505 ; CHECK-NEXT: .byte 9
506 ; CHECK-NEXT: .long 8
507 ; CHECK-NEXT: .byte 10
508 ; CHECK-NEXT: .long 8
509 ; CHECK-NEXT: .byte 11
510 ; CHECK-NEXT: .long 5
511 ; CHECK-NEXT: .ascii "int *"
512 ; CHECK-NEXT: .byte 19
513 ; CHECK-NEXT: .byte 13
514 ; CHECK-NEXT: .byte 1
515 ; CHECK-NEXT: .byte 14
516 ; CHECK-NEXT: .short 6
517 ; CHECK-NEXT: .byte 16
518 ; CHECK-NEXT: .byte 0
519 ; CHECK-NEXT: .byte 15
520 ; CHECK-NEXT: .byte 1
521 ; CHECK-NEXT: .byte 8
522 ; CHECK-NEXT: .byte 7
523 ; CHECK-NEXT: .byte 9
524 ; CHECK-NEXT: .long 8
525 ; CHECK-NEXT: .byte 10
526 ; CHECK-NEXT: .long 8
527 ; CHECK-NEXT: .byte 11
528 ; CHECK-NEXT: .long 5
529 ; CHECK-NEXT: .ascii "int *"
530 ; CHECK-NEXT: .byte 17
531 ; CHECK-NEXT: .byte 18
532 ; CHECK-NEXT: .byte 13
533 ; CHECK-NEXT: .byte 1
534 ; CHECK-NEXT: .byte 14
535 ; CHECK-NEXT: .short 6
536 ; CHECK-NEXT: .byte 16
537 ; CHECK-NEXT: .byte 0
538 ; CHECK-NEXT: .byte 15
539 ; CHECK-NEXT: .byte 1
540 ; CHECK-NEXT: .byte 8
541 ; CHECK-NEXT: .byte 7
542 ; CHECK-NEXT: .byte 9
543 ; CHECK-NEXT: .long 8
544 ; CHECK-NEXT: .byte 10
545 ; CHECK-NEXT: .long 8
546 ; CHECK-NEXT: .byte 11
547 ; CHECK-NEXT: .long 5
548 ; CHECK-NEXT: .ascii "int *"
549 ; CHECK-NEXT: .byte 20
550 ; CHECK-NEXT: .byte 13
551 ; CHECK-NEXT: .byte 1
552 ; CHECK-NEXT: .byte 14
553 ; CHECK-NEXT: .short 0
554 ; CHECK-NEXT: .byte 16
555 ; CHECK-NEXT: .byte 0
556 ; CHECK-NEXT: .byte 15
557 ; CHECK-NEXT: .byte 1
558 ; CHECK-NEXT: .byte 8
559 ; CHECK-NEXT: .byte 5
560
561 define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
562 ret void
563 }
564
565 ; CHECK-LABEL:{{^}}test_access_qual:
566 ; CHECK: .section .AMDGPU.runtime_metadata
567 ; CHECK-NEXT: .byte 4
568 ; CHECK-NEXT: .byte 6
569 ; CHECK-NEXT: .long 16
570 ; CHECK-NEXT: .ascii "test_access_qual"
571 ; CHECK-NEXT: .byte 7
572 ; CHECK-NEXT: .byte 9
573 ; CHECK-NEXT: .long 8
574 ; CHECK-NEXT: .byte 10
575 ; CHECK-NEXT: .long 8
576 ; CHECK-NEXT: .byte 11
577 ; CHECK-NEXT: .long 9
578 ; CHECK-NEXT: .ascii "image1d_t"
579 ; CHECK-NEXT: .byte 13
580 ; CHECK-NEXT: .byte 2
581 ; CHECK-NEXT: .byte 14
582 ; CHECK-NEXT: .short 0
583 ; CHECK-NEXT: .byte 16
584 ; CHECK-NEXT: .byte 1
585 ; CHECK-NEXT: .byte 15
586 ; CHECK-NEXT: .byte 1
587 ; CHECK-NEXT: .byte 8
588 ; CHECK-NEXT: .byte 7
589 ; CHECK-NEXT: .byte 9
590 ; CHECK-NEXT: .long 8
591 ; CHECK-NEXT: .byte 10
592 ; CHECK-NEXT: .long 8
593 ; CHECK-NEXT: .byte 11
594 ; CHECK-NEXT: .long 9
595 ; CHECK-NEXT: .ascii "image2d_t"
596 ; CHECK-NEXT: .byte 13
597 ; CHECK-NEXT: .byte 2
598 ; CHECK-NEXT: .byte 14
599 ; CHECK-NEXT: .short 0
600 ; CHECK-NEXT: .byte 16
601 ; CHECK-NEXT: .byte 2
602 ; CHECK-NEXT: .byte 15
603 ; CHECK-NEXT: .byte 1
604 ; CHECK-NEXT: .byte 8
605 ; CHECK-NEXT: .byte 7
606 ; CHECK-NEXT: .byte 9
607 ; CHECK-NEXT: .long 8
608 ; CHECK-NEXT: .byte 10
609 ; CHECK-NEXT: .long 8
610 ; CHECK-NEXT: .byte 11
611 ; CHECK-NEXT: .long 9
612 ; CHECK-NEXT: .ascii "image3d_t"
613 ; CHECK-NEXT: .byte 13
614 ; CHECK-NEXT: .byte 2
615 ; CHECK-NEXT: .byte 14
616 ; CHECK-NEXT: .short 0
617 ; CHECK-NEXT: .byte 16
618 ; CHECK-NEXT: .byte 3
619 ; CHECK-NEXT: .byte 15
620 ; CHECK-NEXT: .byte 1
621 ; CHECK-NEXT: .byte 8
622 ; CHECK-NEXT: .byte 5
623
624 define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
625 ret void
626 }
627
628 ; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint:
629 ; CHECK: .section .AMDGPU.runtime_metadata
630 ; CHECK-NEXT: .byte 4
631 ; CHECK-NEXT: .byte 6
632 ; CHECK-NEXT: .long 27
633 ; CHECK-NEXT: .ascii "test_reqd_wgs_vec_type_hint"
634 ; CHECK-NEXT: .byte 7
635 ; CHECK-NEXT: .byte 9
636 ; CHECK-NEXT: .long 4
637 ; CHECK-NEXT: .byte 10
638 ; CHECK-NEXT: .long 4
639 ; CHECK-NEXT: .byte 11
640 ; CHECK-NEXT: .long 3
641 ; CHECK-NEXT: .ascii "int"
642 ; CHECK-NEXT: .byte 13
643 ; CHECK-NEXT: .byte 0
644 ; CHECK-NEXT: .byte 14
645 ; CHECK-NEXT: .short 6
646 ; CHECK-NEXT: .byte 16
647 ; CHECK-NEXT: .byte 0
648 ; CHECK-NEXT: .byte 8
649 ; CHECK-NEXT: .byte 21
650 ; CHECK-NEXT: .long 1
651 ; CHECK-NEXT: .long 2
652 ; CHECK-NEXT: .long 4
653 ; CHECK-NEXT: .byte 23
654 ; CHECK-NEXT: .long 3
655 ; CHECK-NEXT: .ascii "int"
656 ; CHECK-NEXT: .byte 5
657
658 define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
659 ret void
660 }
661
662 ; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint:
663 ; CHECK: .section .AMDGPU.runtime_metadata
664 ; CHECK-NEXT: .byte 4
665 ; CHECK-NEXT: .byte 6
666 ; CHECK-NEXT: .long 27
667 ; CHECK-NEXT: .ascii "test_wgs_hint_vec_type_hint"
668 ; CHECK-NEXT: .byte 7
669 ; CHECK-NEXT: .byte 9
670 ; CHECK-NEXT: .long 4
671 ; CHECK-NEXT: .byte 10
672 ; CHECK-NEXT: .long 4
673 ; CHECK-NEXT: .byte 11
674 ; CHECK-NEXT: .long 3
675 ; CHECK-NEXT: .ascii "int"
676 ; CHECK-NEXT: .byte 13
677 ; CHECK-NEXT: .byte 0
678 ; CHECK-NEXT: .byte 14
679 ; CHECK-NEXT: .short 6
680 ; CHECK-NEXT: .byte 16
681 ; CHECK-NEXT: .byte 0
682 ; CHECK-NEXT: .byte 8
683 ; CHECK-NEXT: .byte 22
684 ; CHECK-NEXT: .long 8
685 ; CHECK-NEXT: .long 16
686 ; CHECK-NEXT: .long 32
687 ; CHECK-NEXT: .byte 23
688 ; CHECK-NEXT: .long 5
689 ; CHECK-NEXT: .ascii "uint4"
690 ; CHECK-NEXT: .byte 5
691
692 define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
693 ret void
694 }
695
696 ; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr:
697 ; CHECK: .section .AMDGPU.runtime_metadata
698 ; CHECK-NEXT: .byte 4
699 ; CHECK-NEXT: .byte 6
700 ; CHECK-NEXT: .long 19
701 ; CHECK-NEXT: .ascii "test_arg_ptr_to_ptr"
702 ; CHECK-NEXT: .byte 7
703 ; CHECK-NEXT: .byte 9
704 ; CHECK-NEXT: .long 8
705 ; CHECK-NEXT: .byte 10
706 ; CHECK-NEXT: .long 8
707 ; CHECK-NEXT: .byte 11
708 ; CHECK-NEXT: .long 6
709 ; CHECK-NEXT: .ascii "int **"
710 ; CHECK-NEXT: .byte 13
711 ; CHECK-NEXT: .byte 1
712 ; CHECK-NEXT: .byte 14
713 ; CHECK-NEXT: .short 6
714 ; CHECK-NEXT: .byte 16
715 ; CHECK-NEXT: .byte 0
716 ; CHECK-NEXT: .byte 15
717 ; CHECK-NEXT: .byte 1
718 ; CHECK-NEXT: .byte 8
719 ; CHECK-NEXT: .byte 5
720
721 define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
722 ret void
723 }
724
725 ; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr:
726 ; CHECK: .section .AMDGPU.runtime_metadata
727 ; CHECK-NEXT: .byte 4
728 ; CHECK-NEXT: .byte 6
729 ; CHECK-NEXT: .long 28
730 ; CHECK-NEXT: .ascii "test_arg_struct_contains_ptr"
731 ; CHECK-NEXT: .byte 7
732 ; CHECK-NEXT: .byte 9
733 ; CHECK-NEXT: .long 4
734 ; CHECK-NEXT: .byte 10
735 ; CHECK-NEXT: .long 4
736 ; CHECK-NEXT: .byte 11
737 ; CHECK-NEXT: .long 8
738 ; CHECK-NEXT: .ascii "struct B"
739 ; CHECK-NEXT: .byte 13
740 ; CHECK-NEXT: .byte 1
741 ; CHECK-NEXT: .byte 14
742 ; CHECK-NEXT: .short 0
743 ; CHECK-NEXT: .byte 16
744 ; CHECK-NEXT: .byte 0
745 ; CHECK-NEXT: .byte 15
746 ; CHECK-NEXT: .byte 0
747 ; CHECK-NEXT: .byte 8
748 ; CHECK-NEXT: .byte 5
749
750 define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
751 ret void
752 }
753
754 ; CHECK-LABEL:{{^}}test_arg_vector_of_ptr:
755 ; CHECK: .section .AMDGPU.runtime_metadata
756 ; CHECK-NEXT: .byte 4
757 ; CHECK-NEXT: .byte 6
758 ; CHECK-NEXT: .long 22
759 ; CHECK-NEXT: .ascii "test_arg_vector_of_ptr"
760 ; CHECK-NEXT: .byte 7
761 ; CHECK-NEXT: .byte 9
762 ; CHECK-NEXT: .long 16
763 ; CHECK-NEXT: .byte 10
764 ; CHECK-NEXT: .long 16
765 ; CHECK-NEXT: .byte 11
766 ; CHECK-NEXT: .long 47
767 ; CHECK-NEXT: .ascii "global int* __attribute__((ext_vector_type(2)))"
768 ; CHECK-NEXT: .byte 13
769 ; CHECK-NEXT: .byte 0
770 ; CHECK-NEXT: .byte 14
771 ; CHECK-NEXT: .short 6
772 ; CHECK-NEXT: .byte 16
773 ; CHECK-NEXT: .byte 0
774 ; CHECK-NEXT: .byte 8
775 ; CHECK-NEXT: .byte 5
776
777 define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
778 ret void
779 }
780
781 ; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type:
782 ; CHECK: .section .AMDGPU.runtime_metadata
783 ; CHECK-NEXT: .byte 4
784 ; CHECK-NEXT: .byte 6
785 ; CHECK-NEXT: .long 29
786 ; CHECK-NEXT: .ascii "test_arg_unknown_builtin_type"
787 ; CHECK-NEXT: .byte 7
788 ; CHECK-NEXT: .byte 9
789 ; CHECK-NEXT: .long 8
790 ; CHECK-NEXT: .byte 10
791 ; CHECK-NEXT: .long 8
792 ; CHECK-NEXT: .byte 11
793 ; CHECK-NEXT: .long 11
794 ; CHECK-NEXT: .ascii "clk_event_t"
795 ; CHECK-NEXT: .byte 13
796 ; CHECK-NEXT: .byte 1
797 ; CHECK-NEXT: .byte 14
798 ; CHECK-NEXT: .short 0
799 ; CHECK-NEXT: .byte 16
800 ; CHECK-NEXT: .byte 0
801 ; CHECK-NEXT: .byte 15
802 ; CHECK-NEXT: .byte 1
803 ; CHECK-NEXT: .byte 8
804 ; CHECK-NEXT: .byte 5
805
806 define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
807 ret void
808 }
809
810 !1 = !{i32 0}
811 !2 = !{!"none"}
812 !3 = !{!"int"}
813 !4 = !{!""}
814 !5 = !{i32 undef, i32 1}
815 !6 = !{i32 1, i32 2, i32 4}
816 !7 = !{<4 x i32> undef, i32 0}
817 !8 = !{i32 8, i32 16, i32 32}
818 !9 = !{!"char"}
819 !10 = !{!"ushort2"}
820 !11 = !{!"int3"}
821 !12 = !{!"ulong4"}
822 !13 = !{!"half8"}
823 !14 = !{!"float16"}
824 !15 = !{!"double16"}
825 !16 = !{!"int *"}
826 !17 = !{!"image2d_t"}
827 !18 = !{!"sampler_t"}
828 !19 = !{!"queue_t"}
829 !20 = !{!"struct A"}
830 !21 = !{!"i128"}
831 !22 = !{i32 0, i32 0, i32 0}
832 !23 = !{!"none", !"none", !"none"}
833 !24 = !{!"int", !"short2", !"char3"}
834 !25 = !{!"", !"", !""}
835 !50 = !{i32 1, i32 2, i32 3}
836 !51 = !{!"int *", !"int *", !"int *"}
837 !60 = !{i32 1, i32 1, i32 1}
838 !61 = !{!"read_only", !"write_only", !"read_write"}
839 !62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
840 !70 = !{!"volatile", !"const restrict", !"pipe"}
841 !80 = !{!"int **"}
842 !81 = !{i32 1}
843 !82 = !{!"struct B"}
844 !83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
845 !84 = !{!"clk_event_t"}
846 !opencl.ocl.version = !{!90}
847 !90 = !{i32 2, i32 0}