llvm.org GIT mirror llvm / 384c642
Re-commit [AMDGPU] Add metadata for runtime Attempting to fix lit test failure on ppc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275676 91177308-0d34-0410-b5e6-96231b3b80d8 Yaxun Liu 4 years ago
4 changed file(s) with 1220 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
3838 #include "llvm/Support/MathExtras.h"
3939 #include "llvm/Support/TargetRegistry.h"
4040 #include "llvm/Target/TargetLoweringObjectFile.h"
41
41 #include "AMDGPURuntimeMetadata.h"
42
43 using namespace ::AMDGPU;
4244 using namespace llvm;
4345
4446 // TODO: This should get the default rounding mode from the kernel. We just set
110112 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
111113 TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
112114 "AMD", "AMDGPU");
115 emitStartOfRuntimeMetadata(M);
113116 }
114117
115118 void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
242245 OutStreamer->EmitBytes(StringRef(Comment));
243246 }
244247 }
248
249 emitRuntimeMetadata(*MF.getFunction());
245250
246251 return false;
247252 }
739744 *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
740745 return false;
741746 }
747
748 // Emit a key and an integer value for runtime metadata.
749 static void emitRuntimeMDIntValue(std::unique_ptr &Streamer,
750 RuntimeMD::Key K, uint64_t V,
751 unsigned Size) {
752 Streamer->EmitIntValue(K, 1);
753 Streamer->EmitIntValue(V, Size);
754 }
755
756 // Emit a key and a string value for runtime metadata.
757 static void emitRuntimeMDStringValue(std::unique_ptr &Streamer,
758 RuntimeMD::Key K, StringRef S) {
759 Streamer->EmitIntValue(K, 1);
760 Streamer->EmitIntValue(S.size(), 4);
761 Streamer->EmitBytes(S);
762 }
763
764 // Emit a key and three integer values for runtime metadata.
765 // The three integer values are obtained from MDNode \p Node;
766 static void emitRuntimeMDThreeIntValues(std::unique_ptr &Streamer,
767 RuntimeMD::Key K, MDNode *Node,
768 unsigned Size) {
769 Streamer->EmitIntValue(K, 1);
770 Streamer->EmitIntValue(mdconst::extract(
771 Node->getOperand(0))->getZExtValue(), Size);
772 Streamer->EmitIntValue(mdconst::extract(
773 Node->getOperand(1))->getZExtValue(), Size);
774 Streamer->EmitIntValue(mdconst::extract(
775 Node->getOperand(2))->getZExtValue(), Size);
776 }
777
778 void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
779 OutStreamer->SwitchSection(getObjFileLowering().getContext()
780 .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
781
782 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
783 RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
784 if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
785 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
786 RuntimeMD::OpenCL_C, 1);
787 auto Node = MD->getOperand(0);
788 unsigned short Major = mdconst::extract(Node->getOperand(0))
789 ->getZExtValue();
790 unsigned short Minor = mdconst::extract(Node->getOperand(1))
791 ->getZExtValue();
792 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
793 Major * 100 + Minor * 10, 2);
794 }
795 }
796
797 static std::string getOCLTypeName(Type *Ty, bool isSigned) {
798 if (VectorType* VecTy = dyn_cast(Ty)) {
799 Type* EleTy = VecTy->getElementType();
800 unsigned Size = VecTy->getVectorNumElements();
801 return (Twine(getOCLTypeName(EleTy, isSigned)) + Twine(Size)).str();
802 }
803 switch (Ty->getTypeID()) {
804 case Type::HalfTyID: return "half";
805 case Type::FloatTyID: return "float";
806 case Type::DoubleTyID: return "double";
807 case Type::IntegerTyID: {
808 if (!isSigned)
809 return (Twine('u') + Twine(getOCLTypeName(Ty, true))).str();
810 auto IntTy = cast(Ty);
811 auto BW = IntTy->getIntegerBitWidth();
812 switch (BW) {
813 case 8:
814 return "char";
815 case 16:
816 return "short";
817 case 32:
818 return "int";
819 case 64:
820 return "long";
821 default:
822 return (Twine('i') + Twine(BW)).str();
823 }
824 }
825 default:
826 llvm_unreachable("invalid type");
827 }
828 }
829
830 static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
831 Type *Ty, StringRef TypeName) {
832 if (auto VT = dyn_cast(Ty))
833 return getRuntimeMDValueType(VT->getElementType(), TypeName);
834 else if (auto PT = dyn_cast(Ty))
835 return getRuntimeMDValueType(PT->getElementType(), TypeName);
836 else if (Ty->isHalfTy())
837 return RuntimeMD::KernelArg::F16;
838 else if (Ty->isFloatTy())
839 return RuntimeMD::KernelArg::F32;
840 else if (Ty->isDoubleTy())
841 return RuntimeMD::KernelArg::F64;
842 else if (IntegerType* intTy = dyn_cast(Ty)) {
843 bool Signed = !TypeName.startswith("u");
844 switch (intTy->getIntegerBitWidth()) {
845 case 8:
846 return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
847 case 16:
848 return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
849 case 32:
850 return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
851 case 64:
852 return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
853 default:
854 // Runtime does not recognize other integer types. Report as
855 // struct type.
856 return RuntimeMD::KernelArg::Struct;
857 }
858 } else
859 return RuntimeMD::KernelArg::Struct;
860 }
861
862 void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
863 if (!F.getMetadata("kernel_arg_type"))
864 return;
865
866 MCContext &Context = getObjFileLowering().getContext();
867 OutStreamer->SwitchSection(
868 Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
869 OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
870 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName());
871
872 for (auto &Arg:F.args()) {
873 // Emit KeyArgBegin.
874 unsigned I = Arg.getArgNo();
875 OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
876
877 // Emit KeyArgSize and KeyArgAlign.
878 auto T = Arg.getType();
879 auto DL = F.getParent()->getDataLayout();
880 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
881 DL.getTypeAllocSize(T), 4);
882 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
883 DL.getABITypeAlignment(T), 4);
884
885 // Emit KeyArgTypeName.
886 auto TypeName = dyn_cast(F.getMetadata(
887 "kernel_arg_type")->getOperand(I))->getString();
888 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
889
890 // Emit KeyArgName.
891 if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
892 auto ArgName = cast(ArgNameMD->getOperand(
893 I))->getString();
894 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
895 }
896
897 // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
898 auto TypeQual = cast(F.getMetadata(
899 "kernel_arg_type_qual")->getOperand(I))->getString();
900 SmallVector SplitQ;
901 TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/);
902 for (auto &I:SplitQ) {
903 auto Key = StringSwitch(I)
904 .Case("volatile", RuntimeMD::KeyArgIsVolatile)
905 .Case("restrict", RuntimeMD::KeyArgIsRestrict)
906 .Case("const", RuntimeMD::KeyArgIsConst)
907 .Case("pipe", RuntimeMD::KeyArgIsPipe)
908 .Default(RuntimeMD::KeyNull);
909 OutStreamer->EmitIntValue(Key, 1);
910 }
911
912 // Emit KeyArgTypeKind.
913 auto BaseTypeName = cast(
914 F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
915 auto TypeKind = StringSwitch(BaseTypeName)
916 .Case("sampler_t", RuntimeMD::KernelArg::Sampler)
917 .Case("queue_t", RuntimeMD::KernelArg::Queue)
918 .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
919 "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image)
920 .Cases("image2d_depth_t", "image2d_array_depth_t",
921 "image2d_msaa_t", "image2d_array_msaa_t",
922 "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image)
923 .Cases("image2d_array_msaa_depth_t", "image3d_t",
924 RuntimeMD::KernelArg::Image)
925 .Default(isa(T) ? RuntimeMD::KernelArg::Pointer :
926 RuntimeMD::KernelArg::Value);
927 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
928
929 // Emit KeyArgValueType.
930 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
931 getRuntimeMDValueType(T, BaseTypeName), 2);
932
933 // Emit KeyArgAccQual.
934 auto AccQual = cast(F.getMetadata(
935 "kernel_arg_access_qual")->getOperand(I))->getString();
936 auto AQ = StringSwitch(AccQual)
937 .Case("read_only", RuntimeMD::KernelArg::ReadOnly)
938 .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
939 .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
940 .Default(RuntimeMD::KernelArg::None);
941 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual,
942 AQ, 1);
943
944 // Emit KeyArgAddrQual.
945 if (isa(T))
946 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
947 T->getPointerAddressSpace(), 1);
948
949 // Emit KeyArgEnd
950 OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
951 }
952
953 // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
954 if (auto RWGS = F.getMetadata("reqd_work_group_size"))
955 emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
956 RWGS, 4);
957 if (auto WGSH = F.getMetadata("work_group_size_hint"))
958 emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
959 WGSH, 4);
960 if (auto VTH = F.getMetadata("vec_type_hint")) {
961 auto TypeName = getOCLTypeName(cast(
962 VTH->getOperand(0))->getType(), mdconst::extract(
963 VTH->getOperand(1))->getZExtValue());
964 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint,
965 TypeName);
966 }
967
968 // Emit KeyKernelEnd
969 OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
970 }
126126 unsigned AsmVariant, const char *ExtraCode,
127127 raw_ostream &O) override;
128128
129 void emitStartOfRuntimeMetadata(const Module &M);
130
131 void emitRuntimeMetadata(const Function &F);
132
129133 protected:
130134 std::vector DisasmLines, HexLines;
131135 size_t DisasmLineMaxLen;
0 //===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// Enums and structure types used by runtime metadata.
12 ///
13 /// Runtime requests certain information (metadata) about kernels to be able
14 /// to execute the kernels and answer the queries about the kernels.
15 /// The metadata is represented as a byte stream in an ELF section of a
16 /// binary (code object). The byte stream consists of key-value pairs.
17 /// Each key is an 8 bit unsigned integer. Each value can be an integer,
18 /// a string, or a stream of key-value pairs. There are 3 levels of key-value
19 /// pair streams. At the beginning of the ELF section is the top level
20 /// key-value pair stream. A kernel-level key-value pair stream starts after
21 /// encountering KeyKernelBegin and ends immediately before encountering
22 /// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
23 /// after encountering KeyArgBegin and ends immediately before encountering
24 /// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
25 /// level key-value pair stream. A kernel-argument-level key-value pair stream
26 /// can only appear in a kernel-level key-value pair stream.
27 ///
28 /// The format should be kept backward compatible. New enum values and bit
29 /// fields should be appended at the end. It is suggested to bump up the
30 /// revision number whenever the format changes and document the change
31 /// in the revision in this header.
32 ///
33 //
34 //===----------------------------------------------------------------------===//
35 //
36 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
37 #define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
38
39 #include
40
41 namespace AMDGPU {
42
43 namespace RuntimeMD {
44
45 // Version and revision of runtime metadata
46 const unsigned char MDVersion = 1;
47 const unsigned char MDRevision = 0;
48
49 // ELF section name containing runtime metadata
50 const char SectionName[] = ".AMDGPU.runtime_metadata";
51
52 // Enumeration values of keys in runtime metadata.
53 enum Key {
54 KeyNull = 0, // Place holder. Ignored when encountered
55 KeyMDVersion = 1, // Runtime metadata version
56 KeyLanguage = 2, // Language
57 KeyLanguageVersion = 3, // Language version
58 KeyKernelBegin = 4, // Beginning of kernel-level stream
59 KeyKernelEnd = 5, // End of kernel-level stream
60 KeyKernelName = 6, // Kernel name
61 KeyArgBegin = 7, // Beginning of kernel-arg-level stream
62 KeyArgEnd = 8, // End of kernel-arg-level stream
63 KeyArgSize = 9, // Kernel arg size
64 KeyArgAlign = 10, // Kernel arg alignment
65 KeyArgTypeName = 11, // Kernel type name
66 KeyArgName = 12, // Kernel name
67 KeyArgTypeKind = 13, // Kernel argument type kind
68 KeyArgValueType = 14, // Kernel argument value type
69 KeyArgAddrQual = 15, // Kernel argument address qualifier
70 KeyArgAccQual = 16, // Kernel argument access qualifier
71 KeyArgIsConst = 17, // Kernel argument is const qualified
72 KeyArgIsRestrict = 18, // Kernel argument is restrict qualified
73 KeyArgIsVolatile = 19, // Kernel argument is volatile qualified
74 KeyArgIsPipe = 20, // Kernel argument is pipe qualified
75 KeyReqdWorkGroupSize = 21, // Required work group size
76 KeyWorkGroupSizeHint = 22, // Work group size hint
77 KeyVecTypeHint = 23, // Vector type hint
78 KeyKernelIndex = 24, // Kernel index for device enqueue
79 KeySGPRs = 25, // Number of SGPRs
80 KeyVGPRs = 26, // Number of VGPRs
81 KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD
82 KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD
83 KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits
84 KeyMaxWorkGroupSize = 30, // Maximum work group size
85 KeyNoPartialWorkGroups = 31, // No partial work groups
86 };
87
88 enum Language : uint8_t {
89 OpenCL_C = 0,
90 HCC = 1,
91 OpenMP = 2,
92 OpenCL_CPP = 3,
93 };
94
95 enum LanguageVersion : uint16_t {
96 V100 = 100,
97 V110 = 110,
98 V120 = 120,
99 V200 = 200,
100 V210 = 210,
101 };
102
103 namespace KernelArg {
104 enum TypeKind : uint8_t {
105 Value = 0,
106 Pointer = 1,
107 Image = 2,
108 Sampler = 3,
109 Queue = 4,
110 };
111
112 enum ValueType : uint16_t {
113 Struct = 0,
114 I8 = 1,
115 U8 = 2,
116 I16 = 3,
117 U16 = 4,
118 F16 = 5,
119 I32 = 6,
120 U32 = 7,
121 F32 = 8,
122 I64 = 9,
123 U64 = 10,
124 F64 = 11,
125 };
126
127 enum AccessQualifer : uint8_t {
128 None = 0,
129 ReadOnly = 1,
130 WriteOnly = 2,
131 ReadWrite = 3,
132 };
133 } // namespace KernelArg
134 } // namespace RuntimeMD
135 } // namespace AMDGPU
136
137 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
0 ; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
1
2 %struct.A = type { i8, float }
3 %opencl.image1d_t = type opaque
4 %opencl.image2d_t = type opaque
5 %opencl.image3d_t = type opaque
6 %opencl.queue_t = type opaque
7 %opencl.pipe_t = type opaque
8 %struct.B = type { i32 addrspace(1)*}
9 %opencl.clk_event_t = type opaque
10
11 ; CHECK: .section .AMDGPU.runtime_metadata
12 ; CHECK-NEXT: .byte 1
13 ; CHECK-NEXT: .short 256
14 ; CHECK-NEXT: .byte 2
15 ; CHECK-NEXT: .byte 0
16 ; CHECK-NEXT: .byte 3
17 ; CHECK-NEXT: .short 200
18
19 ; CHECK-LABEL:{{^}}test_char:
20 ; CHECK: .section .AMDGPU.runtime_metadata
21 ; CHECK-NEXT: .byte 4
22 ; CHECK-NEXT: .byte 6
23 ; CHECK-NEXT: .long 9
24 ; CHECK-NEXT: .ascii "test_char"
25 ; CHECK-NEXT: .byte 7
26 ; CHECK-NEXT: .byte 9
27 ; CHECK-NEXT: .long 1
28 ; CHECK-NEXT: .byte 10
29 ; CHECK-NEXT: .long 1
30 ; CHECK-NEXT: .byte 11
31 ; CHECK-NEXT: .long 4
32 ; CHECK-NEXT: .ascii "char"
33 ; CHECK-NEXT: .byte 13
34 ; CHECK-NEXT: .byte 0
35 ; CHECK-NEXT: .byte 14
36 ; CHECK-NEXT: .short 1
37 ; CHECK-NEXT: .byte 16
38 ; CHECK-NEXT: .byte 0
39 ; CHECK-NEXT: .byte 8
40 ; CHECK-NEXT: .byte 5
41
42 define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
43 ret void
44 }
45
46 ; CHECK-LABEL:{{^}}test_ushort2:
47 ; CHECK: .section .AMDGPU.runtime_metadata
48 ; CHECK-NEXT: .byte 4
49 ; CHECK-NEXT: .byte 6
50 ; CHECK-NEXT: .long 12
51 ; CHECK-NEXT: .ascii "test_ushort2"
52 ; CHECK-NEXT: .byte 7
53 ; CHECK-NEXT: .byte 9
54 ; CHECK-NEXT: .long 4
55 ; CHECK-NEXT: .byte 10
56 ; CHECK-NEXT: .long 4
57 ; CHECK-NEXT: .byte 11
58 ; CHECK-NEXT: .long 7
59 ; CHECK-NEXT: .ascii "ushort2"
60 ; CHECK-NEXT: .byte 13
61 ; CHECK-NEXT: .byte 0
62 ; CHECK-NEXT: .byte 14
63 ; CHECK-NEXT: .short 4
64 ; CHECK-NEXT: .byte 16
65 ; CHECK-NEXT: .byte 0
66 ; CHECK-NEXT: .byte 8
67 ; CHECK-NEXT: .byte 5
68
69 define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
70 ret void
71 }
72
73 ; CHECK-LABEL:{{^}}test_int3:
74 ; CHECK: .section .AMDGPU.runtime_metadata
75 ; CHECK-NEXT: .byte 4
76 ; CHECK-NEXT: .byte 6
77 ; CHECK-NEXT: .long 9
78 ; CHECK-NEXT: .ascii "test_int3"
79 ; CHECK-NEXT: .byte 7
80 ; CHECK-NEXT: .byte 9
81 ; CHECK-NEXT: .long 16
82 ; CHECK-NEXT: .byte 10
83 ; CHECK-NEXT: .long 16
84 ; CHECK-NEXT: .byte 11
85 ; CHECK-NEXT: .long 4
86 ; CHECK-NEXT: .ascii "int3"
87 ; CHECK-NEXT: .byte 13
88 ; CHECK-NEXT: .byte 0
89 ; CHECK-NEXT: .byte 14
90 ; CHECK-NEXT: .short 6
91 ; CHECK-NEXT: .byte 16
92 ; CHECK-NEXT: .byte 0
93 ; CHECK-NEXT: .byte 8
94 ; CHECK-NEXT: .byte 5
95
96 define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
97 ret void
98 }
99
100 ; CHECK-LABEL:{{^}}test_ulong4:
101 ; CHECK: .section .AMDGPU.runtime_metadata
102 ; CHECK-NEXT: .byte 4
103 ; CHECK-NEXT: .byte 6
104 ; CHECK-NEXT: .long 11
105 ; CHECK-NEXT: .ascii "test_ulong4"
106 ; CHECK-NEXT: .byte 7
107 ; CHECK-NEXT: .byte 9
108 ; CHECK-NEXT: .long 32
109 ; CHECK-NEXT: .byte 10
110 ; CHECK-NEXT: .long 32
111 ; CHECK-NEXT: .byte 11
112 ; CHECK-NEXT: .long 6
113 ; CHECK-NEXT: .ascii "ulong4"
114 ; CHECK-NEXT: .byte 13
115 ; CHECK-NEXT: .byte 0
116 ; CHECK-NEXT: .byte 14
117 ; CHECK-NEXT: .short 10
118 ; CHECK-NEXT: .byte 16
119 ; CHECK-NEXT: .byte 0
120 ; CHECK-NEXT: .byte 8
121 ; CHECK-NEXT: .byte 5
122
123 define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
124 ret void
125 }
126
127 ; CHECK-LABEL:{{^}}test_half8:
128 ; CHECK: .section .AMDGPU.runtime_metadata
129 ; CHECK-NEXT: .byte 4
130 ; CHECK-NEXT: .byte 6
131 ; CHECK-NEXT: .long 10
132 ; CHECK-NEXT: .ascii "test_half8"
133 ; CHECK-NEXT: .byte 7
134 ; CHECK-NEXT: .byte 9
135 ; CHECK-NEXT: .long 16
136 ; CHECK-NEXT: .byte 10
137 ; CHECK-NEXT: .long 16
138 ; CHECK-NEXT: .byte 11
139 ; CHECK-NEXT: .long 5
140 ; CHECK-NEXT: .ascii "half8"
141 ; CHECK-NEXT: .byte 13
142 ; CHECK-NEXT: .byte 0
143 ; CHECK-NEXT: .byte 14
144 ; CHECK-NEXT: .short 5
145 ; CHECK-NEXT: .byte 16
146 ; CHECK-NEXT: .byte 0
147 ; CHECK-NEXT: .byte 8
148 ; CHECK-NEXT: .byte 5
149
150 define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
151 ret void
152 }
153
154 ; CHECK-LABEL:{{^}}test_float16:
155 ; CHECK: .section .AMDGPU.runtime_metadata
156 ; CHECK-NEXT: .byte 4
157 ; CHECK-NEXT: .byte 6
158 ; CHECK-NEXT: .long 12
159 ; CHECK-NEXT: .ascii "test_float16"
160 ; CHECK-NEXT: .byte 7
161 ; CHECK-NEXT: .byte 9
162 ; CHECK-NEXT: .long 64
163 ; CHECK-NEXT: .byte 10
164 ; CHECK-NEXT: .long 64
165 ; CHECK-NEXT: .byte 11
166 ; CHECK-NEXT: .long 7
167 ; CHECK-NEXT: .ascii "float16"
168 ; CHECK-NEXT: .byte 13
169 ; CHECK-NEXT: .byte 0
170 ; CHECK-NEXT: .byte 14
171 ; CHECK-NEXT: .short 8
172 ; CHECK-NEXT: .byte 16
173 ; CHECK-NEXT: .byte 0
174 ; CHECK-NEXT: .byte 8
175 ; CHECK-NEXT: .byte 5
176
177 define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
178 ret void
179 }
180
181 ; CHECK-LABEL:{{^}}test_double16:
182 ; CHECK: .section .AMDGPU.runtime_metadata
183 ; CHECK-NEXT: .byte 4
184 ; CHECK-NEXT: .byte 6
185 ; CHECK-NEXT: .long 13
186 ; CHECK-NEXT: .ascii "test_double16"
187 ; CHECK-NEXT: .byte 7
188 ; CHECK-NEXT: .byte 9
189 ; CHECK-NEXT: .long 128
190 ; CHECK-NEXT: .byte 10
191 ; CHECK-NEXT: .long 128
192 ; CHECK-NEXT: .byte 11
193 ; CHECK-NEXT: .long 8
194 ; CHECK-NEXT: .ascii "double16"
195 ; CHECK-NEXT: .byte 13
196 ; CHECK-NEXT: .byte 0
197 ; CHECK-NEXT: .byte 14
198 ; CHECK-NEXT: .short 11
199 ; CHECK-NEXT: .byte 16
200 ; CHECK-NEXT: .byte 0
201 ; CHECK-NEXT: .byte 8
202 ; CHECK-NEXT: .byte 5
203
204 define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
205 ret void
206 }
207
208 ; CHECK-LABEL:{{^}}test_pointer:
209 ; CHECK: .section .AMDGPU.runtime_metadata
210 ; CHECK-NEXT: .byte 4
211 ; CHECK-NEXT: .byte 6
212 ; CHECK-NEXT: .long 12
213 ; CHECK-NEXT: .ascii "test_pointer"
214 ; CHECK-NEXT: .byte 7
215 ; CHECK-NEXT: .byte 9
216 ; CHECK-NEXT: .long 8
217 ; CHECK-NEXT: .byte 10
218 ; CHECK-NEXT: .long 8
219 ; CHECK-NEXT: .byte 11
220 ; CHECK-NEXT: .long 5
221 ; CHECK-NEXT: .ascii "int *"
222 ; CHECK-NEXT: .byte 13
223 ; CHECK-NEXT: .byte 1
224 ; CHECK-NEXT: .byte 14
225 ; CHECK-NEXT: .short 6
226 ; CHECK-NEXT: .byte 16
227 ; CHECK-NEXT: .byte 0
228 ; CHECK-NEXT: .byte 15
229 ; CHECK-NEXT: .byte 1
230 ; CHECK-NEXT: .byte 8
231 ; CHECK-NEXT: .byte 5
232
233 define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
234 ret void
235 }
236
237 ; CHECK-LABEL:{{^}}test_image:
238 ; CHECK: .section .AMDGPU.runtime_metadata
239 ; CHECK-NEXT: .byte 4
240 ; CHECK-NEXT: .byte 6
241 ; CHECK-NEXT: .long 10
242 ; CHECK-NEXT: .ascii "test_image"
243 ; CHECK-NEXT: .byte 7
244 ; CHECK-NEXT: .byte 9
245 ; CHECK-NEXT: .long 8
246 ; CHECK-NEXT: .byte 10
247 ; CHECK-NEXT: .long 8
248 ; CHECK-NEXT: .byte 11
249 ; CHECK-NEXT: .long 9
250 ; CHECK-NEXT: .ascii "image2d_t"
251 ; CHECK-NEXT: .byte 13
252 ; CHECK-NEXT: .byte 2
253 ; CHECK-NEXT: .byte 14
254 ; CHECK-NEXT: .short 0
255 ; CHECK-NEXT: .byte 16
256 ; CHECK-NEXT: .byte 0
257 ; CHECK-NEXT: .byte 15
258 ; CHECK-NEXT: .byte 1
259 ; CHECK-NEXT: .byte 8
260 ; CHECK-NEXT: .byte 5
261
262 define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
263 ret void
264 }
265
266 ; CHECK-LABEL:{{^}}test_sampler:
267 ; CHECK: .section .AMDGPU.runtime_metadata
268 ; CHECK-NEXT: .byte 4
269 ; CHECK-NEXT: .byte 6
270 ; CHECK-NEXT: .long 12
271 ; CHECK-NEXT: .ascii "test_sampler"
272 ; CHECK-NEXT: .byte 7
273 ; CHECK-NEXT: .byte 9
274 ; CHECK-NEXT: .long 4
275 ; CHECK-NEXT: .byte 10
276 ; CHECK-NEXT: .long 4
277 ; CHECK-NEXT: .byte 11
278 ; CHECK-NEXT: .long 9
279 ; CHECK-NEXT: .ascii "sampler_t"
280 ; CHECK-NEXT: .byte 13
281 ; CHECK-NEXT: .byte 3
282 ; CHECK-NEXT: .byte 14
283 ; CHECK-NEXT: .short 6
284 ; CHECK-NEXT: .byte 16
285 ; CHECK-NEXT: .byte 0
286 ; CHECK-NEXT: .byte 8
287 ; CHECK-NEXT: .byte 5
288
289 define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
290 ret void
291 }
292
293 ; CHECK-LABEL:{{^}}test_queue:
294 ; CHECK: .section .AMDGPU.runtime_metadata
295 ; CHECK-NEXT: .byte 4
296 ; CHECK-NEXT: .byte 6
297 ; CHECK-NEXT: .long 10
298 ; CHECK-NEXT: .ascii "test_queue"
299 ; CHECK-NEXT: .byte 7
300 ; CHECK-NEXT: .byte 9
301 ; CHECK-NEXT: .long 8
302 ; CHECK-NEXT: .byte 10
303 ; CHECK-NEXT: .long 8
304 ; CHECK-NEXT: .byte 11
305 ; CHECK-NEXT: .long 7
306 ; CHECK-NEXT: .ascii "queue_t"
307 ; CHECK-NEXT: .byte 13
308 ; CHECK-NEXT: .byte 4
309 ; CHECK-NEXT: .byte 14
310 ; CHECK-NEXT: .short 0
311 ; CHECK-NEXT: .byte 16
312 ; CHECK-NEXT: .byte 0
313 ; CHECK-NEXT: .byte 15
314 ; CHECK-NEXT: .byte 1
315 ; CHECK-NEXT: .byte 8
316 ; CHECK-NEXT: .byte 5
317
318 define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
319 ret void
320 }
321
322 ; CHECK-LABEL:{{^}}test_struct:
323 ; CHECK: .section .AMDGPU.runtime_metadata
324 ; CHECK-NEXT: .byte 4
325 ; CHECK-NEXT: .byte 6
326 ; CHECK-NEXT: .long 11
327 ; CHECK-NEXT: .ascii "test_struct"
328 ; CHECK-NEXT: .byte 7
329 ; CHECK-NEXT: .byte 9
330 ; CHECK-NEXT: .long 4
331 ; CHECK-NEXT: .byte 10
332 ; CHECK-NEXT: .long 4
333 ; CHECK-NEXT: .byte 11
334 ; CHECK-NEXT: .long 8
335 ; CHECK-NEXT: .ascii "struct A"
336 ; CHECK-NEXT: .byte 13
337 ; CHECK-NEXT: .byte 1
338 ; CHECK-NEXT: .byte 14
339 ; CHECK-NEXT: .short 0
340 ; CHECK-NEXT: .byte 16
341 ; CHECK-NEXT: .byte 0
342 ; CHECK-NEXT: .byte 15
343 ; CHECK-NEXT: .byte 0
344 ; CHECK-NEXT: .byte 8
345 ; CHECK-NEXT: .byte 5
346
347 define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
348 ret void
349 }
350
351 ; CHECK-LABEL:{{^}}test_i128:
352 ; CHECK: .section .AMDGPU.runtime_metadata
353 ; CHECK-NEXT: .byte 4
354 ; CHECK-NEXT: .byte 6
355 ; CHECK-NEXT: .long 9
356 ; CHECK-NEXT: .ascii "test_i128"
357 ; CHECK-NEXT: .byte 7
358 ; CHECK-NEXT: .byte 9
359 ; CHECK-NEXT: .long 16
360 ; CHECK-NEXT: .byte 10
361 ; CHECK-NEXT: .long 8
362 ; CHECK-NEXT: .byte 11
363 ; CHECK-NEXT: .long 4
364 ; CHECK-NEXT: .ascii "i128"
365 ; CHECK-NEXT: .byte 13
366 ; CHECK-NEXT: .byte 0
367 ; CHECK-NEXT: .byte 14
368 ; CHECK-NEXT: .short 0
369 ; CHECK-NEXT: .byte 16
370 ; CHECK-NEXT: .byte 0
371 ; CHECK-NEXT: .byte 8
372 ; CHECK-NEXT: .byte 5
373
374 define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
375 ret void
376 }
377
378 ; CHECK-LABEL:{{^}}test_multi_arg:
379 ; CHECK: .section .AMDGPU.runtime_metadata
380 ; CHECK-NEXT: .byte 4
381 ; CHECK-NEXT: .byte 6
382 ; CHECK-NEXT: .long 14
383 ; CHECK-NEXT: .ascii "test_multi_arg"
384 ; CHECK-NEXT: .byte 7
385 ; CHECK-NEXT: .byte 9
386 ; CHECK-NEXT: .long 4
387 ; CHECK-NEXT: .byte 10
388 ; CHECK-NEXT: .long 4
389 ; CHECK-NEXT: .byte 11
390 ; CHECK-NEXT: .long 3
391 ; CHECK-NEXT: .ascii "int"
392 ; CHECK-NEXT: .byte 13
393 ; CHECK-NEXT: .byte 0
394 ; CHECK-NEXT: .byte 14
395 ; CHECK-NEXT: .short 6
396 ; CHECK-NEXT: .byte 16
397 ; CHECK-NEXT: .byte 0
398 ; CHECK-NEXT: .byte 8
399 ; CHECK-NEXT: .byte 7
400 ; CHECK-NEXT: .byte 9
401 ; CHECK-NEXT: .long 4
402 ; CHECK-NEXT: .byte 10
403 ; CHECK-NEXT: .long 4
404 ; CHECK-NEXT: .byte 11
405 ; CHECK-NEXT: .long 6
406 ; CHECK-NEXT: .ascii "short2"
407 ; CHECK-NEXT: .byte 13
408 ; CHECK-NEXT: .byte 0
409 ; CHECK-NEXT: .byte 14
410 ; CHECK-NEXT: .short 3
411 ; CHECK-NEXT: .byte 16
412 ; CHECK-NEXT: .byte 0
413 ; CHECK-NEXT: .byte 8
414 ; CHECK-NEXT: .byte 7
415 ; CHECK-NEXT: .byte 9
416 ; CHECK-NEXT: .long 4
417 ; CHECK-NEXT: .byte 10
418 ; CHECK-NEXT: .long 4
419 ; CHECK-NEXT: .byte 11
420 ; CHECK-NEXT: .long 5
421 ; CHECK-NEXT: .ascii "char3"
422 ; CHECK-NEXT: .byte 13
423 ; CHECK-NEXT: .byte 0
424 ; CHECK-NEXT: .byte 14
425 ; CHECK-NEXT: .short 1
426 ; CHECK-NEXT: .byte 16
427 ; CHECK-NEXT: .byte 0
428 ; CHECK-NEXT: .byte 8
429 ; CHECK-NEXT: .byte 5
430
431 define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
432 ret void
433 }
434
435 ; CHECK-LABEL:{{^}}test_addr_space:
436 ; CHECK: .section .AMDGPU.runtime_metadata
437 ; CHECK-NEXT: .byte 4
438 ; CHECK-NEXT: .byte 6
439 ; CHECK-NEXT: .long 15
440 ; CHECK-NEXT: .ascii "test_addr_space"
441 ; CHECK-NEXT: .byte 7
442 ; CHECK-NEXT: .byte 9
443 ; CHECK-NEXT: .long 8
444 ; CHECK-NEXT: .byte 10
445 ; CHECK-NEXT: .long 8
446 ; CHECK-NEXT: .byte 11
447 ; CHECK-NEXT: .long 5
448 ; CHECK-NEXT: .ascii "int *"
449 ; CHECK-NEXT: .byte 13
450 ; CHECK-NEXT: .byte 1
451 ; CHECK-NEXT: .byte 14
452 ; CHECK-NEXT: .short 6
453 ; CHECK-NEXT: .byte 16
454 ; CHECK-NEXT: .byte 0
455 ; CHECK-NEXT: .byte 15
456 ; CHECK-NEXT: .byte 1
457 ; CHECK-NEXT: .byte 8
458 ; CHECK-NEXT: .byte 7
459 ; CHECK-NEXT: .byte 9
460 ; CHECK-NEXT: .long 8
461 ; CHECK-NEXT: .byte 10
462 ; CHECK-NEXT: .long 8
463 ; CHECK-NEXT: .byte 11
464 ; CHECK-NEXT: .long 5
465 ; CHECK-NEXT: .ascii "int *"
466 ; CHECK-NEXT: .byte 13
467 ; CHECK-NEXT: .byte 1
468 ; CHECK-NEXT: .byte 14
469 ; CHECK-NEXT: .short 6
470 ; CHECK-NEXT: .byte 16
471 ; CHECK-NEXT: .byte 0
472 ; CHECK-NEXT: .byte 15
473 ; CHECK-NEXT: .byte 2
474 ; CHECK-NEXT: .byte 8
475 ; CHECK-NEXT: .byte 7
476 ; CHECK-NEXT: .byte 9
477 ; CHECK-NEXT: .long 4
478 ; CHECK-NEXT: .byte 10
479 ; CHECK-NEXT: .long 4
480 ; CHECK-NEXT: .byte 11
481 ; CHECK-NEXT: .long 5
482 ; CHECK-NEXT: .ascii "int *"
483 ; CHECK-NEXT: .byte 13
484 ; CHECK-NEXT: .byte 1
485 ; CHECK-NEXT: .byte 14
486 ; CHECK-NEXT: .short 6
487 ; CHECK-NEXT: .byte 16
488 ; CHECK-NEXT: .byte 0
489 ; CHECK-NEXT: .byte 15
490 ; CHECK-NEXT: .byte 3
491 ; CHECK-NEXT: .byte 8
492 ; CHECK-NEXT: .byte 5
493
494 define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
495 ret void
496 }
497
498 ; CHECK-LABEL:{{^}}test_type_qual:
499 ; CHECK: .section .AMDGPU.runtime_metadata
500 ; CHECK-NEXT: .byte 4
501 ; CHECK-NEXT: .byte 6
502 ; CHECK-NEXT: .long 14
503 ; CHECK-NEXT: .ascii "test_type_qual"
504 ; CHECK-NEXT: .byte 7
505 ; CHECK-NEXT: .byte 9
506 ; CHECK-NEXT: .long 8
507 ; CHECK-NEXT: .byte 10
508 ; CHECK-NEXT: .long 8
509 ; CHECK-NEXT: .byte 11
510 ; CHECK-NEXT: .long 5
511 ; CHECK-NEXT: .ascii "int *"
512 ; CHECK-NEXT: .byte 19
513 ; CHECK-NEXT: .byte 13
514 ; CHECK-NEXT: .byte 1
515 ; CHECK-NEXT: .byte 14
516 ; CHECK-NEXT: .short 6
517 ; CHECK-NEXT: .byte 16
518 ; CHECK-NEXT: .byte 0
519 ; CHECK-NEXT: .byte 15
520 ; CHECK-NEXT: .byte 1
521 ; CHECK-NEXT: .byte 8
522 ; CHECK-NEXT: .byte 7
523 ; CHECK-NEXT: .byte 9
524 ; CHECK-NEXT: .long 8
525 ; CHECK-NEXT: .byte 10
526 ; CHECK-NEXT: .long 8
527 ; CHECK-NEXT: .byte 11
528 ; CHECK-NEXT: .long 5
529 ; CHECK-NEXT: .ascii "int *"
530 ; CHECK-NEXT: .byte 17
531 ; CHECK-NEXT: .byte 18
532 ; CHECK-NEXT: .byte 13
533 ; CHECK-NEXT: .byte 1
534 ; CHECK-NEXT: .byte 14
535 ; CHECK-NEXT: .short 6
536 ; CHECK-NEXT: .byte 16
537 ; CHECK-NEXT: .byte 0
538 ; CHECK-NEXT: .byte 15
539 ; CHECK-NEXT: .byte 1
540 ; CHECK-NEXT: .byte 8
541 ; CHECK-NEXT: .byte 7
542 ; CHECK-NEXT: .byte 9
543 ; CHECK-NEXT: .long 8
544 ; CHECK-NEXT: .byte 10
545 ; CHECK-NEXT: .long 8
546 ; CHECK-NEXT: .byte 11
547 ; CHECK-NEXT: .long 5
548 ; CHECK-NEXT: .ascii "int *"
549 ; CHECK-NEXT: .byte 20
550 ; CHECK-NEXT: .byte 13
551 ; CHECK-NEXT: .byte 1
552 ; CHECK-NEXT: .byte 14
553 ; CHECK-NEXT: .short 0
554 ; CHECK-NEXT: .byte 16
555 ; CHECK-NEXT: .byte 0
556 ; CHECK-NEXT: .byte 15
557 ; CHECK-NEXT: .byte 1
558 ; CHECK-NEXT: .byte 8
559 ; CHECK-NEXT: .byte 5
560
561 define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
562 ret void
563 }
564
565 ; CHECK-LABEL:{{^}}test_access_qual:
566 ; CHECK: .section .AMDGPU.runtime_metadata
567 ; CHECK-NEXT: .byte 4
568 ; CHECK-NEXT: .byte 6
569 ; CHECK-NEXT: .long 16
570 ; CHECK-NEXT: .ascii "test_access_qual"
571 ; CHECK-NEXT: .byte 7
572 ; CHECK-NEXT: .byte 9
573 ; CHECK-NEXT: .long 8
574 ; CHECK-NEXT: .byte 10
575 ; CHECK-NEXT: .long 8
576 ; CHECK-NEXT: .byte 11
577 ; CHECK-NEXT: .long 9
578 ; CHECK-NEXT: .ascii "image1d_t"
579 ; CHECK-NEXT: .byte 13
580 ; CHECK-NEXT: .byte 2
581 ; CHECK-NEXT: .byte 14
582 ; CHECK-NEXT: .short 0
583 ; CHECK-NEXT: .byte 16
584 ; CHECK-NEXT: .byte 1
585 ; CHECK-NEXT: .byte 15
586 ; CHECK-NEXT: .byte 1
587 ; CHECK-NEXT: .byte 8
588 ; CHECK-NEXT: .byte 7
589 ; CHECK-NEXT: .byte 9
590 ; CHECK-NEXT: .long 8
591 ; CHECK-NEXT: .byte 10
592 ; CHECK-NEXT: .long 8
593 ; CHECK-NEXT: .byte 11
594 ; CHECK-NEXT: .long 9
595 ; CHECK-NEXT: .ascii "image2d_t"
596 ; CHECK-NEXT: .byte 13
597 ; CHECK-NEXT: .byte 2
598 ; CHECK-NEXT: .byte 14
599 ; CHECK-NEXT: .short 0
600 ; CHECK-NEXT: .byte 16
601 ; CHECK-NEXT: .byte 2
602 ; CHECK-NEXT: .byte 15
603 ; CHECK-NEXT: .byte 1
604 ; CHECK-NEXT: .byte 8
605 ; CHECK-NEXT: .byte 7
606 ; CHECK-NEXT: .byte 9
607 ; CHECK-NEXT: .long 8
608 ; CHECK-NEXT: .byte 10
609 ; CHECK-NEXT: .long 8
610 ; CHECK-NEXT: .byte 11
611 ; CHECK-NEXT: .long 9
612 ; CHECK-NEXT: .ascii "image3d_t"
613 ; CHECK-NEXT: .byte 13
614 ; CHECK-NEXT: .byte 2
615 ; CHECK-NEXT: .byte 14
616 ; CHECK-NEXT: .short 0
617 ; CHECK-NEXT: .byte 16
618 ; CHECK-NEXT: .byte 3
619 ; CHECK-NEXT: .byte 15
620 ; CHECK-NEXT: .byte 1
621 ; CHECK-NEXT: .byte 8
622 ; CHECK-NEXT: .byte 5
623
624 define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
625 ret void
626 }
627
628 ; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint:
629 ; CHECK: .section .AMDGPU.runtime_metadata
630 ; CHECK-NEXT: .byte 4
631 ; CHECK-NEXT: .byte 6
632 ; CHECK-NEXT: .long 27
633 ; CHECK-NEXT: .ascii "test_reqd_wgs_vec_type_hint"
634 ; CHECK-NEXT: .byte 7
635 ; CHECK-NEXT: .byte 9
636 ; CHECK-NEXT: .long 4
637 ; CHECK-NEXT: .byte 10
638 ; CHECK-NEXT: .long 4
639 ; CHECK-NEXT: .byte 11
640 ; CHECK-NEXT: .long 3
641 ; CHECK-NEXT: .ascii "int"
642 ; CHECK-NEXT: .byte 13
643 ; CHECK-NEXT: .byte 0
644 ; CHECK-NEXT: .byte 14
645 ; CHECK-NEXT: .short 6
646 ; CHECK-NEXT: .byte 16
647 ; CHECK-NEXT: .byte 0
648 ; CHECK-NEXT: .byte 8
649 ; CHECK-NEXT: .byte 21
650 ; CHECK-NEXT: .long 1
651 ; CHECK-NEXT: .long 2
652 ; CHECK-NEXT: .long 4
653 ; CHECK-NEXT: .byte 23
654 ; CHECK-NEXT: .long 3
655 ; CHECK-NEXT: .ascii "int"
656 ; CHECK-NEXT: .byte 5
657
658 define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
659 ret void
660 }
661
662 ; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint:
663 ; CHECK: .section .AMDGPU.runtime_metadata
664 ; CHECK-NEXT: .byte 4
665 ; CHECK-NEXT: .byte 6
666 ; CHECK-NEXT: .long 27
667 ; CHECK-NEXT: .ascii "test_wgs_hint_vec_type_hint"
668 ; CHECK-NEXT: .byte 7
669 ; CHECK-NEXT: .byte 9
670 ; CHECK-NEXT: .long 4
671 ; CHECK-NEXT: .byte 10
672 ; CHECK-NEXT: .long 4
673 ; CHECK-NEXT: .byte 11
674 ; CHECK-NEXT: .long 3
675 ; CHECK-NEXT: .ascii "int"
676 ; CHECK-NEXT: .byte 13
677 ; CHECK-NEXT: .byte 0
678 ; CHECK-NEXT: .byte 14
679 ; CHECK-NEXT: .short 6
680 ; CHECK-NEXT: .byte 16
681 ; CHECK-NEXT: .byte 0
682 ; CHECK-NEXT: .byte 8
683 ; CHECK-NEXT: .byte 22
684 ; CHECK-NEXT: .long 8
685 ; CHECK-NEXT: .long 16
686 ; CHECK-NEXT: .long 32
687 ; CHECK-NEXT: .byte 23
688 ; CHECK-NEXT: .long 5
689 ; CHECK-NEXT: .ascii "uint4"
690 ; CHECK-NEXT: .byte 5
691
692 define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
693 ret void
694 }
695
696 ; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr:
697 ; CHECK: .section .AMDGPU.runtime_metadata
698 ; CHECK-NEXT: .byte 4
699 ; CHECK-NEXT: .byte 6
700 ; CHECK-NEXT: .long 19
701 ; CHECK-NEXT: .ascii "test_arg_ptr_to_ptr"
702 ; CHECK-NEXT: .byte 7
703 ; CHECK-NEXT: .byte 9
704 ; CHECK-NEXT: .long 8
705 ; CHECK-NEXT: .byte 10
706 ; CHECK-NEXT: .long 8
707 ; CHECK-NEXT: .byte 11
708 ; CHECK-NEXT: .long 6
709 ; CHECK-NEXT: .ascii "int **"
710 ; CHECK-NEXT: .byte 13
711 ; CHECK-NEXT: .byte 1
712 ; CHECK-NEXT: .byte 14
713 ; CHECK-NEXT: .short 6
714 ; CHECK-NEXT: .byte 16
715 ; CHECK-NEXT: .byte 0
716 ; CHECK-NEXT: .byte 15
717 ; CHECK-NEXT: .byte 1
718 ; CHECK-NEXT: .byte 8
719 ; CHECK-NEXT: .byte 5
720
721 define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
722 ret void
723 }
724
725 ; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr:
726 ; CHECK: .section .AMDGPU.runtime_metadata
727 ; CHECK-NEXT: .byte 4
728 ; CHECK-NEXT: .byte 6
729 ; CHECK-NEXT: .long 28
730 ; CHECK-NEXT: .ascii "test_arg_struct_contains_ptr"
731 ; CHECK-NEXT: .byte 7
732 ; CHECK-NEXT: .byte 9
733 ; CHECK-NEXT: .long 4
734 ; CHECK-NEXT: .byte 10
735 ; CHECK-NEXT: .long 4
736 ; CHECK-NEXT: .byte 11
737 ; CHECK-NEXT: .long 8
738 ; CHECK-NEXT: .ascii "struct B"
739 ; CHECK-NEXT: .byte 13
740 ; CHECK-NEXT: .byte 1
741 ; CHECK-NEXT: .byte 14
742 ; CHECK-NEXT: .short 0
743 ; CHECK-NEXT: .byte 16
744 ; CHECK-NEXT: .byte 0
745 ; CHECK-NEXT: .byte 15
746 ; CHECK-NEXT: .byte 0
747 ; CHECK-NEXT: .byte 8
748 ; CHECK-NEXT: .byte 5
749
750 define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
751 ret void
752 }
753
754 ; CHECK-LABEL:{{^}}test_arg_vector_of_ptr:
755 ; CHECK: .section .AMDGPU.runtime_metadata
756 ; CHECK-NEXT: .byte 4
757 ; CHECK-NEXT: .byte 6
758 ; CHECK-NEXT: .long 22
759 ; CHECK-NEXT: .ascii "test_arg_vector_of_ptr"
760 ; CHECK-NEXT: .byte 7
761 ; CHECK-NEXT: .byte 9
762 ; CHECK-NEXT: .long 16
763 ; CHECK-NEXT: .byte 10
764 ; CHECK-NEXT: .long 16
765 ; CHECK-NEXT: .byte 11
766 ; CHECK-NEXT: .long 47
767 ; CHECK-NEXT: .ascii "global int* __attribute__((ext_vector_type(2)))"
768 ; CHECK-NEXT: .byte 13
769 ; CHECK-NEXT: .byte 0
770 ; CHECK-NEXT: .byte 14
771 ; CHECK-NEXT: .short 6
772 ; CHECK-NEXT: .byte 16
773 ; CHECK-NEXT: .byte 0
774 ; CHECK-NEXT: .byte 8
775 ; CHECK-NEXT: .byte 5
776
777 define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
778 ret void
779 }
780
781 ; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type:
782 ; CHECK: .section .AMDGPU.runtime_metadata
783 ; CHECK-NEXT: .byte 4
784 ; CHECK-NEXT: .byte 6
785 ; CHECK-NEXT: .long 29
786 ; CHECK-NEXT: .ascii "test_arg_unknown_builtin_type"
787 ; CHECK-NEXT: .byte 7
788 ; CHECK-NEXT: .byte 9
789 ; CHECK-NEXT: .long 8
790 ; CHECK-NEXT: .byte 10
791 ; CHECK-NEXT: .long 8
792 ; CHECK-NEXT: .byte 11
793 ; CHECK-NEXT: .long 11
794 ; CHECK-NEXT: .ascii "clk_event_t"
795 ; CHECK-NEXT: .byte 13
796 ; CHECK-NEXT: .byte 1
797 ; CHECK-NEXT: .byte 14
798 ; CHECK-NEXT: .short 0
799 ; CHECK-NEXT: .byte 16
800 ; CHECK-NEXT: .byte 0
801 ; CHECK-NEXT: .byte 15
802 ; CHECK-NEXT: .byte 1
803 ; CHECK-NEXT: .byte 8
804 ; CHECK-NEXT: .byte 5
805
806 define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
807 ret void
808 }
809
810 !1 = !{i32 0}
811 !2 = !{!"none"}
812 !3 = !{!"int"}
813 !4 = !{!""}
814 !5 = !{i32 undef, i32 1}
815 !6 = !{i32 1, i32 2, i32 4}
816 !7 = !{<4 x i32> undef, i32 0}
817 !8 = !{i32 8, i32 16, i32 32}
818 !9 = !{!"char"}
819 !10 = !{!"ushort2"}
820 !11 = !{!"int3"}
821 !12 = !{!"ulong4"}
822 !13 = !{!"half8"}
823 !14 = !{!"float16"}
824 !15 = !{!"double16"}
825 !16 = !{!"int *"}
826 !17 = !{!"image2d_t"}
827 !18 = !{!"sampler_t"}
828 !19 = !{!"queue_t"}
829 !20 = !{!"struct A"}
830 !21 = !{!"i128"}
831 !22 = !{i32 0, i32 0, i32 0}
832 !23 = !{!"none", !"none", !"none"}
833 !24 = !{!"int", !"short2", !"char3"}
834 !25 = !{!"", !"", !""}
835 !50 = !{i32 1, i32 2, i32 3}
836 !51 = !{!"int *", !"int *", !"int *"}
837 !60 = !{i32 1, i32 1, i32 1}
838 !61 = !{!"read_only", !"write_only", !"read_write"}
839 !62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
840 !70 = !{!"volatile", !"const restrict", !"pipe"}
841 !80 = !{!"int **"}
842 !81 = !{i32 1}
843 !82 = !{!"struct B"}
844 !83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
845 !84 = !{!"clk_event_t"}
846 !opencl.ocl.version = !{!90}
847 !90 = !{i32 2, i32 0}