llvm.org GIT mirror llvm / 29d0ea4
AMDGPU: Cleanup load tests There are a lot of different kinds of loads to test for, and these were scattered around inconsistently with some redundancy. Try to comprehensively test all loads in a consistent way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271571 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
34 changed file(s) with 7440 addition(s) and 2441 deletion(s). Raw diff Collapse all Expand all
6262 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
6363 }
6464
65 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
66 setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
67 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
68 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
69
70 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
71 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
72 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
73
74
6575 setOperationAction(ISD::STORE, MVT::i8, Custom);
6676 setOperationAction(ISD::STORE, MVT::i32, Custom);
6777 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
6979
7080 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
7181 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
82
83 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
84 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
85 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
7286
7387 // Set condition code actions
7488 setCondCodeAction(ISD::SETO, MVT::f32, Expand);
None ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
1 ; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
34
4 ; FUNC-LABEL: {{^}}anyext_load_i8:
5 ; SI: buffer_load_dword v{{[0-9]+}}
6 ; SI: buffer_store_dword v{{[0-9]+}}
5 ; FIXME: This seems to not ever actually become an extload
6 ; FUNC-LABEL: {{^}}global_anyext_load_i8:
7 ; GCN: buffer_load_dword v{{[0-9]+}}
8 ; GCN: buffer_store_dword v{{[0-9]+}}
79
810 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
911 ; EG: VTX_READ_32 [[VAL]]
10 define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
12 define void @global_anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
1113 %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
1214 %load = load i32, i32 addrspace(1)* %cast
1315 %x = bitcast i32 %load to <4 x i8>
1618 ret void
1719 }
1820
19 ; FUNC-LABEL: {{^}}anyext_load_i16:
20 ; SI: buffer_load_dword v{{[0-9]+}}
21 ; SI: buffer_store_dword v{{[0-9]+}}
21 ; FUNC-LABEL: {{^}}global_anyext_load_i16:
22 ; GCN: buffer_load_dword v{{[0-9]+}}
23 ; GCN: buffer_store_dword v{{[0-9]+}}
2224
2325 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
2426 ; EG: VTX_READ_32 [[VAL]]
25 define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
27 define void @global_anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
2628 %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
2729 %load = load i32, i32 addrspace(1)* %cast
2830 %x = bitcast i32 %load to <2 x i16>
3133 ret void
3234 }
3335
34 ; FUNC-LABEL: {{^}}anyext_load_lds_i8:
35 ; SI: ds_read_b32 v{{[0-9]+}}
36 ; SI: ds_write_b32 v{{[0-9]+}}
36 ; FUNC-LABEL: {{^}}local_anyext_load_i8:
37 ; GCN: ds_read_b32 v{{[0-9]+}}
38 ; GCN: ds_write_b32 v{{[0-9]+}}
3739
3840 ; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
3941 ; EG: LDS_WRITE * [[VAL]]
40 define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
42 define void @local_anyext_load_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
4143 %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
4244 %load = load i32, i32 addrspace(3)* %cast
4345 %x = bitcast i32 %load to <4 x i8>
4648 ret void
4749 }
4850
49 ; FUNC-LABEL: {{^}}anyext_load_lds_i16:
50 ; SI: ds_read_b32 v{{[0-9]+}}
51 ; SI: ds_write_b32 v{{[0-9]+}}
51 ; FUNC-LABEL: {{^}}local_anyext_load_i16:
52 ; GCN: ds_read_b32 v{{[0-9]+}}
53 ; GCN: ds_write_b32 v{{[0-9]+}}
5254
5355 ; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
5456 ; EG: LDS_WRITE * [[VAL]]
55 define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
57 define void @local_anyext_load_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
5658 %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
5759 %load = load i32, i32 addrspace(3)* %cast
5860 %x = bitcast i32 %load to <2 x i16>
None ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
22
33 ; FUNC-LABEL: {{^}}fpext_f32_to_f64:
1414 define void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x float> %in) {
1515 %result = fpext <2 x float> %in to <2 x double>
1616 store <2 x double> %result, <2 x double> addrspace(1)* %out
17 ret void
18 }
19
20 ; FUNC-LABEL: {{^}}fpext_v3f32_to_v3f64:
21 ; SI: v_cvt_f64_f32_e32
22 ; SI: v_cvt_f64_f32_e32
23 ; SI: v_cvt_f64_f32_e32
24 define void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out, <3 x float> %in) {
25 %result = fpext <3 x float> %in to <3 x double>
26 store <3 x double> %result, <3 x double> addrspace(1)* %out
1727 ret void
1828 }
1929
+0
-302
test/CodeGen/AMDGPU/global-extload-i1.ll less more
None ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3 ; FIXME: Evergreen broken
4
5 ; FUNC-LABEL: {{^}}zextload_global_i1_to_i32:
6 ; SI: buffer_load_ubyte
7 ; SI: buffer_store_dword
8 ; SI: s_endpgm
9 define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
10 %a = load i1, i1 addrspace(1)* %in
11 %ext = zext i1 %a to i32
12 store i32 %ext, i32 addrspace(1)* %out
13 ret void
14 }
15
16 ; FUNC-LABEL: {{^}}sextload_global_i1_to_i32:
17 ; SI: buffer_load_ubyte
18 ; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
19 ; SI: buffer_store_dword
20 ; SI: s_endpgm
21 define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
22 %a = load i1, i1 addrspace(1)* %in
23 %ext = sext i1 %a to i32
24 store i32 %ext, i32 addrspace(1)* %out
25 ret void
26 }
27
28 ; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
29 ; SI: s_endpgm
30 define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
31 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
32 %ext = zext <1 x i1> %load to <1 x i32>
33 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
34 ret void
35 }
36
37 ; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
38 ; SI: s_endpgm
39 define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
40 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
41 %ext = sext <1 x i1> %load to <1 x i32>
42 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
43 ret void
44 }
45
46 ; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
47 ; SI: s_endpgm
48 define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
49 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
50 %ext = zext <2 x i1> %load to <2 x i32>
51 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
52 ret void
53 }
54
55 ; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
56 ; SI: s_endpgm
57 define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
58 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
59 %ext = sext <2 x i1> %load to <2 x i32>
60 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
61 ret void
62 }
63
64 ; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
65 ; SI: s_endpgm
66 define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
67 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
68 %ext = zext <4 x i1> %load to <4 x i32>
69 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
70 ret void
71 }
72
73 ; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
74 ; SI: s_endpgm
75 define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
76 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
77 %ext = sext <4 x i1> %load to <4 x i32>
78 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
79 ret void
80 }
81
82 ; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
83 ; SI: s_endpgm
84 define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
85 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
86 %ext = zext <8 x i1> %load to <8 x i32>
87 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
88 ret void
89 }
90
91 ; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
92 ; SI: s_endpgm
93 define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
94 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
95 %ext = sext <8 x i1> %load to <8 x i32>
96 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
97 ret void
98 }
99
100 ; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
101 ; SI: s_endpgm
102 define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
103 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
104 %ext = zext <16 x i1> %load to <16 x i32>
105 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
106 ret void
107 }
108
109 ; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
110 ; SI: s_endpgm
111 define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
112 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
113 %ext = sext <16 x i1> %load to <16 x i32>
114 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
115 ret void
116 }
117
118 ; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
119 ; XSI: s_endpgm
120 ; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
121 ; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
122 ; %ext = zext <32 x i1> %load to <32 x i32>
123 ; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
124 ; ret void
125 ; }
126
127 ; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
128 ; XSI: s_endpgm
129 ; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
130 ; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
131 ; %ext = sext <32 x i1> %load to <32 x i32>
132 ; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
133 ; ret void
134 ; }
135
136 ; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
137 ; XSI: s_endpgm
138 ; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
139 ; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
140 ; %ext = zext <64 x i1> %load to <64 x i32>
141 ; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
142 ; ret void
143 ; }
144
145 ; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
146 ; XSI: s_endpgm
147 ; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
148 ; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
149 ; %ext = sext <64 x i1> %load to <64 x i32>
150 ; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
151 ; ret void
152 ; }
153
154 ; FUNC-LABEL: {{^}}zextload_global_i1_to_i64:
155 ; SI-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
156 ; SI-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
157 ; SI: buffer_store_dwordx2
158 define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
159 %a = load i1, i1 addrspace(1)* %in
160 %ext = zext i1 %a to i64
161 store i64 %ext, i64 addrspace(1)* %out
162 ret void
163 }
164
165 ; FUNC-LABEL: {{^}}sextload_global_i1_to_i64:
166 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
167 ; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
168 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
169 ; SI: buffer_store_dwordx2
170 define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
171 %a = load i1, i1 addrspace(1)* %in
172 %ext = sext i1 %a to i64
173 store i64 %ext, i64 addrspace(1)* %out
174 ret void
175 }
176
177 ; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
178 ; SI: s_endpgm
179 define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
180 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
181 %ext = zext <1 x i1> %load to <1 x i64>
182 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
183 ret void
184 }
185
186 ; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
187 ; SI: s_endpgm
188 define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
189 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
190 %ext = sext <1 x i1> %load to <1 x i64>
191 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
192 ret void
193 }
194
195 ; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
196 ; SI: s_endpgm
197 define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
198 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
199 %ext = zext <2 x i1> %load to <2 x i64>
200 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
201 ret void
202 }
203
204 ; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
205 ; SI: s_endpgm
206 define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
207 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
208 %ext = sext <2 x i1> %load to <2 x i64>
209 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
210 ret void
211 }
212
213 ; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
214 ; SI: s_endpgm
215 define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
216 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
217 %ext = zext <4 x i1> %load to <4 x i64>
218 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
219 ret void
220 }
221
222 ; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
223 ; SI: s_endpgm
224 define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
225 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
226 %ext = sext <4 x i1> %load to <4 x i64>
227 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
228 ret void
229 }
230
231 ; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
232 ; SI: s_endpgm
233 define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
234 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
235 %ext = zext <8 x i1> %load to <8 x i64>
236 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
237 ret void
238 }
239
240 ; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
241 ; SI: s_endpgm
242 define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
243 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
244 %ext = sext <8 x i1> %load to <8 x i64>
245 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
246 ret void
247 }
248
249 ; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
250 ; SI: s_endpgm
251 define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
252 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
253 %ext = zext <16 x i1> %load to <16 x i64>
254 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
255 ret void
256 }
257
258 ; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
259 ; SI: s_endpgm
260 define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
261 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
262 %ext = sext <16 x i1> %load to <16 x i64>
263 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
264 ret void
265 }
266
267 ; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
268 ; XSI: s_endpgm
269 ; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
270 ; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
271 ; %ext = zext <32 x i1> %load to <32 x i64>
272 ; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
273 ; ret void
274 ; }
275
276 ; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
277 ; XSI: s_endpgm
278 ; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
279 ; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
280 ; %ext = sext <32 x i1> %load to <32 x i64>
281 ; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
282 ; ret void
283 ; }
284
285 ; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
286 ; XSI: s_endpgm
287 ; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
288 ; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
289 ; %ext = zext <64 x i1> %load to <64 x i64>
290 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
291 ; ret void
292 ; }
293
294 ; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
295 ; XSI: s_endpgm
296 ; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
297 ; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
298 ; %ext = sext <64 x i1> %load to <64 x i64>
299 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
300 ; ret void
301 ; }
+0
-302
test/CodeGen/AMDGPU/global-extload-i16.ll less more
None ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3 ; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
4
5 ; FUNC-LABEL: {{^}}zextload_global_i16_to_i32:
6 ; SI: buffer_load_ushort
7 ; SI: buffer_store_dword
8 ; SI: s_endpgm
9 define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
10 %a = load i16, i16 addrspace(1)* %in
11 %ext = zext i16 %a to i32
12 store i32 %ext, i32 addrspace(1)* %out
13 ret void
14 }
15
16 ; FUNC-LABEL: {{^}}sextload_global_i16_to_i32:
17 ; SI: buffer_load_sshort
18 ; SI: buffer_store_dword
19 ; SI: s_endpgm
20 define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
21 %a = load i16, i16 addrspace(1)* %in
22 %ext = sext i16 %a to i32
23 store i32 %ext, i32 addrspace(1)* %out
24 ret void
25 }
26
27 ; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32:
28 ; SI: buffer_load_ushort
29 ; SI: s_endpgm
30 define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
31 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
32 %ext = zext <1 x i16> %load to <1 x i32>
33 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
34 ret void
35 }
36
37 ; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32:
38 ; SI: buffer_load_sshort
39 ; SI: s_endpgm
40 define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
41 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
42 %ext = sext <1 x i16> %load to <1 x i32>
43 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
44 ret void
45 }
46
47 ; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
48 ; SI: s_endpgm
49 define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
50 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
51 %ext = zext <2 x i16> %load to <2 x i32>
52 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
53 ret void
54 }
55
56 ; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
57 ; SI: s_endpgm
58 define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
59 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
60 %ext = sext <2 x i16> %load to <2 x i32>
61 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
62 ret void
63 }
64
65 ; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
66 ; SI: s_endpgm
67 define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
68 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
69 %ext = zext <4 x i16> %load to <4 x i32>
70 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
71 ret void
72 }
73
74 ; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
75 ; SI: s_endpgm
76 define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
77 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
78 %ext = sext <4 x i16> %load to <4 x i32>
79 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
80 ret void
81 }
82
83 ; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
84 ; SI: s_endpgm
85 define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
86 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
87 %ext = zext <8 x i16> %load to <8 x i32>
88 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
89 ret void
90 }
91
92 ; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
93 ; SI: s_endpgm
94 define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
95 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
96 %ext = sext <8 x i16> %load to <8 x i32>
97 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
98 ret void
99 }
100
101 ; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
102 ; SI: s_endpgm
103 define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
104 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
105 %ext = zext <16 x i16> %load to <16 x i32>
106 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
107 ret void
108 }
109
110 ; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
111 ; SI: s_endpgm
112 define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
113 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
114 %ext = sext <16 x i16> %load to <16 x i32>
115 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
116 ret void
117 }
118
119 ; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
120 ; SI: s_endpgm
121 define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
122 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
123 %ext = zext <32 x i16> %load to <32 x i32>
124 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
125 ret void
126 }
127
128 ; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
129 ; SI: s_endpgm
130 define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
131 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
132 %ext = sext <32 x i16> %load to <32 x i32>
133 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
134 ret void
135 }
136
137 ; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
138 ; SI: s_endpgm
139 define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
140 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
141 %ext = zext <64 x i16> %load to <64 x i32>
142 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
143 ret void
144 }
145
146 ; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
147 ; SI: s_endpgm
148 define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
149 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
150 %ext = sext <64 x i16> %load to <64 x i32>
151 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
152 ret void
153 }
154
155 ; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
156 ; SI-DAG: buffer_load_ushort v[[LO:[0-9]+]],
157 ; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
158 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
159 define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
160 %a = load i16, i16 addrspace(1)* %in
161 %ext = zext i16 %a to i64
162 store i64 %ext, i64 addrspace(1)* %out
163 ret void
164 }
165
166 ; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
167 ; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
168 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
169 ; SI: buffer_store_dwordx2
170 define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
171 %a = load i16, i16 addrspace(1)* %in
172 %ext = sext i16 %a to i64
173 store i64 %ext, i64 addrspace(1)* %out
174 ret void
175 }
176
177 ; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
178 ; SI: s_endpgm
179 define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
180 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
181 %ext = zext <1 x i16> %load to <1 x i64>
182 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
183 ret void
184 }
185
186 ; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
187 ; SI: s_endpgm
188 define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
189 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
190 %ext = sext <1 x i16> %load to <1 x i64>
191 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
192 ret void
193 }
194
195 ; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
196 ; SI: s_endpgm
197 define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
198 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
199 %ext = zext <2 x i16> %load to <2 x i64>
200 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
201 ret void
202 }
203
204 ; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
205 ; SI: s_endpgm
206 define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
207 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
208 %ext = sext <2 x i16> %load to <2 x i64>
209 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
210 ret void
211 }
212
213 ; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
214 ; SI: s_endpgm
215 define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
216 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
217 %ext = zext <4 x i16> %load to <4 x i64>
218 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
219 ret void
220 }
221
222 ; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
223 ; SI: s_endpgm
224 define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
225 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
226 %ext = sext <4 x i16> %load to <4 x i64>
227 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
228 ret void
229 }
230
231 ; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
232 ; SI: s_endpgm
233 define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
234 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
235 %ext = zext <8 x i16> %load to <8 x i64>
236 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
237 ret void
238 }
239
240 ; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
241 ; SI: s_endpgm
242 define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
243 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
244 %ext = sext <8 x i16> %load to <8 x i64>
245 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
246 ret void
247 }
248
249 ; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
250 ; SI: s_endpgm
251 define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
252 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
253 %ext = zext <16 x i16> %load to <16 x i64>
254 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
255 ret void
256 }
257
258 ; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
259 ; SI: s_endpgm
260 define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
261 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
262 %ext = sext <16 x i16> %load to <16 x i64>
263 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
264 ret void
265 }
266
267 ; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
268 ; SI: s_endpgm
269 define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
270 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
271 %ext = zext <32 x i16> %load to <32 x i64>
272 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
273 ret void
274 }
275
276 ; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
277 ; SI: s_endpgm
278 define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
279 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
280 %ext = sext <32 x i16> %load to <32 x i64>
281 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
282 ret void
283 }
284
285 ; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
286 ; SI: s_endpgm
287 define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
288 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
289 %ext = zext <64 x i16> %load to <64 x i64>
290 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
291 ret void
292 }
293
294 ; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
295 ; SI: s_endpgm
296 define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
297 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
298 %ext = sext <64 x i16> %load to <64 x i64>
299 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
300 ret void
301 }
+0
-308
test/CodeGen/AMDGPU/global-extload-i32.ll less more
None ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4 ; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
5 ; SI-DAG: buffer_load_dword v[[LO:[0-9]+]],
6 ; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
7 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
8 define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
9 %a = load i32, i32 addrspace(1)* %in
10 %ext = zext i32 %a to i64
11 store i64 %ext, i64 addrspace(1)* %out
12 ret void
13 }
14
15 ; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
16 ; SI: buffer_load_dword [[LOAD:v[0-9]+]],
17 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
18 ; SI: buffer_store_dwordx2
19 define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
20 %a = load i32, i32 addrspace(1)* %in
21 %ext = sext i32 %a to i64
22 store i64 %ext, i64 addrspace(1)* %out
23 ret void
24 }
25
26 ; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
27 ; SI: buffer_load_dword
28 ; SI: buffer_store_dwordx2
29 ; SI: s_endpgm
30 define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
31 %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
32 %ext = zext <1 x i32> %load to <1 x i64>
33 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
34 ret void
35 }
36
37 ; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
38 ; SI: buffer_load_dword
39 ; SI: v_ashrrev_i32
40 ; SI: buffer_store_dwordx2
41 ; SI: s_endpgm
42 define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
43 %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
44 %ext = sext <1 x i32> %load to <1 x i64>
45 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
46 ret void
47 }
48
49 ; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
50 ; SI: buffer_load_dwordx2
51 ; SI: buffer_store_dwordx4
52 ; SI: s_endpgm
53 define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
54 %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
55 %ext = zext <2 x i32> %load to <2 x i64>
56 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
57 ret void
58 }
59
60 ; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
61 ; SI: buffer_load_dwordx2
62 ; SI-DAG: v_ashrrev_i32
63 ; SI-DAG: v_ashrrev_i32
64 ; SI-DAG: buffer_store_dwordx4
65 ; SI: s_endpgm
66 define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
67 %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
68 %ext = sext <2 x i32> %load to <2 x i64>
69 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
70 ret void
71 }
72
73 ; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
74 ; SI: buffer_load_dwordx4
75 ; SI: buffer_store_dwordx4
76 ; SI: buffer_store_dwordx4
77 ; SI: s_endpgm
78 define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
79 %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
80 %ext = zext <4 x i32> %load to <4 x i64>
81 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
82 ret void
83 }
84
85 ; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
86 ; SI: buffer_load_dwordx4
87 ; SI-DAG: v_ashrrev_i32
88 ; SI-DAG: v_ashrrev_i32
89 ; SI-DAG: v_ashrrev_i32
90 ; SI-DAG: v_ashrrev_i32
91 ; SI-DAG: buffer_store_dwordx4
92 ; SI-DAG: buffer_store_dwordx4
93 ; SI: s_endpgm
94 define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
95 %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
96 %ext = sext <4 x i32> %load to <4 x i64>
97 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
98 ret void
99 }
100
101 ; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
102 ; SI: buffer_load_dwordx4
103 ; SI: buffer_load_dwordx4
104 ; SI-DAG: buffer_store_dwordx4
105 ; SI-DAG: buffer_store_dwordx4
106 ; SI-DAG: buffer_store_dwordx4
107 ; SI-DAG: buffer_store_dwordx4
108 ; SI: s_endpgm
109 define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
110 %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
111 %ext = zext <8 x i32> %load to <8 x i64>
112 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
113 ret void
114 }
115
116 ; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
117 ; SI: buffer_load_dwordx4
118 ; SI: buffer_load_dwordx4
119
120 ; SI-DAG: v_ashrrev_i32
121 ; SI-DAG: v_ashrrev_i32
122 ; SI-DAG: v_ashrrev_i32
123 ; SI-DAG: v_ashrrev_i32
124 ; SI-DAG: v_ashrrev_i32
125 ; SI-DAG: v_ashrrev_i32
126 ; SI-DAG: v_ashrrev_i32
127 ; SI-DAG: v_ashrrev_i32
128 ; SI-DAG: buffer_store_dwordx4
129 ; SI-DAG: buffer_store_dwordx4
130 ; SI-DAG: buffer_store_dwordx4
131 ; SI-DAG: buffer_store_dwordx4
132 ; SI: s_endpgm
133 define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
134 %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
135 %ext = sext <8 x i32> %load to <8 x i64>
136 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
137 ret void
138 }
139
140 ; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
141 ; SI: buffer_load_dwordx4
142 ; SI: buffer_load_dwordx4
143 ; SI: buffer_load_dwordx4
144 ; SI: buffer_load_dwordx4
145
146 ; SI-DAG: v_ashrrev_i32
147 ; SI-DAG: v_ashrrev_i32
148 ; SI-DAG: v_ashrrev_i32
149 ; SI-DAG: v_ashrrev_i32
150 ; SI-DAG: buffer_store_dwordx4
151
152 ; SI-DAG: v_ashrrev_i32
153 ; SI-DAG: v_ashrrev_i32
154 ; SI-DAG: v_ashrrev_i32
155 ; SI-DAG: v_ashrrev_i32
156 ; SI-DAG: buffer_store_dwordx4
157
158 ; SI-DAG: v_ashrrev_i32
159 ; SI-DAG: v_ashrrev_i32
160 ; SI-DAG: v_ashrrev_i32
161 ; SI-DAG: v_ashrrev_i32
162 ; SI-DAG: buffer_store_dwordx4
163
164 ; SI-DAG: v_ashrrev_i32
165 ; SI-DAG: v_ashrrev_i32
166 ; SI-DAG: v_ashrrev_i32
167 ; SI-DAG: v_ashrrev_i32
168 ; SI-DAG: buffer_store_dwordx4
169 ; SI: s_endpgm
170 define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
171 %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
172 %ext = sext <16 x i32> %load to <16 x i64>
173 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
174 ret void
175 }
176
177 ; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
178 ; SI: buffer_load_dwordx4
179 ; SI: buffer_load_dwordx4
180 ; SI: buffer_load_dwordx4
181 ; SI: buffer_load_dwordx4
182
183 ; SI: buffer_store_dwordx4
184 ; SI: buffer_store_dwordx4
185 ; SI: buffer_store_dwordx4
186 ; SI: buffer_store_dwordx4
187 ; SI: buffer_store_dwordx4
188 ; SI: buffer_store_dwordx4
189 ; SI: buffer_store_dwordx4
190 ; SI: buffer_store_dwordx4
191 ; SI: s_endpgm
192 define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
193 %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
194 %ext = zext <16 x i32> %load to <16 x i64>
195 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
196 ret void
197 }
198
199 ; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
200 ; SI: buffer_load_dwordx4
201 ; SI: buffer_load_dwordx4
202 ; SI: buffer_load_dwordx4
203 ; SI: buffer_load_dwordx4
204 ; SI: buffer_load_dwordx4
205 ; SI: buffer_load_dwordx4
206 ; SI: buffer_load_dwordx4
207 ; SI: buffer_load_dwordx4
208
209
210 ; SI-DAG: v_ashrrev_i32
211 ; SI-DAG: v_ashrrev_i32
212 ; SI-DAG: v_ashrrev_i32
213 ; SI-DAG: v_ashrrev_i32
214 ; SI-DAG: v_ashrrev_i32
215 ; SI-DAG: v_ashrrev_i32
216 ; SI-DAG: v_ashrrev_i32
217 ; SI-DAG: v_ashrrev_i32
218 ; SI-DAG: v_ashrrev_i32
219 ; SI-DAG: v_ashrrev_i32
220 ; SI-DAG: v_ashrrev_i32
221 ; SI-DAG: v_ashrrev_i32
222 ; SI-DAG: v_ashrrev_i32
223 ; SI-DAG: v_ashrrev_i32
224 ; SI-DAG: v_ashrrev_i32
225 ; SI-DAG: v_ashrrev_i32
226 ; SI-DAG: v_ashrrev_i32
227 ; SI-DAG: v_ashrrev_i32
228 ; SI-DAG: v_ashrrev_i32
229 ; SI-DAG: v_ashrrev_i32
230 ; SI-DAG: v_ashrrev_i32
231 ; SI-DAG: v_ashrrev_i32
232 ; SI-DAG: v_ashrrev_i32
233 ; SI-DAG: v_ashrrev_i32
234 ; SI-DAG: v_ashrrev_i32
235 ; SI-DAG: v_ashrrev_i32
236 ; SI-DAG: v_ashrrev_i32
237 ; SI-DAG: v_ashrrev_i32
238 ; SI-DAG: v_ashrrev_i32
239 ; SI-DAG: v_ashrrev_i32
240 ; SI-DAG: v_ashrrev_i32
241 ; SI-DAG: v_ashrrev_i32
242
243 ; SI-DAG: buffer_store_dwordx4
244 ; SI-DAG: buffer_store_dwordx4
245 ; SI-DAG: buffer_store_dwordx4
246 ; SI-DAG: buffer_store_dwordx4
247
248 ; SI-DAG: buffer_store_dwordx4
249 ; SI-DAG: buffer_store_dwordx4
250 ; SI-DAG: buffer_store_dwordx4
251 ; SI-DAG: buffer_store_dwordx4
252
253 ; SI-DAG: buffer_store_dwordx4
254 ; SI-DAG: buffer_store_dwordx4
255 ; SI-DAG: buffer_store_dwordx4
256 ; SI-DAG: buffer_store_dwordx4
257
258 ; SI-DAG: buffer_store_dwordx4
259 ; SI-DAG: buffer_store_dwordx4
260 ; SI-DAG: buffer_store_dwordx4
261 ; SI-DAG: buffer_store_dwordx4
262
263 ; SI: s_endpgm
264 define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
265 %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
266 %ext = sext <32 x i32> %load to <32 x i64>
267 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
268 ret void
269 }
270
271 ; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
272 ; SI: buffer_load_dwordx4
273 ; SI: buffer_load_dwordx4
274 ; SI: buffer_load_dwordx4
275 ; SI: buffer_load_dwordx4
276 ; SI: buffer_load_dwordx4
277 ; SI: buffer_load_dwordx4
278 ; SI: buffer_load_dwordx4
279 ; SI: buffer_load_dwordx4
280
281 ; SI-DAG: buffer_store_dwordx4
282 ; SI-DAG: buffer_store_dwordx4
283 ; SI-DAG: buffer_store_dwordx4
284 ; SI-DAG: buffer_store_dwordx4
285
286 ; SI-DAG: buffer_store_dwordx4
287 ; SI-DAG: buffer_store_dwordx4
288 ; SI-DAG: buffer_store_dwordx4
289 ; SI-DAG: buffer_store_dwordx4
290
291 ; SI-DAG: buffer_store_dwordx4
292 ; SI-DAG: buffer_store_dwordx4
293 ; SI-DAG: buffer_store_dwordx4
294 ; SI-DAG: buffer_store_dwordx4
295
296 ; SI-DAG: buffer_store_dwordx4
297 ; SI-DAG: buffer_store_dwordx4
298 ; SI-DAG: buffer_store_dwordx4
299 ; SI-DAG: buffer_store_dwordx4
300
301 ; SI: s_endpgm
302 define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
303 %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
304 %ext = zext <32 x i32> %load to <32 x i64>
305 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
306 ret void
307 }
+0
-299
test/CodeGen/AMDGPU/global-extload-i8.ll less more
None ; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4 ; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
5 ; SI: buffer_load_ubyte
6 ; SI: buffer_store_dword
7 ; SI: s_endpgm
8 define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
9 %a = load i8, i8 addrspace(1)* %in
10 %ext = zext i8 %a to i32
11 store i32 %ext, i32 addrspace(1)* %out
12 ret void
13 }
14
15 ; FUNC-LABEL: {{^}}sextload_global_i8_to_i32:
16 ; SI: buffer_load_sbyte
17 ; SI: buffer_store_dword
18 ; SI: s_endpgm
19 define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
20 %a = load i8, i8 addrspace(1)* %in
21 %ext = sext i8 %a to i32
22 store i32 %ext, i32 addrspace(1)* %out
23 ret void
24 }
25
26 ; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
27 ; SI: s_endpgm
28 define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
29 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
30 %ext = zext <1 x i8> %load to <1 x i32>
31 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
32 ret void
33 }
34
35 ; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
36 ; SI: s_endpgm
37 define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
38 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
39 %ext = sext <1 x i8> %load to <1 x i32>
40 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
41 ret void
42 }
43
44 ; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
45 ; SI: s_endpgm
46 define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
47 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
48 %ext = zext <2 x i8> %load to <2 x i32>
49 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
50 ret void
51 }
52
53 ; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
54 ; SI: s_endpgm
55 define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
56 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
57 %ext = sext <2 x i8> %load to <2 x i32>
58 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
59 ret void
60 }
61
62 ; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
63 ; SI: s_endpgm
64 define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
65 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
66 %ext = zext <4 x i8> %load to <4 x i32>
67 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
68 ret void
69 }
70
71 ; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
72 ; SI: s_endpgm
73 define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
74 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
75 %ext = sext <4 x i8> %load to <4 x i32>
76 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
77 ret void
78 }
79
80 ; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
81 ; SI: s_endpgm
82 define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
83 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
84 %ext = zext <8 x i8> %load to <8 x i32>
85 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
86 ret void
87 }
88
89 ; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
90 ; SI: s_endpgm
91 define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
92 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
93 %ext = sext <8 x i8> %load to <8 x i32>
94 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
95 ret void
96 }
97
98 ; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
99 ; SI: s_endpgm
100 define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
101 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
102 %ext = zext <16 x i8> %load to <16 x i32>
103 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
104 ret void
105 }
106
107 ; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
108 ; SI: s_endpgm
109 define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
110 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
111 %ext = sext <16 x i8> %load to <16 x i32>
112 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
113 ret void
114 }
115
116 ; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
117 ; XSI: s_endpgm
118 ; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
119 ; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
120 ; %ext = zext <32 x i8> %load to <32 x i32>
121 ; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
122 ; ret void
123 ; }
124
125 ; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
126 ; XSI: s_endpgm
127 ; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
128 ; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
129 ; %ext = sext <32 x i8> %load to <32 x i32>
130 ; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
131 ; ret void
132 ; }
133
134 ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
135 ; XSI: s_endpgm
136 ; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
137 ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
138 ; %ext = zext <64 x i8> %load to <64 x i32>
139 ; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
140 ; ret void
141 ; }
142
143 ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
144 ; XSI: s_endpgm
145 ; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
146 ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
147 ; %ext = sext <64 x i8> %load to <64 x i32>
148 ; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
149 ; ret void
150 ; }
151
152 ; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
153 ; SI-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
154 ; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
155 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
156 define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
157 %a = load i8, i8 addrspace(1)* %in
158 %ext = zext i8 %a to i64
159 store i64 %ext, i64 addrspace(1)* %out
160 ret void
161 }
162
163 ; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
164 ; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
165 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
166 ; SI: buffer_store_dwordx2
167 define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
168 %a = load i8, i8 addrspace(1)* %in
169 %ext = sext i8 %a to i64
170 store i64 %ext, i64 addrspace(1)* %out
171 ret void
172 }
173
174 ; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
175 ; SI: s_endpgm
176 define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
177 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
178 %ext = zext <1 x i8> %load to <1 x i64>
179 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
180 ret void
181 }
182
183 ; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
184 ; SI: s_endpgm
185 define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
186 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
187 %ext = sext <1 x i8> %load to <1 x i64>
188 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
189 ret void
190 }
191
192 ; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
193 ; SI: s_endpgm
194 define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
195 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
196 %ext = zext <2 x i8> %load to <2 x i64>
197 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
198 ret void
199 }
200
201 ; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
202 ; SI: s_endpgm
203 define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
204 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
205 %ext = sext <2 x i8> %load to <2 x i64>
206 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
207 ret void
208 }
209
210 ; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
211 ; SI: s_endpgm
212 define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
213 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
214 %ext = zext <4 x i8> %load to <4 x i64>
215 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
216 ret void
217 }
218
219 ; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
220 ; SI: s_endpgm
221 define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
222 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
223 %ext = sext <4 x i8> %load to <4 x i64>
224 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
225 ret void
226 }
227
228 ; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
229 ; SI: s_endpgm
230 define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
231 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
232 %ext = zext <8 x i8> %load to <8 x i64>
233 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
234 ret void
235 }
236
237 ; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
238 ; SI: s_endpgm
239 define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
240 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
241 %ext = sext <8 x i8> %load to <8 x i64>
242 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
243 ret void
244 }
245
246 ; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
247 ; SI: s_endpgm
248 define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
249 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
250 %ext = zext <16 x i8> %load to <16 x i64>
251 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
252 ret void
253 }
254
255 ; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
256 ; SI: s_endpgm
257 define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
258 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
259 %ext = sext <16 x i8> %load to <16 x i64>
260 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
261 ret void
262 }
263
264 ; FUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
265 ; SI: s_endpgm
266 define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
267 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
268 %ext = zext <32 x i8> %load to <32 x i64>
269 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
270 ret void
271 }
272
273 ; FUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
274 ; SI: s_endpgm
275 define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
276 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
277 %ext = sext <32 x i8> %load to <32 x i64>
278 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
279 ret void
280 }
281
282 ; ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
283 ; ; XSI: s_endpgm
284 ; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
285 ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
286 ; %ext = zext <64 x i8> %load to <64 x i64>
287 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
288 ; ret void
289 ; }
290
291 ; ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
292 ; ; XSI: s_endpgm
293 ; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
294 ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
295 ; %ext = sext <64 x i8> %load to <64 x i64>
296 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
297 ; ret void
298 ; }
None ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
0 ; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
11 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
22 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
33 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
474474 ; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
475475 ; ret void
476476 ; }
477
478 ; FUNC-LABEL: {{^}}i1_arg:
479 ; SI: buffer_load_ubyte
480 ; SI: v_and_b32_e32
481 ; SI: buffer_store_byte
482 ; SI: s_endpgm
483 define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
484 store i1 %x, i1 addrspace(1)* %out, align 1
485 ret void
486 }
487
488 ; FUNC-LABEL: {{^}}i1_arg_zext_i32:
489 ; SI: buffer_load_ubyte
490 ; SI: buffer_store_dword
491 ; SI: s_endpgm
492 define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
493 %ext = zext i1 %x to i32
494 store i32 %ext, i32 addrspace(1)* %out, align 4
495 ret void
496 }
497
498 ; FUNC-LABEL: {{^}}i1_arg_zext_i64:
499 ; SI: buffer_load_ubyte
500 ; SI: buffer_store_dwordx2
501 ; SI: s_endpgm
502 define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
503 %ext = zext i1 %x to i64
504 store i64 %ext, i64 addrspace(1)* %out, align 8
505 ret void
506 }
507
508 ; FUNC-LABEL: {{^}}i1_arg_sext_i32:
509 ; SI: buffer_load_ubyte
510 ; SI: buffer_store_dword
511 ; SI: s_endpgm
512 define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
513 %ext = sext i1 %x to i32
514 store i32 %ext, i32addrspace(1)* %out, align 4
515 ret void
516 }
517
518 ; FUNC-LABEL: {{^}}i1_arg_sext_i64:
519 ; SI: buffer_load_ubyte
520 ; SI: v_bfe_i32
521 ; SI: v_ashrrev_i32
522 ; SI: buffer_store_dwordx2
523 ; SI: s_endpgm
524 define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
525 %ext = sext i1 %x to i64
526 store i64 %ext, i64 addrspace(1)* %out, align 8
527 ret void
528 }
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
1 ; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3
4 ; FUNC-LABEL: {{^}}constant_load_f64:
5 ; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
6 ; GCN-NOHSA: buffer_store_dwordx2
7 ; GCN-HSA: flat_store_dwordx2
8 define void @constant_load_f64(double addrspace(1)* %out, double addrspace(2)* %in) #0 {
9 %ld = load double, double addrspace(2)* %in
10 store double %ld, double addrspace(1)* %out
11 ret void
12 }
13
14 attributes #0 = { nounwind }
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4 ; FUNC-LABEL: {{^}}constant_load_i1:
5 ; GCN: buffer_load_ubyte
6 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
7 ; GCN: buffer_store_byte
8
9 ; EG: VTX_READ_8
10 ; EG: AND_INT
11 define void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
12 %load = load i1, i1 addrspace(2)* %in
13 store i1 %load, i1 addrspace(1)* %out
14 ret void
15 }
16
17 ; FUNC-LABEL: {{^}}constant_load_v2i1:
18 define void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
19 %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
20 store <2 x i1> %load, <2 x i1> addrspace(1)* %out
21 ret void
22 }
23
24 ; FUNC-LABEL: {{^}}constant_load_v3i1:
25 define void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
26 %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
27 store <3 x i1> %load, <3 x i1> addrspace(1)* %out
28 ret void
29 }
30
31 ; FUNC-LABEL: {{^}}constant_load_v4i1:
32 define void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
33 %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
34 store <4 x i1> %load, <4 x i1> addrspace(1)* %out
35 ret void
36 }
37
38 ; FUNC-LABEL: {{^}}constant_load_v8i1:
39 define void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
40 %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
41 store <8 x i1> %load, <8 x i1> addrspace(1)* %out
42 ret void
43 }
44
45 ; FUNC-LABEL: {{^}}constant_load_v16i1:
46 define void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
47 %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
48 store <16 x i1> %load, <16 x i1> addrspace(1)* %out
49 ret void
50 }
51
52 ; FUNC-LABEL: {{^}}constant_load_v32i1:
53 define void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
54 %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
55 store <32 x i1> %load, <32 x i1> addrspace(1)* %out
56 ret void
57 }
58
59 ; FUNC-LABEL: {{^}}constant_load_v64i1:
60 define void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
61 %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
62 store <64 x i1> %load, <64 x i1> addrspace(1)* %out
63 ret void
64 }
65
66 ; FUNC-LABEL: {{^}}constant_zextload_i1_to_i32:
67 ; GCN: buffer_load_ubyte
68 ; GCN: buffer_store_dword
69 define void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
70 %a = load i1, i1 addrspace(2)* %in
71 %ext = zext i1 %a to i32
72 store i32 %ext, i32 addrspace(1)* %out
73 ret void
74 }
75
76 ; FUNC-LABEL: {{^}}constant_sextload_i1_to_i32:
77 ; GCN: buffer_load_ubyte
78 ; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
79 ; GCN: buffer_store_dword
80
81 ; EG: VTX_READ_8
82 ; EG: BFE_INT
83 define void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
84 %a = load i1, i1 addrspace(2)* %in
85 %ext = sext i1 %a to i32
86 store i32 %ext, i32 addrspace(1)* %out
87 ret void
88 }
89
90 ; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i32:
91 define void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
92 %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
93 %ext = zext <1 x i1> %load to <1 x i32>
94 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
95 ret void
96 }
97
98 ; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i32:
99 define void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
100 %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
101 %ext = sext <1 x i1> %load to <1 x i32>
102 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
103 ret void
104 }
105
106 ; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i32:
107 define void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
108 %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
109 %ext = zext <2 x i1> %load to <2 x i32>
110 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
111 ret void
112 }
113
114 ; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i32:
115 define void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
116 %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
117 %ext = sext <2 x i1> %load to <2 x i32>
118 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
119 ret void
120 }
121
122 ; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i32:
123 define void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
124 %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
125 %ext = zext <3 x i1> %load to <3 x i32>
126 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
127 ret void
128 }
129
130 ; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i32:
131 define void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
132 %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
133 %ext = sext <3 x i1> %load to <3 x i32>
134 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
135 ret void
136 }
137
138 ; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i32:
139 define void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
140 %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
141 %ext = zext <4 x i1> %load to <4 x i32>
142 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
143 ret void
144 }
145
146 ; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i32:
147 define void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
148 %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
149 %ext = sext <4 x i1> %load to <4 x i32>
150 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
151 ret void
152 }
153
154 ; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i32:
155 define void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
156 %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
157 %ext = zext <8 x i1> %load to <8 x i32>
158 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
159 ret void
160 }
161
162 ; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i32:
163 define void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
164 %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
165 %ext = sext <8 x i1> %load to <8 x i32>
166 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
167 ret void
168 }
169
170 ; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i32:
171 define void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
172 %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
173 %ext = zext <16 x i1> %load to <16 x i32>
174 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
175 ret void
176 }
177
178 ; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i32:
179 define void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
180 %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
181 %ext = sext <16 x i1> %load to <16 x i32>
182 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
183 ret void
184 }
185
186 ; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i32:
187 define void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
188 %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
189 %ext = zext <32 x i1> %load to <32 x i32>
190 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
191 ret void
192 }
193
194 ; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i32:
195 define void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
196 %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
197 %ext = sext <32 x i1> %load to <32 x i32>
198 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
199 ret void
200 }
201
202 ; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i32:
203 define void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
204 %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
205 %ext = zext <64 x i1> %load to <64 x i32>
206 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
207 ret void
208 }
209
210 ; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i32:
211 define void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
212 %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
213 %ext = sext <64 x i1> %load to <64 x i32>
214 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
215 ret void
216 }
217
218 ; FUNC-LABEL: {{^}}constant_zextload_i1_to_i64:
219 ; GCN-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
220 ; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
221 ; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]
222 ; GCN: buffer_store_dwordx2
223 define void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
224 %a = load i1, i1 addrspace(2)* %in
225 %ext = zext i1 %a to i64
226 store i64 %ext, i64 addrspace(1)* %out
227 ret void
228 }
229
230 ; FUNC-LABEL: {{^}}constant_sextload_i1_to_i64:
231 ; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]],
232 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
233 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
234 ; GCN: buffer_store_dwordx2
235 define void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
236 %a = load i1, i1 addrspace(2)* %in
237 %ext = sext i1 %a to i64
238 store i64 %ext, i64 addrspace(1)* %out
239 ret void
240 }
241
242 ; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i64:
243 define void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
244 %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
245 %ext = zext <1 x i1> %load to <1 x i64>
246 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
247 ret void
248 }
249
250 ; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i64:
251 define void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
252 %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
253 %ext = sext <1 x i1> %load to <1 x i64>
254 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
255 ret void
256 }
257
258 ; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i64:
259 define void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
260 %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
261 %ext = zext <2 x i1> %load to <2 x i64>
262 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
263 ret void
264 }
265
266 ; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i64:
267 define void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
268 %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
269 %ext = sext <2 x i1> %load to <2 x i64>
270 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
271 ret void
272 }
273
274 ; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i64:
275 define void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
276 %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
277 %ext = zext <3 x i1> %load to <3 x i64>
278 store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
279 ret void
280 }
281
282 ; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i64:
283 define void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
284 %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
285 %ext = sext <3 x i1> %load to <3 x i64>
286 store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
287 ret void
288 }
289
290 ; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i64:
291 define void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
292 %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
293 %ext = zext <4 x i1> %load to <4 x i64>
294 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
295 ret void
296 }
297
298 ; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i64:
299 define void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
300 %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
301 %ext = sext <4 x i1> %load to <4 x i64>
302 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
303 ret void
304 }
305
306 ; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i64:
307 define void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
308 %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
309 %ext = zext <8 x i1> %load to <8 x i64>
310 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
311 ret void
312 }
313
314 ; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i64:
315 define void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
316 %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
317 %ext = sext <8 x i1> %load to <8 x i64>
318 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
319 ret void
320 }
321
322 ; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i64:
323 define void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
324 %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
325 %ext = zext <16 x i1> %load to <16 x i64>
326 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
327 ret void
328 }
329
330 ; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i64:
331 define void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
332 %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
333 %ext = sext <16 x i1> %load to <16 x i64>
334 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
335 ret void
336 }
337
338 ; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i64:
339 define void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
340 %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
341 %ext = zext <32 x i1> %load to <32 x i64>
342 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
343 ret void
344 }
345
346 ; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i64:
347 define void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
348 %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
349 %ext = sext <32 x i1> %load to <32 x i64>
350 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
351 ret void
352 }
353
354 ; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i64:
355 define void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
356 %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
357 %ext = zext <64 x i1> %load to <64 x i64>
358 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
359 ret void
360 }
361
362 ; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i64:
363 define void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
364 %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
365 %ext = sext <64 x i1> %load to <64 x i64>
366 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
367 ret void
368 }
369
370 attributes #0 = { nounwind }
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5 ; FUNC-LABEL: {{^}}constant_load_i16:
6 ; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
7 ; GCN-HSA: flat_load_ushort
8
9 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
10 define void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
11 entry:
12 %ld = load i16, i16 addrspace(2)* %in
13 store i16 %ld, i16 addrspace(1)* %out
14 ret void
15 }
16
17 ; FUNC-LABEL: {{^}}constant_load_v2i16:
18 ; GCN: s_load_dword s
19
20 ; EG: VTX_READ_32
21 define void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) {
22 entry:
23 %ld = load <2 x i16>, <2 x i16> addrspace(2)* %in
24 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
25 ret void
26 }
27
28 ; FUNC-LABEL: {{^}}constant_load_v3i16:
29 ; GCN: s_load_dwordx2 s
30
31 ; EG-DAG: VTX_READ_32
32 ; EG-DAG: VTX_READ_16
33 define void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
34 entry:
35 %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
36 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
37 ret void
38 }
39
40 ; FUNC-LABEL: {{^}}constant_load_v4i16:
41 ; GCN: s_load_dwordx2
42
43 ; EG: VTX_READ_64
44 define void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) {
45 entry:
46 %ld = load <4 x i16>, <4 x i16> addrspace(2)* %in
47 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
48 ret void
49 }
50
51 ; FUNC-LABEL: {{^}}constant_load_v8i16:
52 ; GCN: s_load_dwordx4
53
54 ; EG: VTX_READ_128
55 define void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) {
56 entry:
57 %ld = load <8 x i16>, <8 x i16> addrspace(2)* %in
58 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
59 ret void
60 }
61
62 ; FUNC-LABEL: {{^}}constant_load_v16i16:
63 ; GCN: s_load_dwordx8
64
65 ; EG: VTX_READ_128
66 ; EG: VTX_READ_128
67 define void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) {
68 entry:
69 %ld = load <16 x i16>, <16 x i16> addrspace(2)* %in
70 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
71 ret void
72 }
73
74 ; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32:
75 ; GCN-NOHSA: buffer_load_ushort
76 ; GCN-NOHSA: buffer_store_dword
77
78 ; GCN-HSA: flat_load_ushort
79 ; GCN-HSA: flat_store_dword
80
81 ; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
82 define void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
83 %a = load i16, i16 addrspace(2)* %in
84 %ext = zext i16 %a to i32
85 store i32 %ext, i32 addrspace(1)* %out
86 ret void
87 }
88
89 ; FUNC-LABEL: {{^}}constant_sextload_i16_to_i32:
90 ; GCN-NOHSA: buffer_load_sshort
91 ; GCN-NOHSA: buffer_store_dword
92
93 ; GCN-HSA: flat_load_sshort
94 ; GCN-HSA: flat_store_dword
95
96 ; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
97 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
98 ; EG: 16
99 define void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
100 %a = load i16, i16 addrspace(2)* %in
101 %ext = sext i16 %a to i32
102 store i32 %ext, i32 addrspace(1)* %out
103 ret void
104 }
105
106 ; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i32:
107 ; GCN-NOHSA: buffer_load_ushort
108 ; GCN-HSA: flat_load_ushort
109 define void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
110 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
111 %ext = zext <1 x i16> %load to <1 x i32>
112 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
113 ret void
114 }
115
116 ; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i32:
117 ; GCN-NOHSA: buffer_load_sshort
118 ; GCN-HSA: flat_load_sshort
119 define void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
120 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
121 %ext = sext <1 x i16> %load to <1 x i32>
122 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
123 ret void
124 }
125
126 ; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i32:
127 ; GCN-NOHSA: buffer_load_ushort
128 ; GCN-NOHSA: buffer_load_ushort
129 ; GCN-HSA: flat_load_ushort
130 ; GCN-HSA: flat_load_ushort
131 define void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
132 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
133 %ext = zext <2 x i16> %load to <2 x i32>
134 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
135 ret void
136 }
137
138 ; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i32:
139 ; GCN-NOHSA: buffer_load_sshort
140 ; GCN-NOHSA: buffer_load_sshort
141
142 ; GCN-HSA: flat_load_sshort
143 ; GCN-HSA: flat_load_sshort
144
145 ; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
146 ; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
147 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
148 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
149 ; EG-DAG: 16
150 ; EG-DAG: 16
151 define void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
152 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
153 %ext = sext <2 x i16> %load to <2 x i32>
154 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
155 ret void
156 }
157
158 ; FUNC-LABEL: {{^}}constant_constant_zextload_v3i16_to_v3i32:
159 ; GCN: s_load_dwordx2
160 define void @constant_constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
161 entry:
162 %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
163 %ext = zext <3 x i16> %ld to <3 x i32>
164 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
165 ret void
166 }
167
168 ; FUNC-LABEL: {{^}}constant_constant_sextload_v3i16_to_v3i32:
169 ; GCN: s_load_dwordx2
170 define void @constant_constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
171 entry:
172 %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
173 %ext = sext <3 x i16> %ld to <3 x i32>
174 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
175 ret void
176 }
177
178 ; FUNC-LABEL: {{^}}constant_constant_zextload_v4i16_to_v4i32:
179 ; GCN-NOHSA: buffer_load_ushort
180 ; GCN-NOHSA: buffer_load_ushort
181 ; GCN-NOHSA: buffer_load_ushort
182 ; GCN-NOHSA: buffer_load_ushort
183
184 ; GCN-HSA: flat_load_ushort
185 ; GCN-HSA: flat_load_ushort
186 ; GCN-HSA: flat_load_ushort
187 ; GCN-HSA: flat_load_ushort
188
189 ; EG: VTX_READ_16
190 ; EG: VTX_READ_16
191 ; EG: VTX_READ_16
192 ; EG: VTX_READ_16
193 define void @constant_constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
194 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
195 %ext = zext <4 x i16> %load to <4 x i32>
196 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
197 ret void
198 }
199
200 ; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i32:
201 ; GCN-NOHSA: buffer_load_sshort
202 ; GCN-NOHSA: buffer_load_sshort
203 ; GCN-NOHSA: buffer_load_sshort
204 ; GCN-NOHSA: buffer_load_sshort
205
206 ; GCN-HSA: flat_load_sshort
207 ; GCN-HSA: flat_load_sshort
208 ; GCN-HSA: flat_load_sshort
209 ; GCN-HSA: flat_load_sshort
210
211 ; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
212 ; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
213 ; EG-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
214 ; EG-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
215 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
216 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
217 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
218 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
219 ; EG-DAG: 16
220 ; EG-DAG: 16
221 ; EG-DAG: 16
222 ; EG-DAG: 16
223 define void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
224 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
225 %ext = sext <4 x i16> %load to <4 x i32>
226 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
227 ret void
228 }
229
230 ; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i32:
231 ; GCN-NOHSA: buffer_load_ushort
232 ; GCN-NOHSA: buffer_load_ushort
233 ; GCN-NOHSA: buffer_load_ushort
234 ; GCN-NOHSA: buffer_load_ushort
235 ; GCN-NOHSA: buffer_load_ushort
236 ; GCN-NOHSA: buffer_load_ushort
237 ; GCN-NOHSA: buffer_load_ushort
238 ; GCN-NOHSA: buffer_load_ushort
239
240 ; GCN-HSA: flat_load_ushort
241 ; GCN-HSA: flat_load_ushort
242 ; GCN-HSA: flat_load_ushort
243 ; GCN-HSA: flat_load_ushort
244 ; GCN-HSA: flat_load_ushort
245 ; GCN-HSA: flat_load_ushort
246 ; GCN-HSA: flat_load_ushort
247 ; GCN-HSA: flat_load_ushort
248 define void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
249 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
250 %ext = zext <8 x i16> %load to <8 x i32>
251 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
252 ret void
253 }
254
255 ; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i32:
256 ; GCN-NOHSA: buffer_load_sshort
257 ; GCN-NOHSA: buffer_load_sshort
258 ; GCN-NOHSA: buffer_load_sshort
259 ; GCN-NOHSA: buffer_load_sshort
260 ; GCN-NOHSA: buffer_load_sshort
261 ; GCN-NOHSA: buffer_load_sshort
262 ; GCN-NOHSA: buffer_load_sshort
263 ; GCN-NOHSA: buffer_load_sshort
264
265 ; GCN-HSA: flat_load_sshort
266 ; GCN-HSA: flat_load_sshort
267 ; GCN-HSA: flat_load_sshort
268 ; GCN-HSA: flat_load_sshort
269 ; GCN-HSA: flat_load_sshort
270 ; GCN-HSA: flat_load_sshort
271 ; GCN-HSA: flat_load_sshort
272 ; GCN-HSA: flat_load_sshort
273 define void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
274 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
275 %ext = sext <8 x i16> %load to <8 x i32>
276 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
277 ret void
278 }
279
280 ; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i32:
281 ; GCN-NOHSA: buffer_load_ushort
282 ; GCN-NOHSA: buffer_load_ushort
283 ; GCN-NOHSA: buffer_load_ushort
284 ; GCN-NOHSA: buffer_load_ushort
285 ; GCN-NOHSA: buffer_load_ushort
286 ; GCN-NOHSA: buffer_load_ushort
287 ; GCN-NOHSA: buffer_load_ushort
288 ; GCN-NOHSA: buffer_load_ushort
289 ; GCN-NOHSA: buffer_load_ushort
290 ; GCN-NOHSA: buffer_load_ushort
291 ; GCN-NOHSA: buffer_load_ushort
292 ; GCN-NOHSA: buffer_load_ushort
293 ; GCN-NOHSA: buffer_load_ushort
294 ; GCN-NOHSA: buffer_load_ushort
295 ; GCN-NOHSA: buffer_load_ushort
296 ; GCN-NOHSA: buffer_load_ushort
297
298 ; GCN-HSA: flat_load_ushort
299 ; GCN-HSA: flat_load_ushort
300 ; GCN-HSA: flat_load_ushort
301 ; GCN-HSA: flat_load_ushort
302 ; GCN-HSA: flat_load_ushort
303 ; GCN-HSA: flat_load_ushort
304 ; GCN-HSA: flat_load_ushort
305 ; GCN-HSA: flat_load_ushort
306 ; GCN-HSA: flat_load_ushort
307 ; GCN-HSA: flat_load_ushort
308 ; GCN-HSA: flat_load_ushort
309 ; GCN-HSA: flat_load_ushort
310 ; GCN-HSA: flat_load_ushort
311 ; GCN-HSA: flat_load_ushort
312 ; GCN-HSA: flat_load_ushort
313 ; GCN-HSA: flat_load_ushort
314 define void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
315 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
316 %ext = zext <16 x i16> %load to <16 x i32>
317 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
318 ret void
319 }
320
321 ; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i32:
322 define void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
323 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
324 %ext = sext <16 x i16> %load to <16 x i32>
325 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
326 ret void
327 }
328
329 ; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i32:
330 ; GCN-NOHSA: buffer_load_ushort
331 ; GCN-NOHSA: buffer_load_ushort
332 ; GCN-NOHSA: buffer_load_ushort
333 ; GCN-NOHSA: buffer_load_ushort
334 ; GCN-NOHSA: buffer_load_ushort
335 ; GCN-NOHSA: buffer_load_ushort
336 ; GCN-NOHSA: buffer_load_ushort
337 ; GCN-NOHSA: buffer_load_ushort
338 ; GCN-NOHSA: buffer_load_ushort
339 ; GCN-NOHSA: buffer_load_ushort
340 ; GCN-NOHSA: buffer_load_ushort
341 ; GCN-NOHSA: buffer_load_ushort
342 ; GCN-NOHSA: buffer_load_ushort
343 ; GCN-NOHSA: buffer_load_ushort
344 ; GCN-NOHSA: buffer_load_ushort
345 ; GCN-NOHSA: buffer_load_ushort
346 ; GCN-NOHSA: buffer_load_ushort
347 ; GCN-NOHSA: buffer_load_ushort
348 ; GCN-NOHSA: buffer_load_ushort
349 ; GCN-NOHSA: buffer_load_ushort
350 ; GCN-NOHSA: buffer_load_ushort
351 ; GCN-NOHSA: buffer_load_ushort
352 ; GCN-NOHSA: buffer_load_ushort
353 ; GCN-NOHSA: buffer_load_ushort
354 ; GCN-NOHSA: buffer_load_ushort
355 ; GCN-NOHSA: buffer_load_ushort
356 ; GCN-NOHSA: buffer_load_ushort
357 ; GCN-NOHSA: buffer_load_ushort
358 ; GCN-NOHSA: buffer_load_ushort
359 ; GCN-NOHSA: buffer_load_ushort
360 ; GCN-NOHSA: buffer_load_ushort
361 ; GCN-NOHSA: buffer_load_ushort
362
363 ; GCN-HSA: flat_load_ushort
364 ; GCN-HSA: flat_load_ushort
365 ; GCN-HSA: flat_load_ushort
366 ; GCN-HSA: flat_load_ushort
367 ; GCN-HSA: flat_load_ushort
368 ; GCN-HSA: flat_load_ushort
369 ; GCN-HSA: flat_load_ushort
370 ; GCN-HSA: flat_load_ushort
371 ; GCN-HSA: flat_load_ushort
372 ; GCN-HSA: flat_load_ushort
373 ; GCN-HSA: flat_load_ushort
374 ; GCN-HSA: flat_load_ushort
375 ; GCN-HSA: flat_load_ushort
376 ; GCN-HSA: flat_load_ushort
377 ; GCN-HSA: flat_load_ushort
378 ; GCN-HSA: flat_load_ushort
379 ; GCN-HSA: flat_load_ushort
380 ; GCN-HSA: flat_load_ushort
381 ; GCN-HSA: flat_load_ushort
382 ; GCN-HSA: flat_load_ushort
383 ; GCN-HSA: flat_load_ushort
384 ; GCN-HSA: flat_load_ushort
385 ; GCN-HSA: flat_load_ushort
386 ; GCN-HSA: flat_load_ushort
387 ; GCN-HSA: flat_load_ushort
388 ; GCN-HSA: flat_load_ushort
389 ; GCN-HSA: flat_load_ushort
390 ; GCN-HSA: flat_load_ushort
391 ; GCN-HSA: flat_load_ushort
392 ; GCN-HSA: flat_load_ushort
393 ; GCN-HSA: flat_load_ushort
394 ; GCN-HSA: flat_load_ushort
395 define void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
396 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
397 %ext = zext <32 x i16> %load to <32 x i32>
398 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
399 ret void
400 }
401
402 ; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i32:
403 ; GCN-NOHSA: buffer_load_sshort
404 ; GCN-NOHSA: buffer_load_sshort
405 ; GCN-NOHSA: buffer_load_sshort
406 ; GCN-NOHSA: buffer_load_sshort
407 ; GCN-NOHSA: buffer_load_sshort
408 ; GCN-NOHSA: buffer_load_sshort
409 ; GCN-NOHSA: buffer_load_sshort
410 ; GCN-NOHSA: buffer_load_sshort
411 ; GCN-NOHSA: buffer_load_sshort
412 ; GCN-NOHSA: buffer_load_sshort
413 ; GCN-NOHSA: buffer_load_sshort
414 ; GCN-NOHSA: buffer_load_sshort
415 ; GCN-NOHSA: buffer_load_sshort
416 ; GCN-NOHSA: buffer_load_sshort
417 ; GCN-NOHSA: buffer_load_sshort
418 ; GCN-NOHSA: buffer_load_sshort
419 ; GCN-NOHSA: buffer_load_sshort
420 ; GCN-NOHSA: buffer_load_sshort
421 ; GCN-NOHSA: buffer_load_sshort
422 ; GCN-NOHSA: buffer_load_sshort
423 ; GCN-NOHSA: buffer_load_sshort
424 ; GCN-NOHSA: buffer_load_sshort
425 ; GCN-NOHSA: buffer_load_sshort
426 ; GCN-NOHSA: buffer_load_sshort
427 ; GCN-NOHSA: buffer_load_sshort
428 ; GCN-NOHSA: buffer_load_sshort
429 ; GCN-NOHSA: buffer_load_sshort
430 ; GCN-NOHSA: buffer_load_sshort
431 ; GCN-NOHSA: buffer_load_sshort
432 ; GCN-NOHSA: buffer_load_sshort
433 ; GCN-NOHSA: buffer_load_sshort
434 ; GCN-NOHSA: buffer_load_sshort
435
436 ; GCN-HSA: flat_load_sshort
437 ; GCN-HSA: flat_load_sshort
438 ; GCN-HSA: flat_load_sshort
439 ; GCN-HSA: flat_load_sshort
440 ; GCN-HSA: flat_load_sshort
441 ; GCN-HSA: flat_load_sshort
442 ; GCN-HSA: flat_load_sshort
443 ; GCN-HSA: flat_load_sshort
444 ; GCN-HSA: flat_load_sshort
445 ; GCN-HSA: flat_load_sshort
446 ; GCN-HSA: flat_load_sshort
447 ; GCN-HSA: flat_load_sshort
448 ; GCN-HSA: flat_load_sshort
449 ; GCN-HSA: flat_load_sshort
450 ; GCN-HSA: flat_load_sshort
451 ; GCN-HSA: flat_load_sshort
452 ; GCN-HSA: flat_load_sshort
453 ; GCN-HSA: flat_load_sshort
454 ; GCN-HSA: flat_load_sshort
455 ; GCN-HSA: flat_load_sshort
456 ; GCN-HSA: flat_load_sshort
457 ; GCN-HSA: flat_load_sshort
458 ; GCN-HSA: flat_load_sshort
459 ; GCN-HSA: flat_load_sshort
460 ; GCN-HSA: flat_load_sshort
461 ; GCN-HSA: flat_load_sshort
462 ; GCN-HSA: flat_load_sshort
463 ; GCN-HSA: flat_load_sshort
464 ; GCN-HSA: flat_load_sshort
465 ; GCN-HSA: flat_load_sshort
466 ; GCN-HSA: flat_load_sshort
467 ; GCN-HSA: flat_load_sshort
468 define void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
469 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
470 %ext = sext <32 x i16> %load to <32 x i32>
471 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
472 ret void
473 }
474
475 ; FUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i32:
476 ; GCN-NOHSA: buffer_load_ushort
477 ; GCN-NOHSA: buffer_load_ushort
478 ; GCN-NOHSA: buffer_load_ushort
479 ; GCN-NOHSA: buffer_load_ushort
480 ; GCN-NOHSA: buffer_load_ushort
481 ; GCN-NOHSA: buffer_load_ushort
482 ; GCN-NOHSA: buffer_load_ushort
483 ; GCN-NOHSA: buffer_load_ushort
484 ; GCN-NOHSA: buffer_load_ushort
485 ; GCN-NOHSA: buffer_load_ushort
486 ; GCN-NOHSA: buffer_load_ushort
487 ; GCN-NOHSA: buffer_load_ushort
488 ; GCN-NOHSA: buffer_load_ushort
489 ; GCN-NOHSA: buffer_load_ushort
490 ; GCN-NOHSA: buffer_load_ushort
491 ; GCN-NOHSA: buffer_load_ushort
492 ; GCN-NOHSA: buffer_load_ushort
493 ; GCN-NOHSA: buffer_load_ushort
494 ; GCN-NOHSA: buffer_load_ushort
495 ; GCN-NOHSA: buffer_load_ushort
496 ; GCN-NOHSA: buffer_load_ushort
497 ; GCN-NOHSA: buffer_load_ushort
498 ; GCN-NOHSA: buffer_load_ushort
499 ; GCN-NOHSA: buffer_load_ushort
500 ; GCN-NOHSA: buffer_load_ushort
501 ; GCN-NOHSA: buffer_load_ushort
502 ; GCN-NOHSA: buffer_load_ushort
503 ; GCN-NOHSA: buffer_load_ushort
504 ; GCN-NOHSA: buffer_load_ushort
505 ; GCN-NOHSA: buffer_load_ushort
506 ; GCN-NOHSA: buffer_load_ushort
507 ; GCN-NOHSA: buffer_load_ushort
508 ; GCN-NOHSA: buffer_load_ushort
509 ; GCN-NOHSA: buffer_load_ushort
510 ; GCN-NOHSA: buffer_load_ushort
511 ; GCN-NOHSA: buffer_load_ushort
512 ; GCN-NOHSA: buffer_load_ushort
513 ; GCN-NOHSA: buffer_load_ushort
514 ; GCN-NOHSA: buffer_load_ushort
515 ; GCN-NOHSA: buffer_load_ushort
516 ; GCN-NOHSA: buffer_load_ushort
517 ; GCN-NOHSA: buffer_load_ushort
518 ; GCN-NOHSA: buffer_load_ushort
519 ; GCN-NOHSA: buffer_load_ushort
520 ; GCN-NOHSA: buffer_load_ushort
521 ; GCN-NOHSA: buffer_load_ushort
522 ; GCN-NOHSA: buffer_load_ushort
523 ; GCN-NOHSA: buffer_load_ushort
524 ; GCN-NOHSA: buffer_load_ushort
525 ; GCN-NOHSA: buffer_load_ushort
526 ; GCN-NOHSA: buffer_load_ushort
527 ; GCN-NOHSA: buffer_load_ushort
528 ; GCN-NOHSA: buffer_load_ushort
529 ; GCN-NOHSA: buffer_load_ushort
530 ; GCN-NOHSA: buffer_load_ushort
531 ; GCN-NOHSA: buffer_load_ushort
532 ; GCN-NOHSA: buffer_load_ushort
533 ; GCN-NOHSA: buffer_load_ushort
534 ; GCN-NOHSA: buffer_load_ushort
535 ; GCN-NOHSA: buffer_load_ushort
536 ; GCN-NOHSA: buffer_load_ushort
537 ; GCN-NOHSA: buffer_load_ushort
538 ; GCN-NOHSA: buffer_load_ushort
539 ; GCN-NOHSA: buffer_load_ushort
540
541 ; GCN-HSA: flat_load_ushort
542 ; GCN-HSA: flat_load_ushort
543 ; GCN-HSA: flat_load_ushort
544 ; GCN-HSA: flat_load_ushort
545 ; GCN-HSA: flat_load_ushort
546 ; GCN-HSA: flat_load_ushort
547 ; GCN-HSA: flat_load_ushort
548 ; GCN-HSA: flat_load_ushort
549 ; GCN-HSA: flat_load_ushort
550 ; GCN-HSA: flat_load_ushort
551 ; GCN-HSA: flat_load_ushort
552 ; GCN-HSA: flat_load_ushort
553 ; GCN-HSA: flat_load_ushort
554 ; GCN-HSA: flat_load_ushort
555 ; GCN-HSA: flat_load_ushort
556 ; GCN-HSA: flat_load_ushort
557 ; GCN-HSA: flat_load_ushort
558 ; GCN-HSA: flat_load_ushort
559 ; GCN-HSA: flat_load_ushort
560 ; GCN-HSA: flat_load_ushort
561 ; GCN-HSA: flat_load_ushort
562 ; GCN-HSA: flat_load_ushort
563 ; GCN-HSA: flat_load_ushort
564 ; GCN-HSA: flat_load_ushort
565 ; GCN-HSA: flat_load_ushort
566 ; GCN-HSA: flat_load_ushort
567 ; GCN-HSA: flat_load_ushort
568 ; GCN-HSA: flat_load_ushort
569 ; GCN-HSA: flat_load_ushort
570 ; GCN-HSA: flat_load_ushort
571 ; GCN-HSA: flat_load_ushort
572 ; GCN-HSA: flat_load_ushort
573 ; GCN-HSA: flat_load_ushort
574 ; GCN-HSA: flat_load_ushort
575 ; GCN-HSA: flat_load_ushort
576 ; GCN-HSA: flat_load_ushort
577 ; GCN-HSA: flat_load_ushort
578 ; GCN-HSA: flat_load_ushort
579 ; GCN-HSA: flat_load_ushort
580 ; GCN-HSA: flat_load_ushort
581 ; GCN-HSA: flat_load_ushort
582 ; GCN-HSA: flat_load_ushort
583 ; GCN-HSA: flat_load_ushort
584 ; GCN-HSA: flat_load_ushort
585 ; GCN-HSA: flat_load_ushort
586 ; GCN-HSA: flat_load_ushort
587 ; GCN-HSA: flat_load_ushort
588 ; GCN-HSA: flat_load_ushort
589 ; GCN-HSA: flat_load_ushort
590 ; GCN-HSA: flat_load_ushort
591 ; GCN-HSA: flat_load_ushort
592 ; GCN-HSA: flat_load_ushort
593 ; GCN-HSA: flat_load_ushort
594 ; GCN-HSA: flat_load_ushort
595 ; GCN-HSA: flat_load_ushort
596 ; GCN-HSA: flat_load_ushort
597 ; GCN-HSA: flat_load_ushort
598 ; GCN-HSA: flat_load_ushort
599 ; GCN-HSA: flat_load_ushort
600 ; GCN-HSA: flat_load_ushort
601 ; GCN-HSA: flat_load_ushort
602 ; GCN-HSA: flat_load_ushort
603 ; GCN-HSA: flat_load_ushort
604 ; GCN-HSA: flat_load_ushort
605 define void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
606 %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
607 %ext = zext <64 x i16> %load to <64 x i32>
608 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
609 ret void
610 }
611
612 ; FUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i32:
613 define void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
614 %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
615 %ext = sext <64 x i16> %load to <64 x i32>
616 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
617 ret void
618 }
619
620 ; FUNC-LABEL: {{^}}constant_zextload_i16_to_i64:
621 ; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
622 ; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
623 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
624
625 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
626 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
627 define void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
628 %a = load i16, i16 addrspace(2)* %in
629 %ext = zext i16 %a to i64
630 store i64 %ext, i64 addrspace(1)* %out
631 ret void
632 }
633
634 ; FUNC-LABEL: {{^}}constant_sextload_i16_to_i64:
635 ; GCN-NOHSA-DAG: buffer_load_sshort v[[LO:[0-9]+]],
636 ; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
637 ; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
638
639 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
640 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
641 define void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
642 %a = load i16, i16 addrspace(2)* %in
643 %ext = sext i16 %a to i64
644 store i64 %ext, i64 addrspace(1)* %out
645 ret void
646 }
647
648 ; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i64:
649 define void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
650 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
651 %ext = zext <1 x i16> %load to <1 x i64>
652 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
653 ret void
654 }
655
656 ; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i64:
657 define void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
658 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
659 %ext = sext <1 x i16> %load to <1 x i64>
660 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
661 ret void
662 }
663
664 ; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64:
665 define void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
666 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
667 %ext = zext <2 x i16> %load to <2 x i64>
668 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
669 ret void
670 }
671
672 ; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64:
673 define void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
674 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
675 %ext = sext <2 x i16> %load to <2 x i64>
676 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
677 ret void
678 }
679
680 ; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64:
681 define void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
682 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
683 %ext = zext <4 x i16> %load to <4 x i64>
684 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
685 ret void
686 }
687
688 ; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64:
689 define void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
690 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
691 %ext = sext <4 x i16> %load to <4 x i64>
692 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
693 ret void
694 }
695
696 ; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64:
697 define void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
698 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
699 %ext = zext <8 x i16> %load to <8 x i64>
700 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
701 ret void
702 }
703
704 ; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64:
705 define void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
706 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
707 %ext = sext <8 x i16> %load to <8 x i64>
708 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
709 ret void
710 }
711
712 ; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i64:
713 define void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
714 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
715 %ext = zext <16 x i16> %load to <16 x i64>
716 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
717 ret void
718 }
719
720 ; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i64:
721 define void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
722 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
723 %ext = sext <16 x i16> %load to <16 x i64>
724 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
725 ret void
726 }
727
728 ; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i64:
729 define void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
730 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
731 %ext = zext <32 x i16> %load to <32 x i64>
732 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
733 ret void
734 }
735
736 ; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i64:
737 define void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
738 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
739 %ext = sext <32 x i16> %load to <32 x i64>
740 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
741 ret void
742 }
743
744 ; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64:
745 ; define void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
746 ; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
747 ; %ext = zext <64 x i16> %load to <64 x i64>
748 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
749 ; ret void
750 ; }
751
752 ; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64:
753 ; define void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
754 ; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
755 ; %ext = sext <64 x i16> %load to <64 x i64>
756 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
757 ; ret void
758 ; }
759
760 attributes #0 = { nounwind }
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5 ; FUNC-LABEL: {{^}}constant_load_i32:
6 ; GCN: s_load_dword s{{[0-9]+}}
7
8 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
9 define void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
10 entry:
11 %ld = load i32, i32 addrspace(2)* %in
12 store i32 %ld, i32 addrspace(1)* %out
13 ret void
14 }
15
16 ; FUNC-LABEL: {{^}}constant_load_v2i32:
17 ; GCN: s_load_dwordx2
18
19 ; EG: VTX_READ_64
20 define void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
21 entry:
22 %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
23 store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
24 ret void
25 }
26
27 ; FUNC-LABEL: {{^}}constant_load_v3i32:
28 ; GCN: s_load_dwordx4
29
30 ; EG: VTX_READ_128
31 define void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 {
32 entry:
33 %ld = load <3 x i32>, <3 x i32> addrspace(2)* %in
34 store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
35 ret void
36 }
37
38 ; FUNC-LABEL: {{^}}constant_load_v4i32:
39 ; GCN: s_load_dwordx4
40
41 ; EG: VTX_READ_128
42 define void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
43 entry:
44 %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
45 store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
46 ret void
47 }
48
49 ; FUNC-LABEL: {{^}}constant_load_v8i32:
50 ; GCN: s_load_dwordx8
51
52 ; EG: VTX_READ_128
53 ; EG: VTX_READ_128
54 define void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
55 entry:
56 %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
57 store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
58 ret void
59 }
60
61 ; FUNC-LABEL: {{^}}constant_load_v16i32:
62 ; GCN: s_load_dwordx16
63
64 ; EG: VTX_READ_128
65 ; EG: VTX_READ_128
66 ; EG: VTX_READ_128
67 ; EG: VTX_READ_128
68 define void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
69 entry:
70 %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
71 store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
72 ret void
73 }
74
75 ; FUNC-LABEL: {{^}}constant_zextload_i32_to_i64:
76 ; GCN-DAG: s_load_dword s[[SLO:[0-9]+]],
77 ; GCN-DAG: v_mov_b32_e32 v[[SHI:[0-9]+]], 0{{$}}
78 ; GCN: store_dwordx2
79
80 ; EG: MEM_RAT
81 ; EG: MEM_RAT
82 define void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
83 %ld = load i32, i32 addrspace(2)* %in
84 %ext = zext i32 %ld to i64
85 store i64 %ext, i64 addrspace(1)* %out
86 ret void
87 }
88
89 ; FUNC-LABEL: {{^}}constant_sextload_i32_to_i64:
90 ; GCN: s_load_dword s[[SLO:[0-9]+]]
91 ; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[SLO]], 31
92 ; GCN: store_dwordx2
93
94 ; EG: MEM_RAT
95 ; EG: MEM_RAT
96 ; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
97 ; EG: 31
98 define void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
99 %ld = load i32, i32 addrspace(2)* %in
100 %ext = sext i32 %ld to i64
101 store i64 %ext, i64 addrspace(1)* %out
102 ret void
103 }
104
105 ; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
106 ; GCN: s_load_dword
107 ; GCN: store_dwordx2
108 define void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
109 %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
110 %ext = zext <1 x i32> %ld to <1 x i64>
111 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
112 ret void
113 }
114
115 ; FUNC-LABEL: {{^}}constant_sextload_v1i32_to_v1i64:
116 ; GCN: s_load_dword s[[LO:[0-9]+]]
117 ; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
118 ; GCN: store_dwordx2
119 define void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
120 %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
121 %ext = sext <1 x i32> %ld to <1 x i64>
122 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
123 ret void
124 }
125
126 ; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
127 ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
128 ; GCN: store_dwordx4
129 define void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
130 %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
131 %ext = zext <2 x i32> %ld to <2 x i64>
132 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
133 ret void
134 }
135
136 ; FUNC-LABEL: {{^}}constant_sextload_v2i32_to_v2i64:
137 ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
138
139 ; GCN-DAG: s_ashr_i32
140 ; GCN-DAG: s_ashr_i32
141
142 ; GCN: store_dwordx4
143 define void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
144 %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
145 %ext = sext <2 x i32> %ld to <2 x i64>
146 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
147 ret void
148 }
149
150 ; FUNC-LABEL: {{^}}constant_zextload_v4i32_to_v4i64:
151 ; GCN: s_load_dwordx4
152
153 ; GCN: store_dwordx4
154 ; GCN: store_dwordx4
155 define void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
156 %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
157 %ext = zext <4 x i32> %ld to <4 x i64>
158 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
159 ret void
160 }
161
162 ; FUNC-LABEL: {{^}}constant_sextload_v4i32_to_v4i64:
163 ; GCN: s_load_dwordx4
164
165 ; GCN: s_ashr_i32
166 ; GCN: s_ashr_i32
167 ; GCN: s_ashr_i32
168 ; GCN: s_ashr_i32
169
170 ; GCN: store_dwordx4
171 ; GCN: store_dwordx4
172 define void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
173 %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
174 %ext = sext <4 x i32> %ld to <4 x i64>
175 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
176 ret void
177 }
178
179 ; FUNC-LABEL: {{^}}constant_zextload_v8i32_to_v8i64:
180 ; GCN: s_load_dwordx8
181
182 ; GCN-NOHSA-DAG: buffer_store_dwordx4
183 ; GCN-NOHSA-DAG: buffer_store_dwordx4
184 ; GCN-NOHSA-DAG: buffer_store_dwordx4
185 ; GCN-NOHSA-DAG: buffer_store_dwordx4
186
187 ; GCN-HSA-DAG: flat_store_dwordx4
188 ; GCN-HSA-DAG: flat_store_dwordx4
189 ; GCN-SA-DAG: flat_store_dwordx4
190 ; GCN-HSA-DAG: flat_store_dwordx4
191 define void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
192 %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
193 %ext = zext <8 x i32> %ld to <8 x i64>
194 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
195 ret void
196 }
197
198 ; FUNC-LABEL: {{^}}constant_sextload_v8i32_to_v8i64:
199 ; GCN: s_load_dwordx8
200
201 ; GCN: s_ashr_i32
202 ; GCN: s_ashr_i32
203 ; GCN: s_ashr_i32
204 ; GCN: s_ashr_i32
205 ; GCN: s_ashr_i32
206 ; GCN: s_ashr_i32
207 ; GCN: s_ashr_i32
208 ; GCN: s_ashr_i32
209
210 ; GCN-NOHSA-DAG: buffer_store_dwordx4
211 ; GCN-NOHSA-DAG: buffer_store_dwordx4
212 ; GCN-NOHSA-DAG: buffer_store_dwordx4
213 ; GCN-NOHSA-DAG: buffer_store_dwordx4
214
215 ; GCN-HSA-DAG: flat_store_dwordx4
216 ; GCN-HSA-DAG: flat_store_dwordx4
217 ; GCN-HSA-DAG: flat_store_dwordx4
218 ; GCN-HSA-DAG: flat_store_dwordx4
219 define void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
220 %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
221 %ext = sext <8 x i32> %ld to <8 x i64>
222 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
223 ret void
224 }
225
226 ; FUNC-LABEL: {{^}}constant_sextload_v16i32_to_v16i64:
227 ; GCN: s_load_dwordx16
228
229
230 ; GCN-DAG: s_ashr_i32
231
232 ; GCN: store_dwordx4
233 ; GCN: store_dwordx4
234 ; GCN: store_dwordx4
235 ; GCN: store_dwordx4
236 ; GCN: store_dwordx4
237 ; GCN: store_dwordx4
238 ; GCN: store_dwordx4
239 ; GCN: store_dwordx4
240 define void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
241 %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
242 %ext = sext <16 x i32> %ld to <16 x i64>
243 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
244 ret void
245 }
246
247 ; FUNC-LABEL: {{^}}constant_zextload_v16i32_to_v16i64
248 ; GCN: s_load_dwordx16
249
250 ; GCN-NOHSA: buffer_store_dwordx4
251 ; GCN-NOHSA: buffer_store_dwordx4
252 ; GCN-NOHSA: buffer_store_dwordx4
253 ; GCN-NOHSA: buffer_store_dwordx4
254 ; GCN-NOHSA: buffer_store_dwordx4
255 ; GCN-NOHSA: buffer_store_dwordx4
256 ; GCN-NOHSA: buffer_store_dwordx4
257 ; GCN-NOHSA: buffer_store_dwordx4
258
259 ; GCN-HSA: flat_store_dwordx4
260 ; GCN-HSA: flat_store_dwordx4
261 ; GCN-HSA: flat_store_dwordx4
262 ; GCN-HSA: flat_store_dwordx4
263 ; GCN-HSA: flat_store_dwordx4
264 ; GCN-HSA: flat_store_dwordx4
265 ; GCN-HSA: flat_store_dwordx4
266 ; GCN-HSA: flat_store_dwordx4
267 define void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
268 %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
269 %ext = zext <16 x i32> %ld to <16 x i64>
270 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
271 ret void
272 }
273
274 ; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64:
275
276 ; GCN: s_load_dwordx16
277 ; GCN: s_load_dwordx16
278
279 ; GCN-NOHSA: buffer_store_dwordx4
280 ; GCN-NOHSA: buffer_store_dwordx4
281 ; GCN-NOHSA: buffer_store_dwordx4
282 ; GCN-NOHSA: buffer_store_dwordx4
283
284 ; GCN-NOHSA: buffer_store_dwordx4
285 ; GCN-NOHSA: buffer_store_dwordx4
286 ; GCN-NOHSA: buffer_store_dwordx4
287 ; GCN-NOHSA: buffer_store_dwordx4
288
289 ; GCN-NOHSA: buffer_store_dwordx4
290 ; GCN-NOHSA: buffer_store_dwordx4
291 ; GCN-NOHSA: buffer_store_dwordx4
292 ; GCN-NOHSA: buffer_store_dwordx4
293
294 ; GCN-NOHSA: buffer_store_dwordx4
295 ; GCN-NOHSA: buffer_store_dwordx4
296 ; GCN-NOHSA: buffer_store_dwordx4
297 ; GCN-NOHSA: buffer_store_dwordx4
298
299 ; GCN-HSA: flat_store_dwordx4
300 ; GCN-HSA: flat_store_dwordx4
301 ; GCN-HSA: flat_store_dwordx4
302 ; GCN-HSA: flat_store_dwordx4
303
304 ; GCN-HSA: flat_store_dwordx4
305 ; GCN-HSA: flat_store_dwordx4
306 ; GCN-HSA: flat_store_dwordx4
307 ; GCN-HSA: flat_store_dwordx4
308
309 ; GCN-HSA: flat_store_dwordx4
310 ; GCN-HSA: flat_store_dwordx4
311 ; GCN-HSA: flat_store_dwordx4
312 ; GCN-HSA: flat_store_dwordx4
313
314 ; GCN-HSA: flat_store_dwordx4
315 ; GCN-HSA: flat_store_dwordx4
316 ; GCN-HSA: flat_store_dwordx4
317 ; GCN-HSA: flat_store_dwordx4
318
319 define void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
320 %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
321 %ext = sext <32 x i32> %ld to <32 x i64>
322 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
323 ret void
324 }
325
326 ; FUNC-LABEL: {{^}}constant_zextload_v32i32_to_v32i64:
327 ; GCN: s_load_dwordx16
328 ; GCN: s_load_dwordx16
329
330 ; GCN-NOHSA-DAG: buffer_store_dwordx4
331 ; GCN-NOHSA-DAG: buffer_store_dwordx4
332 ; GCN-NOHSA-DAG: buffer_store_dwordx4
333 ; GCN-NOHSA-DAG: buffer_store_dwordx4
334
335 ; GCN-NOHSA-DAG: buffer_store_dwordx4
336 ; GCN-NOHSA-DAG: buffer_store_dwordx4
337 ; GCN-NOHSA-DAG: buffer_store_dwordx4
338 ; GCN-NOHSA-DAG: buffer_store_dwordx4
339
340 ; GCN-NOHSA-DAG: buffer_store_dwordx4
341 ; GCN-NOHSA-DAG: buffer_store_dwordx4
342 ; GCN-NOHSA-DAG: buffer_store_dwordx4
343 ; GCN-NOHSA-DAG: buffer_store_dwordx4
344
345 ; GCN-NOHSA-DAG: buffer_store_dwordx4
346 ; GCN-NOHSA-DAG: buffer_store_dwordx4
347 ; GCN-NOHSA-DAG: buffer_store_dwordx4
348 ; GCN-NOHSA-DAG: buffer_store_dwordx4
349
350
351 ; GCN-HSA-DAG: flat_store_dwordx4
352 ; GCN-HSA-DAG: flat_store_dwordx4
353 ; GCN-HSA-DAG: flat_store_dwordx4
354 ; GCN-HSA-DAG: flat_store_dwordx4
355
356 ; GCN-HSA-DAG: flat_store_dwordx4
357 ; GCN-HSA-DAG: flat_store_dwordx4
358 ; GCN-HSA-DAG: flat_store_dwordx4
359 ; GCN-HSA-DAG: flat_store_dwordx4
360
361 ; GCN-HSA-DAG: flat_store_dwordx4
362 ; GCN-HSA-DAG: flat_store_dwordx4
363 ; GCN-HSA-DAG: flat_store_dwordx4
364 ; GCN-HSA-DAG: flat_store_dwordx4
365
366 ; GCN-HSA-DAG: flat_store_dwordx4
367 ; GCN-HSA-DAG: flat_store_dwordx4
368 ; GCN-HSA-DAG: flat_store_dwordx4
369 ; GCN-HSA-DAG: flat_store_dwordx4
370 define void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
371 %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
372 %ext = zext <32 x i32> %ld to <32 x i64>
373 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
374 ret void
375 }
376
377 attributes #0 = { nounwind }
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=VI -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5
6 ; FUNC-LABEL: {{^}}constant_load_i64:
7 ; GCN: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
8 ; EG: VTX_READ_64
9 define void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(2)* %in) #0 {
10 %ld = load i64, i64 addrspace(2)* %in
11 store i64 %ld, i64 addrspace(1)* %out
12 ret void
13 }
14
15 ; FUNC-LABEL: {{^}}constant_load_v2i64:
16 ; GCN: s_load_dwordx4
17
18 ; EG: VTX_READ_128
19 define void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(2)* %in) #0 {
20 entry:
21 %ld = load <2 x i64>, <2 x i64> addrspace(2)* %in
22 store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
23 ret void
24 }
25
26 ; FUNC-LABEL: {{^}}constant_load_v3i64:
27 ; GCN-DAG: s_load_dwordx4 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
28 ; SI-DAG: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x4{{$}}
29 ; VI-DAG: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x10{{$}}
30
31 ; EG-DAG: VTX_READ_32
32 ; EG-DAG: VTX_READ_32
33 ; EG-DAG: VTX_READ_32
34 ; EG-DAG: VTX_READ_32
35 ; EG-DAG: VTX_READ_32
36 ; EG-DAG: VTX_READ_32
37 define void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(2)* %in) #0 {
38 entry:
39 %ld = load <3 x i64>, <3 x i64> addrspace(2)* %in
40 store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
41 ret void
42 }
43
44 ; FUNC-LABEL: {{^}}constant_load_v4i64
45 ; GCN: s_load_dwordx8
46
47 ; EG: VTX_READ_128
48 ; EG: VTX_READ_128
49 define void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(2)* %in) #0 {
50 entry:
51 %ld = load <4 x i64>, <4 x i64> addrspace(2)* %in
52 store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
53 ret void
54 }
55
56 ; FUNC-LABEL: {{^}}constant_load_v8i64:
57 ; GCN: s_load_dwordx16
58
59 ; EG: VTX_READ_128
60 ; EG: VTX_READ_128
61 ; EG: VTX_READ_128
62 ; EG: VTX_READ_128
63 define void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(2)* %in) #0 {
64 entry:
65 %ld = load <8 x i64>, <8 x i64> addrspace(2)* %in
66 store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
67 ret void
68 }
69
70 ; FUNC-LABEL: {{^}}constant_load_v16i64:
71 ; GCN: s_load_dwordx16
72 ; GCN: s_load_dwordx16
73
74 ; EG: VTX_READ_128
75 ; EG: VTX_READ_128
76 ; EG: VTX_READ_128
77 ; EG: VTX_READ_128
78 ; EG: VTX_READ_128
79 ; EG: VTX_READ_128
80 ; EG: VTX_READ_128
81 ; EG: VTX_READ_128
82 define void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(2)* %in) #0 {
83 entry:
84 %ld = load <16 x i64>, <16 x i64> addrspace(2)* %in
85 store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
86 ret void
87 }
88
89 attributes #0 = { nounwind }
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5
6 ; FUNC-LABEL: {{^}}constant_load_i8:
7 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
8 ; GCN-HSA: flat_load_ubyte
9
10 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
11 define void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
12 entry:
13 %ld = load i8, i8 addrspace(2)* %in
14 store i8 %ld, i8 addrspace(1)* %out
15 ret void
16 }
17
18 ; FUNC-LABEL: {{^}}constant_load_v2i8:
19 ; GCN-NOHSA: buffer_load_ushort v
20 ; GCN-HSA: flat_load_ushort v
21
22 ; EG: VTX_READ_16
23 define void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
24 entry:
25 %ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
26 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
27 ret void
28 }
29
30 ; FUNC-LABEL: {{^}}constant_load_v3i8:
31 ; GCN: s_load_dword s
32
33 ; EG-DAG: VTX_READ_32
34 define void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
35 entry:
36 %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
37 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
38 ret void
39 }
40
41 ; FUNC-LABEL: {{^}}constant_load_v4i8:
42 ; GCN: s_load_dword s
43
44 ; EG: VTX_READ_32
45 define void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
46 entry:
47 %ld = load <4 x i8>, <4 x i8> addrspace(2)* %in
48 store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
49 ret void
50 }
51
52 ; FUNC-LABEL: {{^}}constant_load_v8i8:
53 ; GCN: s_load_dwordx2
54
55 ; EG: VTX_READ_64
56 define void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
57 entry:
58 %ld = load <8 x i8>, <8 x i8> addrspace(2)* %in
59 store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
60 ret void
61 }
62
63 ; FUNC-LABEL: {{^}}constant_load_v16i8:
64 ; GCN: s_load_dwordx4
65
66 ; EG: VTX_READ_128
67 define void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
68 entry:
69 %ld = load <16 x i8>, <16 x i8> addrspace(2)* %in
70 store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
71 ret void
72 }
73
74 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i32:
75 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
76 ; GCN-HSA: flat_load_ubyte
77
78 ; EG: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
79 define void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
80 %a = load i8, i8 addrspace(2)* %in
81 %ext = zext i8 %a to i32
82 store i32 %ext, i32 addrspace(1)* %out
83 ret void
84 }
85
86 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i32:
87 ; GCN-NOHSA: buffer_load_sbyte
88 ; GCN-HSA: flat_load_sbyte
89
90 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
91 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
92 ; EG: 8
93 define void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
94 %ld = load i8, i8 addrspace(2)* %in
95 %ext = sext i8 %ld to i32
96 store i32 %ext, i32 addrspace(1)* %out
97 ret void
98 }
99
100 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
101 define void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
102 %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
103 %ext = zext <1 x i8> %load to <1 x i32>
104 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
105 ret void
106 }
107
108 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i32:
109 define void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
110 %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
111 %ext = sext <1 x i8> %load to <1 x i32>
112 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
113 ret void
114 }
115
116 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i32:
117 ; GCN-NOHSA: buffer_load_ubyte
118 ; GCN-NOHSA: buffer_load_ubyte
119 ; GCN-HSA: flat_load_ubyte
120 ; GCN-HSA: flat_load_ubyte
121 ; EG: VTX_READ_8
122 ; EG: VTX_READ_8
123 define void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
124 %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
125 %ext = zext <2 x i8> %load to <2 x i32>
126 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
127 ret void
128 }
129
130 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i32:
131 ; GCN-NOHSA: buffer_load_sbyte
132 ; GCN-NOHSA: buffer_load_sbyte
133 ; GCN-HSA: flat_load_sbyte
134 ; GCN-HSA: flat_load_sbyte
135
136 ; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
137 ; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
138 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
139 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
140 ; EG-DAG: 8
141 ; EG-DAG: 8
142 define void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
143 %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
144 %ext = sext <2 x i8> %load to <2 x i32>
145 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
146 ret void
147 }
148
149 ; FUNC-LABEL: {{^}}constant_zextload_v3i8_to_v3i32:
150 ; GCN: s_load_dword s
151
152 ; GCN-DAG: s_bfe_u32
153 ; GCN-DAG: s_bfe_u32
154 ; GCN-DAG: s_and_b32
155 define void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
156 entry:
157 %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
158 %ext = zext <3 x i8> %ld to <3 x i32>
159 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
160 ret void
161 }
162
163 ; FUNC-LABEL: {{^}}constant_sextload_v3i8_to_v3i32:
164 ; GCN: s_load_dword s
165
166 ; GCN-DAG: s_bfe_i32
167 ; GCN-DAG: s_bfe_i32
168 ; GCN-DAG: s_bfe_i32
169 define void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
170 entry:
171 %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
172 %ext = sext <3 x i8> %ld to <3 x i32>
173 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
174 ret void
175 }
176
177 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i32:
178 ; GCN-NOHSA: buffer_load_ubyte
179 ; GCN-NOHSA: buffer_load_ubyte
180 ; GCN-NOHSA: buffer_load_ubyte
181 ; GCN-NOHSA: buffer_load_ubyte
182 ; GCN-HSA: flat_load_ubyte
183 ; GCN-HSA: flat_load_ubyte
184 ; GCN-HSA: flat_load_ubyte
185 ; GCN-HSA: flat_load_ubyte
186
187 ; EG: VTX_READ_8
188 ; EG: VTX_READ_8
189 ; EG: VTX_READ_8
190 ; EG: VTX_READ_8
191 define void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
192 %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
193 %ext = zext <4 x i8> %load to <4 x i32>
194 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
195 ret void
196 }
197
198 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i32:
199 ; GCN-NOHSA: buffer_load_sbyte
200 ; GCN-NOHSA: buffer_load_sbyte
201 ; GCN-NOHSA: buffer_load_sbyte
202 ; GCN-NOHSA: buffer_load_sbyte
203 ; GCN-HSA: flat_load_sbyte
204 ; GCN-HSA: flat_load_sbyte
205 ; GCN-HSA: flat_load_sbyte
206 ; GCN-HSA: flat_load_sbyte
207
208 ; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
209 ; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
210 ; EG-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
211 ; EG-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
212 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
213 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
214 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
215 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
216 ; EG-DAG: 8
217 ; EG-DAG: 8
218 ; EG-DAG: 8
219 ; EG-DAG: 8
220 define void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
221 %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
222 %ext = sext <4 x i8> %load to <4 x i32>
223 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
224 ret void
225 }
226
227 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i32:
228 ; GCN-NOHSA: buffer_load_ubyte
229 ; GCN-NOHSA: buffer_load_ubyte
230 ; GCN-NOHSA: buffer_load_ubyte
231 ; GCN-NOHSA: buffer_load_ubyte
232 ; GCN-NOHSA: buffer_load_ubyte
233 ; GCN-NOHSA: buffer_load_ubyte
234 ; GCN-NOHSA: buffer_load_ubyte
235 ; GCN-NOHSA: buffer_load_ubyte
236
237 ; GCN-HSA: flat_load_ubyte
238 ; GCN-HSA: flat_load_ubyte
239 ; GCN-HSA: flat_load_ubyte
240 ; GCN-HSA: flat_load_ubyte
241 ; GCN-HSA: flat_load_ubyte
242 ; GCN-HSA: flat_load_ubyte
243 ; GCN-HSA: flat_load_ubyte
244 ; GCN-HSA: flat_load_ubyte
245 define void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
246 %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
247 %ext = zext <8 x i8> %load to <8 x i32>
248 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
249 ret void
250 }
251
252 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i32:
253 ; GCN-NOHSA: buffer_load_sbyte
254 ; GCN-NOHSA: buffer_load_sbyte
255 ; GCN-NOHSA: buffer_load_sbyte
256 ; GCN-NOHSA: buffer_load_sbyte
257 ; GCN-NOHSA: buffer_load_sbyte
258 ; GCN-NOHSA: buffer_load_sbyte
259 ; GCN-NOHSA: buffer_load_sbyte
260 ; GCN-NOHSA: buffer_load_sbyte
261
262 ; GCN-HSA: flat_load_sbyte
263 ; GCN-HSA: flat_load_sbyte
264 ; GCN-HSA: flat_load_sbyte
265 ; GCN-HSA: flat_load_sbyte
266 ; GCN-HSA: flat_load_sbyte
267 ; GCN-HSA: flat_load_sbyte
268 ; GCN-HSA: flat_load_sbyte
269 ; GCN-HSA: flat_load_sbyte
270 define void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
271 %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
272 %ext = sext <8 x i8> %load to <8 x i32>
273 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
274 ret void
275 }
276
277 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i32:
278 define void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
279 %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
280 %ext = zext <16 x i8> %load to <16 x i32>
281 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
282 ret void
283 }
284
285 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i32:
286 define void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
287 %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
288 %ext = sext <16 x i8> %load to <16 x i32>
289 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
290 ret void
291 }
292
293 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i32:
294 define void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
295 %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
296 %ext = zext <32 x i8> %load to <32 x i32>
297 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
298 ret void
299 }
300
301 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i32:
302 define void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
303 %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
304 %ext = sext <32 x i8> %load to <32 x i32>
305 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
306 ret void
307 }
308
309 ; FUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i32:
310 define void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
311 %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
312 %ext = zext <64 x i8> %load to <64 x i32>
313 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
314 ret void
315 }
316
317 ; FUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i32:
318 define void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
319 %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
320 %ext = sext <64 x i8> %load to <64 x i32>
321 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
322 ret void
323 }
324
325 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i64:
326 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
327
328 ; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
329 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
330
331 ; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
332 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
333 define void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
334 %a = load i8, i8 addrspace(2)* %in
335 %ext = zext i8 %a to i64
336 store i64 %ext, i64 addrspace(1)* %out
337 ret void
338 }
339
340 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i64:
341 ; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
342 ; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
343 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
344
345 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
346 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
347 define void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
348 %a = load i8, i8 addrspace(2)* %in
349 %ext = sext i8 %a to i64
350 store i64 %ext, i64 addrspace(1)* %out
351 ret void
352 }
353
354 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i64:
355 define void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
356 %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
357 %ext = zext <1 x i8> %load to <1 x i64>
358 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
359 ret void
360 }
361
362 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i64:
363 define void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
364 %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
365 %ext = sext <1 x i8> %load to <1 x i64>
366 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
367 ret void
368 }
369
370 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
371 define void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
372 %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
373 %ext = zext <2 x i8> %load to <2 x i64>
374 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
375 ret void
376 }
377
378 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
379 define void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
380 %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
381 %ext = sext <2 x i8> %load to <2 x i64>
382 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
383 ret void
384 }
385
386 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
387 define void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
388 %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
389 %ext = zext <4 x i8> %load to <4 x i64>
390 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
391 ret void
392 }
393
394 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
395 define void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
396 %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
397 %ext = sext <4 x i8> %load to <4 x i64>
398 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
399 ret void
400 }
401
402 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
403 define void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
404 %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
405 %ext = zext <8 x i8> %load to <8 x i64>
406 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
407 ret void
408 }
409
410 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
411 define void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
412 %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
413 %ext = sext <8 x i8> %load to <8 x i64>
414 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
415 ret void
416 }
417
418 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
419 define void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
420 %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
421 %ext = zext <16 x i8> %load to <16 x i64>
422 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
423 ret void
424 }
425
426 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
427 define void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
428 %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
429 %ext = sext <16 x i8> %load to <16 x i64>
430 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
431 ret void
432 }
433
434 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i64:
435 define void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
436 %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
437 %ext = zext <32 x i8> %load to <32 x i64>
438 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
439 ret void
440 }
441
442 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i64:
443 define void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
444 %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
445 %ext = sext <32 x i8> %load to <32 x i64>
446 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
447 ret void
448 }
449
450 ; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
451 ; define void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
452 ; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
453 ; %ext = zext <64 x i8> %load to <64 x i64>
454 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
455 ; ret void
456 ; }
457
458 ; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
459 ; define void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
460 ; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
461 ; %ext = sext <64 x i8> %load to <64 x i64>
462 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
463 ; ret void
464 ; }
465
466 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i16:
467 ; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
468 ; GCN-NOHSA: buffer_store_short v[[VAL]]
469
470 ; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
471 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
472 define void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
473 %a = load i8, i8 addrspace(2)* %in
474 %ext = zext i8 %a to i16
475 store i16 %ext, i16 addrspace(1)* %out
476 ret void
477 }
478
479 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i16:
480 ; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
481 ; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
482
483 ; GCN-NOHSA: buffer_store_short v[[VAL]]
484 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
485 define void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
486 %a = load i8, i8 addrspace(2)* %in
487 %ext = sext i8 %a to i16
488 store i16 %ext, i16 addrspace(1)* %out
489 ret void
490 }
491
492 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
493 define void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
494 %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
495 %ext = zext <1 x i8> %load to <1 x i16>
496 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
497 ret void
498 }
499
500 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i16:
501 define void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
502 %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
503 %ext = sext <1 x i8> %load to <1 x i16>
504 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
505 ret void
506 }
507
508 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
509 define void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
510 %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
511 %ext = zext <2 x i8> %load to <2 x i16>
512 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
513 ret void
514 }
515
516 ; FUNC-LABEL