llvm.org GIT mirror llvm / ce00361
AMDGPU: Make v32i8/v64i8 illegal types Old intrinsics were forcing these, but they have now all been removed. This fixes large i8 vector operations generally being broken. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258788 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 4 years ago
7 changed file(s) with 213 addition(s) and 110 deletion(s). Raw diff Collapse all Expand all
4141 : AMDGPUTargetLowering(TM, STI) {
4242 addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
4343 addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
44
45 addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
46 addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
4744
4845 addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
4946 addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
119119 >;
120120
121121 class SDSample : SDNode
122 SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
122 SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
123123 SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
124124 >;
125125
21012101 defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
21022102 defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
21032103 defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
2104 defm : SMRD_Pattern <"S_LOAD_DWORDX8", v32i8>;
21052104 defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
21062105 defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
21072106
23422341
23432342 /* SIsample for simple 1D texture lookup */
23442343 def : Pat <
2345 (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
2344 (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
23462345 (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
23472346 >;
23482347
23492348 class SamplePattern : Pat <
2350 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
2349 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
23512350 (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
23522351 >;
23532352
23542353 class SampleRectPattern : Pat <
2355 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT),
2354 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT),
23562355 (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
23572356 >;
23582357
23592358 class SampleArrayPattern : Pat <
2360 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY),
2359 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY),
23612360 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
23622361 >;
23632362
23642363 class SampleShadowPattern
23652364 ValueType vt> : Pat <
2366 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW),
2365 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW),
23672366 (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
23682367 >;
23692368
23702369 class SampleShadowArrayPattern
23712370 ValueType vt> : Pat <
2372 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
2371 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
23732372 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
23742373 >;
23752374
24222421
24232422 /* int_SI_imageload for texture fetches consuming varying address parameters */
24242423 class ImageLoadPattern : Pat <
2425 (name addr_type:$addr, v32i8:$rsrc, imm),
2424 (name addr_type:$addr, v8i32:$rsrc, imm),
24262425 (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
24272426 >;
24282427
24292428 class ImageLoadArrayPattern : Pat <
2430 (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY),
2429 (name addr_type:$addr, v8i32:$rsrc, TEX_ARRAY),
24312430 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
24322431 >;
24332432
24342433 class ImageLoadMSAAPattern : Pat <
2435 (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA),
2434 (name addr_type:$addr, v8i32:$rsrc, TEX_MSAA),
24362435 (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
24372436 >;
24382437
24392438 class ImageLoadArrayMSAAPattern : Pat <
2440 (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA),
2439 (name addr_type:$addr, v8i32:$rsrc, TEX_ARRAY_MSAA),
24412440 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
24422441 >;
24432442
25512550
25522551 def : BitConvert ;
25532552 def : BitConvert ;
2554 def : BitConvert ;
2555 def : BitConvert ;
2556 def : BitConvert ;
25572553 def : BitConvert ;
25582554 def : BitConvert ;
2559 def : BitConvert ;
25602555
25612556 def : BitConvert ;
25622557 def : BitConvert ;
207207 let CopyCost = 2;
208208 }
209209
210 def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add SGPR_256)> {
210 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
211211 // Requires 4 s_mov_b64 to copy
212212 let CopyCost = 4;
213213 }
235235 let CopyCost = 4;
236236 }
237237
238 def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add VGPR_256)> {
238 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
239239 let CopyCost = 8;
240240 }
241241
0 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
3 ; FUNC-LABEL: {{^}}extract_vector_elt_v1i8:
4 ; SI: buffer_load_ubyte
5 ; SI: buffer_store_byte
6 define void @extract_vector_elt_v1i8(i8 addrspace(1)* %out, <1 x i8> %foo) #0 {
7 %p0 = extractelement <1 x i8> %foo, i32 0
8 store i8 %p0, i8 addrspace(1)* %out
9 ret void
10 }
11
12 ; FUNC-LABEL: {{^}}extract_vector_elt_v2i8:
13 ; SI: buffer_load_ubyte
14 ; SI: buffer_load_ubyte
15 ; SI: buffer_store_byte
16 ; SI: buffer_store_byte
17 define void @extract_vector_elt_v2i8(i8 addrspace(1)* %out, <2 x i8> %foo) #0 {
18 %p0 = extractelement <2 x i8> %foo, i32 0
19 %p1 = extractelement <2 x i8> %foo, i32 1
20 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
21 store i8 %p1, i8 addrspace(1)* %out
22 store i8 %p0, i8 addrspace(1)* %out1
23 ret void
24 }
25
26 ; FUNC-LABEL: {{^}}extract_vector_elt_v3i8:
27 ; SI: buffer_load_ubyte
28 ; SI: buffer_load_ubyte
29 ; SI: buffer_store_byte
30 ; SI: buffer_store_byte
31 define void @extract_vector_elt_v3i8(i8 addrspace(1)* %out, <3 x i8> %foo) #0 {
32 %p0 = extractelement <3 x i8> %foo, i32 0
33 %p1 = extractelement <3 x i8> %foo, i32 2
34 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
35 store i8 %p1, i8 addrspace(1)* %out
36 store i8 %p0, i8 addrspace(1)* %out1
37 ret void
38 }
39
40 ; FUNC-LABEL: {{^}}extract_vector_elt_v4i8:
41 ; SI: buffer_load_ubyte
42 ; SI: buffer_load_ubyte
43 ; SI: buffer_store_byte
44 ; SI: buffer_store_byte
45 define void @extract_vector_elt_v4i8(i8 addrspace(1)* %out, <4 x i8> %foo) #0 {
46 %p0 = extractelement <4 x i8> %foo, i32 0
47 %p1 = extractelement <4 x i8> %foo, i32 2
48 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
49 store i8 %p1, i8 addrspace(1)* %out
50 store i8 %p0, i8 addrspace(1)* %out1
51 ret void
52 }
53
54 ; FUNC-LABEL: {{^}}extract_vector_elt_v8i8:
55 ; SI: buffer_load_ubyte
56 ; SI: buffer_load_ubyte
57 ; SI: buffer_store_byte
58 ; SI: buffer_store_byte
59 define void @extract_vector_elt_v8i8(i8 addrspace(1)* %out, <8 x i8> %foo) #0 {
60 %p0 = extractelement <8 x i8> %foo, i32 0
61 %p1 = extractelement <8 x i8> %foo, i32 2
62 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
63 store i8 %p1, i8 addrspace(1)* %out
64 store i8 %p0, i8 addrspace(1)* %out1
65 ret void
66 }
67
68 ; FUNC-LABEL: {{^}}extract_vector_elt_v16i8:
69 ; SI: buffer_load_ubyte
70 ; SI: buffer_load_ubyte
71 ; SI: buffer_store_byte
72 ; SI: buffer_store_byte
73 define void @extract_vector_elt_v16i8(i8 addrspace(1)* %out, <16 x i8> %foo) #0 {
74 %p0 = extractelement <16 x i8> %foo, i32 0
75 %p1 = extractelement <16 x i8> %foo, i32 2
76 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
77 store i8 %p1, i8 addrspace(1)* %out
78 store i8 %p0, i8 addrspace(1)* %out1
79 ret void
80 }
81
82 ; FUNC-LABEL: {{^}}extract_vector_elt_v32i8:
83 ; SI: buffer_load_ubyte
84 ; SI: buffer_load_ubyte
85 ; SI: buffer_store_byte
86 ; SI: buffer_store_byte
87 define void @extract_vector_elt_v32i8(i8 addrspace(1)* %out, <32 x i8> %foo) #0 {
88 %p0 = extractelement <32 x i8> %foo, i32 0
89 %p1 = extractelement <32 x i8> %foo, i32 2
90 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
91 store i8 %p1, i8 addrspace(1)* %out
92 store i8 %p0, i8 addrspace(1)* %out1
93 ret void
94 }
95
96 ; FUNC-LABEL: {{^}}extract_vector_elt_v64i8:
97 ; SI: buffer_load_ubyte
98 ; SI: buffer_load_ubyte
99 ; SI: buffer_store_byte
100 ; SI: buffer_store_byte
101 define void @extract_vector_elt_v64i8(i8 addrspace(1)* %out, <64 x i8> %foo) #0 {
102 %p0 = extractelement <64 x i8> %foo, i32 0
103 %p1 = extractelement <64 x i8> %foo, i32 2
104 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
105 store i8 %p1, i8 addrspace(1)* %out
106 store i8 %p0, i8 addrspace(1)* %out1
107 ret void
108 }
109
110 attributes #0 = { nounwind }
44 ;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
55 define void @gather4_v2() #0 {
66 main_body:
7 %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
7 %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
88 %r0 = extractelement <4 x float> %r, i32 0
99 %r1 = extractelement <4 x float> %r, i32 1
1010 %r2 = extractelement <4 x float> %r, i32 2
1717 ;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
1818 define void @gather4() #0 {
1919 main_body:
20 %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
20 %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
2121 %r0 = extractelement <4 x float> %r, i32 0
2222 %r1 = extractelement <4 x float> %r, i32 1
2323 %r2 = extractelement <4 x float> %r, i32 2
3030 ;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
3131 define void @gather4_cl() #0 {
3232 main_body:
33 %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
33 %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
3434 %r0 = extractelement <4 x float> %r, i32 0
3535 %r1 = extractelement <4 x float> %r, i32 1
3636 %r2 = extractelement <4 x float> %r, i32 2
4343 ;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
4444 define void @gather4_l() #0 {
4545 main_body:
46 %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
46 %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
4747 %r0 = extractelement <4 x float> %r, i32 0
4848 %r1 = extractelement <4 x float> %r, i32 1
4949 %r2 = extractelement <4 x float> %r, i32 2
5656 ;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
5757 define void @gather4_b() #0 {
5858 main_body:
59 %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
59 %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
6060 %r0 = extractelement <4 x float> %r, i32 0
6161 %r1 = extractelement <4 x float> %r, i32 1
6262 %r2 = extractelement <4 x float> %r, i32 2
6969 ;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
7070 define void @gather4_b_cl() #0 {
7171 main_body:
72 %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
72 %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
7373 %r0 = extractelement <4 x float> %r, i32 0
7474 %r1 = extractelement <4 x float> %r, i32 1
7575 %r2 = extractelement <4 x float> %r, i32 2
8282 ;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
8383 define void @gather4_b_cl_v8() #0 {
8484 main_body:
85 %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
85 %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
8686 %r0 = extractelement <4 x float> %r, i32 0
8787 %r1 = extractelement <4 x float> %r, i32 1
8888 %r2 = extractelement <4 x float> %r, i32 2
9595 ;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
9696 define void @gather4_lz_v2() #0 {
9797 main_body:
98 %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
98 %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
9999 %r0 = extractelement <4 x float> %r, i32 0
100100 %r1 = extractelement <4 x float> %r, i32 1
101101 %r2 = extractelement <4 x float> %r, i32 2
108108 ;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
109109 define void @gather4_lz() #0 {
110110 main_body:
111 %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
111 %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
112112 %r0 = extractelement <4 x float> %r, i32 0
113113 %r1 = extractelement <4 x float> %r, i32 1
114114 %r2 = extractelement <4 x float> %r, i32 2
123123 ;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
124124 define void @gather4_o() #0 {
125125 main_body:
126 %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
126 %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
127127 %r0 = extractelement <4 x float> %r, i32 0
128128 %r1 = extractelement <4 x float> %r, i32 1
129129 %r2 = extractelement <4 x float> %r, i32 2
136136 ;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
137137 define void @gather4_cl_o() #0 {
138138 main_body:
139 %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
139 %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
140140 %r0 = extractelement <4 x float> %r, i32 0
141141 %r1 = extractelement <4 x float> %r, i32 1
142142 %r2 = extractelement <4 x float> %r, i32 2
149149 ;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
150150 define void @gather4_cl_o_v8() #0 {
151151 main_body:
152 %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
152 %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
153153 %r0 = extractelement <4 x float> %r, i32 0
154154 %r1 = extractelement <4 x float> %r, i32 1
155155 %r2 = extractelement <4 x float> %r, i32 2
162162 ;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
163163 define void @gather4_l_o() #0 {
164164 main_body:
165 %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
165 %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
166166 %r0 = extractelement <4 x float> %r, i32 0
167167 %r1 = extractelement <4 x float> %r, i32 1
168168 %r2 = extractelement <4 x float> %r, i32 2
175175 ;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
176176 define void @gather4_l_o_v8() #0 {
177177 main_body:
178 %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
178 %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
179179 %r0 = extractelement <4 x float> %r, i32 0
180180 %r1 = extractelement <4 x float> %r, i32 1
181181 %r2 = extractelement <4 x float> %r, i32 2
188188 ;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
189189 define void @gather4_b_o() #0 {
190190 main_body:
191 %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
191 %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
192192 %r0 = extractelement <4 x float> %r, i32 0
193193 %r1 = extractelement <4 x float> %r, i32 1
194194 %r2 = extractelement <4 x float> %r, i32 2
201201 ;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
202202 define void @gather4_b_o_v8() #0 {
203203 main_body:
204 %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
204 %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
205205 %r0 = extractelement <4 x float> %r, i32 0
206206 %r1 = extractelement <4 x float> %r, i32 1
207207 %r2 = extractelement <4 x float> %r, i32 2
214214 ;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
215215 define void @gather4_b_cl_o() #0 {
216216 main_body:
217 %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
217 %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
218218 %r0 = extractelement <4 x float> %r, i32 0
219219 %r1 = extractelement <4 x float> %r, i32 1
220220 %r2 = extractelement <4 x float> %r, i32 2
227227 ;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
228228 define void @gather4_lz_o() #0 {
229229 main_body:
230 %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
230 %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
231231 %r0 = extractelement <4 x float> %r, i32 0
232232 %r1 = extractelement <4 x float> %r, i32 1
233233 %r2 = extractelement <4 x float> %r, i32 2
242242 ;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
243243 define void @gather4_c() #0 {
244244 main_body:
245 %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
245 %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
246246 %r0 = extractelement <4 x float> %r, i32 0
247247 %r1 = extractelement <4 x float> %r, i32 1
248248 %r2 = extractelement <4 x float> %r, i32 2
255255 ;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
256256 define void @gather4_c_cl() #0 {
257257 main_body:
258 %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
258 %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
259259 %r0 = extractelement <4 x float> %r, i32 0
260260 %r1 = extractelement <4 x float> %r, i32 1
261261 %r2 = extractelement <4 x float> %r, i32 2
268268 ;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
269269 define void @gather4_c_cl_v8() #0 {
270270 main_body:
271 %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
271 %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
272272 %r0 = extractelement <4 x float> %r, i32 0
273273 %r1 = extractelement <4 x float> %r, i32 1
274274 %r2 = extractelement <4 x float> %r, i32 2
281281 ;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
282282 define void @gather4_c_l() #0 {
283283 main_body:
284 %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
284 %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
285285 %r0 = extractelement <4 x float> %r, i32 0
286286 %r1 = extractelement <4 x float> %r, i32 1
287287 %r2 = extractelement <4 x float> %r, i32 2
294294 ;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
295295 define void @gather4_c_l_v8() #0 {
296296 main_body:
297 %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
297 %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
298298 %r0 = extractelement <4 x float> %r, i32 0
299299 %r1 = extractelement <4 x float> %r, i32 1
300300 %r2 = extractelement <4 x float> %r, i32 2
307307 ;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
308308 define void @gather4_c_b() #0 {
309309 main_body:
310 %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
310 %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
311311 %r0 = extractelement <4 x float> %r, i32 0
312312 %r1 = extractelement <4 x float> %r, i32 1
313313 %r2 = extractelement <4 x float> %r, i32 2
320320 ;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
321321 define void @gather4_c_b_v8() #0 {
322322 main_body:
323 %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
323 %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
324324 %r0 = extractelement <4 x float> %r, i32 0
325325 %r1 = extractelement <4 x float> %r, i32 1
326326 %r2 = extractelement <4 x float> %r, i32 2
333333 ;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
334334 define void @gather4_c_b_cl() #0 {
335335 main_body:
336 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
336 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
337337 %r0 = extractelement <4 x float> %r, i32 0
338338 %r1 = extractelement <4 x float> %r, i32 1
339339 %r2 = extractelement <4 x float> %r, i32 2
346346 ;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
347347 define void @gather4_c_lz() #0 {
348348 main_body:
349 %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
349 %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
350350 %r0 = extractelement <4 x float> %r, i32 0
351351 %r1 = extractelement <4 x float> %r, i32 1
352352 %r2 = extractelement <4 x float> %r, i32 2
361361 ;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
362362 define void @gather4_c_o() #0 {
363363 main_body:
364 %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
364 %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
365365 %r0 = extractelement <4 x float> %r, i32 0
366366 %r1 = extractelement <4 x float> %r, i32 1
367367 %r2 = extractelement <4 x float> %r, i32 2
374374 ;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
375375 define void @gather4_c_o_v8() #0 {
376376 main_body:
377 %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
377 %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
378378 %r0 = extractelement <4 x float> %r, i32 0
379379 %r1 = extractelement <4 x float> %r, i32 1
380380 %r2 = extractelement <4 x float> %r, i32 2
387387 ;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
388388 define void @gather4_c_cl_o() #0 {
389389 main_body:
390 %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
390 %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
391391 %r0 = extractelement <4 x float> %r, i32 0
392392 %r1 = extractelement <4 x float> %r, i32 1
393393 %r2 = extractelement <4 x float> %r, i32 2
400400 ;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
401401 define void @gather4_c_l_o() #0 {
402402 main_body:
403 %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
403 %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
404404 %r0 = extractelement <4 x float> %r, i32 0
405405 %r1 = extractelement <4 x float> %r, i32 1
406406 %r2 = extractelement <4 x float> %r, i32 2
413413 ;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
414414 define void @gather4_c_b_o() #0 {
415415 main_body:
416 %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
416 %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
417417 %r0 = extractelement <4 x float> %r, i32 0
418418 %r1 = extractelement <4 x float> %r, i32 1
419419 %r2 = extractelement <4 x float> %r, i32 2
426426 ;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
427427 define void @gather4_c_b_cl_o() #0 {
428428 main_body:
429 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
429 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
430430 %r0 = extractelement <4 x float> %r, i32 0
431431 %r1 = extractelement <4 x float> %r, i32 1
432432 %r2 = extractelement <4 x float> %r, i32 2
439439 ;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
440440 define void @gather4_c_lz_o() #0 {
441441 main_body:
442 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
442 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
443443 %r0 = extractelement <4 x float> %r, i32 0
444444 %r1 = extractelement <4 x float> %r, i32 1
445445 %r2 = extractelement <4 x float> %r, i32 2
452452 ;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
453453 define void @gather4_c_lz_o_v8() #0 {
454454 main_body:
455 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
456 %r0 = extractelement <4 x float> %r, i32 0
457 %r1 = extractelement <4 x float> %r, i32 1
458 %r2 = extractelement <4 x float> %r, i32 2
459 %r3 = extractelement <4 x float> %r, i32 3
460 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
461 ret void
462 }
463
464
465
466 declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
467 declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
468 declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
469 declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
470 declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
471 declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
472 declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
473 declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
474 declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
475
476 declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
477 declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
478 declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
479 declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
480 declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
481 declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
482 declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
483 declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
484 declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
485
486 declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
487 declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
488 declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
489 declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
490 declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
491 declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
492 declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
493 declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
494 declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
495
496 declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
497 declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
498 declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
499 declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
500 declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
501 declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
502 declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
503 declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
455 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
456 %r0 = extractelement <4 x float> %r, i32 0
457 %r1 = extractelement <4 x float> %r, i32 1
458 %r2 = extractelement <4 x float> %r, i32 2
459 %r3 = extractelement <4 x float> %r, i32 3
460 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
461 ret void
462 }
463
464
465
466 declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
467 declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
468 declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
469 declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
470 declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
471 declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
472 declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
473 declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
474 declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
475
476 declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
477 declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
478 declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
479 declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
480 declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
481 declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
482 declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
483 declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
484 declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
485
486 declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
487 declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
488 declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
489 declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
490 declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
491 declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
492 declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
493 declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
494 declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
495
496 declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
497 declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
498 declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
499 declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
500 declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
501 declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
502 declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
503 declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
504504
505505 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
506506
44 ;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
55 define void @getlod() #0 {
66 main_body:
7 %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
7 %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
88 %r0 = extractelement <4 x float> %r, i32 0
99 %r1 = extractelement <4 x float> %r, i32 1
1010 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
1515 ;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
1616 define void @getlod_v2() #0 {
1717 main_body:
18 %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
18 %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
1919 %r0 = extractelement <4 x float> %r, i32 0
2020 %r1 = extractelement <4 x float> %r, i32 1
2121 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
2626 ;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
2727 define void @getlod_v4() #0 {
2828 main_body:
29 %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
29 %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
3030 %r0 = extractelement <4 x float> %r, i32 0
3131 %r1 = extractelement <4 x float> %r, i32 1
3232 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
3434 }
3535
3636
37 declare <4 x float> @llvm.SI.getlod.i32(i32, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
38 declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
39 declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
37 declare <4 x float> @llvm.SI.getlod.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
38 declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
39 declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
4040
4141 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
4242