llvm.org GIT mirror llvm / 74ebd8a
AMDGPU: Convert tests away from llvm.SI.load.const git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351494 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 2 months ago
8 changed file(s) with 282 addition(s) and 282 deletion(s). Raw diff Collapse all Expand all
77 ; of which were in SGPRs.
88 define amdgpu_vs float @main(i32 %v) {
99 main_body:
10 %d1 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 960)
11 %d2 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 976)
10 %d1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 960, i32 0)
11 %d2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 976, i32 0)
1212 br i1 undef, label %ENDIF56, label %IF57
1313
1414 IF57: ; preds = %ENDIF
4040 }
4141
4242 ; Function Attrs: nounwind readnone
43 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #0
43 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #0
4444
4545 attributes #0 = { nounwind readnone }
4646 attributes #1 = { readnone }
1414
1515 define amdgpu_gs void @main(i32 inreg %arg) #0 {
1616 main_body:
17 %tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 20)
18 %tmp1 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 24)
19 %tmp2 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 48)
17 %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 20, i32 0)
18 %tmp1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 24, i32 0)
19 %tmp2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 48, i32 0)
2020 %array_vector3 = insertelement <4 x float> zeroinitializer, float %tmp2, i32 3
2121 %array_vector5 = insertelement <4 x float> , float %tmp, i32 1
2222 %array_vector6 = insertelement <4 x float> %array_vector5, float undef, i32 2
4444 ret void
4545 }
4646
47 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
47 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
4848 declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #2
4949 declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #3
5050
77 main_body:
88 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
99 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
10 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
11 %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
12 %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
10 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 0, i32 0)
11 %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 16, i32 0)
12 %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 32, i32 0)
1313 %tmp24 = fptosi float %tmp22 to i32
1414 %tmp25 = icmp ne i32 %tmp24, 0
1515 br i1 %tmp25, label %ENDIF, label %ELSE
3131 main_body:
3232 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
3333 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
34 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
35 %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
36 %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 36)
37 %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 40)
38 %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 48)
39 %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 52)
40 %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 56)
41 %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 64)
42 %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 68)
43 %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 72)
44 %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 76)
45 %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 80)
46 %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 84)
47 %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 88)
48 %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 92)
34 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 16, i32 0)
35 %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 32, i32 0)
36 %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 36, i32 0)
37 %tmp24 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 40, i32 0)
38 %tmp25 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 48, i32 0)
39 %tmp26 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 52, i32 0)
40 %tmp27 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 56, i32 0)
41 %tmp28 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 64, i32 0)
42 %tmp29 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 68, i32 0)
43 %tmp30 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 72, i32 0)
44 %tmp31 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 76, i32 0)
45 %tmp32 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 80, i32 0)
46 %tmp33 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 84, i32 0)
47 %tmp34 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 88, i32 0)
48 %tmp35 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 92, i32 0)
4949 %tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %arg2, i32 0
5050 %tmp37 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp36, !tbaa !0
5151 %tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg1, i32 0
171171 main_body:
172172 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
173173 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
174 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
175 %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 4)
176 %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 8)
177 %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 12)
174 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 0, i32 0)
175 %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 4, i32 0)
176 %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 8, i32 0)
177 %tmp24 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 12, i32 0)
178178 %tmp25 = fptosi float %tmp24 to i32
179179 %tmp26 = bitcast i32 %tmp25 to float
180180 %tmp27 = bitcast float %tmp26 to i32
224224 entry:
225225 %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
226226 %tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
227 %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 16)
227 %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 16, i32 0)
228228 %tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
229229 %tmp24 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp23, !tbaa !0
230230 %tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
324324 bb:
325325 %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i32 0, i32 0
326326 %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !3
327 %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp22, i32 16)
327 %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp22, i32 16, i32 0)
328328 %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(4)* %arg3, i32 0, i32 0
329329 %tmp26 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp25, !tbaa !3
330330 %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(4)* %arg2, i32 0, i32 0
408408 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
409409 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
410410 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
411 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
411 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
412412
413413 attributes #0 = { nounwind }
414414 attributes #1 = { nounwind readnone }
2727 main_body:
2828 %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
2929 %tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
30 %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 96)
31 %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 100)
32 %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 104)
33 %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 112)
34 %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 116)
35 %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 120)
36 %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 128)
37 %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 132)
38 %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 140)
39 %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 144)
40 %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 160)
41 %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 176)
42 %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 180)
43 %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 184)
44 %tmp36 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 192)
45 %tmp37 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 196)
46 %tmp38 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 200)
47 %tmp39 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 208)
48 %tmp40 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 212)
49 %tmp41 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 216)
50 %tmp42 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 224)
51 %tmp43 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 240)
52 %tmp44 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 244)
53 %tmp45 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 248)
54 %tmp46 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 256)
55 %tmp47 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 272)
56 %tmp48 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 276)
57 %tmp49 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 280)
58 %tmp50 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 288)
59 %tmp51 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 292)
60 %tmp52 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 296)
61 %tmp53 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 304)
62 %tmp54 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 308)
63 %tmp55 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 312)
64 %tmp56 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 368)
65 %tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 372)
66 %tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 376)
67 %tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 384)
30 %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 96, i32 0)
31 %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 100, i32 0)
32 %tmp24 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 104, i32 0)
33 %tmp25 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 112, i32 0)
34 %tmp26 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 116, i32 0)
35 %tmp27 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 120, i32 0)
36 %tmp28 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 128, i32 0)
37 %tmp29 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 132, i32 0)
38 %tmp30 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 140, i32 0)
39 %tmp31 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 144, i32 0)
40 %tmp32 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 160, i32 0)
41 %tmp33 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 176, i32 0)
42 %tmp34 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 180, i32 0)
43 %tmp35 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 184, i32 0)
44 %tmp36 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 192, i32 0)
45 %tmp37 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 196, i32 0)
46 %tmp38 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 200, i32 0)
47 %tmp39 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 208, i32 0)
48 %tmp40 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 212, i32 0)
49 %tmp41 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 216, i32 0)
50 %tmp42 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 224, i32 0)
51 %tmp43 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 240, i32 0)
52 %tmp44 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 244, i32 0)
53 %tmp45 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 248, i32 0)
54 %tmp46 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 256, i32 0)
55 %tmp47 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 272, i32 0)
56 %tmp48 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 276, i32 0)
57 %tmp49 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 280, i32 0)
58 %tmp50 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 288, i32 0)
59 %tmp51 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 292, i32 0)
60 %tmp52 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 296, i32 0)
61 %tmp53 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 304, i32 0)
62 %tmp54 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 308, i32 0)
63 %tmp55 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 312, i32 0)
64 %tmp56 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 368, i32 0)
65 %tmp57 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 372, i32 0)
66 %tmp58 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 376, i32 0)
67 %tmp59 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 384, i32 0)
6868 %tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
6969 %tmp61 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp60, !tbaa !0
7070 %tmp62 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
646646 main_body:
647647 %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0
648648 %tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
649 %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 0)
650 %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 4)
651 %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 8)
652 %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 12)
653 %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 28)
654 %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 48)
655 %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 52)
656 %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 56)
657 %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 64)
658 %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 68)
659 %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 72)
660 %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 76)
661 %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 128)
662 %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 132)
663 %tmp36 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 144)
664 %tmp37 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 148)
665 %tmp38 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 152)
666 %tmp39 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 160)
667 %tmp40 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 164)
668 %tmp41 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 168)
669 %tmp42 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 172)
670 %tmp43 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 176)
671 %tmp44 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 180)
672 %tmp45 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 184)
673 %tmp46 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 192)
674 %tmp47 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 196)
675 %tmp48 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 200)
676 %tmp49 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 208)
677 %tmp50 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 212)
678 %tmp51 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 216)
679 %tmp52 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 220)
680 %tmp53 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 236)
681 %tmp54 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 240)
682 %tmp55 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 244)
683 %tmp56 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 248)
684 %tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 252)
685 %tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 256)
686 %tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 260)
687 %tmp60 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 264)
688 %tmp61 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 268)
689 %tmp62 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 272)
690 %tmp63 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 276)
691 %tmp64 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 280)
692 %tmp65 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 284)
693 %tmp66 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 288)
694 %tmp67 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 292)
695 %tmp68 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 464)
696 %tmp69 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 468)
697 %tmp70 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 472)
698 %tmp71 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 496)
699 %tmp72 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 500)
700 %tmp73 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 504)
701 %tmp74 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 512)
702 %tmp75 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 516)
703 %tmp76 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 524)
704 %tmp77 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 532)
705 %tmp78 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 536)
706 %tmp79 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 540)
707 %tmp80 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 544)
708 %tmp81 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 548)
709 %tmp82 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 552)
710 %tmp83 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 556)
711 %tmp84 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 560)
712 %tmp85 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 564)
713 %tmp86 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 568)
714 %tmp87 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 572)
715 %tmp88 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 576)
716 %tmp89 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 580)
717 %tmp90 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 584)
718 %tmp91 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 588)
719 %tmp92 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 592)
720 %tmp93 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 596)
721 %tmp94 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 600)
722 %tmp95 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 604)
723 %tmp96 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 608)
724 %tmp97 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 612)
725 %tmp98 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 616)
726 %tmp99 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 624)
727 %tmp100 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 628)
728 %tmp101 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 632)
729 %tmp102 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 636)
730 %tmp103 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 640)
731 %tmp104 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 644)
732 %tmp105 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 648)
733 %tmp106 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 652)
734 %tmp107 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 656)
735 %tmp108 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 660)
736 %tmp109 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 664)
737 %tmp110 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 668)
738 %tmp111 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 672)
739 %tmp112 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 676)
740 %tmp113 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 680)
741 %tmp114 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 684)
742 %tmp115 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 688)
743 %tmp116 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 692)
744 %tmp117 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 696)
745 %tmp118 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 700)
746 %tmp119 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 704)
747 %tmp120 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 708)
748 %tmp121 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 712)
749 %tmp122 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 716)
750 %tmp123 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 864)
751 %tmp124 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 868)
649 %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 0, i32 0)
650 %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 4, i32 0)
651 %tmp24 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 8, i32 0)
652 %tmp25 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 12, i32 0)
653 %tmp26 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 28, i32 0)
654 %tmp27 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 48, i32 0)
655 %tmp28 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 52, i32 0)
656 %tmp29 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 56, i32 0)
657 %tmp30 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 64, i32 0)
658 %tmp31 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 68, i32 0)
659 %tmp32 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 72, i32 0)
660 %tmp33 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 76, i32 0)
661 %tmp34 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 128, i32 0)
662 %tmp35 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 132, i32 0)
663 %tmp36 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 144, i32 0)
664 %tmp37 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 148, i32 0)
665 %tmp38 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 152, i32 0)
666 %tmp39 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 160, i32 0)
667 %tmp40 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 164, i32 0)
668 %tmp41 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 168, i32 0)
669 %tmp42 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 172, i32 0)
670 %tmp43 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 176, i32 0)
671 %tmp44 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 180, i32 0)
672 %tmp45 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 184, i32 0)
673 %tmp46 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 192, i32 0)
674 %tmp47 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 196, i32 0)
675 %tmp48 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 200, i32 0)
676 %tmp49 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 208, i32 0)
677 %tmp50 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 212, i32 0)
678 %tmp51 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 216, i32 0)
679 %tmp52 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 220, i32 0)
680 %tmp53 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 236, i32 0)
681 %tmp54 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 240, i32 0)
682 %tmp55 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 244, i32 0)
683 %tmp56 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 248, i32 0)
684 %tmp57 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 252, i32 0)
685 %tmp58 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 256, i32 0)
686 %tmp59 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 260, i32 0)
687 %tmp60 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 264, i32 0)
688 %tmp61 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 268, i32 0)
689 %tmp62 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 272, i32 0)
690 %tmp63 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 276, i32 0)
691 %tmp64 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 280, i32 0)
692 %tmp65 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 284, i32 0)
693 %tmp66 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 288, i32 0)
694 %tmp67 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 292, i32 0)
695 %tmp68 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 464, i32 0)
696 %tmp69 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 468, i32 0)
697 %tmp70 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 472, i32 0)
698 %tmp71 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 496, i32 0)
699 %tmp72 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 500, i32 0)
700 %tmp73 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 504, i32 0)
701 %tmp74 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 512, i32 0)
702 %tmp75 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 516, i32 0)
703 %tmp76 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 524, i32 0)
704 %tmp77 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 532, i32 0)
705 %tmp78 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 536, i32 0)
706 %tmp79 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 540, i32 0)
707 %tmp80 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 544, i32 0)
708 %tmp81 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 548, i32 0)
709 %tmp82 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 552, i32 0)
710 %tmp83 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 556, i32 0)
711 %tmp84 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 560, i32 0)
712 %tmp85 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 564, i32 0)
713 %tmp86 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 568, i32 0)
714 %tmp87 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 572, i32 0)
715 %tmp88 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 576, i32 0)
716 %tmp89 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 580, i32 0)
717 %tmp90 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 584, i32 0)
718 %tmp91 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 588, i32 0)
719 %tmp92 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 592, i32 0)
720 %tmp93 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 596, i32 0)
721 %tmp94 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 600, i32 0)
722 %tmp95 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 604, i32 0)
723 %tmp96 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 608, i32 0)
724 %tmp97 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 612, i32 0)
725 %tmp98 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 616, i32 0)
726 %tmp99 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 624, i32 0)
727 %tmp100 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 628, i32 0)
728 %tmp101 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 632, i32 0)
729 %tmp102 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 636, i32 0)
730 %tmp103 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 640, i32 0)
731 %tmp104 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 644, i32 0)
732 %tmp105 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 648, i32 0)
733 %tmp106 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 652, i32 0)
734 %tmp107 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 656, i32 0)
735 %tmp108 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 660, i32 0)
736 %tmp109 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 664, i32 0)
737 %tmp110 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 668, i32 0)
738 %tmp111 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 672, i32 0)
739 %tmp112 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 676, i32 0)
740 %tmp113 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 680, i32 0)
741 %tmp114 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 684, i32 0)
742 %tmp115 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 688, i32 0)
743 %tmp116 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 692, i32 0)
744 %tmp117 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 696, i32 0)
745 %tmp118 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 700, i32 0)
746 %tmp119 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 704, i32 0)
747 %tmp120 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 708, i32 0)
748 %tmp121 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 712, i32 0)
749 %tmp122 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 716, i32 0)
750 %tmp123 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 864, i32 0)
751 %tmp124 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 868, i32 0)
752752 %tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(4)* %arg2, i64 0, i32 0
753753 %tmp126 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp125, !tbaa !0
754754 %tmp127 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(4)* %arg1, i64 0, i32 0
16821682 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
16831683 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
16841684 declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
1685 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
1685 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
16861686
16871687 attributes #0 = { nounwind }
16881688 attributes #1 = { nounwind readnone }
88
99 define amdgpu_ps void @main() #0 {
1010 main_body:
11 %tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 16)
12 %tmp1 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 32)
13 %tmp2 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 80)
14 %tmp3 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 84)
15 %tmp4 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 88)
16 %tmp5 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 96)
17 %tmp6 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 100)
18 %tmp7 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 104)
19 %tmp8 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 112)
20 %tmp9 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 116)
21 %tmp10 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 120)
22 %tmp11 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 128)
23 %tmp12 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 132)
24 %tmp13 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 136)
25 %tmp14 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 144)
26 %tmp15 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 148)
27 %tmp16 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 152)
28 %tmp17 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 160)
29 %tmp18 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 164)
30 %tmp19 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 168)
31 %tmp20 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 176)
32 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 180)
33 %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 184)
34 %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 192)
35 %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 196)
36 %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 200)
37 %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 208)
38 %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 212)
39 %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 216)
40 %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 224)
41 %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 228)
42 %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 232)
43 %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 240)
44 %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 244)
45 %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 248)
46 %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 256)
47 %tmp36 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 260)
48 %tmp37 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 264)
49 %tmp38 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 272)
50 %tmp39 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 276)
51 %tmp40 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 280)
52 %tmp41 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 288)
53 %tmp42 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 292)
54 %tmp43 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 296)
55 %tmp44 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 304)
56 %tmp45 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 308)
57 %tmp46 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 312)
58 %tmp47 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 320)
59 %tmp48 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 324)
60 %tmp49 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 328)
61 %tmp50 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 336)
62 %tmp51 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 340)
63 %tmp52 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 344)
64 %tmp53 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 352)
65 %tmp54 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 356)
66 %tmp55 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 360)
67 %tmp56 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 368)
68 %tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 372)
69 %tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 376)
70 %tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 384)
71 %tmp60 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 388)
72 %tmp61 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 392)
73 %tmp62 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 400)
74 %tmp63 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 404)
75 %tmp64 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 408)
76 %tmp65 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 416)
77 %tmp66 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 420)
11 %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 16, i32 0)
12 %tmp1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 32, i32 0)
13 %tmp2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 80, i32 0)
14 %tmp3 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 84, i32 0)
15 %tmp4 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 88, i32 0)
16 %tmp5 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 96, i32 0)
17 %tmp6 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 100, i32 0)
18 %tmp7 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 104, i32 0)
19 %tmp8 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 112, i32 0)
20 %tmp9 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 116, i32 0)
21 %tmp10 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 120, i32 0)
22 %tmp11 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 128, i32 0)
23 %tmp12 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 132, i32 0)
24 %tmp13 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 136, i32 0)
25 %tmp14 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 144, i32 0)
26 %tmp15 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 148, i32 0)
27 %tmp16 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 152, i32 0)
28 %tmp17 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 160, i32 0)
29 %tmp18 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 164, i32 0)
30 %tmp19 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 168, i32 0)
31 %tmp20 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 176, i32 0)
32 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 180, i32 0)
33 %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 184, i32 0)
34 %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 192, i32 0)
35 %tmp24 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 196, i32 0)
36 %tmp25 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 200, i32 0)
37 %tmp26 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 208, i32 0)
38 %tmp27 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 212, i32 0)
39 %tmp28 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 216, i32 0)
40 %tmp29 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 224, i32 0)
41 %tmp30 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 228, i32 0)
42 %tmp31 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 232, i32 0)
43 %tmp32 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 240, i32 0)
44 %tmp33 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 244, i32 0)
45 %tmp34 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 248, i32 0)
46 %tmp35 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 256, i32 0)
47 %tmp36 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 260, i32 0)
48 %tmp37 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 264, i32 0)
49 %tmp38 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 272, i32 0)
50 %tmp39 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 276, i32 0)
51 %tmp40 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 280, i32 0)
52 %tmp41 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 288, i32 0)
53 %tmp42 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 292, i32 0)
54 %tmp43 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 296, i32 0)
55 %tmp44 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 304, i32 0)
56 %tmp45 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 308, i32 0)
57 %tmp46 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 312, i32 0)
58 %tmp47 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 320, i32 0)
59 %tmp48 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 324, i32 0)
60 %tmp49 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 328, i32 0)
61 %tmp50 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 336, i32 0)
62 %tmp51 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 340, i32 0)
63 %tmp52 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 344, i32 0)
64 %tmp53 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 352, i32 0)
65 %tmp54 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 356, i32 0)
66 %tmp55 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 360, i32 0)
67 %tmp56 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 368, i32 0)
68 %tmp57 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 372, i32 0)
69 %tmp58 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 376, i32 0)
70 %tmp59 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 384, i32 0)
71 %tmp60 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 388, i32 0)
72 %tmp61 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 392, i32 0)
73 %tmp62 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 400, i32 0)
74 %tmp63 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 404, i32 0)
75 %tmp64 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 408, i32 0)
76 %tmp65 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 416, i32 0)
77 %tmp66 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 420, i32 0)
7878 br label %LOOP
7979
8080 LOOP: ; preds = %ENDIF2795, %main_body
496496 declare float @llvm.maxnum.f32(float, float) #1
497497 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
498498 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
499 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
499 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
500500
501501 attributes #0 = { nounwind }
502502 attributes #1 = { nounwind readnone }
9797 %d1 = insertelement <4 x i32> %d0, i32 1, i32 1
9898 %d2 = insertelement <4 x i32> %d1, i32 2, i32 2
9999 %d3 = insertelement <4 x i32> %d2, i32 3, i32 3
100 %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %d3, i32 0)
100 %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %d3, i32 0, i32 0)
101101 ret float %r
102102 }
103103
109109 main_body:
110110 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
111111 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
112 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
112 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 16, i32 0)
113113 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
114114 ret void
115115 }
125125 main_body:
126126 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
127127 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
128 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1020)
128 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 1020, i32 0)
129129 %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %in
130130 %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 1020, i32 1)
131131 %s.buffer.float = bitcast i32 %s.buffer to float
148148 main_body:
149149 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
150150 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
151 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1024)
151 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 1024, i32 0)
152152 %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %in
153153 %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 1024, i32 0)
154154 %s.buffer.float = bitcast i32 %s.buffer to float
169169 main_body:
170170 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
171171 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
172 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048572)
172 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 1048572, i32 0)
173173 %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %in
174174 %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 1048572, i32 0)
175175 %s.buffer.float = bitcast i32 %s.buffer to float
189189 main_body:
190190 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
191191 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
192 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048576)
192 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 1048576, i32 0)
193193 %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %in
194194 %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 1048576, i32 0)
195195 %s.buffer.float = bitcast i32 %s.buffer to float
277277 ; GCN: s_buffer_load_dword s{{[0-9]}}, s[0:3], s4
278278 define amdgpu_ps float @smrd_sgpr_offset(<4 x i32> inreg %desc, i32 inreg %offset) #0 {
279279 main_body:
280 %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %offset)
280 %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 0)
281281 ret float %r
282282 }
283283
285285 ; GCN: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
286286 define amdgpu_ps float @smrd_vgpr_offset(<4 x i32> inreg %desc, i32 %offset) #0 {
287287 main_body:
288 %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %offset)
288 %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 0)
289289 ret float %r
290290 }
291291
295295 define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
296296 main_body:
297297 %off = add i32 %offset, 4092
298 %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off)
298 %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %off, i32 0)
299299 ret float %r
300300 }
301301
307307 define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 {
308308 main_body:
309309 %off = add i32 %offset, 4096
310 %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off)
310 %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %off, i32 0)
311311 ret float %r
312312 }
313313
319319 ; VIGFX9-NEXT: s_buffer_load_dwordx2 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x1c
320320 define amdgpu_ps void @smrd_imm_merged(<4 x i32> inreg %desc) #0 {
321321 main_body:
322 %r1 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 4)
323 %r2 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 8)
324 %r3 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 12)
325 %r4 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 16)
326 %r5 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 28)
327 %r6 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 32)
322 %r1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 4, i32 0)
323 %r2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 8, i32 0)
324 %r3 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 12, i32 0)
325 %r4 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 16, i32 0)
326 %r5 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 28, i32 0)
327 %r6 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 32, i32 0)
328328 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) #0
329329 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) #0
330330 ret void
351351 ;
352352 define amdgpu_ps float @smrd_imm_merge_m0(<4 x i32> inreg %desc, i32 inreg %prim, float %u, float %v) #0 {
353353 main_body:
354 %idx1.f = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 0)
354 %idx1.f = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 0, i32 0)
355355 %idx1 = bitcast float %idx1.f to i32
356356
357357 %v0.x1 = call nsz float @llvm.amdgcn.interp.p1(float %u, i32 0, i32 0, i32 %prim)
376376 %v1 = insertelement <3 x float> %v0.tmp1, float %v0.z, i32 2
377377
378378 %b = extractelement <3 x float> %v1, i32 %idx1
379 %c = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 4)
379 %c = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 4, i32 0)
380380
381381 %res.tmp = fadd float %a, %b
382382 %res = fadd float %res.tmp, %c
395395 %a4 = add i32 %a, 16
396396 %a5 = add i32 %a, 28
397397 %a6 = add i32 %a, 32
398 %r1 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %a1)
399 %r2 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %a2)
400 %r3 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %a3)
401 %r4 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %a4)
402 %r5 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %a5)
403 %r6 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %a6)
398 %r1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %a1, i32 0)
399 %r2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %a2, i32 0)
400 %r3 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %a3, i32 0)
401 %r4 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %a4, i32 0)
402 %r5 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %a5, i32 0)
403 %r6 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %a6, i32 0)
404404 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) #0
405405 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) #0
406406 ret void
427427
428428 .inner_loop_body:
429429 %descriptor = load <4 x i32>, <4 x i32> addrspace(4)* %descptr, align 16, !invariant.load !0
430 %load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
430 %load1result = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %descriptor, i32 0, i32 0)
431431 store float %load1result, float addrspace(1)* undef
432432 %inner_br2 = icmp uge i32 %1, 10
433433 br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body
434434
435435 .outer_loop_body:
436436 %offset = shl i32 %loopctr.2, 6
437 %load2result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 %offset)
437 %load2result = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %descriptor, i32 %offset, i32 0)
438438 %outer_br = fcmp ueq float %load2result, 0x0
439439 br i1 %outer_br, label %.outer_loop_header, label %ret_block
440440 }
450450 main_body:
451451 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
452452 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
453 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 %ncoff)
453 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 %ncoff, i32 0)
454454 %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %in
455455 %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 %ncoff, i32 0)
456456 %s.buffer.float = bitcast i32 %s.buffer to float
469469 main_body:
470470 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
471471 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
472 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 %ncoff)
472 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 %ncoff, i32 0)
473473 %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %in
474474 %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 %ncoff, i32 0)
475475 %s.buffer.float = bitcast i32 %s.buffer to float
488488 main_body:
489489 %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %arg, i32 0
490490 %tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp
491 %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 %ncoff)
491 %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 %ncoff, i32 0)
492492 %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %in
493493 %s.buffer = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %tmp22, i32 %ncoff, i32 0)
494494 %s.buffer.elt = extractelement <8 x i32> %s.buffer, i32 1
580580 %counter = phi i32 [ 0, %main_body ], [ %counter.next, %loop ]
581581 %sum = phi float [ 0.0, %main_body ], [ %sum.next, %loop ]
582582 %offset = shl i32 %counter, 2
583 %v = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %offset)
583 %v = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 0)
584584 %sum.next = fadd float %sum, %v
585585 %counter.next = add i32 %counter, 1
586586 %cc = icmp uge i32 %counter.next, %bound
606606 %counter = phi i32 [ 0, %main_body ], [ %counter.next, %loop.a ], [ %counter.next, %loop.b ]
607607 %sum = phi float [ 0.0, %main_body ], [ %sum.next, %loop.a ], [ %sum.next.b, %loop.b ]
608608 %offset = shl i32 %counter, 2
609 %v = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %offset)
609 %v = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 0)
610610 %sum.next = fadd float %sum, %v
611611 %counter.next = add i32 %counter, 1
612612 %cc = icmp uge i32 %counter.next, %bound
643643
644644 endif1: ; preds = %if1, %main_body
645645 %tmp13 = extractelement <3 x i32> %arg4, i32 0
646 %tmp97 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 %tmp13)
646 %tmp97 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 %tmp13, i32 0)
647647 ret float %tmp97
648648 }
649649
688688 }
689689
690690 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
691 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
692691 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
693692 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
694 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
693
694 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) #1
695695 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
696696 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
697697 declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32)
77 ; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
88 define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(4)* byval %arg) #0 {
99 bb:
10 %tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 96)
10 %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 96, i32 0)
1111 %tmp1 = bitcast float %tmp to i32
1212 br i1 undef, label %bb2, label %bb3
1313
3030 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
3131 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
3232 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
33 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
33 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
3434
3535 attributes #0 = { nounwind }
3636 attributes #1 = { nounwind readnone }
3030 bb:
3131 %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i64 0, i64 0
3232 %tmp11 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, align 16, !tbaa !0
33 %tmp12 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 0)
34 %tmp13 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 16)
35 %tmp14 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 32)
33 %tmp12 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp11, i32 0, i32 0)
34 %tmp13 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp11, i32 16, i32 0)
35 %tmp14 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp11, i32 32, i32 0)
3636 %tmp15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(4)* %arg4, i64 0, i64 0
3737 %tmp16 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp15, align 16, !tbaa !0
3838 %tmp17 = add i32 %arg5, %arg7
487487 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
488488 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
489489
490 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
490 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
491491 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2
492492
493493 attributes #0 = { nounwind }