llvm.org GIT mirror llvm / 337264a
AMDGPU: Remove old sample intrinsics I did my best to try to update all the uses in tests that just happened to use the old ones to the newer intrinsics. I'm not sure I got all of the immediate operand conversions correct, since the value seems to have been ignored by the old pattern but I don't think it really matters. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258787 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 4 years ago
16 changed file(s) with 226 addition(s) and 811 deletion(s). Raw diff Collapse all Expand all
14131413 return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
14141414 Op->getVTList(), Ops, VT, MMO);
14151415 }
1416 case AMDGPUIntrinsic::SI_sample:
1417 return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
1418 case AMDGPUIntrinsic::SI_sampleb:
1419 return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
1420 case AMDGPUIntrinsic::SI_sampled:
1421 return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
1422 case AMDGPUIntrinsic::SI_samplel:
1423 return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
14241416 case AMDGPUIntrinsic::SI_vs_load_input:
14251417 return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
14261418 Op.getOperand(1),
16001592 }
16011593
16021594 return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
1603 }
1604
1605 SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
1606 const SDValue &Op,
1607 SelectionDAG &DAG) const {
1608 return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
1609 Op.getOperand(2),
1610 Op.getOperand(3),
1611 Op.getOperand(4));
16121595 }
16131596
16141597 SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2222 class SITargetLowering : public AMDGPUTargetLowering {
2323 SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
2424 SDValue Chain, unsigned Offset, bool Signed) const;
25 SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
26 SelectionDAG &DAG) const;
2725 SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
2826 SelectionDAG &DAG) const override;
2927
24412441 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
24422442 >;
24432443
2444 multiclass ImageLoadPatterns {
2445 def : ImageLoadPattern ;
2446 def : ImageLoadArrayPattern ;
2447 }
2448
2449 multiclass ImageLoadMSAAPatterns {
2450 def : ImageLoadMSAAPattern ;
2451 def : ImageLoadArrayMSAAPattern ;
2452 }
2453
2454 defm : ImageLoadPatterns;
2455 defm : ImageLoadPatterns;
2456
2457 defm : ImageLoadMSAAPatterns;
2458 defm : ImageLoadMSAAPatterns;
2459
2460 /* Image resource information */
2461 def : Pat <
2462 (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
2463 (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
2464 >;
2465
2466 def : Pat <
2467 (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
2468 (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
2469 >;
2470
2471 def : Pat <
2472 (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA),
2473 (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
2474 >;
2475
24762444 /********** ============================================ **********/
24772445 /********** Extraction, Insertion, Building and Casting **********/
24782446 /********** ============================================ **********/
171171 def int_SI_image_load_mip : Image;
172172 def int_SI_getresinfo : Image;
173173
174 // Deprecated image and sample intrinsics.
175 class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
176
177 def int_SI_sample : Sample;
178 def int_SI_sampleb : Sample;
179 def int_SI_sampled : Sample;
180 def int_SI_samplel : Sample;
181 def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
182 def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
183
184174 /* Interpolation Intrinsics */
185175
186176 def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
33 ; GCN-LABEL: {{^}}main:
44 ; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
55 ; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
6
76 define void @main() #0 {
8 main_body:
9 %0 = fptosi float undef to i32
10 %1 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> undef, <32 x i8> undef, i32 2)
11 %2 = extractelement <4 x i32> %1, i32 0
12 %3 = and i32 %0, 7
13 %4 = shl i32 1, %3
14 %5 = and i32 %2, %4
15 %6 = icmp eq i32 %5, 0
16 %.10 = select i1 %6, float 0.000000e+00, float undef
17 %7 = call i32 @llvm.SI.packf16(float undef, float %.10)
18 %8 = bitcast i32 %7 to float
19 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %8, float undef, float %8)
7 bb:
8 %tmp = fptosi float undef to i32
9 %tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
10 %tmp2.f = extractelement <4 x float> %tmp1, i32 0
11 %tmp2 = bitcast float %tmp2.f to i32
12 %tmp3 = and i32 %tmp, 7
13 %tmp4 = shl i32 1, %tmp3
14 %tmp5 = and i32 %tmp2, %tmp4
15 %tmp6 = icmp eq i32 %tmp5, 0
16 %tmp7 = select i1 %tmp6, float 0.000000e+00, float undef
17 %tmp8 = call i32 @llvm.SI.packf16(float undef, float %tmp7)
18 %tmp9 = bitcast i32 %tmp8 to float
19 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp9, float undef, float %tmp9)
2020 ret void
2121 }
2222
23 ; Function Attrs: nounwind readnone
24 declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
25
26 ; Function Attrs: nounwind readnone
23 declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
2724 declare i32 @llvm.SI.packf16(float, float) #1
28
2925 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
3026
3127 attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
0 ;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s
2
3 ; CHECK-LABEL: {{^}}v1:
4 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13
5 define void @v1(i32 %a1) #0 {
6 entry:
7 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
8 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
9 %2 = extractelement <4 x float> %1, i32 0
10 %3 = extractelement <4 x float> %1, i32 2
11 %4 = extractelement <4 x float> %1, i32 3
12 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
13 ret void
14 }
15
16 ; CHECK-LABEL: {{^}}v2:
17 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 11
18 define void @v2(i32 %a1) #0 {
19 entry:
20 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
21 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
22 %2 = extractelement <4 x float> %1, i32 0
23 %3 = extractelement <4 x float> %1, i32 1
24 %4 = extractelement <4 x float> %1, i32 3
25 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
26 ret void
27 }
28
29 ; CHECK-LABEL: {{^}}v3:
30 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
31 define void @v3(i32 %a1) #0 {
32 entry:
33 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
34 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
35 %2 = extractelement <4 x float> %1, i32 1
36 %3 = extractelement <4 x float> %1, i32 2
37 %4 = extractelement <4 x float> %1, i32 3
38 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
39 ret void
40 }
41
42 ; CHECK-LABEL: {{^}}v4:
43 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 7
44 define void @v4(i32 %a1) #0 {
45 entry:
46 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
47 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
48 %2 = extractelement <4 x float> %1, i32 0
49 %3 = extractelement <4 x float> %1, i32 1
50 %4 = extractelement <4 x float> %1, i32 2
51 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
52 ret void
53 }
54
55 ; CHECK-LABEL: {{^}}v5:
56 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
57 define void @v5(i32 %a1) #0 {
58 entry:
59 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
60 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
61 %2 = extractelement <4 x float> %1, i32 1
62 %3 = extractelement <4 x float> %1, i32 3
63 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
64 ret void
65 }
66
67 ; CHECK-LABEL: {{^}}v6:
68 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 6
69 define void @v6(i32 %a1) #0 {
70 entry:
71 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
72 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
73 %2 = extractelement <4 x float> %1, i32 1
74 %3 = extractelement <4 x float> %1, i32 2
75 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
76 ret void
77 }
78
79 ; CHECK-LABEL: {{^}}v7:
80 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 9
81 define void @v7(i32 %a1) #0 {
82 entry:
83 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
84 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
85 %2 = extractelement <4 x float> %1, i32 0
86 %3 = extractelement <4 x float> %1, i32 3
87 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
88 ret void
89 }
90
91 declare <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) readnone
92
93 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
94
95 attributes #0 = { "ShaderType"="0" }
+0
-132
test/CodeGen/AMDGPU/llvm.SI.imageload.ll less more
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
2
3 ;CHECK-DAG: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
4 ;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
5 ;CHECK-DAG: image_load_mip {{v[0-9]+}}, 2, 0, 0, 0
6 ;CHECK-DAG: image_load_mip {{v[0-9]+}}, 1, 0, 0, 0
7 ;CHECK-DAG: image_load_mip {{v[0-9]+}}, 4, 0, 0, 0
8 ;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, 0
9 ;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
10 ;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
11 ;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
12 ;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, -1
13
14 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
15 %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
16 %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
17 %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
18 %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
19 %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
20 %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
21 %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
22 %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
23 %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
24 %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
25 %res1 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v1,
26 <32 x i8> undef, i32 1)
27 %res2 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v2,
28 <32 x i8> undef, i32 2)
29 %res3 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v3,
30 <32 x i8> undef, i32 3)
31 %res4 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v4,
32 <32 x i8> undef, i32 4)
33 %res5 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v5,
34 <32 x i8> undef, i32 5)
35 %res6 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v6,
36 <32 x i8> undef, i32 6)
37 %res10 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v10,
38 <32 x i8> undef, i32 10)
39 %res11 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v11,
40 <32 x i8> undef, i32 11)
41 %res15 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v15,
42 <32 x i8> undef, i32 15)
43 %res16 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v16,
44 <32 x i8> undef, i32 16)
45 %e1 = extractelement <4 x i32> %res1, i32 0
46 %e2 = extractelement <4 x i32> %res2, i32 1
47 %e3 = extractelement <4 x i32> %res3, i32 2
48 %e4 = extractelement <4 x i32> %res4, i32 3
49 %t0 = extractelement <4 x i32> %res5, i32 0
50 %t1 = extractelement <4 x i32> %res5, i32 1
51 %e5 = add i32 %t0, %t1
52 %t2 = extractelement <4 x i32> %res6, i32 0
53 %t3 = extractelement <4 x i32> %res6, i32 2
54 %e6 = add i32 %t2, %t3
55 %t10 = extractelement <4 x i32> %res10, i32 2
56 %t11 = extractelement <4 x i32> %res10, i32 3
57 %e10 = add i32 %t10, %t11
58 %t12 = extractelement <4 x i32> %res11, i32 0
59 %t13 = extractelement <4 x i32> %res11, i32 1
60 %t14 = extractelement <4 x i32> %res11, i32 2
61 %t15 = add i32 %t12, %t13
62 %e11 = add i32 %t14, %t15
63 %t28 = extractelement <4 x i32> %res15, i32 0
64 %t29 = extractelement <4 x i32> %res15, i32 1
65 %t30 = extractelement <4 x i32> %res15, i32 2
66 %t31 = extractelement <4 x i32> %res15, i32 3
67 %t32 = add i32 %t28, %t29
68 %t33 = add i32 %t30, %t31
69 %e15 = add i32 %t32, %t33
70 %e16 = extractelement <4 x i32> %res16, i32 3
71 %s1 = add i32 %e1, %e2
72 %s2 = add i32 %s1, %e3
73 %s3 = add i32 %s2, %e4
74 %s4 = add i32 %s3, %e5
75 %s5 = add i32 %s4, %e6
76 %s9 = add i32 %s5, %e10
77 %s10 = add i32 %s9, %e11
78 %s14 = add i32 %s10, %e15
79 %s15 = add i32 %s14, %e16
80 %s16 = bitcast i32 %s15 to float
81 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
82 ret void
83 }
84
85 ; Test that ccordinates are stored in vgprs and not sgprs
86 ; CHECK: vgpr_coords
87 ; CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
88 define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
89 main_body:
90 %20 = getelementptr float addrspace(2)*, float addrspace(2)* addrspace(2)* %0, i32 0
91 %21 = load float addrspace(2)*, float addrspace(2)* addrspace(2)* %20, !tbaa !2
92 %22 = getelementptr float, float addrspace(2)* %21, i32 0
93 %23 = load float, float addrspace(2)* %22, !tbaa !2, !invariant.load !1
94 %24 = getelementptr float, float addrspace(2)* %21, i32 1
95 %25 = load float, float addrspace(2)* %24, !tbaa !2, !invariant.load !1
96 %26 = getelementptr float, float addrspace(2)* %21, i32 4
97 %27 = load float, float addrspace(2)* %26, !tbaa !2, !invariant.load !1
98 %28 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
99 %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, !tbaa !2
100 %30 = bitcast float %27 to i32
101 %31 = bitcast float %23 to i32
102 %32 = bitcast float %25 to i32
103 %33 = insertelement <4 x i32> undef, i32 %31, i32 0
104 %34 = insertelement <4 x i32> %33, i32 %32, i32 1
105 %35 = insertelement <4 x i32> %34, i32 %30, i32 2
106 %36 = insertelement <4 x i32> %35, i32 undef, i32 3
107 %37 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %36, <32 x i8> %29, i32 2)
108 %38 = extractelement <4 x i32> %37, i32 0
109 %39 = extractelement <4 x i32> %37, i32 1
110 %40 = extractelement <4 x i32> %37, i32 2
111 %41 = extractelement <4 x i32> %37, i32 3
112 %42 = bitcast i32 %38 to float
113 %43 = bitcast i32 %39 to float
114 %44 = bitcast i32 %40 to float
115 %45 = bitcast i32 %41 to float
116 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %42, float %43, float %44, float %45)
117 ret void
118 }
119
120 declare <4 x i32> @llvm.SI.imageload.(<4 x i32>, <32 x i8>, i32) readnone
121 ; Function Attrs: nounwind readnone
122 declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
123
124 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
125
126 attributes #0 = { "ShaderType"="0" }
127 attributes #1 = { nounwind readnone }
128
129 !0 = !{!"const", null}
130 !1 = !{}
131 !2 = !{!0, !0, i64 0, i32 1}
+0
-111
test/CodeGen/AMDGPU/llvm.SI.resinfo.ll less more
None ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
2
3 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
4 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
5 ; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 2, 0, 0, 0
6 ; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 1, 0, 0, 0
7 ; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 4, 0, 0, 0
8 ; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, 0
9 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
10 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
11 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
12 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
13 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
14 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
15 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
16 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
17 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
18 ; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, -1
19
20 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8,
21 i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) {
22 %res1 = call <4 x i32> @llvm.SI.resinfo(i32 %a1, <32 x i8> undef, i32 1)
23 %res2 = call <4 x i32> @llvm.SI.resinfo(i32 %a2, <32 x i8> undef, i32 2)
24 %res3 = call <4 x i32> @llvm.SI.resinfo(i32 %a3, <32 x i8> undef, i32 3)
25 %res4 = call <4 x i32> @llvm.SI.resinfo(i32 %a4, <32 x i8> undef, i32 4)
26 %res5 = call <4 x i32> @llvm.SI.resinfo(i32 %a5, <32 x i8> undef, i32 5)
27 %res6 = call <4 x i32> @llvm.SI.resinfo(i32 %a6, <32 x i8> undef, i32 6)
28 %res7 = call <4 x i32> @llvm.SI.resinfo(i32 %a7, <32 x i8> undef, i32 7)
29 %res8 = call <4 x i32> @llvm.SI.resinfo(i32 %a8, <32 x i8> undef, i32 8)
30 %res9 = call <4 x i32> @llvm.SI.resinfo(i32 %a9, <32 x i8> undef, i32 9)
31 %res10 = call <4 x i32> @llvm.SI.resinfo(i32 %a10, <32 x i8> undef, i32 10)
32 %res11 = call <4 x i32> @llvm.SI.resinfo(i32 %a11, <32 x i8> undef, i32 11)
33 %res12 = call <4 x i32> @llvm.SI.resinfo(i32 %a12, <32 x i8> undef, i32 12)
34 %res13 = call <4 x i32> @llvm.SI.resinfo(i32 %a13, <32 x i8> undef, i32 13)
35 %res14 = call <4 x i32> @llvm.SI.resinfo(i32 %a14, <32 x i8> undef, i32 14)
36 %res15 = call <4 x i32> @llvm.SI.resinfo(i32 %a15, <32 x i8> undef, i32 15)
37 %res16 = call <4 x i32> @llvm.SI.resinfo(i32 %a16, <32 x i8> undef, i32 16)
38 %e1 = extractelement <4 x i32> %res1, i32 0
39 %e2 = extractelement <4 x i32> %res2, i32 1
40 %e3 = extractelement <4 x i32> %res3, i32 2
41 %e4 = extractelement <4 x i32> %res4, i32 3
42 %t0 = extractelement <4 x i32> %res5, i32 0
43 %t1 = extractelement <4 x i32> %res5, i32 1
44 %e5 = add i32 %t0, %t1
45 %t2 = extractelement <4 x i32> %res6, i32 0
46 %t3 = extractelement <4 x i32> %res6, i32 2
47 %e6 = add i32 %t2, %t3
48 %t4 = extractelement <4 x i32> %res7, i32 0
49 %t5 = extractelement <4 x i32> %res7, i32 3
50 %e7 = add i32 %t4, %t5
51 %t6 = extractelement <4 x i32> %res8, i32 1
52 %t7 = extractelement <4 x i32> %res8, i32 2
53 %e8 = add i32 %t6, %t7
54 %t8 = extractelement <4 x i32> %res9, i32 1
55 %t9 = extractelement <4 x i32> %res9, i32 3
56 %e9 = add i32 %t8, %t9
57 %t10 = extractelement <4 x i32> %res10, i32 2
58 %t11 = extractelement <4 x i32> %res10, i32 3
59 %e10 = add i32 %t10, %t11
60 %t12 = extractelement <4 x i32> %res11, i32 0
61 %t13 = extractelement <4 x i32> %res11, i32 1
62 %t14 = extractelement <4 x i32> %res11, i32 2
63 %t15 = add i32 %t12, %t13
64 %e11 = add i32 %t14, %t15
65 %t16 = extractelement <4 x i32> %res12, i32 0
66 %t17 = extractelement <4 x i32> %res12, i32 1
67 %t18 = extractelement <4 x i32> %res12, i32 3
68 %t19 = add i32 %t16, %t17
69 %e12 = add i32 %t18, %t19
70 %t20 = extractelement <4 x i32> %res13, i32 0
71 %t21 = extractelement <4 x i32> %res13, i32 2
72 %t22 = extractelement <4 x i32> %res13, i32 3
73 %t23 = add i32 %t20, %t21
74 %e13 = add i32 %t22, %t23
75 %t24 = extractelement <4 x i32> %res14, i32 1
76 %t25 = extractelement <4 x i32> %res14, i32 2
77 %t26 = extractelement <4 x i32> %res14, i32 3
78 %t27 = add i32 %t24, %t25
79 %e14 = add i32 %t26, %t27
80 %t28 = extractelement <4 x i32> %res15, i32 0
81 %t29 = extractelement <4 x i32> %res15, i32 1
82 %t30 = extractelement <4 x i32> %res15, i32 2
83 %t31 = extractelement <4 x i32> %res15, i32 3
84 %t32 = add i32 %t28, %t29
85 %t33 = add i32 %t30, %t31
86 %e15 = add i32 %t32, %t33
87 %e16 = extractelement <4 x i32> %res16, i32 3
88 %s1 = add i32 %e1, %e2
89 %s2 = add i32 %s1, %e3
90 %s3 = add i32 %s2, %e4
91 %s4 = add i32 %s3, %e5
92 %s5 = add i32 %s4, %e6
93 %s6 = add i32 %s5, %e7
94 %s7 = add i32 %s6, %e8
95 %s8 = add i32 %s7, %e9
96 %s9 = add i32 %s8, %e10
97 %s10 = add i32 %s9, %e11
98 %s11 = add i32 %s10, %e12
99 %s12 = add i32 %s11, %e13
100 %s13 = add i32 %s12, %e14
101 %s14 = add i32 %s13, %e15
102 %s15 = add i32 %s14, %e16
103 %s16 = bitcast i32 %s15 to float
104 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
105 ret void
106 }
107
108 declare <4 x i32> @llvm.SI.resinfo(i32, <32 x i8>, i32) readnone
109
110 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+0
-96
test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll less more
None ;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s
2
3 ; CHECK-LABEL: {{^}}v1:
4 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13
5 define void @v1(i32 %a1) #0 {
6 entry:
7 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
8 %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
9 %2 = extractelement <4 x float> %1, i32 0
10 %3 = extractelement <4 x float> %1, i32 2
11 %4 = extractelement <4 x float> %1, i32 3
12 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
13 ret void
14 }
15
16 ; CHECK-LABEL: {{^}}v2:
17 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 11
18 define void @v2(i32 %a1) #0 {
19 entry:
20 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
21 %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
22 %2 = extractelement <4 x float> %1, i32 0
23 %3 = extractelement <4 x float> %1, i32 1
24 %4 = extractelement <4 x float> %1, i32 3
25 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
26 ret void
27 }
28
29 ; CHECK-LABEL: {{^}}v3:
30 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
31 define void @v3(i32 %a1) #0 {
32 entry:
33 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
34 %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
35 %2 = extractelement <4 x float> %1, i32 1
36 %3 = extractelement <4 x float> %1, i32 2
37 %4 = extractelement <4 x float> %1, i32 3
38 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
39 ret void
40 }
41
42 ; CHECK-LABEL: {{^}}v4:
43 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 7
44 define void @v4(i32 %a1) #0 {
45 entry:
46 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
47 %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
48 %2 = extractelement <4 x float> %1, i32 0
49 %3 = extractelement <4 x float> %1, i32 1
50 %4 = extractelement <4 x float> %1, i32 2
51 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
52 ret void
53 }
54
55 ; CHECK-LABEL: {{^}}v5:
56 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
57 define void @v5(i32 %a1) #0 {
58 entry:
59 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
60 %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
61 %2 = extractelement <4 x float> %1, i32 1
62 %3 = extractelement <4 x float> %1, i32 3
63 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
64 ret void
65 }
66
67 ; CHECK-LABEL: {{^}}v6:
68 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 6
69 define void @v6(i32 %a1) #0 {
70 entry:
71 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
72 %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
73 %2 = extractelement <4 x float> %1, i32 1
74 %3 = extractelement <4 x float> %1, i32 2
75 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
76 ret void
77 }
78
79 ; CHECK-LABEL: {{^}}v7:
80 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 9
81 define void @v7(i32 %a1) #0 {
82 entry:
83 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
84 %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
85 %2 = extractelement <4 x float> %1, i32 0
86 %3 = extractelement <4 x float> %1, i32 3
87 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
88 ret void
89 }
90
91 declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
92
93 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
94
95 attributes #0 = { "ShaderType"="0" }
+0
-160
test/CodeGen/AMDGPU/llvm.SI.sample.ll less more
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
2
3 ;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
4 ;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 3
5 ;CHECK-DAG: image_sample {{v[0-9]+}}, 2
6 ;CHECK-DAG: image_sample {{v[0-9]+}}, 1
7 ;CHECK-DAG: image_sample {{v[0-9]+}}, 4
8 ;CHECK-DAG: image_sample {{v[0-9]+}}, 8
9 ;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 5
10 ;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 9
11 ;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 6
12 ;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
13 ;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 12
14 ;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 7
15 ;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 11
16 ;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 13
17 ;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
18 ;CHECK-DAG: image_sample {{v[0-9]+}}, 8
19
20 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
21 %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
22 %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
23 %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
24 %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
25 %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
26 %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
27 %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
28 %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
29 %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
30 %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
31 %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
32 %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
33 %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
34 %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
35 %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
36 %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
37 %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
38 <32 x i8> undef, <16 x i8> undef, i32 1)
39 %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
40 <32 x i8> undef, <16 x i8> undef, i32 2)
41 %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
42 <32 x i8> undef, <16 x i8> undef, i32 3)
43 %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
44 <32 x i8> undef, <16 x i8> undef, i32 4)
45 %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
46 <32 x i8> undef, <16 x i8> undef, i32 5)
47 %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
48 <32 x i8> undef, <16 x i8> undef, i32 6)
49 %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
50 <32 x i8> undef, <16 x i8> undef, i32 7)
51 %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
52 <32 x i8> undef, <16 x i8> undef, i32 8)
53 %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
54 <32 x i8> undef, <16 x i8> undef, i32 9)
55 %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
56 <32 x i8> undef, <16 x i8> undef, i32 10)
57 %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
58 <32 x i8> undef, <16 x i8> undef, i32 11)
59 %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
60 <32 x i8> undef, <16 x i8> undef, i32 12)
61 %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
62 <32 x i8> undef, <16 x i8> undef, i32 13)
63 %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
64 <32 x i8> undef, <16 x i8> undef, i32 14)
65 %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
66 <32 x i8> undef, <16 x i8> undef, i32 15)
67 %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
68 <32 x i8> undef, <16 x i8> undef, i32 16)
69 %e1 = extractelement <4 x float> %res1, i32 0
70 %e2 = extractelement <4 x float> %res2, i32 1
71 %e3 = extractelement <4 x float> %res3, i32 2
72 %e4 = extractelement <4 x float> %res4, i32 3
73 %t0 = extractelement <4 x float> %res5, i32 0
74 %t1 = extractelement <4 x float> %res5, i32 1
75 %e5 = fadd float %t0, %t1
76 %t2 = extractelement <4 x float> %res6, i32 0
77 %t3 = extractelement <4 x float> %res6, i32 2
78 %e6 = fadd float %t2, %t3
79 %t4 = extractelement <4 x float> %res7, i32 0
80 %t5 = extractelement <4 x float> %res7, i32 3
81 %e7 = fadd float %t4, %t5
82 %t6 = extractelement <4 x float> %res8, i32 1
83 %t7 = extractelement <4 x float> %res8, i32 2
84 %e8 = fadd float %t6, %t7
85 %t8 = extractelement <4 x float> %res9, i32 1
86 %t9 = extractelement <4 x float> %res9, i32 3
87 %e9 = fadd float %t8, %t9
88 %t10 = extractelement <4 x float> %res10, i32 2
89 %t11 = extractelement <4 x float> %res10, i32 3
90 %e10 = fadd float %t10, %t11
91 %t12 = extractelement <4 x float> %res11, i32 0
92 %t13 = extractelement <4 x float> %res11, i32 1
93 %t14 = extractelement <4 x float> %res11, i32 2
94 %t15 = fadd float %t12, %t13
95 %e11 = fadd float %t14, %t15
96 %t16 = extractelement <4 x float> %res12, i32 0
97 %t17 = extractelement <4 x float> %res12, i32 1
98 %t18 = extractelement <4 x float> %res12, i32 3
99 %t19 = fadd float %t16, %t17
100 %e12 = fadd float %t18, %t19
101 %t20 = extractelement <4 x float> %res13, i32 0
102 %t21 = extractelement <4 x float> %res13, i32 2
103 %t22 = extractelement <4 x float> %res13, i32 3
104 %t23 = fadd float %t20, %t21
105 %e13 = fadd float %t22, %t23
106 %t24 = extractelement <4 x float> %res14, i32 1
107 %t25 = extractelement <4 x float> %res14, i32 2
108 %t26 = extractelement <4 x float> %res14, i32 3
109 %t27 = fadd float %t24, %t25
110 %e14 = fadd float %t26, %t27
111 %t28 = extractelement <4 x float> %res15, i32 0
112 %t29 = extractelement <4 x float> %res15, i32 1
113 %t30 = extractelement <4 x float> %res15, i32 2
114 %t31 = extractelement <4 x float> %res15, i32 3
115 %t32 = fadd float %t28, %t29
116 %t33 = fadd float %t30, %t31
117 %e15 = fadd float %t32, %t33
118 %e16 = extractelement <4 x float> %res16, i32 3
119 %s1 = fadd float %e1, %e2
120 %s2 = fadd float %s1, %e3
121 %s3 = fadd float %s2, %e4
122 %s4 = fadd float %s3, %e5
123 %s5 = fadd float %s4, %e6
124 %s6 = fadd float %s5, %e7
125 %s7 = fadd float %s6, %e8
126 %s8 = fadd float %s7, %e9
127 %s9 = fadd float %s8, %e10
128 %s10 = fadd float %s9, %e11
129 %s11 = fadd float %s10, %e12
130 %s12 = fadd float %s11, %e13
131 %s13 = fadd float %s12, %e14
132 %s14 = fadd float %s13, %e15
133 %s15 = fadd float %s14, %e16
134 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
135 ret void
136 }
137
138 ; CHECK: {{^}}v1:
139 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
140 define void @v1(i32 %a1) #0 {
141 entry:
142 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
143 %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
144 %2 = extractelement <4 x float> %1, i32 0
145 %3 = extractelement <4 x float> %1, i32 1
146 %4 = extractelement <4 x float> %1, i32 2
147 %5 = extractelement <4 x float> %1, i32 3
148 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
149 ret void
150 }
151
152
153 declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
154
155 declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
156
157 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
158
159 attributes #0 = { "ShaderType"="0" }
+0
-143
test/CodeGen/AMDGPU/llvm.SI.sampled.ll less more
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
2
3 ;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15
4 ;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 3
5 ;CHECK-DAG: image_sample_d {{v[0-9]+}}, 2
6 ;CHECK-DAG: image_sample_d {{v[0-9]+}}, 1
7 ;CHECK-DAG: image_sample_d {{v[0-9]+}}, 4
8 ;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
9 ;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 5
10 ;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 9
11 ;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 6
12 ;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 10
13 ;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 12
14 ;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 7
15 ;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 11
16 ;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 13
17 ;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 14
18 ;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
19
20 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
21 %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
22 %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
23 %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
24 %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
25 %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
26 %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
27 %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
28 %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
29 %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
30 %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
31 %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
32 %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
33 %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
34 %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
35 %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
36 %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
37 %res1 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v1,
38 <32 x i8> undef, <16 x i8> undef, i32 1)
39 %res2 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v2,
40 <32 x i8> undef, <16 x i8> undef, i32 2)
41 %res3 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v3,
42 <32 x i8> undef, <16 x i8> undef, i32 3)
43 %res4 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v4,
44 <32 x i8> undef, <16 x i8> undef, i32 4)
45 %res5 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v5,
46 <32 x i8> undef, <16 x i8> undef, i32 5)
47 %res6 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v6,
48 <32 x i8> undef, <16 x i8> undef, i32 6)
49 %res7 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v7,
50 <32 x i8> undef, <16 x i8> undef, i32 7)
51 %res8 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v8,
52 <32 x i8> undef, <16 x i8> undef, i32 8)
53 %res9 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v9,
54 <32 x i8> undef, <16 x i8> undef, i32 9)
55 %res10 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v10,
56 <32 x i8> undef, <16 x i8> undef, i32 10)
57 %res11 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v11,
58 <32 x i8> undef, <16 x i8> undef, i32 11)
59 %res12 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v12,
60 <32 x i8> undef, <16 x i8> undef, i32 12)
61 %res13 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v13,
62 <32 x i8> undef, <16 x i8> undef, i32 13)
63 %res14 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v14,
64 <32 x i8> undef, <16 x i8> undef, i32 14)
65 %res15 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v15,
66 <32 x i8> undef, <16 x i8> undef, i32 15)
67 %res16 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v16,
68 <32 x i8> undef, <16 x i8> undef, i32 16)
69 %e1 = extractelement <4 x float> %res1, i32 0
70 %e2 = extractelement <4 x float> %res2, i32 1
71 %e3 = extractelement <4 x float> %res3, i32 2
72 %e4 = extractelement <4 x float> %res4, i32 3
73 %t0 = extractelement <4 x float> %res5, i32 0
74 %t1 = extractelement <4 x float> %res5, i32 1
75 %e5 = fadd float %t0, %t1
76 %t2 = extractelement <4 x float> %res6, i32 0
77 %t3 = extractelement <4 x float> %res6, i32 2
78 %e6 = fadd float %t2, %t3
79 %t4 = extractelement <4 x float> %res7, i32 0
80 %t5 = extractelement <4 x float> %res7, i32 3
81 %e7 = fadd float %t4, %t5
82 %t6 = extractelement <4 x float> %res8, i32 1
83 %t7 = extractelement <4 x float> %res8, i32 2
84 %e8 = fadd float %t6, %t7
85 %t8 = extractelement <4 x float> %res9, i32 1
86 %t9 = extractelement <4 x float> %res9, i32 3
87 %e9 = fadd float %t8, %t9
88 %t10 = extractelement <4 x float> %res10, i32 2
89 %t11 = extractelement <4 x float> %res10, i32 3
90 %e10 = fadd float %t10, %t11
91 %t12 = extractelement <4 x float> %res11, i32 0
92 %t13 = extractelement <4 x float> %res11, i32 1
93 %t14 = extractelement <4 x float> %res11, i32 2
94 %t15 = fadd float %t12, %t13
95 %e11 = fadd float %t14, %t15
96 %t16 = extractelement <4 x float> %res12, i32 0
97 %t17 = extractelement <4 x float> %res12, i32 1
98 %t18 = extractelement <4 x float> %res12, i32 3
99 %t19 = fadd float %t16, %t17
100 %e12 = fadd float %t18, %t19
101 %t20 = extractelement <4 x float> %res13, i32 0
102 %t21 = extractelement <4 x float> %res13, i32 2
103 %t22 = extractelement <4 x float> %res13, i32 3
104 %t23 = fadd float %t20, %t21
105 %e13 = fadd float %t22, %t23
106 %t24 = extractelement <4 x float> %res14, i32 1
107 %t25 = extractelement <4 x float> %res14, i32 2
108 %t26 = extractelement <4 x float> %res14, i32 3
109 %t27 = fadd float %t24, %t25
110 %e14 = fadd float %t26, %t27
111 %t28 = extractelement <4 x float> %res15, i32 0
112 %t29 = extractelement <4 x float> %res15, i32 1
113 %t30 = extractelement <4 x float> %res15, i32 2
114 %t31 = extractelement <4 x float> %res15, i32 3
115 %t32 = fadd float %t28, %t29
116 %t33 = fadd float %t30, %t31
117 %e15 = fadd float %t32, %t33
118 %e16 = extractelement <4 x float> %res16, i32 3
119 %s1 = fadd float %e1, %e2
120 %s2 = fadd float %s1, %e3
121 %s3 = fadd float %s2, %e4
122 %s4 = fadd float %s3, %e5
123 %s5 = fadd float %s4, %e6
124 %s6 = fadd float %s5, %e7
125 %s7 = fadd float %s6, %e8
126 %s8 = fadd float %s7, %e9
127 %s9 = fadd float %s8, %e10
128 %s10 = fadd float %s9, %e11
129 %s11 = fadd float %s10, %e12
130 %s12 = fadd float %s11, %e13
131 %s13 = fadd float %s12, %e14
132 %s14 = fadd float %s13, %e15
133 %s15 = fadd float %s14, %e16
134 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
135 ret void
136 }
137
138 declare <4 x float> @llvm.SI.sampled.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
139
140 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
141
142 attributes #0 = { "ShaderType"="0" }
22
33 ; This test checks that no VGPR to SGPR copies are created by the register
44 ; allocator.
5
6
7 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
8
59
610 ; CHECK-LABEL: {{^}}phi1:
711 ; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
812 ; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
9 define void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
13 define void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
1014 main_body:
1115 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
1216 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
3034
3135 ; Make sure this program doesn't crash
3236 ; CHECK-LABEL: {{^}}phi2:
33 define void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
37 define void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
3438 main_body:
3539 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
3640 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
4953 %tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 84)
5054 %tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 88)
5155 %tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 92)
52 %tmp36 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %arg2, i32 0
53 %tmp37 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp36, !tbaa !0
56 %tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
57 %tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
5458 %tmp38 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
5559 %tmp39 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp38, !tbaa !0
5660 %tmp40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg3, <2 x i32> %arg5)
6266 %tmp46 = bitcast float %tmp41 to i32
6367 %tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
6468 %tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
65 %tmp49 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp48, <32 x i8> %tmp37, <16 x i8> %tmp39, i32 2)
69 %tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32>
70 %tmp49 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp48, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
6671 %tmp50 = extractelement <4 x float> %tmp49, i32 2
6772 %tmp51 = call float @fabs(float %tmp50)
6873 %tmp52 = fmul float %tmp42, %tmp42
150155
151156 ; We just want ot make sure the program doesn't crash
152157 ; CHECK-LABEL: {{^}}loop:
153 define void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
158 define void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
154159 main_body:
155160 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
156161 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
199204 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
200205
201206 ; Function Attrs: nounwind readnone
202 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
207 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <8 x i32>, <16 x i8>, i32) #1
203208
204209 ; Function Attrs: readnone
205210 declare float @llvm.amdgcn.rsq.f32(float) #3
221226 ; CHECK: image_sample
222227 ; CHECK: exp
223228 ; CHECK: s_endpgm
224 define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
229 define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
225230 entry:
226231 %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
227232 %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
228233 %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 16)
229 %tmp23 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 0
230 %tmp24 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp23, !tbaa !0
234 %tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
235 %tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
231236 %tmp25 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
232237 %tmp26 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp25, !tbaa !0
233238 %tmp27 = fcmp oeq float %tmp22, 0.000000e+00
239 %tmp26.bc = bitcast <16 x i8> %tmp26 to <4 x i32>
234240 br i1 %tmp27, label %if, label %else
235241
236242 if: ; preds = %entry
237 %val.if = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> zeroinitializer, <32 x i8> %tmp24, <16 x i8> %tmp26, i32 2)
243 %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> zeroinitializer, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
238244 %val.if.0 = extractelement <4 x float> %val.if, i32 0
239245 %val.if.1 = extractelement <4 x float> %val.if, i32 1
240246 %val.if.2 = extractelement <4 x float> %val.if, i32 2
241247 br label %endif
242248
243249 else: ; preds = %entry
244 %val.else = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> , <32 x i8> %tmp24, <16 x i8> %tmp26, i32 2)
250 %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> , <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
245251 %val.else.0 = extractelement <4 x float> %val.else, i32 0
246252 %val.else.1 = extractelement <4 x float> %val.else, i32 1
247253 %val.else.2 = extractelement <4 x float> %val.else, i32 2
284290 ; This test is just checking that we don't crash / assertion fail.
285291 ; CHECK-LABEL: {{^}}copy2:
286292 ; CHECK: s_endpgm
287 define void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
293 define void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
288294 entry:
289295 br label %LOOP68
290296
334340 %tmp53 = bitcast float %tmp30 to i32
335341 %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
336342 %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
337 %tmp56 = bitcast <8 x i32> %tmp26 to <32 x i8>
338 %tmp57 = bitcast <4 x i32> %tmp28 to <16 x i8>
339 %tmp58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp55, <32 x i8> %tmp56, <16 x i8> %tmp57, i32 2)
343 %tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32>
344 %tmp58 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp55, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
340345 br label %bb71
341346
342347 bb80: ; preds = %bb
345350 %tmp82.2 = add i32 %tmp82, 1
346351 %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
347352 %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
348 %tmp85 = bitcast <8 x i32> %tmp26 to <32 x i8>
349 %tmp86 = bitcast <4 x i32> %tmp28 to <16 x i8>
350 %tmp87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp84, <32 x i8> %tmp85, <16 x i8> %tmp86, i32 2)
353 %tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32>
354 %tmp87 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
351355 br label %bb71
352356
353357 bb71: ; preds = %bb80, %bb38
55
66 ; CHECK: {{^}}main:
77 ; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
8 define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
8 define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
99 main_body:
1010 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
1111 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
1212 %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
13 %tmp22 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %arg2, i32 0
14 %tmp23 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp22, !tbaa !0
13 %tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
14 %tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
1515 %tmp24 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
1616 %tmp25 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp24, !tbaa !0
1717 %tmp26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg3, <2 x i32> %arg5)
2323 %tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1
2424 %tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2
2525 %tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3
26 %tmp35 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %tmp34, <32 x i8> %tmp23, <16 x i8> %tmp25, i32 2)
26 %tmp25.bc = bitcast <16 x i8> %tmp25 to <4 x i32>
27 %tmp35 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %tmp34, <8 x i32> %tmp23, <4 x i32> %tmp25.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2728 %tmp36 = extractelement <4 x float> %tmp35, i32 0
2829 %tmp37 = extractelement <4 x float> %tmp35, i32 1
2930 %tmp38 = extractelement <4 x float> %tmp35, i32 2
3839 ; Function Attrs: nounwind readnone
3940 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
4041
41 ; Function Attrs: nounwind readnone
42 declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
42 declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
43
4344
4445 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
4546
2222 %tmp28 = bitcast float %tmp26 to i32
2323 %tmp29 = insertelement <2 x i32> undef, i32 %tmp27, i32 0
2424 %tmp30 = insertelement <2 x i32> %tmp29, i32 %tmp28, i32 1
25 %tmp31 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp30, <32 x i8> %tmp22, <16 x i8> %tmp24, i32 2)
25 %tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32>
26 %tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32>
27 %tmp31 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp30, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2628 %tmp32 = extractelement <4 x float> %tmp31, i32 0
2729 %tmp33 = extractelement <4 x float> %tmp31, i32 1
2830 %tmp34 = extractelement <4 x float> %tmp31, i32 2
3840 ; Function Attrs: nounwind readnone
3941 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
4042
41 ; Function Attrs: nounwind readnone
42 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
43 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
44
4345
4446 ; Function Attrs: nounwind readnone
4547 declare i32 @llvm.SI.packf16(float, float) #1
2121 ; Writing to M0 from an SMRD instruction will hang the GPU.
2222 ; CHECK-NOT: s_buffer_load_dword m0
2323 ; CHECK: s_endpgm
24 define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
24 define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
2525 main_body:
2626 %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
2727 %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
6363 %tmp57 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 372)
6464 %tmp58 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 376)
6565 %tmp59 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 384)
66 %tmp60 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 0
67 %tmp61 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp60, !tbaa !0
66 %tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
67 %tmp61 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp60, !tbaa !0
6868 %tmp62 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
6969 %tmp63 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp62, !tbaa !0
70 %tmp64 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 1
71 %tmp65 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp64, !tbaa !0
70 %tmp63.bc = bitcast <16 x i8> %tmp63 to <4 x i32>
71 %tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
72 %tmp65 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp64, !tbaa !0
7273 %tmp66 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
7374 %tmp67 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp66, !tbaa !0
74 %tmp68 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 2
75 %tmp69 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp68, !tbaa !0
75 %tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
76 %tmp69 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp68, !tbaa !0
7677 %tmp70 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
7778 %tmp71 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp70, !tbaa !0
78 %tmp72 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 3
79 %tmp73 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp72, !tbaa !0
79 %tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
80 %tmp73 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp72, !tbaa !0
8081 %tmp74 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
8182 %tmp75 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp74, !tbaa !0
82 %tmp76 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 4
83 %tmp77 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp76, !tbaa !0
83 %tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
84 %tmp77 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp76, !tbaa !0
8485 %tmp78 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
8586 %tmp79 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp78, !tbaa !0
86 %tmp80 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 5
87 %tmp81 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp80, !tbaa !0
87 %tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
88 %tmp81 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp80, !tbaa !0
8889 %tmp82 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
8990 %tmp83 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp82, !tbaa !0
90 %tmp84 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 6
91 %tmp85 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp84, !tbaa !0
91 %tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
92 %tmp85 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp84, !tbaa !0
9293 %tmp86 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
9394 %tmp87 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp86, !tbaa !0
94 %tmp88 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 7
95 %tmp89 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp88, !tbaa !0
95 %tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
96 %tmp89 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp88, !tbaa !0
9697 %tmp90 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
9798 %tmp91 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp90, !tbaa !0
9899 %tmp92 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg4, <2 x i32> %arg6)
271272 %tmp240 = insertelement <8 x i32> %tmp239, i32 %tmp238, i32 5
272273 %tmp241 = insertelement <8 x i32> %tmp240, i32 undef, i32 6
273274 %tmp242 = insertelement <8 x i32> %tmp241, i32 undef, i32 7
274 %tmp243 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp242, <32 x i8> %tmp61, <16 x i8> %tmp63, i32 2)
275 %tmp243 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp242, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
275276 %tmp244 = extractelement <4 x float> %tmp243, i32 3
276277 %tmp245 = fcmp oge float %temp30.0, %tmp244
277278 %tmp246 = sext i1 %tmp245 to i32
316317 %tmp274 = insertelement <8 x i32> %tmp273, i32 %tmp268, i32 5
317318 %tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6
318319 %tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7
319 %tmp277 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp276, <32 x i8> %tmp65, <16 x i8> %tmp67, i32 2)
320 %tmp67.bc = bitcast <16 x i8> %tmp67 to <4 x i32>
321 %tmp277 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp276, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
320322 %tmp278 = extractelement <4 x float> %tmp277, i32 0
321323 %tmp279 = extractelement <4 x float> %tmp277, i32 1
322324 %tmp280 = extractelement <4 x float> %tmp277, i32 2
336338 %tmp294 = insertelement <8 x i32> %tmp293, i32 %tmp288, i32 5
337339 %tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6
338340 %tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7
339 %tmp297 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp296, <32 x i8> %tmp81, <16 x i8> %tmp83, i32 2)
341 %tmp83.bc = bitcast <16 x i8> %tmp83 to <4 x i32>
342 %tmp297 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp296, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
340343 %tmp298 = extractelement <4 x float> %tmp297, i32 0
341344 %tmp299 = extractelement <4 x float> %tmp297, i32 1
342345 %tmp300 = extractelement <4 x float> %tmp297, i32 2
354357 %tmp312 = insertelement <8 x i32> %tmp311, i32 %tmp306, i32 5
355358 %tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6
356359 %tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7
357 %tmp315 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp314, <32 x i8> %tmp77, <16 x i8> %tmp79, i32 2)
360 %tmp79.bc = bitcast <16 x i8> %tmp79 to <4 x i32>
361 %tmp315 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp314, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
358362 %tmp316 = extractelement <4 x float> %tmp315, i32 0
359363 %tmp317 = extractelement <4 x float> %tmp315, i32 1
360364 %tmp318 = extractelement <4 x float> %tmp315, i32 2
384388 %tmp342 = insertelement <8 x i32> %tmp341, i32 %tmp336, i32 5
385389 %tmp343 = insertelement <8 x i32> %tmp342, i32 undef, i32 6
386390 %tmp344 = insertelement <8 x i32> %tmp343, i32 undef, i32 7
387 %tmp345 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp344, <32 x i8> %tmp61, <16 x i8> %tmp63, i32 2)
391 %tmp345 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp344, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
388392 %tmp346 = extractelement <4 x float> %tmp345, i32 0
389393 %tmp347 = extractelement <4 x float> %tmp345, i32 1
390394 %tmp348 = extractelement <4 x float> %tmp345, i32 2
414418 %tmp372 = insertelement <8 x i32> %tmp371, i32 %tmp366, i32 5
415419 %tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6
416420 %tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7
417 %tmp375 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp374, <32 x i8> %tmp69, <16 x i8> %tmp71, i32 2)
421 %tmp71.bc = bitcast <16 x i8> %tmp71 to <4 x i32>
422 %tmp375 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp374, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
418423 %tmp376 = extractelement <4 x float> %tmp375, i32 0
419424 %tmp377 = extractelement <4 x float> %tmp375, i32 1
420425 %tmp378 = extractelement <4 x float> %tmp375, i32 2
468473 %tmp426 = insertelement <8 x i32> %tmp425, i32 %tmp420, i32 5
469474 %tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6
470475 %tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7
471 %tmp429 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp428, <32 x i8> %tmp85, <16 x i8> %tmp87, i32 2)
476 %tmp87.bc = bitcast <16 x i8> %tmp87 to <4 x i32>
477 %tmp429 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp428, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
472478 %tmp430 = extractelement <4 x float> %tmp429, i32 0
473479 %tmp431 = extractelement <4 x float> %tmp429, i32 1
474480 %tmp432 = extractelement <4 x float> %tmp429, i32 2
509515 %tmp467 = insertelement <4 x i32> %tmp466, i32 %tmp464, i32 1
510516 %tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2
511517 %tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3
512 %tmp470 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %tmp469, <32 x i8> %tmp89, <16 x i8> %tmp91, i32 4)
518 %tmp91.bc = bitcast <16 x i8> %tmp91 to <4 x i32>
519 %tmp470 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tmp469, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
513520 %tmp471 = extractelement <4 x float> %tmp470, i32 0
514521 %tmp472 = extractelement <4 x float> %tmp470, i32 1
515522 %tmp473 = extractelement <4 x float> %tmp470, i32 2
610617 %tmp568 = insertelement <8 x i32> %tmp567, i32 %tmp562, i32 5
611618 %tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6
612619 %tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7
613 %tmp571 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp570, <32 x i8> %tmp73, <16 x i8> %tmp75, i32 2)
620 %tmp75.bc = bitcast <16 x i8> %tmp75 to <4 x i32>
621 %tmp571 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp570, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
614622 %tmp572 = extractelement <4 x float> %tmp571, i32 0
615623 %tmp573 = extractelement <4 x float> %tmp571, i32 1
616624 %tmp574 = extractelement <4 x float> %tmp571, i32 2
634642 %tmp588 = insertelement <8 x i32> %tmp587, i32 %tmp586, i32 5
635643 %tmp589 = insertelement <8 x i32> %tmp588, i32 undef, i32 6
636644 %tmp590 = insertelement <8 x i32> %tmp589, i32 undef, i32 7
637 %tmp591 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp590, <32 x i8> %tmp61, <16 x i8> %tmp63, i32 2)
645 %tmp591 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp590, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
638646 %tmp592 = extractelement <4 x float> %tmp591, i32 3
639647 %tmp593 = fcmp oge float %temp30.1, %tmp592
640648 %tmp594 = sext i1 %tmp593 to i32
659667
660668 ; CHECK-LABEL: {{^}}main1:
661669 ; CHECK: s_endpgm
662 define void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
670 define void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
663671 main_body:
664672 %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
665673 %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
766774 %tmp122 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 716)
767775 %tmp123 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 864)
768776 %tmp124 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 868)
769 %tmp125 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 0
770 %tmp126 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp125, !tbaa !0
777 %tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
778 %tmp126 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp125, !tbaa !0
771779 %tmp127 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
772780 %tmp128 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp127, !tbaa !0
773 %tmp129 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 1
774 %tmp130 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp129, !tbaa !0
781 %tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
782 %tmp130 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp129, !tbaa !0
775783 %tmp131 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
776784 %tmp132 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp131, !tbaa !0
777 %tmp133 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 2
778 %tmp134 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp133, !tbaa !0
785 %tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
786 %tmp134 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp133, !tbaa !0
779787 %tmp135 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
780788 %tmp136 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp135, !tbaa !0
781 %tmp137 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 3
782 %tmp138 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp137, !tbaa !0
789 %tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
790 %tmp138 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp137, !tbaa !0
783791 %tmp139 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
784792 %tmp140 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp139, !tbaa !0
785 %tmp141 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 4
786 %tmp142 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp141, !tbaa !0
793 %tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
794 %tmp142 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp141, !tbaa !0
787795 %tmp143 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
788796 %tmp144 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp143, !tbaa !0
789 %tmp145 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 5
790 %tmp146 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp145, !tbaa !0
797 %tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
798 %tmp146 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp145, !tbaa !0
791799 %tmp147 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
792800 %tmp148 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp147, !tbaa !0
793 %tmp149 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 6
794 %tmp150 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp149, !tbaa !0
801 %tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
802 %tmp150 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp149, !tbaa !0
795803 %tmp151 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
796804 %tmp152 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp151, !tbaa !0
797 %tmp153 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 7
798 %tmp154 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp153, !tbaa !0
805 %tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
806 %tmp154 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp153, !tbaa !0
799807 %tmp155 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
800808 %tmp156 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp155, !tbaa !0
801 %tmp157 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 8
802 %tmp158 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp157, !tbaa !0
809 %tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 8
810 %tmp158 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp157, !tbaa !0
803811 %tmp159 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 8
804812 %tmp160 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp159, !tbaa !0
805813 %tmp161 = fcmp ugt float %arg17, 0.000000e+00
867875 %tmp222 = bitcast float %tmp174 to i32
868876 %tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0
869877 %tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1
870 %tmp225 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp224, <32 x i8> %tmp130, <16 x i8> %tmp132, i32 2)
878 %tmp132.bc = bitcast <16 x i8> %tmp132 to <4 x i32>
879 %tmp225 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp224, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
871880 %tmp226 = extractelement <4 x float> %tmp225, i32 0
872881 %tmp227 = extractelement <4 x float> %tmp225, i32 1
873882 %tmp228 = extractelement <4 x float> %tmp225, i32 2
937946 %tmp279 = insertelement <4 x i32> %tmp278, i32 %tmp277, i32 1
938947 %tmp280 = insertelement <4 x i32> %tmp279, i32 0, i32 2
939948 %tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3
940 %tmp282 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %tmp281, <32 x i8> %tmp146, <16 x i8> %tmp148, i32 2)
949 %tmp148.bc = bitcast <16 x i8> %tmp148 to <4 x i32>
950 %tmp282 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp281, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
941951 %tmp283 = extractelement <4 x float> %tmp282, i32 3
942952 %tmp284 = fadd float %temp168.0, %tmp273
943953 %tmp285 = fadd float %temp169.0, %tmp274
10001010 %tmp339 = bitcast float %tmp335 to i32
10011011 %tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0
10021012 %tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1
1003 %tmp342 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp341, <32 x i8> %tmp134, <16 x i8> %tmp136, i32 2)
1013 %tmp136.bc = bitcast <16 x i8> %tmp136 to <4 x i32>
1014 %tmp342 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp341, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
10041015 %tmp343 = extractelement <4 x float> %tmp342, i32 0
10051016 %tmp344 = extractelement <4 x float> %tmp342, i32 1
10061017 %tmp345 = extractelement <4 x float> %tmp342, i32 2
10321043 %tmp359 = bitcast float %tmp337 to i32
10331044 %tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0
10341045 %tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1
1035 %tmp362 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp361, <32 x i8> %tmp150, <16 x i8> %tmp152, i32 2)
1046 %tmp152.bc = bitcast <16 x i8> %tmp152 to <4 x i32>
1047 %tmp362 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp361, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
10361048 %tmp363 = extractelement <4 x float> %tmp362, i32 2
10371049 %tmp364 = fmul float %result.i40, %result.i
10381050 %tmp365 = fmul float %result.i36, %result.i44
10421054 %tmp369 = bitcast float %tmp311 to i32
10431055 %tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0
10441056 %tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1
1045 %tmp372 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp371, <32 x i8> %tmp138, <16 x i8> %tmp140, i32 2)
1057 %tmp140.bc = bitcast <16 x i8> %tmp140 to <4 x i32>
1058 %tmp372 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp371, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
10461059 %tmp373 = extractelement <4 x float> %tmp372, i32 0
10471060 %tmp374 = extractelement <4 x float> %tmp372, i32 1
10481061 %tmp375 = extractelement <4 x float> %tmp372, i32 2
10581071 %tmp383 = bitcast float %tmp321 to i32
10591072 %tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0
10601073 %tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1
1061 %tmp386 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp385, <32 x i8> %tmp142, <16 x i8> %tmp144, i32 2)
1074 %tmp144.bc = bitcast <16 x i8> %tmp144 to <4 x i32>
1075 %tmp386 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp385, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
10621076 %tmp387 = extractelement <4 x float> %tmp386, i32 0
10631077 %tmp388 = extractelement <4 x float> %tmp386, i32 1
10641078 %tmp389 = extractelement <4 x float> %tmp386, i32 2
11541168 %tmp467 = bitcast float %tmp220 to i32
11551169 %tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0
11561170 %tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1
1157 %tmp470 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp469, <32 x i8> %tmp158, <16 x i8> %tmp160, i32 2)
1171 %tmp160.bc = bitcast <16 x i8> %tmp160 to <4 x i32>
1172 %tmp470 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp469, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
11581173 %tmp471 = extractelement <4 x float> %tmp470, i32 0
11591174 %tmp472 = extractelement <4 x float> %tmp470, i32 1
11601175 %tmp473 = extractelement <4 x float> %tmp470, i32 2
11711186 %tmp484 = bitcast float %tmp172 to i32
11721187 %tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0
11731188 %tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1
1174 %tmp487 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp486, <32 x i8> %tmp154, <16 x i8> %tmp156, i32 2)
1189 %tmp156.bc = bitcast <16 x i8> %tmp156 to <4 x i32>
1190 %tmp487 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp486, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
11751191 %tmp488 = extractelement <4 x float> %tmp487, i32 0
11761192 %tmp489 = extractelement <4 x float> %tmp487, i32 1
11771193 %tmp490 = extractelement <4 x float> %tmp487, i32 2
13761392 %tmp657 = insertelement <4 x i32> %tmp656, i32 %tmp654, i32 1
13771393 %tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2
13781394 %tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3
1379 %tmp660 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %tmp659, <32 x i8> %tmp126, <16 x i8> %tmp128, i32 2)
1395 %tmp128.bc = bitcast <16 x i8> %tmp128 to <4 x i32>
1396 %tmp660 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp659, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
13801397 %tmp661 = extractelement <4 x float> %tmp660, i32 0
13811398 %tmp662 = extractelement <4 x float> %tmp660, i32 1
13821399 %tmp663 = bitcast float %tmp646 to i32
13861403 %tmp667 = insertelement <4 x i32> %tmp666, i32 %tmp664, i32 1
13871404 %tmp668 = insertelement <4 x i32> %tmp667, i32 %tmp665, i32 2
13881405 %tmp669 = insertelement <4 x i32> %tmp668, i32 undef, i32 3
1389 %tmp670 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %tmp669, <32 x i8> %tmp126, <16 x i8> %tmp128, i32 2)
1406 %tmp670 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp669, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
13901407 %tmp671 = extractelement <4 x float> %tmp670, i32 0
13911408 %tmp672 = extractelement <4 x float> %tmp670, i32 1
13921409 %tmp673 = fsub float -0.000000e+00, %tmp662
15481565 declare float @llvm.AMDGPU.clamp.f32(float, float, float) #1
15491566
15501567 ; Function Attrs: nounwind readnone
1551 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #2
1568 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
15521569
15531570 ; Function Attrs: nounwind readnone
1554 declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #2
1571 declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
1572
15551573
15561574 declare float @llvm.exp2.f32(float) #2
15571575
15711589 declare float @llvm.amdgcn.rsq.f32(float) #2
15721590
15731591 ; Function Attrs: nounwind readnone
1574 declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #2
1592 declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
15751593
15761594 ; Function Attrs: readnone
15771595 declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #1
15801598 declare float @fabs(float) #1
15811599
15821600 ; Function Attrs: nounwind readnone
1583 declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #2
1601 declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
1602
15841603
15851604 ; Function Attrs: nounwind readnone
15861605 declare float @llvm.pow.f32(float, float) #2
6666 br label %bb4
6767
6868 bb9: ; preds = %bb2
69 %tmp10 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 2)
69 %tmp10 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
7070 %tmp11 = extractelement <4 x float> %tmp10, i32 1
7171 %tmp12 = extractelement <4 x float> %tmp10, i32 3
7272 br label %bb14
9797 }
9898
9999 ; Function Attrs: nounwind readnone
100 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
100 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
101101
102102 ; Function Attrs: nounwind readnone
103103 declare i32 @llvm.SI.packf16(float, float) #1