llvm.org GIT mirror llvm / 7d93793
AMDGPU: Remove some uses of llvm.SI.export in tests Merge some of the old, smaller tests into more complete versions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295792 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 2 years ago
33 changed file(s) with 994 addition(s) and 1107 deletion(s). Raw diff Collapse all Expand all
22
33 ; This test just checks that the compiler doesn't crash.
44
5 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
6
75 ; FUNC-LABEL: {{^}}v32i8_to_v8i32:
8 ; SI: s_endpgm
9 define amdgpu_ps void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
6 define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
107 entry:
118 %1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
129 %2 = bitcast <32 x i8> %1 to <8 x i32>
1310 %3 = extractelement <8 x i32> %2, i32 1
1411 %4 = icmp ne i32 %3, 0
1512 %5 = select i1 %4, float 0.0, float 1.0
16 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5)
17 ret void
13 ret float %5
1814 }
1915
2016 ; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
33 ; GCN-LABEL: {{^}}main:
44 ; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
55 ; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
6 define amdgpu_ps void @main(float %arg0, float %arg1) #0 {
6 define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
77 bb:
88 %tmp = fptosi float %arg0 to i32
99 %tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
1616 %tmp7 = select i1 %tmp6, float 0.000000e+00, float %arg1
1717 %tmp8 = call i32 @llvm.SI.packf16(float undef, float %tmp7)
1818 %tmp9 = bitcast i32 %tmp8 to float
19 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp9, float undef, float %tmp9)
20 ret void
19 ret float %tmp9
2120 }
2221
2322 declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
2423 declare i32 @llvm.SI.packf16(float, float) #1
25 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
2624
2725 attributes #0 = { nounwind }
2826 attributes #1 = { nounwind readnone }
9696
9797 ; GCN-LABEL: {{^}}kill_vcc_implicit_def:
9898 ; GCN: IeeeMode: 0
99 define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
99 define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
100100 entry:
101101 %tmp0 = fcmp olt float %13, 0.0
102102 call void @llvm.AMDGPU.kill(float %14)
103103 %tmp1 = select i1 %tmp0, float 1.0, float 0.0
104 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
105 ret void
104 ret float %tmp1
106105 }
107106
108
109107 declare void @llvm.AMDGPU.kill(float)
110 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
111108
112109 attributes #0 = { nounwind "target-cpu"="tahiti" }
113110 attributes #1 = { nounwind "target-cpu"="fiji" }
2323 ; TONGA-NEXT: .long 704
2424 ; CONFIG: .p2align 8
2525 ; CONFIG: test:
26 define amdgpu_ps void @test(i32 %p) {
26 define amdgpu_ps void @test(i32 %p) #0 {
2727 %i = add i32 %p, 2
2828 %r = bitcast i32 %i to float
29 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
29 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r, float %r, float %r, float %r, i1 true, i1 false)
3030 ret void
3131 }
3232
33 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
33 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
34
35 attributes #0 = { nounwind }
666666 store double 4096.0, double addrspace(1)* %out
667667 ret void
668668 }
669
670 ; GCN-LABEL: {{^}}literal_folding:
671 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
672 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
673 define amdgpu_vs void @literal_folding(float %arg) {
674 main_body:
675 %tmp = fmul float %arg, 0x3FE86A7F00000000
676 %tmp1 = fmul float %arg, 0xBFE86A7F00000000
677 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0
678 ret void
679 }
680
681 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
682
683 attributes #0 = { nounwind }
0 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
11 --- |
2 define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
2 define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x
3 i32> inreg, i32 inreg %w, float %v) #0 {
34 %a = load volatile float, float addrspace(1)* undef
45 %b = load volatile float, float addrspace(1)* undef
56 %c = load volatile float, float addrspace(1)* undef
67 %d = load volatile float, float addrspace(1)* undef
7 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %a, float %b, float %c, float %d)
8 call void @llvm.amdgcn.exp.f32(i32 15, i32 1, float %a, float %b, float %c, float %d, i1 true, i1 false)
89 ret <4 x float>
910 }
1011
11 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
12 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
1213
13 attributes #0 = { readnone }
14 attributes #1 = { nounwind }
14 attributes #0 = { nounwind }
1515
1616 ...
1717 ---
33 ; SI-LABEL: {{^}}kill_gs_const:
44 ; SI-NOT: v_cmpx_le_f32
55 ; SI: s_mov_b64 exec, 0
6
76 define amdgpu_gs void @kill_gs_const() {
87 main_body:
9 %0 = icmp ule i32 0, 3
10 %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
11 call void @llvm.AMDGPU.kill(float %1)
12 %2 = icmp ule i32 3, 0
13 %3 = select i1 %2, float 1.000000e+00, float -1.000000e+00
14 call void @llvm.AMDGPU.kill(float %3)
8 %tmp = icmp ule i32 0, 3
9 %tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
10 call void @llvm.AMDGPU.kill(float %tmp1)
11 %tmp2 = icmp ule i32 3, 0
12 %tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00
13 call void @llvm.AMDGPU.kill(float %tmp3)
1514 ret void
1615 }
1716
2019 ; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
2120 ; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
2221 ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
23 define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
22 define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
2423 entry:
25 %tmp0 = fcmp olt float %13, 0.0
26 call void @llvm.AMDGPU.kill(float %14)
27 %tmp1 = select i1 %tmp0, float 1.0, float 0.0
28 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
24 %tmp0 = fcmp olt float %arg13, 0.000000e+00
25 call void @llvm.AMDGPU.kill(float %arg14)
26 %tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00
27 call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
2928 ret void
3029 }
3130
32 declare void @llvm.AMDGPU.kill(float)
33 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
31 declare void @llvm.AMDGPU.kill(float) #0
32 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
3433
35 !0 = !{!"const", null, i32 1}
34 attributes #0 = { nounwind }
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=CHECK,VI %s
0 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
22
3 ;CHECK-LABEL: {{^}}image_load_v4i32:
4 ;CHECK: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
5 ;CHECK: s_waitcnt vmcnt(0)
6 define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
3 ; GCN-LABEL: {{^}}image_load_v4i32:
4 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
5 ; GCN: s_waitcnt vmcnt(0)
6 define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
77 main_body:
8 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
8 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
99 ret <4 x float> %tex
1010 }
1111
12 ;CHECK-LABEL: {{^}}image_load_v2i32:
13 ;CHECK: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
14 ;CHECK: s_waitcnt vmcnt(0)
15 define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
12 ; GCN-LABEL: {{^}}image_load_v2i32:
13 ; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
14 ; GCN: s_waitcnt vmcnt(0)
15 define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
1616 main_body:
17 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
17 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
1818 ret <4 x float> %tex
1919 }
2020
21 ;CHECK-LABEL: {{^}}image_load_i32:
22 ;CHECK: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
23 ;CHECK: s_waitcnt vmcnt(0)
24 define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) {
21 ; GCN-LABEL: {{^}}image_load_i32:
22 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
23 ; GCN: s_waitcnt vmcnt(0)
24 define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 {
2525 main_body:
26 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
26 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
2727 ret <4 x float> %tex
2828 }
2929
30 ;CHECK-LABEL: {{^}}image_load_mip:
31 ;CHECK: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
32 ;CHECK: s_waitcnt vmcnt(0)
33 define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) {
30 ; GCN-LABEL: {{^}}image_load_mip:
31 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
32 ; GCN: s_waitcnt vmcnt(0)
33 define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
3434 main_body:
35 %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
35 %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
3636 ret <4 x float> %tex
3737 }
3838
39 ;CHECK-LABEL: {{^}}image_load_1:
40 ;CHECK: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
41 ;CHECK: s_waitcnt vmcnt(0)
42 define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
39 ; GCN-LABEL: {{^}}image_load_1:
40 ; GCN: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
41 ; GCN: s_waitcnt vmcnt(0)
42 define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
4343 main_body:
44 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
44 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
4545 %elt = extractelement <4 x float> %tex, i32 0
46 ; Only first component used, test that dmask etc. is changed accordingly
4746 ret float %elt
4847 }
4948
50 ;CHECK-LABEL: {{^}}image_load_f32_v2i32:
51 ;CHECK: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
52 ;CHECK: s_waitcnt vmcnt(0)
53 define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
49 ; GCN-LABEL: {{^}}image_load_f32_v2i32:
50 ; GCN: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
51 ; GCN: s_waitcnt vmcnt(0)
52 define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
5453 main_body:
55 %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
54 %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
5655 ret float %tex
5756 }
5857
59 ;CHECK-LABEL: {{^}}image_load_v2f32_v4i32:
60 ;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
61 ;CHECK: s_waitcnt vmcnt(0)
62 define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
58 ; GCN-LABEL: {{^}}image_load_v2f32_v4i32:
59 ; GCN: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
60 ; GCN: s_waitcnt vmcnt(0)
61 define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
6362 main_body:
64 %tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
63 %tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
6564 ret <2 x float> %tex
6665 }
6766
68
69 ;CHECK-LABEL: {{^}}image_store_v4i32:
70 ;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
71 define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
67 ; GCN-LABEL: {{^}}image_store_v4i32:
68 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
69 define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
7270 main_body:
73 call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
71 call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
7472 ret void
7573 }
7674
77 ;CHECK-LABEL: {{^}}image_store_v2i32:
78 ;CHECK: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
79 define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) {
75 ; GCN-LABEL: {{^}}image_store_v2i32:
76 ; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
77 define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 {
8078 main_body:
81 call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
79 call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
8280 ret void
8381 }
8482
85 ;CHECK-LABEL: {{^}}image_store_i32:
86 ;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
87 define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) {
83 ; GCN-LABEL: {{^}}image_store_i32:
84 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
85 define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 {
8886 main_body:
89 call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
87 call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
9088 ret void
9189 }
9290
93 ;CHECK-LABEL: {{^}}image_store_f32_i32:
94 ;CHECK: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
95 define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) {
91 ; GCN-LABEL: {{^}}image_store_f32_i32:
92 ; GCN: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
93 define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) #0 {
9694 main_body:
97 call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
95 call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
9896 ret void
9997 }
10098
101 ;CHECK-LABEL: {{^}}image_store_v2f32_v4i32:
102 ;CHECK: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
103 define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) {
99 ; GCN-LABEL: {{^}}image_store_v2f32_v4i32:
100 ; GCN: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
101 define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) #0 {
104102 main_body:
105 call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
103 call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
106104 ret void
107105 }
108106
109 ;CHECK-LABEL: {{^}}image_store_mip:
110 ;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
111 define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
107 ; GCN-LABEL: {{^}}image_store_mip:
108 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
109 define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
112110 main_body:
113 call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
111 call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
114112 ret void
115113 }
116114
117 ;CHECK-LABEL: {{^}}getresinfo:
118 ;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
119 define amdgpu_ps void @getresinfo() {
115 ; GCN-LABEL: {{^}}getresinfo:
116 ; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
117 define amdgpu_ps void @getresinfo() #0 {
120118 main_body:
121 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
119 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
122120 %r0 = extractelement <4 x float> %r, i32 0
123121 %r1 = extractelement <4 x float> %r, i32 1
124122 %r2 = extractelement <4 x float> %r, i32 2
125123 %r3 = extractelement <4 x float> %r, i32 3
126 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
124 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0
127125 ret void
128126 }
129127
130128 ; Ideally, the register allocator would avoid the wait here
131129 ;
132 ;CHECK-LABEL: {{^}}image_store_wait:
133 ;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
134 ;CHECK: s_waitcnt vmcnt(0) expcnt(0)
135 ;CHECK: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
136 ;CHECK: s_waitcnt vmcnt(0)
137 ;CHECK: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
138 define amdgpu_ps void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) {
130 ; GCN-LABEL: {{^}}image_store_wait:
131 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
132 ; GCN: s_waitcnt vmcnt(0) expcnt(0)
133 ; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
134 ; GCN: s_waitcnt vmcnt(0)
135 ; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
136 define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
139137 main_body:
140 call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0)
141 %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0)
142 call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i1 0, i1 0, i1 0, i1 0)
138 call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %arg, i32 15, i1 false, i1 false, i1 false, i1 false)
139 %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %arg4, <8 x i32> %arg1, i32 15, i1 false, i1 false, i1 false, i1 false)
140 call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %arg4, <8 x i32> %arg2, i32 15, i1 false, i1 false, i1 false, i1 false)
143141 ret void
144142 }
145143
148146 ; VI-LABEL: image_load_mmo
149147 ; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
150148 ; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
151 define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) {
152 store float 0.0, float addrspace(3)* %lds
153 %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
149 define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) #0 {
150 bb:
151 store float 0.000000e+00, float addrspace(3)* %lds
152 %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
154153 %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
155 store float 0.0, float addrspace(3)* %tmp2
156 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tex, float %tex, float %tex, float %tex)
154 store float 0.000000e+00, float addrspace(3)* %tmp2
155 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tex, float %tex, float %tex, float %tex, i1 true, i1 true) #0
157156 ret void
158157 }
159158
160159 declare float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
161160 declare <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
162161 declare void @llvm.amdgcn.image.store.f32.i32.v8i32(float, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
163 declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
164162
165163
164 declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
166165 declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
167166 declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
168167 declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
172171 declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
173172 declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
174173 declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
174 declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
175175
176 declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #0
177
178 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
176 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
179177
180178 attributes #0 = { nounwind }
181179 attributes #1 = { nounwind readonly }
11 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
22 ; RUN: llc -march=amdgcn -mcpu=kabini -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s
33 ; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s
4
54
65 ; GCN-LABEL: {{^}}v_interp:
76 ; GCN-NOT: s_wqm
109 ; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
1110 ; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
1211 ; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
13 define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) {
12 define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 {
1413 main_body:
15 %i = extractelement <2 x float> %4, i32 0
16 %j = extractelement <2 x float> %4, i32 1
17 %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %3)
18 %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %3)
19 %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %3)
20 %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %3)
21 %const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %3)
14 %i = extractelement <2 x float> %arg4, i32 0
15 %j = extractelement <2 x float> %arg4, i32 1
16 %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %arg3)
17 %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %arg3)
18 %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %arg3)
19 %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %arg3)
20 %const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %arg3)
2221 %w = fadd float %p1_1, %const
23 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %w)
22 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_0, float %p1_1, float %w, i1 true, i1 true) #0
2423 ret void
2524 }
2625
3938 ; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.w{{$}}
4039 ; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.w{{$}}
4140 ; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
42 define amdgpu_ps void @v_interp_p1(float %i) {
41 define amdgpu_ps void @v_interp_p1(float %i) #0 {
42 bb:
4343 %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256)
4444 %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256)
4545 %p0_2 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 0, i32 256)
7979 ; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.x{{$}}
8080 ; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
8181 ; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
82 define amdgpu_ps void @v_interp_p2(float %x, float %j) {
82 define amdgpu_ps void @v_interp_p2(float %x, float %j) #0 {
83 bb:
8384 %p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256)
8485 %p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256)
8586 %p2_2 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 2, i32 0, i32 256)
120121 ; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr64.y{{$}}
121122 ; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, attr64.y{{$}}
122123 ; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_10, attr64.x{{$}}
123 define amdgpu_ps void @v_interp_mov(float %x, float %j) {
124 define amdgpu_ps void @v_interp_mov(float %x, float %j) #0 {
125 bb:
124126 %mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256)
125127 %mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256)
126128 %mov_2 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 256)
163165 ; VI-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
164166 ; VI: s_mov_b32 m0, -1{{$}}
165167 ; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
166 define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
167 store float 0.0, float addrspace(3)* %lds
168 define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) #0 {
169 bb:
170 store float 0.000000e+00, float addrspace(3)* %lds
168171 %tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0)
169172 %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
170 store float 0.0, float addrspace(3)* %tmp2
171 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
173 store float 0.000000e+00, float addrspace(3)* %tmp2
174 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
172175 ret void
173176 }
174177
177180
178181 ; GCN-LABEL: {{^}}v_interp_p1_bank16_bug:
179182 ; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
180 define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) {
183 define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 {
181184 main_body:
182185 %i.i = extractelement <2 x i32> %arg19, i32 0
183186 %j.i = extractelement <2 x i32> %arg19, i32 1
184187 %i.f.i = bitcast i32 %i.i to float
185188 %j.f.i = bitcast i32 %j.i to float
186 %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #1
187 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #1
189 %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #0
190 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #0
188191 %i.i7 = extractelement <2 x i32> %arg19, i32 0
189192 %j.i8 = extractelement <2 x i32> %arg19, i32 1
190193 %i.f.i9 = bitcast i32 %i.i7 to float
191194 %j.f.i10 = bitcast i32 %j.i8 to float
192 %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #1
193 %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #1
195 %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #0
196 %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #0
194197 %i.i1 = extractelement <2 x i32> %arg19, i32 0
195198 %j.i2 = extractelement <2 x i32> %arg19, i32 1
196199 %i.f.i3 = bitcast i32 %i.i1 to float
197200 %j.f.i4 = bitcast i32 %j.i2 to float
198 %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #1
199 %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #1
201 %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #0
202 %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #0
200203 %tmp = call float @llvm.fabs.f32(float %p2.i)
201204 %tmp34 = call float @llvm.fabs.f32(float %p2.i12)
202205 %tmp35 = call float @llvm.fabs.f32(float %p2.i6)
203206 %tmp36 = call i32 @llvm.SI.packf16(float %tmp, float %tmp34)
204 %tmp37 = bitcast i32 %tmp36 to float
207 %tmp37 = bitcast i32 %tmp36 to <2 x half>
205208 %tmp38 = call i32 @llvm.SI.packf16(float %tmp35, float 1.000000e+00)
206 %tmp39 = bitcast i32 %tmp38 to float
207 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp37, float %tmp39, float %tmp37, float %tmp39)
208 ret void
209 }
210
211 declare float @llvm.fabs.f32(float) #0
212 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
213 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
214 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
215 declare i32 @llvm.SI.packf16(float, float) #0
216 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
217
218 attributes #0 = { nounwind readnone }
219 attributes #1 = { nounwind }
209 %tmp39 = bitcast i32 %tmp38 to <2 x half>
210 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp37, <2 x half> %tmp39, i1 true, i1 true) #0
211 ret void
212 }
213
214 declare float @llvm.fabs.f32(float) #1
215 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
216 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
217 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
218 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
219 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
220 declare i32 @llvm.SI.packf16(float, float) #1
221
222 attributes #0 = { nounwind }
223 attributes #1 = { nounwind readnone }
None ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
0 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
22
33 ; GCN-LABEL: {{^}}mbcnt_intrinsics:
44 ; GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0
55 ; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
66 ; VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
7
8 define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
7 define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3) {
98 main_body:
10 %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
11 %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1
12 %4 = bitcast i32 %hi to float
13 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %4, float %4, float %4, float %4)
9 %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
10 %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #0
11 %tmp = bitcast i32 %hi to float
12 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp, float %tmp, float %tmp, float %tmp, i1 true, i1 true) #1
1413 ret void
1514 }
1615
17 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
16 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
17 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
18 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
1819
19 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
20
21 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
22
23 attributes #1 = { nounwind readnone }
20 attributes #0 = { nounwind readnone }
21 attributes #1 = { nounwind }
+0
-15
test/CodeGen/AMDGPU/lshl.ll less more
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
2
3 ;CHECK: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
4
5 define void @test(i32 %p) {
6 %i = mul i32 %p, 2
7 %r = bitcast i32 %i to float
8 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
9 ret void
10 }
11
12 declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
13
14 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+0
-15
test/CodeGen/AMDGPU/lshr.ll less more
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
2
3 ;CHECK: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
4
5 define void @test(i32 %p) {
6 %i = udiv i32 %p, 2
7 %r = bitcast i32 %i to float
8 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
9 ret void
10 }
11
12 declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
13
14 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+0
-17
test/CodeGen/AMDGPU/mulhu.ll less more
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
2
3 ;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
4 ;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
5 ;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
6
7 define void @test(i32 %p) {
8 %i = udiv i32 %p, 3
9 %r = bitcast i32 %i to float
10 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
11 ret void
12 }
13
14 declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
15
16 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
0 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
42
53 ; GCN-LABEL: {{^}}vgpr:
64 ; GCN: v_mov_b32_e32 v1, v0
75 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
8 ; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
9 ; GCN: s_waitcnt expcnt(0)
10 ; GCN-NOT: s_endpgm
11 define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
12 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
13 %x = fadd float %3, 1.0
14 %a = insertvalue {float, float} undef, float %x, 0
15 %b = insertvalue {float, float} %a, float %3, 1
16 ret {float, float} %b
6 ; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
7 ; GCN: s_waitcnt expcnt(0)
8 ; GCN-NOT: s_endpgm
9 define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
10 bb:
11 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
12 %x = fadd float %arg3, 1.000000e+00
13 %a = insertvalue { float, float } undef, float %x, 0
14 %b = insertvalue { float, float } %a, float %arg3, 1
15 ret { float, float } %b
1716 }
1817
1918 ; GCN-LABEL: {{^}}vgpr_literal:
2019 ; GCN: v_mov_b32_e32 v4, v0
21 ; GCN: exp mrt0 v4, v4, v4, v4 done compr vm
20 ; GCN: exp mrt0 v4, v4, v4, v4 done vm
2221
2322 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
2423 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
2625 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
2726 ; GCN: s_waitcnt expcnt(0)
2827 ; GCN-NOT: s_endpgm
29 define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
30 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
31 ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
32 }
33
28 define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
29 bb:
30 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
31 ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
32 }
3433
3534 ; GCN: .long 165580
3635 ; GCN-NEXT: .long 562
4342 ; GCN: v_mov_b32_e32 v3, v4
4443 ; GCN: v_mov_b32_e32 v4, v6
4544 ; GCN-NOT: s_endpgm
46 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
47 %i0 = extractelement <2 x i32> %4, i32 0
48 %i1 = extractelement <2 x i32> %4, i32 1
49 %i2 = extractelement <2 x i32> %7, i32 0
50 %i3 = extractelement <2 x i32> %8, i32 0
51 %f0 = bitcast i32 %i0 to float
52 %f1 = bitcast i32 %i1 to float
53 %f2 = bitcast i32 %i2 to float
54 %f3 = bitcast i32 %i3 to float
55 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
56 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
57 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
58 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
59 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
60 ret {float, float, float, float, float} %r4
61 }
62
45 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
46 bb:
47 %i0 = extractelement <2 x i32> %arg4, i32 0
48 %i1 = extractelement <2 x i32> %arg4, i32 1
49 %i2 = extractelement <2 x i32> %arg7, i32 0
50 %i3 = extractelement <2 x i32> %arg8, i32 0
51 %f0 = bitcast i32 %i0 to float
52 %f1 = bitcast i32 %i1 to float
53 %f2 = bitcast i32 %i2 to float
54 %f3 = bitcast i32 %i3 to float
55 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
56 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
57 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
58 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
59 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
60 ret { float, float, float, float, float } %r4
61 }
6362
6463 ; GCN: .long 165580
6564 ; GCN-NEXT: .long 1
6867 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
6968 ; GCN: v_mov_b32_e32 v0, 1.0
7069 ; GCN-NOT: s_endpgm
71 define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
72 ret float 1.0
73 }
74
70 define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
71 bb:
72 ret float 1.000000e+00
73 }
7574
7675 ; GCN: .long 165580
7776 ; GCN-NEXT: .long 2081
8281 ; GCN-DAG: v_mov_b32_e32 v1, v2
8382 ; GCN: v_mov_b32_e32 v2, v3
8483 ; GCN-NOT: s_endpgm
85 define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
86 %f = bitcast <2 x i32> %8 to <2 x float>
87 %s = insertvalue {float, <2 x float>} undef, float %14, 0
88 %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
89 ret {float, <2 x float>} %s1
90 }
91
84 define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
85 bb:
86 %f = bitcast <2 x i32> %arg8 to <2 x float>
87 %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
88 %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
89 ret { float, <2 x float> } %s1
90 }
9291
9392 ; GCN: .long 165580
9493 ; GCN-NEXT: .long 562
101100 ; GCN-DAG: v_mov_b32_e32 v3, v6
102101 ; GCN-DAG: v_mov_b32_e32 v4, v8
103102 ; GCN-NOT: s_endpgm
104 attributes #1 = { "InitialPSInputAddr"="1" }
105 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
106 %i0 = extractelement <2 x i32> %4, i32 0
107 %i1 = extractelement <2 x i32> %4, i32 1
108 %i2 = extractelement <2 x i32> %7, i32 0
109 %i3 = extractelement <2 x i32> %8, i32 0
110 %f0 = bitcast i32 %i0 to float
111 %f1 = bitcast i32 %i1 to float
112 %f2 = bitcast i32 %i2 to float
113 %f3 = bitcast i32 %i3 to float
114 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
115 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
116 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
117 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
118 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
119 ret {float, float, float, float, float} %r4
120 }
121
103 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
104 bb:
105 %i0 = extractelement <2 x i32> %arg4, i32 0
106 %i1 = extractelement <2 x i32> %arg4, i32 1
107 %i2 = extractelement <2 x i32> %arg7, i32 0
108 %i3 = extractelement <2 x i32> %arg8, i32 0
109 %f0 = bitcast i32 %i0 to float
110 %f1 = bitcast i32 %i1 to float
111 %f2 = bitcast i32 %i2 to float
112 %f3 = bitcast i32 %i3 to float
113 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
114 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
115 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
116 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
117 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
118 ret { float, float, float, float, float } %r4
119 }
122120
123121 ; GCN: .long 165580
124122 ; GCN-NEXT: .long 562
131129 ; GCN: v_mov_b32_e32 v3, v8
132130 ; GCN: v_mov_b32_e32 v4, v12
133131 ; GCN-NOT: s_endpgm
134 attributes #2 = { "InitialPSInputAddr"="119" }
135 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
136 %i0 = extractelement <2 x i32> %4, i32 0
137 %i1 = extractelement <2 x i32> %4, i32 1
138 %i2 = extractelement <2 x i32> %7, i32 0
139 %i3 = extractelement <2 x i32> %8, i32 0
140 %f0 = bitcast i32 %i0 to float
141 %f1 = bitcast i32 %i1 to float
142 %f2 = bitcast i32 %i2 to float
143 %f3 = bitcast i32 %i3 to float
144 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
145 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
146 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
147 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
148 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
149 ret {float, float, float, float, float} %r4
150 }
151
132 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
133 bb:
134 %i0 = extractelement <2 x i32> %arg4, i32 0
135 %i1 = extractelement <2 x i32> %arg4, i32 1
136 %i2 = extractelement <2 x i32> %arg7, i32 0
137 %i3 = extractelement <2 x i32> %arg8, i32 0
138 %f0 = bitcast i32 %i0 to float
139 %f1 = bitcast i32 %i1 to float
140 %f2 = bitcast i32 %i2 to float
141 %f3 = bitcast i32 %i3 to float
142 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
143 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
144 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
145 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
146 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
147 ret { float, float, float, float, float } %r4
148 }
152149
153150 ; GCN: .long 165580
154151 ; GCN-NEXT: .long 562
161158 ; GCN: v_mov_b32_e32 v3, v4
162159 ; GCN: v_mov_b32_e32 v4, v8
163160 ; GCN-NOT: s_endpgm
164 attributes #3 = { "InitialPSInputAddr"="418" }
165 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
166 %i0 = extractelement <2 x i32> %4, i32 0
167 %i1 = extractelement <2 x i32> %4, i32 1
168 %i2 = extractelement <2 x i32> %7, i32 0
169 %i3 = extractelement <2 x i32> %8, i32 0
170 %f0 = bitcast i32 %i0 to float
171 %f1 = bitcast i32 %i1 to float
172 %f2 = bitcast i32 %i2 to float
173 %f3 = bitcast i32 %i3 to float
174 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
175 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
176 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
177 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
178 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
179 ret {float, float, float, float, float} %r4
180 }
181
161 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
162 bb:
163 %i0 = extractelement <2 x i32> %arg4, i32 0
164 %i1 = extractelement <2 x i32> %arg4, i32 1
165 %i2 = extractelement <2 x i32> %arg7, i32 0
166 %i3 = extractelement <2 x i32> %arg8, i32 0
167 %f0 = bitcast i32 %i0 to float
168 %f1 = bitcast i32 %i1 to float
169 %f2 = bitcast i32 %i2 to float
170 %f3 = bitcast i32 %i3 to float
171 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
172 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
173 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
174 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
175 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
176 ret { float, float, float, float, float } %r4
177 }
182178
183179 ; GCN-LABEL: {{^}}sgpr:
184180 ; GCN: s_add_i32 s0, s3, 2
185181 ; GCN: s_mov_b32 s2, s3
186182 ; GCN-NOT: s_endpgm
187 define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
188 %x = add i32 %2, 2
189 %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
190 %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
191 %c = insertvalue {i32, i32, i32} %a, i32 %2, 2
192 ret {i32, i32, i32} %c
193 }
194
183 define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
184 bb:
185 %x = add i32 %arg2, 2
186 %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
187 %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
188 %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
189 ret { i32, i32, i32 } %c
190 }
195191
196192 ; GCN-LABEL: {{^}}sgpr_literal:
197193 ; GCN: s_mov_b32 s0, 5
200196 ; GCN-DAG: s_mov_b32 s2, 7
201197 ; GCN-DAG: s_mov_b32 s3, 8
202198 ; GCN-NOT: s_endpgm
203 define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
204 %x = add i32 %2, 2
205 ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
206 }
207
199 define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
200 bb:
201 %x = add i32 %arg2, 2
202 ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
203 }
208204
209205 ; GCN-LABEL: {{^}}both:
210206 ; GCN: v_mov_b32_e32 v1, v0
211 ; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
207 ; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
212208 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
213209 ; GCN-DAG: s_add_i32 s0, s3, 2
214210 ; GCN-DAG: s_mov_b32 s1, s2
215211 ; GCN: s_mov_b32 s2, s3
216212 ; GCN: s_waitcnt expcnt(0)
217213 ; GCN-NOT: s_endpgm
218 define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
219 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
220 %v = fadd float %3, 1.0
221 %s = add i32 %2, 2
222 %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
223 %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
224 %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
225 %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
226 %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
227 ret {float, i32, float, i32, i32} %a4
228 }
229
214 define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
215 bb:
216 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
217 %v = fadd float %arg3, 1.000000e+00
218 %s = add i32 %arg2, 2
219 %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
220 %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
221 %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
222 %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
223 %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
224 ret { float, i32, float, i32, i32 } %a4
225 }
230226
231227 ; GCN-LABEL: {{^}}structure_literal:
232228 ; GCN: v_mov_b32_e32 v3, v0
233 ; GCN: exp mrt0 v3, v3, v3, v3 done compr vm
229 ; GCN: exp mrt0 v3, v3, v3, v3 done vm
234230
235231 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
236232 ; GCN-DAG: s_mov_b32 s0, 2
238234 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
239235 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
240236 ; GCN: s_waitcnt expcnt(0)
241 define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
242 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
243 ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> }}
244 }
245
246 attributes #0 = { nounwind "InitialPSInputAddr"="0" }
237 define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
238 bb:
239 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
240 ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> } }
241 }
242
243 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
244
245 attributes #0 = { nounwind }
246 attributes #1 = { nounwind "InitialPSInputAddr"="0" }
247 attributes #2 = { nounwind "InitialPSInputAddr"="1" }
248 attributes #3 = { nounwind "InitialPSInputAddr"="119" }
249 attributes #4 = { nounwind "InitialPSInputAddr"="418" }
33 ; CHECK-LABEL: {{^}}main:
44 ; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
55 ; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
6 define void @main(float %p) {
6 define amdgpu_ps float @main(float inreg %p) {
77 main_body:
88 %c = fcmp oeq float %p, %p
99 %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
10 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
11 ret void
10 ret float %r
1211 }
13
14 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
33 ; CHECK-LABEL: {{^}}main:
44 ; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
55 ; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
6 define void @main(float %p) {
6 define amdgpu_ps float @main(float inreg %p) {
77 main_body:
88 %c = fcmp une float %p, %p
99 %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
10 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
11 ret void
10 ret float %r
1211 }
13
14 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s
2
3 ; Function Attrs: nounwind readnone
4 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
52
63 ; CHECK-LABEL: {{^}}phi1:
74 ; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
85 ; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
9 define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
6 define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
107 main_body:
118 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
129 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
2421 ENDIF: ; preds = %ELSE, %main_body
2522 %temp.0 = phi float [ %tmp26, %ELSE ], [ %tmp21, %main_body ]
2623 %tmp27 = fadd float %temp.0, %tmp23
27 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00)
24 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
2825 ret void
2926 }
3027
3128 ; Make sure this program doesn't crash
3229 ; CHECK-LABEL: {{^}}phi2:
33 define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
30 define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
3431 main_body:
3532 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
3633 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
5754 %j.i = extractelement <2 x i32> %arg5, i32 1
5855 %i.f.i = bitcast i32 %i.i to float
5956 %j.f.i = bitcast i32 %j.i to float
60 %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #0
61 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #0
57 %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #1
58 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #1
6259 %i.i19 = extractelement <2 x i32> %arg5, i32 0
6360 %j.i20 = extractelement <2 x i32> %arg5, i32 1
6461 %i.f.i21 = bitcast i32 %i.i19 to float
6562 %j.f.i22 = bitcast i32 %j.i20 to float
66 %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) #0
67 %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) #0
63 %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) #1
64 %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) #1
6865 %i.i13 = extractelement <2 x i32> %arg5, i32 0
6966 %j.i14 = extractelement <2 x i32> %arg5, i32 1
7067 %i.f.i15 = bitcast i32 %i.i13 to float
7168 %j.f.i16 = bitcast i32 %j.i14 to float
72 %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) #0
73 %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) #0
69 %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) #1
70 %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) #1
7471 %i.i7 = extractelement <2 x i32> %arg5, i32 0
7572 %j.i8 = extractelement <2 x i32> %arg5, i32 1
7673 %i.f.i9 = bitcast i32 %i.i7 to float
7774 %j.f.i10 = bitcast i32 %j.i8 to float
78 %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) #0
79 %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) #0
75 %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) #1
76 %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) #1
8077 %i.i1 = extractelement <2 x i32> %arg5, i32 0
8178 %j.i2 = extractelement <2 x i32> %arg5, i32 1
8279 %i.f.i3 = bitcast i32 %i.i1 to float
8380 %j.f.i4 = bitcast i32 %j.i2 to float
84 %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #0
85 %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #0
81 %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1
82 %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1
8683 %tmp45 = bitcast float %p2.i to i32
8784 %tmp46 = bitcast float %p2.i24 to i32
8885 %tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
167164 %tmp111 = fsub float -0.000000e+00, %tmp105
168165 %tmp112 = fmul float %tmp111, %tmp106
169166 %tmp113 = call i32 @llvm.SI.packf16(float %tmp108, float %tmp110)
170 %tmp114 = bitcast i32 %tmp113 to float
167 %tmp114 = bitcast i32 %tmp113 to <2 x half>
171168 %tmp115 = call i32 @llvm.SI.packf16(float %tmp112, float 1.000000e+00)
172 %tmp116 = bitcast i32 %tmp115 to float
173 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp114, float %tmp116, float %tmp114, float %tmp116)
169 %tmp116 = bitcast i32 %tmp115 to <2 x half>
170 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp114, <2 x half> %tmp116, i1 true, i1 true) #0
174171 ret void
175172 }
176173
177174 ; We just want ot make sure the program doesn't crash
178175 ; CHECK-LABEL: {{^}}loop:
179 define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
176 define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
180177 main_body:
181178 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
182179 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
203200 br i1 %tmp33, label %IF, label %ENDIF
204201
205202 IF: ; preds = %LOOP
206 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00)
203 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00, i1 true, i1 true) #0
207204 ret void
208205
209206 ENDIF: ; preds = %LOOP
229226 ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
230227 ; CHECK: exp
231228 ; CHECK: s_endpgm
232 define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #1 {
229 define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
233230 entry:
234231 %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
235232 %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
260257 %val.0 = phi float [ %val.if.0, %if ], [ %val.else.0, %else ]
261258 %val.1 = phi float [ %val.if.1, %if ], [ %val.else.1, %else ]
262259 %val.2 = phi float [ %val.if.2, %if ], [ %val.else.2, %else ]
263 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %val.0, float %val.1, float %val.2, float 0.000000e+00)
260 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %val.0, float %val.1, float %val.2, float 0.000000e+00, i1 true, i1 true) #0
264261 ret void
265262 }
266263
293290 ; This test is just checking that we don't crash / assertion fail.
294291 ; CHECK-LABEL: {{^}}copy2:
295292 ; CHECK: s_endpgm
296 define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #1 {
293 define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
297294 entry:
298295 br label %LOOP68
299296
307304 IF70: ; preds = %LOOP68
308305 %q = icmp ne i32 %l, 13
309306 %temp.8 = select i1 %q, float 1.000000e+00, float 0.000000e+00
310 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
307 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
311308 ret void
312309
313310 ENDIF69: ; preds = %LOOP68
329326 ; [[END]]:
330327 ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
331328 ; CHECK: s_endpgm
332 define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #1 {
329 define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
333330 bb:
334331 %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
335332 %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !3
342339 %j.i = extractelement <2 x i32> %arg7, i32 1
343340 %i.f.i = bitcast i32 %i.i to float
344341 %j.f.i = bitcast i32 %j.i to float
345 %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #1
346 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #1
342 %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #0
343 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #0
347344 %i.i1 = extractelement <2 x i32> %arg7, i32 0
348345 %j.i2 = extractelement <2 x i32> %arg7, i32 1
349346 %i.f.i3 = bitcast i32 %i.i1 to float
350347 %j.f.i4 = bitcast i32 %j.i2 to float
351 %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1
352 %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1
348 %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #0
349 %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #0
353350 %tmp31 = bitcast float %tmp23 to i32
354351 %tmp36 = icmp ne i32 %tmp31, 0
355352 br i1 %tmp36, label %bb38, label %bb80
376373 bb71: ; preds = %bb80, %bb38
377374 %tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ]
378375 %tmp88 = extractelement <4 x float> %tmp72, i32 0
379 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp88, float %tmp88, float %tmp88, float %tmp88)
376 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp88, float %tmp88, float %tmp88, float %tmp88, i1 true, i1 true) #0
380377 ret void
381378 }
382379
383380 ; Check the the resource descriptor is stored in an sgpr.
384381 ; CHECK-LABEL: {{^}}mimg_srsrc_sgpr:
385382 ; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
386 define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #1 {
383 define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
387384 bb:
388 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
385 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
389386 %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
390387 %tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
391388 %tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> , <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
392389 %tmp10 = extractelement <4 x float> %tmp9, i32 0
393390 %tmp12 = call i32 @llvm.SI.packf16(float undef, float %tmp10)
394 %tmp13 = bitcast i32 %tmp12 to float
395 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef)
391 %tmp13 = bitcast i32 %tmp12 to <2 x half>
392 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
396393 ret void
397394 }
398395
399396 ; Check the the sampler is stored in an sgpr.
400397 ; CHECK-LABEL: {{^}}mimg_ssamp_sgpr:
401398 ; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
402 define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #1 {
399 define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 {
403400 bb:
404 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
401 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
405402 %tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
406403 %tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0
407404 %tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> , <8 x i32> undef, <4 x i32> %tmp8, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
408405 %tmp10 = extractelement <4 x float> %tmp9, i32 0
409406 %tmp12 = call i32 @llvm.SI.packf16(float %tmp10, float undef)
410 %tmp13 = bitcast i32 %tmp12 to float
411 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef)
412 ret void
413 }
414
415 ; Function Attrs: nounwind readnone
416 declare float @llvm.SI.load.const(<16 x i8>, i32) #0
417
418 ; Function Attrs: nounwind readnone
419 declare float @llvm.fabs.f32(float) #0
420
421 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
422
423 ; Function Attrs: nounwind readnone
424 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <8 x i32>, <16 x i8>, i32) #0
425
426 ; Function Attrs: nounwind readnone
427 declare float @llvm.amdgcn.rsq.f32(float) #0
428
429 ; Function Attrs: nounwind readnone
430 declare float @llvm.exp2.f32(float) #0
431
432 ; Function Attrs: nounwind readnone
433 declare float @llvm.pow.f32(float, float) #0
434
435 ; Function Attrs: nounwind readnone
436 declare i32 @llvm.SI.packf16(float, float) #0
437
438 ; Function Attrs: nounwind readnone
439 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
440
441 ; Function Attrs: nounwind readnone
442 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
443
444 ; Function Attrs: nounwind readnone
445 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
446
447 ; Function Attrs: nounwind readnone
448 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
449
450 attributes #0 = { nounwind readnone }
451 attributes #1 = { nounwind }
452 attributes #2 = { nounwind readonly }
407 %tmp13 = bitcast i32 %tmp12 to <2 x half>
408 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
409 ret void
410 }
411
412 declare float @llvm.fabs.f32(float) #1
413 declare float @llvm.amdgcn.rsq.f32(float) #1
414 declare float @llvm.exp2.f32(float) #1
415 declare float @llvm.pow.f32(float, float) #1
416 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
417 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
418 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
419 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
420 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
421
422 declare i32 @llvm.SI.packf16(float, float) #1
423 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
424 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
425
426 attributes #0 = { nounwind }
427 attributes #1 = { nounwind readnone }
453428
454429 !0 = !{!1, !1, i64 0, i32 1}
455430 !1 = !{!"const", !2}
None ; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=SI %s
1 ; XUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=VI %s
2 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
0 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
1 ; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
33
44 declare i32 @llvm.r600.read.tidig.x() #0
55
465465 ret void
466466 }
467467
468 ; FUNC-LABEL: {{^}}test_mul2:
469 ; GCN: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
470 define void @test_mul2(i32 %p) {
471 %i = mul i32 %p, 2
472 store volatile i32 %i, i32 addrspace(1)* undef
473 ret void
474 }
475
468476 attributes #0 = { nounwind readnone }
+0
-14
test/CodeGen/AMDGPU/si-literal-folding.ll less more
None ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1
2 ; GCN-LABEL: {{^}}main:
3 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
4 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
5 define amdgpu_vs void @main(float) {
6 main_body:
7 %1 = fmul float %0, 0x3FE86A7F00000000
8 %2 = fmul float %0, 0xBFE86A7F00000000
9 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %1, float %1, float %2, float %2)
10 ret void
11 }
12
13 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
0 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
22
33 ; This shader has the potential to generated illegal VGPR to SGPR copies if
44 ; the wrong register class is used for the REG_SEQUENCE instructions.
55
6 ; CHECK: {{^}}main:
7 ; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
6 ; GCN-LABEL: {{^}}main:
7 ; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
88 define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
99 main_body:
1010 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
3939 %tmp37 = extractelement <4 x float> %tmp35, i32 1
4040 %tmp38 = extractelement <4 x float> %tmp35, i32 2
4141 %tmp39 = extractelement <4 x float> %tmp35, i32 3
42 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp36, float %tmp37, float %tmp38, float %tmp39)
42 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp36, float %tmp37, float %tmp38, float %tmp39, i1 true, i1 true) #0
4343 ret void
4444 }
4545
46 ; Function Attrs: nounwind readnone
46 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
47 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
48 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
49
4750 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
48
49 ; Function Attrs: nounwind readnone
5051 declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
51
52 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
53
54 ; Function Attrs: nounwind readnone
55 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
56
57 ; Function Attrs: nounwind readnone
58 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
59
60 ; Function Attrs: nounwind readnone
61 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
6252
6353 attributes #0 = { nounwind }
6454 attributes #1 = { nounwind readnone }
22 ; The only way the subtarget knows that the si machine scheduler is being used
33 ; is to specify -mattr=si-scheduler. If we just pass --misched=si, the backend
44 ; won't know what scheduler we are using.
5 ; RUN: llc -march=amdgcn -mcpu=SI --misched=si -mattr=si-scheduler < %s | FileCheck %s
5 ; RUN: llc -march=amdgcn --misched=si -mattr=si-scheduler < %s | FileCheck %s
66
77 ; The test checks the "si" machine scheduler pass works correctly.
88
1515 ; CHECK: s_waitcnt vmcnt(0)
1616 ; CHECK: exp
1717 ; CHECK: s_endpgm
18 define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
18 define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
1919 main_body:
2020 %tmp = bitcast [34 x <8 x i32>] addrspace(2)* %arg3 to <32 x i8> addrspace(2)*
2121 %tmp22 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp, align 32, !tbaa !0
4545 %tmp34 = extractelement <4 x float> %tmp31, i32 2
4646 %tmp35 = extractelement <4 x float> %tmp31, i32 3
4747 %tmp36 = call i32 @llvm.SI.packf16(float %tmp32, float %tmp33)
48 %tmp37 = bitcast i32 %tmp36 to float
48 %tmp37 = bitcast i32 %tmp36 to <2 x half>
4949 %tmp38 = call i32 @llvm.SI.packf16(float %tmp34, float %tmp35)
50 %tmp39 = bitcast i32 %tmp38 to float
51 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp37, float %tmp39, float %tmp37, float %tmp39)
50 %tmp39 = bitcast i32 %tmp38 to <2 x half>
51 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp37, <2 x half> %tmp39, i1 true, i1 false) #0
5252 ret void
5353 }
5454
55 ; Function Attrs: nounwind readnone
56 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
55 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
56 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
57 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
5758
58 ; Function Attrs: nounwind readnone
59 declare i32 @llvm.SI.packf16(float, float) #0
59 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
60 declare i32 @llvm.SI.packf16(float, float) #1
6061
61 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
62
63 ; Function Attrs: nounwind readnone
64 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
65
66 ; Function Attrs: nounwind readnone
67 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
68
69 attributes #0 = { nounwind readnone }
70 attributes #1 = { nounwind }
62 attributes #0 = { nounwind }
63 attributes #1 = { nounwind readnone }
7164
7265 !0 = !{!1, !1, i64 0, i32 1}
7366 !1 = !{!"const", !2}
731731 %tmp579 = fmul float %tmp574, %tmp45
732732 %tmp580 = fadd float %tmp579, %tmp556
733733 %tmp581 = call i32 @llvm.SI.packf16(float %tmp576, float %tmp578)
734 %tmp582 = bitcast i32 %tmp581 to float
734 %tmp582 = bitcast i32 %tmp581 to <2 x half>
735735 %tmp583 = call i32 @llvm.SI.packf16(float %tmp580, float %tmp282)
736 %tmp584 = bitcast i32 %tmp583 to float
737 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp582, float %tmp584, float %tmp582, float %tmp584)
736 %tmp584 = bitcast i32 %tmp583 to <2 x half>
737 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp582, <2 x half> %tmp584, i1 true, i1 true) #0
738738 ret void
739739
740740 ENDIF66: ; preds = %LOOP65
18131813 %max.0.i1 = call float @llvm.maxnum.f32(float %tmp774, float 0.000000e+00)
18141814 %clamp.i2 = call float @llvm.minnum.f32(float %max.0.i1, float 1.000000e+00)
18151815 %tmp776 = call i32 @llvm.SI.packf16(float %tmp768, float %tmp770)
1816 %tmp777 = bitcast i32 %tmp776 to float
1816 %tmp777 = bitcast i32 %tmp776 to <2 x half>
18171817 %tmp778 = call i32 @llvm.SI.packf16(float %tmp772, float %clamp.i2)
1818 %tmp779 = bitcast i32 %tmp778 to float
1819 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp777, float %tmp779, float %tmp777, float %tmp779)
1818 %tmp779 = bitcast i32 %tmp778 to <2 x half>
1819 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp777, <2 x half> %tmp779, i1 true, i1 true) #0
18201820 ret void
18211821
18221822 ELSE214: ; preds = %ELSE211
18341834
18351835 declare float @llvm.exp2.f32(float) #1
18361836 declare float @llvm.ceil.f32(float) #1
1837 declare float @llvm.amdgcn.rsq.f32(float) #1
18381837 declare float @llvm.fabs.f32(float) #1
18391838 declare float @llvm.pow.f32(float, float) #1
18401839 declare float @llvm.minnum.f32(float, float) #1
18411840 declare float @llvm.maxnum.f32(float, float) #1
1841 declare float @llvm.amdgcn.rsq.f32(float) #1
18421842 declare float @llvm.amdgcn.cubeid(float, float, float) #1
18431843 declare float @llvm.amdgcn.cubesc(float, float, float) #1
18441844 declare float @llvm.amdgcn.cubetc(float, float, float) #1
18471847 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
18481848 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
18491849 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
1850 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
1851
18501852 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
18511853 declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
18521854 declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
18531855 declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
18541856 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
18551857 declare i32 @llvm.SI.packf16(float, float) #1
1856 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
18571858
18581859 attributes #0 = { nounwind }
18591860 attributes #1 = { nounwind readnone }
55
66 ; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
77 ; SI-NOT: v_readlane_b32 [[SAVED]]
8
89 define amdgpu_ps void @main() #0 {
910 main_body:
10 %0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
11 %1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
12 %2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
13 %3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
14 %4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
15 %5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
16 %6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
17 %7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
18 %8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
19 %9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
20 %10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
21 %11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
22 %12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
23 %13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
24 %14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
25 %15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
26 %16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
27 %17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
28 %18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
29 %19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
30 %20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
31 %21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
32 %22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
33 %23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
34 %24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
35 %25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
36 %26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
37 %27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
38 %28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
39 %29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
40 %30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
41 %31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
42 %32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
43 %33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
44 %34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
45 %35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
46 %36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
47 %37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
48 %38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
49 %39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
50 %40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
51 %41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
52 %42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
53 %43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
54 %44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
55 %45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
56 %46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
57 %47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
58 %48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
59 %49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
60 %50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
61 %51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
62 %52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
63 %53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
64 %54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
65 %55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
66 %56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
67 %57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
68 %58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
69 %59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
70 %60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
71 %61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
72 %62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
73 %63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
74 %64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
75 %65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
76 %66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
11 %tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
12 %tmp1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
13 %tmp2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
14 %tmp3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
15 %tmp4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
16 %tmp5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
17 %tmp6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
18 %tmp7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
19 %tmp8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
20 %tmp9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
21 %tmp10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
22 %tmp11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
23 %tmp12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
24 %tmp13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
25 %tmp14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
26 %tmp15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
27 %tmp16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
28 %tmp17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
29 %tmp18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
30 %tmp19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
31 %tmp20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
32 %tmp21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
33 %tmp22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
34 %tmp23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
35 %tmp24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
36 %tmp25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
37 %tmp26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
38 %tmp27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
39 %tmp28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
40 %tmp29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
41 %tmp30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
42 %tmp31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
43 %tmp32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
44 %tmp33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
45 %tmp34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
46 %tmp35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
47 %tmp36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
48 %tmp37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
49 %tmp38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
50 %tmp39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
51 %tmp40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
52 %tmp41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
53 %tmp42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
54 %tmp43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
55 %tmp44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
56 %tmp45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
57 %tmp46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
58 %tmp47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
59 %tmp48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
60 %tmp49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
61 %tmp50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
62 %tmp51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
63 %tmp52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
64 %tmp53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
65 %tmp54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
66 %tmp55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
67 %tmp56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
68 %tmp57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
69 %tmp58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
70 %tmp59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
71 %tmp60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
72 %tmp61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
73 %tmp62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
74 %tmp63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
75 %tmp64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
76 %tmp65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
77 %tmp66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
7778 br label %LOOP
7879
7980 LOOP: ; preds = %ENDIF2795, %main_body
8081 %temp894.0 = phi float [ 0.000000e+00, %main_body ], [ %temp894.1, %ENDIF2795 ]
8182 %temp18.0 = phi float [ undef, %main_body ], [ %temp18.1, %ENDIF2795 ]
8283 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
83 %67 = icmp sgt i32 %tid, 4
84 br i1 %67, label %ENDLOOP, label %ENDIF
84 %tmp67 = icmp sgt i32 %tid, 4
85 br i1 %tmp67, label %ENDLOOP, label %ENDIF
8586
8687 ENDLOOP: ; preds = %ELSE2566, %LOOP
87 %one.sub.a.i = fsub float 1.000000e+00, %0
88 %one.sub.a.i = fsub float 1.000000e+00, %tmp
8889 %one.sub.ac.i = fmul float %one.sub.a.i, undef
8990 %result.i = fadd float fmul (float undef, float undef), %one.sub.ac.i
90 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %result.i, float undef, float 1.000000e+00)
91 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float undef, float %result.i, float undef, float 1.000000e+00, i1 true, i1 true) #0
9192 ret void
9293
9394 ENDIF: ; preds = %LOOP
94 %68 = fsub float %2, undef
95 %69 = fsub float %3, undef
96 %70 = fsub float %4, undef
97 %71 = fmul float %68, 0.000000e+00
98 %72 = fmul float %69, undef
99 %73 = fmul float %70, undef
100 %74 = fsub float %6, undef
101 %75 = fsub float %7, undef
102 %76 = fmul float %74, undef
103 %77 = fmul float %75, 0.000000e+00
104 %78 = call float @llvm.minnum.f32(float %73, float %77)
105 %79 = call float @llvm.maxnum.f32(float %71, float 0.000000e+00)
106 %80 = call float @llvm.maxnum.f32(float %72, float %76)
107 %81 = call float @llvm.maxnum.f32(float undef, float %78)
108 %82 = call float @llvm.minnum.f32(float %79, float %80)
109 %83 = call float @llvm.minnum.f32(float %82, float undef)
110 %84 = fsub float %14, undef
111 %85 = fsub float %15, undef
112 %86 = fsub float %16, undef
113 %87 = fmul float %84, undef
114 %88 = fmul float %85, undef
115 %89 = fmul float %86, undef
116 %90 = fsub float %17, undef
117 %91 = fsub float %18, undef
118 %92 = fsub float %19, undef
119 %93 = fmul float %90, 0.000000e+00
120 %94 = fmul float %91, undef
121 %95 = fmul float %92, undef
122 %96 = call float @llvm.minnum.f32(float %88, float %94)
123 %97 = call float @llvm.maxnum.f32(float %87, float %93)
124 %98 = call float @llvm.maxnum.f32(float %89, float %95)
125 %99 = call float @llvm.maxnum.f32(float undef, float %96)
126 %100 = call float @llvm.maxnum.f32(float %99, float undef)
127 %101 = call float @llvm.minnum.f32(float %97, float undef)
128 %102 = call float @llvm.minnum.f32(float %101, float %98)
129 %103 = fsub float %30, undef
130 %104 = fsub float %31, undef
131 %105 = fmul float %103, 0.000000e+00
132 %106 = fmul float %104, 0.000000e+00
133 %107 = call float @llvm.minnum.f32(float undef, float %105)
134 %108 = call float @llvm.maxnum.f32(float undef, float %106)
135 %109 = call float @llvm.maxnum.f32(float undef, float %107)
136 %110 = call float @llvm.maxnum.f32(float %109, float undef)
137 %111 = call float @llvm.minnum.f32(float undef, float %108)
138 %112 = fsub float %32, undef
139 %113 = fsub float %33, undef
140 %114 = fsub float %34, undef
141 %115 = fmul float %112, 0.000000e+00
142 %116 = fmul float %113, undef
143 %117 = fmul float %114, undef
144 %118 = fsub float %35, undef
145 %119 = fsub float %36, undef
146 %120 = fsub float %37, undef
147 %121 = fmul float %118, undef
148 %122 = fmul float %119, undef
149 %123 = fmul float %120, undef
150 %124 = call float @llvm.minnum.f32(float %115, float %121)
151 %125 = call float @llvm.minnum.f32(float %116, float %122)
152 %126 = call float @llvm.minnum.f32(float %117, float %123)
153 %127 = call float @llvm.maxnum.f32(float %124, float %125)
154 %128 = call float @llvm.maxnum.f32(float %127, float %126)
155 %129 = fsub float %38, undef
156 %130 = fsub float %39, undef
157 %131 = fsub float %40, undef
158 %132 = fmul float %129, 0.000000e+00
159 %133 = fmul float %130, undef
160 %134 = fmul float %131, undef
161 %135 = fsub float %41, undef
162 %136 = fsub float %42, undef
163 %137 = fsub float %43, undef
164 %138 = fmul float %135, undef
165 %139 = fmul float %136, undef
166 %140 = fmul float %137, undef
167 %141 = call float @llvm.minnum.f32(float %132, float %138)
168 %142 = call float @llvm.minnum.f32(float %133, float %139)
169 %143 = call float @llvm.minnum.f32(float %134, float %140)
170 %144 = call float @llvm.maxnum.f32(float %141, float %142)
171 %145 = call float @llvm.maxnum.f32(float %144, float %143)
172 %146 = fsub float %44, undef
173 %147 = fsub float %45, undef
174 %148 = fsub float %46, undef
175 %149 = fmul float %146, 0.000000e+00
176 %150 = fmul float %147, 0.000000e+00
177 %151 = fmul float %148, undef
178 %152 = fsub float %47, undef
179 %153 = fsub float %48, undef
180 %154 = fsub float %49, undef
181 %155 = fmul float %152, undef
182 %156 = fmul float %153, 0.000000e+00
183 %157 = fmul float %154, undef
184 %158 = call float @llvm.minnum.f32(float %149, float %155)
185 %159 = call float @llvm.minnum.f32(float %150, float %156)
186 %160 = call float @llvm.minnum.f32(float %151, float %157)
187 %161 = call float @llvm.maxnum.f32(float %158, float %159)
188 %162 = call float @llvm.maxnum.f32(float %161, float %160)
189 %163 = fsub float %50, undef
190 %164 = fsub float %51, undef
191 %165 = fsub float %52, undef
192 %166 = fmul float %163, undef
193 %167 = fmul float %164, 0.000000e+00
194 %168 = fmul float %165, 0.000000e+00
195 %169 = fsub float %53, undef
196 %170 = fsub float %54, undef
197 %171 = fsub float %55, undef
198 %172 = fdiv float 1.000000e+00, %temp18.0
199 %173 = fmul float %169, undef
200 %174 = fmul float %170, undef
201 %175 = fmul float %171, %172
202 %176 = call float @llvm.minnum.f32(float %166, float %173)
203 %177 = call float @llvm.minnum.f32(float %167, float %174)
204 %178 = call float @llvm.minnum.f32(float %168, float %175)
205 %179 = call float @llvm.maxnum.f32(float %176, float %177)
206 %180 = call float @llvm.maxnum.f32(float %179, float %178)
207 %181 = fsub float %62, undef
208 %182 = fsub float %63, undef
209 %183 = fsub float %64, undef
210 %184 = fmul float %181, 0.000000e+00
211 %185 = fmul float %182, undef
212 %186 = fmul float %183, undef
213 %187 = fsub float %65, undef
214 %188 = fsub float %66, undef
215 %189 = fmul float %187, undef
216 %190 = fmul float %188, undef
217 %191 = call float @llvm.maxnum.f32(float %184, float %189)
218 %192 = call float @llvm.maxnum.f32(float %185, float %190)
219 %193 = call float @llvm.maxnum.f32(float %186, float undef)
220 %194 = call float @llvm.minnum.f32(float %191, float %192)
221 %195 = call float @llvm.minnum.f32(float %194, float %193)
222 %.temp292.7 = select i1 undef, float %162, float undef
223 %temp292.9 = select i1 false, float %180, float %.temp292.7
95 %tmp68 = fsub float %tmp2, undef
96 %tmp69 = fsub float %tmp3, undef
97 %tmp70 = fsub float %tmp4, undef
98 %tmp71 = fmul float %tmp68, 0.000000e+00
99 %tmp72 = fmul float %tmp69, undef
100 %tmp73 = fmul float %tmp70, undef
101 %tmp74 = fsub float %tmp6, undef
102 %tmp75 = fsub float %tmp7, undef
103 %tmp76 = fmul float %tmp74, undef
104 %tmp77 = fmul float %tmp75, 0.000000e+00
105 %tmp78 = call float @llvm.minnum.f32(float %tmp73, float %tmp77)
106 %tmp79 = call float @llvm.maxnum.f32(float %tmp71, float 0.000000e+00)
107 %tmp80 = call float @llvm.maxnum.f32(float %tmp72, float %tmp76)
108 %tmp81 = call float @llvm.maxnum.f32(float undef, float %tmp78)
109 %tmp82 = call float @llvm.minnum.f32(float %tmp79, float %tmp80)
110 %tmp83 = call float @llvm.minnum.f32(float %tmp82, float undef)
111 %tmp84 = fsub float %tmp14, undef
112 %tmp85 = fsub float %tmp15, undef
113 %tmp86 = fsub float %tmp16, undef
114 %tmp87 = fmul float %tmp84, undef
115 %tmp88 = fmul float %tmp85, undef
116 %tmp89 = fmul float %tmp86, undef
117 %tmp90 = fsub float %tmp17, undef
118 %tmp91 = fsub float %tmp18, undef
119 %tmp92 = fsub float %tmp19, undef
120 %tmp93 = fmul float %tmp90, 0.000000e+00
121 %tmp94 = fmul float %tmp91, undef
122 %tmp95 = fmul float %tmp92, undef
123 %tmp96 = call float @llvm.minnum.f32(float %tmp88, float %tmp94)
124 %tmp97 = call float @llvm.maxnum.f32(float %tmp87, float %tmp93)
125 %tmp98 = call float @llvm.maxnum.f32(float %tmp89, float %tmp95)
126 %tmp99 = call float @llvm.maxnum.f32(float undef, float %tmp96)
127 %tmp100 = call float @llvm.maxnum.f32(float %tmp99, float undef)
128 %tmp101 = call float @llvm.minnum.f32(float %tmp97, float undef)
129 %tmp102 = call float @llvm.minnum.f32(float %tmp101, float %tmp98)
130 %tmp103 = fsub float %tmp30, undef
131 %tmp104 = fsub float %tmp31, undef
132 %tmp105 = fmul float %tmp103, 0.000000e+00
133 %tmp106 = fmul float %tmp104, 0.000000e+00
134 %tmp107 = call float @llvm.minnum.f32(float undef, float %tmp105)
135 %tmp108 = call float @llvm.maxnum.f32(float undef, float %tmp106)
136 %tmp109 = call float @llvm.maxnum.f32(float undef, float %tmp107)
137 %tmp110 = call float @llvm.maxnum.f32(float %tmp109, float undef)
138 %tmp111 = call float @llvm.minnum.f32(float undef, float %tmp108)
139 %tmp112 = fsub float %tmp32, undef
140 %tmp113 = fsub float %tmp33, undef
141 %tmp114 = fsub float %tmp34, undef
142 %tmp115 = fmul float %tmp112, 0.000000e+00
143 %tmp116 = fmul float %tmp113, undef
144 %tmp117 = fmul float %tmp114, undef
145 %tmp118 = fsub float %tmp35, undef
146 %tmp119 = fsub float %tmp36, undef
147 %tmp120 = fsub float %tmp37, undef
148 %tmp121 = fmul float %tmp118, undef
149 %tmp122 = fmul float %tmp119, undef
150 %tmp123 = fmul float %tmp120, undef
151 %tmp124 = call float @llvm.minnum.f32(float %tmp115, float %tmp121)
152 %tmp125 = call float @llvm.minnum.f32(float %tmp116, float %tmp122)
153 %tmp126 = call float @llvm.minnum.f32(float %tmp117, float %tmp123)
154 %tmp127 = call float @llvm.maxnum.f32(float %tmp124, float %tmp125)
155 %tmp128 = call float @llvm.maxnum.f32(float %tmp127, float %tmp126)
156 %tmp129 = fsub float %tmp38, undef
157 %tmp130 = fsub float %tmp39, undef
158 %tmp131 = fsub float %tmp40, undef
159 %tmp132 = fmul float %tmp129, 0.000000e+00
160 %tmp133 = fmul float %tmp130, undef
161 %tmp134 = fmul float %tmp131, undef
162 %tmp135 = fsub float %tmp41, undef
163 %tmp136 = fsub float %tmp42, undef
164 %tmp137 = fsub float %tmp43, undef
165 %tmp138 = fmul float %tmp135, undef
166 %tmp139 = fmul float %tmp136, undef
167 %tmp140 = fmul float %tmp137, undef
168 %tmp141 = call float @llvm.minnum.f32(float %tmp132, float %tmp138)
169 %tmp142 = call float @llvm.minnum.f32(float %tmp133, float %tmp139)
170 %tmp143 = call float @llvm.minnum.f32(float %tmp134, float %tmp140)
171 %tmp144 = call float @llvm.maxnum.f32(float %tmp141, float %tmp142)
172 %tmp145 = call float @llvm.maxnum.f32(float %tmp144, float %tmp143)
173 %tmp146 = fsub float %tmp44, undef
174 %tmp147 = fsub float %tmp45, undef
175 %tmp148 = fsub float %tmp46, undef
176 %tmp149 = fmul float %tmp146, 0.000000e+00
177 %tmp150 = fmul float %tmp147, 0.000000e+00
178 %tmp151 = fmul float %tmp148, undef
179 %tmp152 = fsub float %tmp47, undef
180 %tmp153 = fsub float %tmp48, undef
181 %tmp154 = fsub float %tmp49, undef
182 %tmp155 = fmul float %tmp152, undef
183 %tmp156 = fmul float %tmp153, 0.000000e+00
184 %tmp157 = fmul float %tmp154, undef
185 %tmp158 = call float @llvm.minnum.f32(float %tmp149, float %tmp155)
186 %tmp159 = call float @llvm.minnum.f32(float %tmp150, float %tmp156)
187 %tmp160 = call float @llvm.minnum.f32(float %tmp151, float %tmp157)
188 %tmp161 = call float @llvm.maxnum.f32(float %tmp158, float %tmp159)
189 %tmp162 = call float @llvm.maxnum.f32(float %tmp161, float %tmp160)
190 %tmp163 = fsub float %tmp50, undef
191 %tmp164 = fsub float %tmp51, undef
192 %tmp165 = fsub float %tmp52, undef
193 %tmp166 = fmul float %tmp163, undef
194 %tmp167 = fmul float %tmp164, 0.000000e+00
195 %tmp168 = fmul float %tmp165, 0.000000e+00
196 %tmp169 = fsub float %tmp53, undef
197 %tmp170 = fsub float %tmp54, undef
198 %tmp171 = fsub float %tmp55, undef
199 %tmp172 = fdiv float 1.000000e+00, %temp18.0
200 %tmp173 = fmul float %tmp169, undef
201 %tmp174 = fmul float %tmp170, undef
202 %tmp175 = fmul float %tmp171, %tmp172
203 %tmp176 = call float @llvm.minnum.f32(float %tmp166, float %tmp173)
204 %tmp177 = call float @llvm.minnum.f32(float %tmp167, float %tmp174)
205 %tmp178 = call float @llvm.minnum.f32(float %tmp168, float %tmp175)
206 %tmp179 = call float @llvm.maxnum.f32(float %tmp176, float %tmp177)
207 %tmp180 = call float @llvm.maxnum.f32(float %tmp179, float %tmp178)
208 %tmp181 = fsub float %tmp62, undef
209 %tmp182 = fsub float %tmp63, undef
210 %tmp183 = fsub float %tmp64, undef
211 %tmp184 = fmul float %tmp181, 0.000000e+00
212 %tmp185 = fmul float %tmp182, undef
213 %tmp186 = fmul float %tmp183, undef
214 %tmp187 = fsub float %tmp65, undef
215 %tmp188 = fsub float %tmp66, undef
216 %tmp189 = fmul float %tmp187, undef
217 %tmp190 = fmul float %tmp188, undef
218 %tmp191 = call float @llvm.maxnum.f32(float %tmp184, float %tmp189)
219 %tmp192 = call float @llvm.maxnum.f32(float %tmp185, float %tmp190)
220 %tmp193 = call float @llvm.maxnum.f32(float %tmp186, float undef)
221 %tmp194 = call float @llvm.minnum.f32(float %tmp191, float %tmp192)
222 %tmp195 = call float @llvm.minnum.f32(float %tmp194, float %tmp193)
223 %.temp292.7 = select i1 undef, float %tmp162, float undef
224 %temp292.9 = select i1 false, float %tmp180, float %.temp292.7
224225 %.temp292.9 = select i1 undef, float undef, float %temp292.9
225 %196 = fcmp ogt float undef, 0.000000e+00
226 %197 = fcmp olt float undef, %195
227 %198 = and i1 %196, %197
228 %199 = fcmp olt float undef, %.temp292.9
229 %200 = and i1 %198, %199
230 %temp292.11 = select i1 %200, float undef, float %.temp292.9
231 %tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
226 %tmp196 = fcmp ogt float undef, 0.000000e+00
227 %tmp197 = fcmp olt float undef, %tmp195
228 %tmp198 = and i1 %tmp196, %tmp197
229 %tmp199 = fcmp olt float undef, %.temp292.9
230 %tmp200 = and i1 %tmp198, %tmp199
231 %temp292.11 = select i1 %tmp200, float undef, float %.temp292.9
232 %tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
232233 %cmp0 = icmp eq i32 %tid0, 0
233234 br i1 %cmp0, label %IF2565, label %ELSE2566
234235
235236 IF2565: ; preds = %ENDIF
236 %tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
237 %tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
237238 %cmp1 = icmp eq i32 %tid1, 0
238239 br i1 %cmp1, label %ENDIF2582, label %ELSE2584
239240
240241 ELSE2566: ; preds = %ENDIF
241 %tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
242 %tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
242243 %tidf = bitcast i32 %tid2 to float
243 %201 = fcmp oeq float %temp292.11, %tidf
244 br i1 %201, label %ENDLOOP, label %ELSE2593
244 %tmp201 = fcmp oeq float %temp292.11, %tidf
245 br i1 %tmp201, label %ENDLOOP, label %ELSE2593
245246
246247 ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
247248 %temp894.1 = phi float [ undef, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
248 %temp18.1 = phi float [ %218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
249 %202 = fsub float %5, undef
250 %203 = fmul float %202, undef
251 %204 = call float @llvm.maxnum.f32(float undef, float %203)
252 %205 = call float @llvm.minnum.f32(float %204, float undef)
253 %206 = call float @llvm.minnum.f32(float %205, float undef)
254 %207 = fcmp ogt float undef, 0.000000e+00
255 %208 = fcmp olt float undef, 1.000000e+00
256 %209 = and i1 %207, %208
257 %tid3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
249 %temp18.1 = phi float [ %tmp218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
250 %tmp202 = fsub float %tmp5, undef
251 %tmp203 = fmul float %tmp202, undef
252 %tmp204 = call float @llvm.maxnum.f32(float undef, float %tmp203)
253 %tmp205 = call float @llvm.minnum.f32(float %tmp204, float undef)
254 %tmp206 = call float @llvm.minnum.f32(float %tmp205, float undef)
255 %tmp207 = fcmp ogt float undef, 0.000000e+00
256 %tmp208 = fcmp olt float undef, 1.000000e+00
257 %tmp209 = and i1 %tmp207, %tmp208
258 %tid3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
258259 %tidf3 = bitcast i32 %tid3 to float
259 %210 = fcmp olt float %tidf3, %206
260 %211 = and i1 %209, %210
261 br i1 %211, label %ENDIF2795, label %ELSE2797
260 %tmp210 = fcmp olt float %tidf3, %tmp206
261 %tmp211 = and i1 %tmp209, %tmp210
262 br i1 %tmp211, label %ENDIF2795, label %ELSE2797
262263
263264 ELSE2584: ; preds = %IF2565
264265 br label %ENDIF2582
265266
266267 ENDIF2582: ; preds = %ELSE2584, %IF2565
267 %212 = fadd float %1, undef
268 %213 = fadd float 0.000000e+00, %212
269 %floor = call float @llvm.floor.f32(float %213)
270 %214 = fsub float %213, %floor
271 %tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
268 %tmp212 = fadd float %tmp1, undef
269 %tmp213 = fadd float 0.000000e+00, %tmp212
270 %floor = call float @llvm.floor.f32(float %tmp213)
271 %tmp214 = fsub float %tmp213, %floor
272 %tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
272273 %cmp4 = icmp eq i32 %tid4, 0
273274 br i1 %cmp4, label %IF2589, label %ELSE2590
274275
279280 br label %ENDIF2588
280281
281282 ENDIF2588: ; preds = %ELSE2590, %IF2589
282 %215 = fsub float 1.000000e+00, %214
283 %216 = call float @llvm.sqrt.f32(float %215)
284 %217 = fmul float %216, undef
285 %218 = fadd float %217, undef
283 %tmp215 = fsub float 1.000000e+00, %tmp214
284 %tmp216 = call float @llvm.sqrt.f32(float %tmp215)
285 %tmp217 = fmul float %tmp216, undef
286 %tmp218 = fadd float %tmp217, undef
286287 br label %ENDIF2564
287288
288289 ELSE2593: ; preds = %ELSE2566
289 %219 = fcmp oeq float %temp292.11, %81
290 %220 = fcmp olt float %81, %83
291 %221 = and i1 %219, %220
292 br i1 %221, label %ENDIF2594, label %ELSE2596
290 %tmp219 = fcmp oeq float %temp292.11, %tmp81
291 %tmp220 = fcmp olt float %tmp81, %tmp83
292 %tmp221 = and i1 %tmp219, %tmp220
293 br i1 %tmp221, label %ENDIF2594, label %ELSE2596
293294
294295 ELSE2596: ; preds = %ELSE2593
295 %222 = fcmp oeq float %temp292.11, %100
296 %223 = fcmp olt float %100, %102
297 %224 = and i1 %222, %223
298 br i1 %224, label %ENDIF2594, label %ELSE2632
296 %tmp222 = fcmp oeq float %temp292.11, %tmp100
297 %tmp223 = fcmp olt float %tmp100, %tmp102
298 %tmp224 = and i1 %tmp222, %tmp223
299 br i1 %tmp224, label %ENDIF2594, label %ELSE2632
299300
300301 ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
301302 %temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
302 %225 = fmul float %temp894.2, undef
303 %tmp225 = fmul float %temp894.2, undef
303304 br label %ENDIF2564
304305
305306 ELSE2632: ; preds = %ELSE2596
306307 br i1 undef, label %ENDIF2594, label %ELSE2650
307308
308309 ELSE2650: ; preds = %ELSE2632
309 %226 = fcmp oeq float %temp292.11, %110
310 %227 = fcmp olt float %110, %111
311 %228 = and i1 %226, %227
312 br i1 %228, label %IF2667, label %ELSE2668
310 %tmp226 = fcmp oeq float %temp292.11, %tmp110
311 %tmp227 = fcmp olt float %tmp110, %tmp111
312 %tmp228 = and i1 %tmp226, %tmp227
313 br i1 %tmp228, label %IF2667, label %ELSE2668
313314
314315 IF2667: ; preds = %ELSE2650
315316 br i1 undef, label %ENDIF2594, label %ELSE2671
316317
317318 ELSE2668: ; preds = %ELSE2650
318 %229 = fcmp oeq float %temp292.11, %128
319 %230 = fcmp olt float %128, undef
320 %231 = and i1 %229, %230
321 br i1 %231, label %ENDIF2594, label %ELSE2686
319 %tmp229 = fcmp oeq float %temp292.11, %tmp128
320 %tmp230 = fcmp olt float %tmp128, undef
321 %tmp231 = and i1 %tmp229, %tmp230
322 br i1 %tmp231, label %ENDIF2594, label %ELSE2686
322323
323324 ELSE2671: ; preds = %IF2667
324325 br label %ENDIF2594
325326
326327 ELSE2686: ; preds = %ELSE2668
327 %232 = fcmp oeq float %temp292.11, %145
328 %233 = fcmp olt float %145, undef
329 %234 = and i1 %232, %233
330 br i1 %234, label %ENDIF2594, label %ELSE2704
328 %tmp232 = fcmp oeq float %temp292.11, %tmp145
329 %tmp233 = fcmp olt float %tmp145, undef
330 %tmp234 = and i1 %tmp232, %tmp233
331 br i1 %tmp234, label %ENDIF2594, label %ELSE2704
331332
332333 ELSE2704: ; preds = %ELSE2686
333 %235 = fcmp oeq float %temp292.11, %180
334 %236 = fcmp olt float %180, undef
335 %237 = and i1 %235, %236
336 br i1 %237, label %ENDIF2594, label %ELSE2740
334 %tmp235 = fcmp oeq float %temp292.11, %tmp180
335 %tmp236 = fcmp olt float %tmp180, undef
336 %tmp237 = and i1 %tmp235, %tmp236
337 br i1 %tmp237, label %ENDIF2594, label %ELSE2740
337338
338339 ELSE2740: ; preds = %ELSE2704
339340 br i1 undef, label %IF2757, label %ELSE2758
348349 br label %ENDIF2594
349350
350351 IF2775: ; preds = %ELSE2758
351 %238 = fcmp olt float undef, undef
352 br i1 %238, label %ENDIF2594, label %ELSE2779
352 %tmp238 = fcmp olt float undef, undef
353 br i1 %tmp238, label %ENDIF2594, label %ELSE2779
353354
354355 ELSE2779: ; preds = %IF2775
355356 br i1 undef, label %ENDIF2594, label %ELSE2782
358359 br i1 undef, label %ENDIF2594, label %ELSE2785
359360
360361 ELSE2785: ; preds = %ELSE2782
361 %239 = fcmp olt float undef, 0.000000e+00
362 br i1 %239, label %ENDIF2594, label %ELSE2788
362 %tmp239 = fcmp olt float undef, 0.000000e+00
363 br i1 %tmp239, label %ENDIF2594, label %ELSE2788
363364
364365 ELSE2788: ; preds = %ELSE2785
365 %240 = fcmp olt float 0.000000e+00, undef
366 %.2848 = select i1 %240, float -1.000000e+00, float 1.000000e+00
366 %tmp240 = fcmp olt float 0.000000e+00, undef
367 %.2848 = select i1 %tmp240, float -1.000000e+00, float 1.000000e+00
367368 br label %ENDIF2594
368369
369370 ELSE2797: ; preds = %ENDIF2564
370 %241 = fsub float %8, undef
371 %242 = fsub float %9, undef
372 %243 = fsub float %10, undef
373 %244 = fmul float %241, undef
374 %245 = fmul float %242, undef
375 %246 = fmul float %243, undef
376 %247 = fsub float %11, undef
377 %248 = fsub float %12, undef
378 %249 = fsub float %13, undef
379 %250 = fmul float %247, undef
380 %251 = fmul float %248, undef
381 %252 = fmul float %249, undef
382 %253 = call float @llvm.minnum.f32(float %244, float %250)
383 %254 = call float @llvm.minnum.f32(float %245, float %251)
384 %255 = call float @llvm.maxnum.f32(float %246, float %252)
385 %256 = call float @llvm.maxnum.f32(float %253, float %254)
386 %257 = call float @llvm.maxnum.f32(float %256, float undef)
387 %258 = call float @llvm.minnum.f32(float undef, float %255)
388 %259 = fcmp ogt float %257, 0.000000e+00
389 %260 = fcmp olt float %257, 1.000000e+00
390 %261 = and i1 %259, %260
391 %262 = fcmp olt float %257, %258
392 %263 = and i1 %261, %262
393 br i1 %263, label %ENDIF2795, label %ELSE2800
371 %tmp241 = fsub float %tmp8, undef
372 %tmp242 = fsub float %tmp9, undef
373 %tmp243 = fsub float %tmp10, undef
374 %tmp244 = fmul float %tmp241, undef
375 %tmp245 = fmul float %tmp242, undef
376 %tmp246 = fmul float %tmp243, undef
377 %tmp247 = fsub float %tmp11, undef
378 %tmp248 = fsub float %tmp12, undef
379 %tmp249 = fsub float %tmp13, undef
380 %tmp250 = fmul float %tmp247, undef
381 %tmp251 = fmul float %tmp248, undef
382 %tmp252 = fmul float %tmp249, undef
383 %tmp253 = call float @llvm.minnum.f32(float %tmp244, float %tmp250)
384 %tmp254 = call float @llvm.minnum.f32(float %tmp245, float %tmp251)
385 %tmp255 = call float @llvm.maxnum.f32(float %tmp246, float %tmp252)
386 %tmp256 = call float @llvm.maxnum.f32(float %tmp253, float %tmp254)
387 %tmp257 = call float @llvm.maxnum.f32(float %tmp256, float undef)
388 %tmp258 = call float @llvm.minnum.f32(float undef, float %tmp255)
389 %tmp259 = fcmp ogt float %tmp257, 0.000000e+00
390 %tmp260 = fcmp olt float %tmp257, 1.000000e+00
391 %tmp261 = and i1 %tmp259, %tmp260
392 %tmp262 = fcmp olt float %tmp257, %tmp258
393 %tmp263 = and i1 %tmp261, %tmp262
394 br i1 %tmp263, label %ENDIF2795, label %ELSE2800
394395
395396 ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
396397 br label %LOOP
399400 br i1 undef, label %ENDIF2795, label %ELSE2803
400401
401402 ELSE2803: ; preds = %ELSE2800
402 %264 = fsub float %20, undef
403 %265 = fsub float %21, undef
404 %266 = fsub float %22, undef
405 %267 = fmul float %264, undef
406 %268 = fmul float %265, undef
407 %269 = fmul float %266, 0.000000e+00
408 %270 = fsub float %23, undef
409 %271 = fsub float %24, undef
410 %272 = fsub float %25, undef
411 %273 = fmul float %270, undef
412 %274 = fmul float %271, undef
413 %275 = fmul float %272, undef
414 %276 = call float @llvm.minnum.f32(float %267, float %273)
415 %277 = call float @llvm.maxnum.f32(float %268, float %274)
416 %278 = call float @llvm.maxnum.f32(float %269, float %275)
417 %279 = call float @llvm.maxnum.f32(float %276, float undef)
418 %280 = call float @llvm.maxnum.f32(float %279, float undef)
419 %281 = call float @llvm.minnum.f32(float undef, float %277)
420 %282 = call float @llvm.minnum.f32(float %281, float %278)
421 %283 = fcmp ogt float %280, 0.000000e+00
422 %284 = fcmp olt float %280, 1.000000e+00
423 %285 = and i1 %283, %284
424 %286 = fcmp olt float %280, %282
425 %287 = and i1 %285, %286
426 br i1 %287, label %ENDIF2795, label %ELSE2806
403 %tmp264 = fsub float %tmp20, undef
404 %tmp265 = fsub float %tmp21, undef
405 %tmp266 = fsub float %tmp22, undef
406 %tmp267 = fmul float %tmp264, undef
407 %tmp268 = fmul float %tmp265, undef
408 %tmp269 = fmul float %tmp266, 0.000000e+00
409 %tmp270 = fsub float %tmp23, undef
410 %tmp271 = fsub float %tmp24, undef
411 %tmp272 = fsub float %tmp25, undef
412 %tmp273 = fmul float %tmp270, undef
413 %tmp274 = fmul float %tmp271, undef
414 %tmp275 = fmul float %tmp272, undef
415 %tmp276 = call float @llvm.minnum.f32(float %tmp267, float %tmp273)
416 %tmp277 = call float @llvm.maxnum.f32(float %tmp268, float %tmp274)
417 %tmp278 = call float @llvm.maxnum.f32(float %tmp269, float %tmp275)
418 %tmp279 = call float @llvm.maxnum.f32(float %tmp276, float undef)
419 %tmp280 = call float @llvm.maxnum.f32(float %tmp279, float undef)
420 %tmp281 = call float @llvm.minnum.f32(float undef, float %tmp277)
421 %tmp282 = call float @llvm.minnum.f32(float %tmp281, float %tmp278)
422 %tmp283 = fcmp ogt float %tmp280, 0.000000e+00
423 %tmp284 = fcmp olt float %tmp280, 1.000000e+00
424 %tmp285 = and i1 %tmp283, %tmp284
425 %tmp286 = fcmp olt float %tmp280, %tmp282
426 %tmp287 = and i1 %tmp285, %tmp286
427 br i1 %tmp287, label %ENDIF2795, label %ELSE2806
427428
428429 ELSE2806: ; preds = %ELSE2803
429 %288 = fsub float %26, undef
430 %289 = fsub float %27, undef
431 %290 = fsub float %28, undef
432 %291 = fmul float %288, undef
433 %292 = fmul float %289, 0.000000e+00
434 %293 = fmul float %290, undef
435 %294 = fsub float %29, undef
436 %295 = fmul float %294, undef
437 %296 = call float @llvm.minnum.f32(float %291, float %295)
438 %297 = call float @llvm.minnum.f32(float %292, float undef)
439 %298 = call float @llvm.maxnum.f32(float %293, float undef)
440 %299 = call float @llvm.maxnum.f32(float %296, float %297)
441 %300 = call float @llvm.maxnum.f32(float %299, float undef)
442 %301 = call float @llvm.minnum.f32(float undef, float %298)
443 %302 = fcmp ogt float %300, 0.000000e+00
444 %303 = fcmp olt float %300, 1.000000e+00
445 %304 = and i1 %302, %303
446 %305 = fcmp olt float %300, %301
447 %306 = and i1 %304, %305
448 br i1 %306, label %ENDIF2795, label %ELSE2809
430 %tmp288 = fsub float %tmp26, undef
431 %tmp289 = fsub float %tmp27, undef
432 %tmp290 = fsub float %tmp28, undef
433 %tmp291 = fmul float %tmp288, undef
434 %tmp292 = fmul float %tmp289, 0.000000e+00
435 %tmp293 = fmul float %tmp290, undef
436 %tmp294 = fsub float %tmp29, undef
437 %tmp295 = fmul float %tmp294, undef
438 %tmp296 = call float @llvm.minnum.f32(float %tmp291, float %tmp295)
439 %tmp297 = call float @llvm.minnum.f32(float %tmp292, float undef)
440 %tmp298 = call float @llvm.maxnum.f32(float %tmp293, float undef)
441 %tmp299 = call float @llvm.maxnum.f32(float %tmp296, float %tmp297)
442 %tmp300 = call float @llvm.maxnum.f32(float %tmp299, float undef)
443 %tmp301 = call float @llvm.minnum.f32(float undef, float %tmp298)
444 %tmp302 = fcmp ogt float %tmp300, 0.000000e+00
445 %tmp303 = fcmp olt float %tmp300, 1.000000e+00
446 %tmp304 = and i1 %tmp302, %tmp303
447 %tmp305 = fcmp olt float %tmp300, %tmp301
448 %tmp306 = and i1 %tmp304, %tmp305
449 br i1 %tmp306, label %ENDIF2795, label %ELSE2809
449450
450451 ELSE2809: ; preds = %ELSE2806
451452 br i1 undef, label %ENDIF2795, label %ELSE2812
460461 br i1 undef, label %ENDIF2795, label %ELSE2821
461462
462463 ELSE2821: ; preds = %ELSE2818
463 %307 = fsub float %56, undef
464 %308 = fsub float %57, undef
465 %309 = fsub float %58, undef
466 %310 = fmul float %307, undef
467 %311 = fmul float %308, 0.000000e+00
468 %312 = fmul float %309, undef
469 %313 = fsub float %59, undef
470 %314 = fsub float %60, undef
471 %315 = fsub float %61, undef
472 %316 = fmul float %313, undef
473 %317 = fmul float %314, undef
474 %318 = fmul float %315, undef
475 %319 = call float @llvm.maxnum.f32(float %310, float %316)
476 %320 = call float @llvm.maxnum.f32(float %311, float %317)
477 %321 = call float @llvm.maxnum.f32(float %312, float %318)
478 %322 = call float @llvm.minnum.f32(float %319, float %320)
479 %323 = call float @llvm.minnum.f32(float %322, float %321)
480 %324 = fcmp ogt float undef, 0.000000e+00
481 %325 = fcmp olt float undef, 1.000000e+00
482 %326 = and i1 %324, %325
483 %327 = fcmp olt float undef, %323
484 %328 = and i1 %326, %327
485 br i1 %328, label %ENDIF2795, label %ELSE2824
464 %tmp307 = fsub float %tmp56, undef
465 %tmp308 = fsub float %tmp57, undef
466 %tmp309 = fsub float %tmp58, undef
467 %tmp310 = fmul float %tmp307, undef
468 %tmp311 = fmul float %tmp308, 0.000000e+00
469 %tmp312 = fmul float %tmp309, undef
470 %tmp313 = fsub float %tmp59, undef
471 %tmp314 = fsub float %tmp60, undef
472 %tmp315 = fsub float %tmp61, undef
473 %tmp316 = fmul float %tmp313, undef
474 %tmp317 = fmul float %tmp314, undef
475 %tmp318 = fmul float %tmp315, undef
476 %tmp319 = call float @llvm.maxnum.f32(float %tmp310, float %tmp316)
477 %tmp320 = call float @llvm.maxnum.f32(float %tmp311, float %tmp317)
478 %tmp321 = call float @llvm.maxnum.f32(float %tmp312, float %tmp318)
479 %tmp322 = call float @llvm.minnum.f32(float %tmp319, float %tmp320)
480 %tmp323 = call float @llvm.minnum.f32(float %tmp322, float %tmp321)
481 %tmp324 = fcmp ogt float undef, 0.000000e+00
482 %tmp325 = fcmp olt float undef, 1.000000e+00
483 %tmp326 = and i1 %tmp324, %tmp325
484 %tmp327 = fcmp olt float undef, %tmp323
485 %tmp328 = and i1 %tmp326, %tmp327
486 br i1 %tmp328, label %ENDIF2795, label %ELSE2824
486487
487488 ELSE2824: ; preds = %ELSE2821
488489 %.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
489490 br label %ENDIF2795
490491 }
491492
493 declare float @llvm.floor.f32(float) #1
494 declare float @llvm.sqrt.f32(float) #1
495 declare float @llvm.minnum.f32(float, float) #1
496 declare float @llvm.maxnum.f32(float, float) #1
492497 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
493
494 ; Function Attrs: nounwind readnone
498 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
495499 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
496
497 ; Function Attrs: nounwind readnone
498 declare float @llvm.floor.f32(float) #1
499
500 ; Function Attrs: nounwind readnone
501 declare float @llvm.sqrt.f32(float) #1
502
503 ; Function Attrs: nounwind readnone
504 declare float @llvm.minnum.f32(float, float) #1
505
506 ; Function Attrs: nounwind readnone
507 declare float @llvm.maxnum.f32(float, float) #1
508
509 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
510500
511501 attributes #0 = { nounwind }
512502 attributes #1 = { nounwind readnone }
None ; RUN: llc < %s -march=amdgcn -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
1 ; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=GCN %s
2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
0 ; RUN: llc -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SIVI %s
1 ; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=SIVI %s
33
44 ; SMRD load with an immediate offset.
55 ; GCN-LABEL: {{^}}smrd0:
66 ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
77 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
8 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
8 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
99 entry:
10 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
11 %1 = load i32, i32 addrspace(2)* %0
12 store i32 %1, i32 addrspace(1)* %out
10 %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
11 %tmp1 = load i32, i32 addrspace(2)* %tmp
12 store i32 %tmp1, i32 addrspace(1)* %out
1313 ret void
1414 }
1515
1717 ; GCN-LABEL: {{^}}smrd1:
1818 ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
1919 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
20 define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
20 define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
2121 entry:
22 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
23 %1 = load i32, i32 addrspace(2)* %0
24 store i32 %1, i32 addrspace(1)* %out
22 %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
23 %tmp1 = load i32, i32 addrspace(2)* %tmp
24 store i32 %tmp1, i32 addrspace(1)* %out
2525 ret void
2626 }
2727
3232 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
3333 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
3434 ; GCN: s_endpgm
35 define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
35 define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
3636 entry:
37 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
38 %1 = load i32, i32 addrspace(2)* %0
39 store i32 %1, i32 addrspace(1)* %out
37 %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
38 %tmp1 = load i32, i32 addrspace(2)* %tmp
39 store i32 %tmp1, i32 addrspace(1)* %out
4040 ret void
4141 }
4242
4747 ; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
4848 ; TODO: Add VI checks
4949 ; GCN: s_endpgm
50 define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
50 define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
5151 entry:
52 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
53 %1 = load i32, i32 addrspace(2)* %0
54 store i32 %1, i32 addrspace(1)* %out
52 %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296
53 %tmp1 = load i32, i32 addrspace(2)* %tmp
54 store i32 %tmp1, i32 addrspace(1)* %out
5555 ret void
5656 }
5757
6161 ; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
6262 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
6363 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
64 define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
64 define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
6565 entry:
66 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
67 %1 = load i32, i32 addrspace(2)* %0
68 store i32 %1, i32 addrspace(1)* %out
66 %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
67 %tmp1 = load i32, i32 addrspace(2)* %tmp
68 store i32 %tmp1, i32 addrspace(1)* %out
6969 ret void
7070 }
7171
7575 ; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
7676 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
7777 ; GCN: s_endpgm
78 define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
78 define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
7979 entry:
80 %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
81 %1 = load i32, i32 addrspace(2)* %0
82 store i32 %1, i32 addrspace(1)* %out
80 %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
81 %tmp1 = load i32, i32 addrspace(2)* %tmp
82 store i32 %tmp1, i32 addrspace(1)* %out
8383 ret void
8484 }
8585
8787 ; GCN-LABEL: {{^}}smrd_load_const0:
8888 ; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
8989 ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
90 define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
90 define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
9191 main_body:
92 %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
93 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
94 %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
95 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
92 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
93 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
94 %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
95 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
9696 ret void
9797 }
9898
101101 ; GCN-LABEL: {{^}}smrd_load_const1:
102102 ; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
103103 ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
104 define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
104 define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
105105 main_body:
106 %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
107 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
108 %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
109 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
106 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
107 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
108 %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1020)
109 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
110110 ret void
111111 }
112
112113 ; SMRD load using the load.const intrinsic with an offset greater than the
113114 ; largets possible immediate.
114115 ; immediate offset.
117118 ; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
118119 ; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
119120 ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
120 define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
121 define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
121122 main_body:
122 %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
123 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
124 %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
125 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
123 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
124 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
125 %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1024)
126 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
126127 ret void
127128 }
128129
132133 ; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
133134 ; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
134135 ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
135 define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
136 define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
136137 main_body:
137 %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
138 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
139 %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048572)
140 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
138 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
139 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
140 %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048572)
141 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
141142 ret void
142143 }
143144
147148 ; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
148149 ; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
149150 ; GCN: s_endpgm
150 define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
151 define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
151152 main_body:
152 %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
153 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
154 %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048576)
155 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
153 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
154 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
155 %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048576)
156 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
156157 ret void
157158 }
158159
159 ; Function Attrs: nounwind readnone
160 declare float @llvm.SI.load.const(<16 x i8>, i32) #0
160 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
161 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
161162
162 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
163
164 attributes #0 = { nounwind readnone }
163 attributes #0 = { nounwind }
164 attributes #1 = { nounwind readnone }
106106 %export = phi float [ %lds_data, %if ], [ %interp, %else ]
107107 %tmp4 = call i32 @llvm.SI.packf16(float %export, float %export)
108108 %tmp5 = bitcast i32 %tmp4 to float
109 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp5, float %tmp5, float %tmp5, float %tmp5)
109 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp5, float %tmp5, float %tmp5, float %tmp5, i1 true, i1 true) #0
110110 ret void
111111 }
112112
204204 ret void
205205 }
206206
207 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
208
209 declare i32 @llvm.SI.packf16(float, float) readnone
210
211 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
207 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
208 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
209 declare i32 @llvm.SI.packf16(float, float) #1
212210
213211 attributes #0 = { nounwind }
214212 attributes #1 = { nounwind readnone }
None ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
11
22 ; FIXME: Move this to sgpr-copy.ll when this is fixed on VI.
33 ; Make sure that when we split an smrd instruction in order to move it to
44 ; the VALU, we are also moving its users to the VALU.
5 ; CHECK-LABEL: {{^}}split_smrd_add_worklist:
6 ; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
75
6 ; GCN-LABEL: {{^}}split_smrd_add_worklist:
7 ; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
88 define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
99 bb:
1010 %tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
2323 %tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> , <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2424 %tmp10 = extractelement <4 x float> %tmp9, i32 0
2525 %tmp12 = call i32 @llvm.SI.packf16(float %tmp10, float undef)
26 %tmp13 = bitcast i32 %tmp12 to float
27 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp13, float undef, float undef)
26 %tmp13 = bitcast i32 %tmp12 to <2 x half>
27 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
2828 ret void
2929 }
3030
31 ; Function Attrs: nounwind readnone
31 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
32
3233 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
33
34 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
35
3634 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
37
3835 declare i32 @llvm.SI.packf16(float, float) #1
3936
4037 attributes #0 = { nounwind }
4138 attributes #1 = { nounwind readnone }
4239
4340 !0 = !{!1, !1, i64 0, i32 1}
44 !1 = !{!"const", !3}
45 !2 = !{!1, !1, i64 0}
46 !3 = !{!"tbaa root"}
41 !1 = !{!"const", !2}
42 !2 = !{!"tbaa root"}
None ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -o - %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s
0 ; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
22
3 ; SI-LABEL:{{^}}row_filter_C1_D0:
4 ; SI: s_endpgm
5 ; Function Attrs: nounwind
3 ; GCN-LABEL:{{^}}row_filter_C1_D0:
64 define void @row_filter_C1_D0() {
75 entry:
86 br i1 undef, label %for.inc.1, label %do.body.preheader
97
108 do.body.preheader: ; preds = %entry
11 %0 = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
9 %tmp = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
1210 br i1 undef, label %do.body56.1, label %do.body90
1311
1412 do.body90: ; preds = %do.body56.2, %do.body56.1, %do.body.preheader
15 %1 = phi <4 x i32> [ %6, %do.body56.2 ], [ %5, %do.body56.1 ], [ %0, %do.body.preheader ]
16 %2 = insertelement <4 x i32> %1, i32 undef, i32 2
17 %3 = insertelement <4 x i32> %2, i32 undef, i32 3
13 %tmp1 = phi <4 x i32> [ %tmp6, %do.body56.2 ], [ %tmp5, %do.body56.1 ], [ %tmp, %do.body.preheader ]
14 %tmp2 = insertelement <4 x i32> %tmp1, i32 undef, i32 2
15 %tmp3 = insertelement <4 x i32> %tmp2, i32 undef, i32 3
1816 br i1 undef, label %do.body124.1, label %do.body.1562.preheader
1917
2018 do.body.1562.preheader: ; preds = %do.body124.1, %do.body90
21 %storemerge = phi <4 x i32> [ %3, %do.body90 ], [ %7, %do.body124.1 ]
22 %4 = insertelement <4 x i32> undef, i32 undef, i32 1
19 %storemerge = phi <4 x i32> [ %tmp3, %do.body90 ], [ %tmp7, %do.body124.1 ]
20 %tmp4 = insertelement <4 x i32> undef, i32 undef, i32 1
2321 br label %for.inc.1
2422
2523 do.body56.1: ; preds = %do.body.preheader
26 %5 = insertelement <4 x i32> %0, i32 undef, i32 1
24 %tmp5 = insertelement <4 x i32> %tmp, i32 undef, i32 1
2725 %or.cond472.1 = or i1 undef, undef
2826 br i1 %or.cond472.1, label %do.body56.2, label %do.body90
2927
3028 do.body56.2: ; preds = %do.body56.1
31 %6 = insertelement <4 x i32> %5, i32 undef, i32 1
29 %tmp6 = insertelement <4 x i32> %tmp5, i32 undef, i32 1
3230 br label %do.body90
3331
3432 do.body124.1: ; preds = %do.body90
35 %7 = insertelement <4 x i32> %3, i32 undef, i32 3
33 %tmp7 = insertelement <4 x i32> %tmp3, i32 undef, i32 3
3634 br label %do.body.1562.preheader
3735
3836 for.inc.1: ; preds = %do.body.1562.preheader, %entry
4139 unreachable
4240 }
4341
44 ; SI-LABEL: {{^}}foo:
45 ; SI: s_endpgm
42 ; GCN-LABEL: {{^}}foo:
43 ; GCN: s_endpgm
4644 define amdgpu_ps void @foo() #0 {
4745 bb:
4846 br i1 undef, label %bb2, label %bb1
7775 bb14: ; preds = %bb27, %bb24, %bb9
7876 %tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ]
7977 %tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ]
80 %tmp17 = fmul float 10.5, %tmp16
81 %tmp18 = fmul float 11.5, %tmp15
82 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp18, float %tmp17, float %tmp17, float %tmp17)
78 %tmp17 = fmul float 1.050000e+01, %tmp16
79 %tmp18 = fmul float 1.150000e+01, %tmp15
80 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp18, float %tmp17, float %tmp17, float %tmp17, i1 true, i1 true) #0
8381 ret void
8482
8583 bb23: ; preds = %bb13
9694 br label %bb14
9795 }
9896
99 ; Function Attrs: nounwind readnone
97 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
10098 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
101
102 ; Function Attrs: nounwind readnone
103 declare i32 @llvm.SI.packf16(float, float) #1
104
105 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
10699
107100 attributes #0 = { nounwind }
108101 attributes #1 = { nounwind readnone }
44 ; FUNC-LABEL: {{^}}udiv_i32:
55 ; EG-NOT: SETGE_INT
66 ; EG: CF_END
7
8 ; SI: v_rcp_iflag_f32_e32
79 define void @udiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
810 %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
9 %a = load i32, i32 addrspace(1) * %in
10 %b = load i32, i32 addrspace(1) * %b_ptr
11 %a = load i32, i32 addrspace(1)* %in
12 %b = load i32, i32 addrspace(1)* %b_ptr
1113 %result = udiv i32 %a, %b
1214 store i32 %result, i32 addrspace(1)* %out
1315 ret void
1416 }
1517
1618 ; FUNC-LABEL: {{^}}s_udiv_i32:
17
19 ; SI: v_rcp_iflag_f32_e32
1820 define void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
1921 %result = udiv i32 %a, %b
2022 store i32 %result, i32 addrspace(1)* %out
2931 ; FUNC-LABEL: {{^}}udiv_v2i32:
3032 ; EG: CF_END
3133
34 ; SI: v_rcp_iflag_f32_e32
35 ; SI: v_rcp_iflag_f32_e32
3236 ; SI: s_endpgm
3337 define void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
3438 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
157161 store <4 x i32> %2, <4 x i32> addrspace(1)* %out, align 16
158162 ret void
159163 }
164
165 ; FUNC-LABEL: {{^}}test_udiv2:
166 ; SI: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
167 define void @test_udiv2(i32 %p) {
168 %i = udiv i32 %p, 2
169 store volatile i32 %i, i32 addrspace(1)* undef
170 ret void
171 }
172
173 ; FUNC-LABEL: {{^}}test_udiv_3_mulhu:
174 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
175 ; SI: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
176 ; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
177 define void @test_udiv_3_mulhu(i32 %p) {
178 %i = udiv i32 %p, 3
179 store volatile i32 %i, i32 addrspace(1)* undef
180 ret void
181 }
+0
-13
test/CodeGen/AMDGPU/urecip.ll less more
None ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
2
3 ; CHECK: v_rcp_iflag_f32_e32
4
5 define void @test(i32 %p, i32 %q) {
6 %i = udiv i32 %p, %q
7 %r = bitcast i32 %i to float
8 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
9 ret void
10 }
11
12 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
178178 br i1 %tmp155, label %bb156, label %bb157
179179
180180 bb156: ; preds = %bb24
181 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp12, float %tmp103, float %tmp102, float %tmp101)
182 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %tmp99, float %tmp98, float %tmp97, float %tmp95)
183 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %tmp94, float %tmp93, float %tmp91, float %tmp90)
184 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %tmp89, float %tmp87, float %tmp86, float %tmp85)
185 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %tmp83, float %tmp82, float %tmp81, float %tmp79)
186 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %tmp78, float %tmp77, float %tmp75, float %tmp74)
187 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %tmp73, float %tmp71, float %tmp70, float %tmp69)
188 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %tmp67, float %tmp66, float %tmp65, float %tmp63)
189 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %tmp62, float %tmp61, float %tmp59, float %tmp58)
190 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %tmp57, float %tmp55, float %tmp54, float %tmp53)
191 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %tmp51, float %tmp50, float %tmp49, float %tmp47)
192 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 43, i32 0, float %tmp46, float %tmp45, float %tmp43, float %tmp42)
193 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 44, i32 0, float %tmp41, float %tmp39, float %tmp38, float %tmp37)
194 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 45, i32 0, float %tmp35, float %tmp34, float %tmp33, float %tmp31)
195 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 46, i32 0, float %tmp30, float %tmp29, float %tmp27, float %tmp26)
196 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 47, i32 0, float %tmp25, float %tmp28, float %tmp32, float %tmp36)
197 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 48, i32 0, float %tmp40, float %tmp44, float %tmp48, float %tmp52)
198 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 49, i32 0, float %tmp56, float %tmp60, float %tmp64, float %tmp68)
199 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 50, i32 0, float %tmp72, float %tmp76, float %tmp80, float %tmp84)
200 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 51, i32 0, float %tmp88, float %tmp92, float %tmp96, float %tmp100)
201 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 52, i32 0, float %tmp104, float %tmp105, float %tmp106, float %tmp108)
202 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 53, i32 0, float %tmp109, float %tmp110, float %tmp111, float %tmp112)
203 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 54, i32 0, float %tmp113, float %tmp114, float %tmp115, float %tmp116)
204 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 55, i32 0, float %tmp117, float %tmp118, float %tmp119, float %tmp120)
205 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 56, i32 0, float %tmp121, float %tmp122, float %tmp123, float %tmp124)
206 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 57, i32 0, float %tmp125, float %tmp126, float %tmp127, float %tmp128)
207 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 58, i32 0, float %tmp129, float %tmp130, float %tmp131, float %tmp132)
208 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 59, i32 0, float %tmp133, float %tmp134, float %tmp135, float %tmp136)
209 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 60, i32 0, float %tmp137, float %tmp138, float %tmp139, float %tmp140)
210 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 61, i32 0, float %tmp141, float %tmp142, float %tmp143, float %tmp144)
211 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 62, i32 0, float %tmp145, float %tmp146, float %tmp147, float %tmp148)
212 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float %tmp149, float %tmp150, float %tmp151, float %tmp13)
213 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
181 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp12, float %tmp103, float %tmp102, float %tmp101, i1 false, i1 false) #0
182 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %tmp99, float %tmp98, float %tmp97, float %tmp95, i1 false, i1 false) #0
183 call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %tmp94, float %tmp93, float %tmp91, float %tmp90, i1 false, i1 false) #0
184 call void @llvm.amdgcn.exp.f32(i32 35, i32 15, float %tmp89, float %tmp87, float %tmp86, float %tmp85, i1 false, i1 false) #0
185 call void @llvm.amdgcn.exp.f32(i32 36, i32 15, float %tmp83, float %tmp82, float %tmp81, float %tmp79, i1 false, i1 false) #0
186 call void @llvm.amdgcn.exp.f32(i32 37, i32 15, float %tmp78, float %tmp77, float %tmp75, float %tmp74, i1 false, i1 false) #0
187 call void @llvm.amdgcn.exp.f32(i32 38, i32 15, float %tmp73, float %tmp71, float %tmp70, float %tmp69, i1 false, i1 false) #0
188 call void @llvm.amdgcn.exp.f32(i32 39, i32 15, float %tmp67, float %tmp66, float %tmp65, float %tmp63, i1 false, i1 false) #0
189 call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp62, float %tmp61, float %tmp59, float %tmp58, i1 false, i1 false) #0
190 call void @llvm.amdgcn.exp.f32(i32 41, i32 15, float %tmp57, float %tmp55, float %tmp54, float %tmp53, i1 false, i1 false) #0
191 call void @llvm.amdgcn.exp.f32(i32 42, i32 15, float %tmp51, float %tmp50, float %tmp49, float %tmp47, i1 false, i1 false) #0
192 call void @llvm.amdgcn.exp.f32(i32 43, i32 15, float %tmp46, float %tmp45, float %tmp43, float %tmp42, i1 false, i1 false) #0
193 call void @llvm.amdgcn.exp.f32(i32 44, i32 15, float %tmp41, float %tmp39, float %tmp38, float %tmp37, i1 false, i1 false) #0
194 call void @llvm.amdgcn.exp.f32(i32 45, i32 15, float %tmp35, float %tmp34, float %tmp33, float %tmp31, i1 false, i1 false) #0
195 call void @llvm.amdgcn.exp.f32(i32 46, i32 15, float %tmp30, float %tmp29, float %tmp27, float %tmp26, i1 false, i1 false) #0
196 call void @llvm.amdgcn.exp.f32(i32 47, i32 15, float %tmp25, float %tmp28, float %tmp32, float %tmp36, i1 false, i1 false) #0
197 call void @llvm.amdgcn.exp.f32(i32 48, i32 15, float %tmp40, float %tmp44, float %tmp48, float %tmp52, i1 false, i1 false) #0
198 call void @llvm.amdgcn.exp.f32(i32 49, i32 15, float %tmp56, float %tmp60, float %tmp64, float %tmp68, i1 false, i1 false) #0
199 call void @llvm.amdgcn.exp.f32(i32 50, i32 15, float %tmp72, float %tmp76, float %tmp80, float %tmp84, i1 false, i1 false) #0
200 call void @llvm.amdgcn.exp.f32(i32 51, i32 15, float %tmp88, float %tmp92, float %tmp96, float %tmp100, i1 false, i1 false) #0
201 call void @llvm.amdgcn.exp.f32(i32 52, i32 15, float %tmp104, float %tmp105, float %tmp106, float %tmp108, i1 false, i1 false) #0
202 call void @llvm.amdgcn.exp.f32(i32 53, i32 15, float %tmp109, float %tmp110, float %tmp111, float %tmp112, i1 false, i1 false) #0
203 call void @llvm.amdgcn.exp.f32(i32 54, i32 15, float %tmp113, float %tmp114, float %tmp115, float %tmp116, i1 false, i1 false) #0
204 call void @llvm.amdgcn.exp.f32(i32 55, i32 15, float %tmp117, float %tmp118, float %tmp119, float %tmp120, i1 false, i1 false) #0
205 call void @llvm.amdgcn.exp.f32(i32 56, i32 15, float %tmp121, float %tmp122, float %tmp123, float %tmp124, i1 false, i1 false) #0
206 call void @llvm.amdgcn.exp.f32(i32 57, i32 15, float %tmp125, float %tmp126, float %tmp127, float %tmp128, i1 false, i1 false) #0
207 call void @llvm.amdgcn.exp.f32(i32 58, i32 15, float %tmp129, float %tmp130, float %tmp131, float %tmp132, i1 false, i1 false) #0
208 call void @llvm.amdgcn.exp.f32(i32 59, i32 15, float %tmp133, float %tmp134, float %tmp135, float %tmp136, i1 false, i1 false) #0
209 call void @llvm.amdgcn.exp.f32(i32 60, i32 15, float %tmp137, float %tmp138, float %tmp139, float %tmp140, i1 false, i1 false) #0
210 call void @llvm.amdgcn.exp.f32(i32 61, i32 15, float %tmp141, float %tmp142, float %tmp143, float %tmp144, i1 false, i1 false) #0
211 call void @llvm.amdgcn.exp.f32(i32 62, i32 15, float %tmp145, float %tmp146, float %tmp147, float %tmp148, i1 false, i1 false) #0
212 call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float %tmp149, float %tmp150, float %tmp151, float %tmp13, i1 false, i1 false) #0
213 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 true, i1 false) #0
214214 ret void
215215
216216 bb157: ; preds = %bb24
481481 br label %bb24
482482 }
483483
484 ; Function Attrs: nounwind readnone
484 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
485 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
486
485487 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
486
487 ; Function Attrs: nounwind readnone
488488 declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
489
490 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
491
492 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
493489
494490 attributes #0 = { nounwind }
495491 attributes #1 = { nounwind readnone }
1010 ; DEFAULT: exp
1111 ; DEFAULT: s_waitcnt lgkmcnt(0)
1212 ; DEFAULT: s_endpgm
13 define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) {
13 define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
1414 main_body:
1515 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
1616 %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
1919 %tmp13 = extractelement <4 x float> %tmp11, i32 1
2020 call void @llvm.amdgcn.s.barrier() #1
2121 %tmp14 = extractelement <4 x float> %tmp11, i32 2
22 ; %tmp15 = extractelement <4 x float> %tmp11, i32 3
23 %tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
22 %tmp15 = load float, float addrspace(2)* %constptr, align 4
2423 %tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
2524 %tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
2625 %tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
2827 %tmp20 = extractelement <4 x float> %tmp18, i32 1
2928 %tmp21 = extractelement <4 x float> %tmp18, i32 2
3029 %tmp22 = extractelement <4 x float> %tmp18, i32 3
31 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
32 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp12, float %tmp13, float %tmp14, float %tmp15)
30 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 false, i1 false) #0
31 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp12, float %tmp13, float %tmp14, float %tmp15, i1 true, i1 false) #0
3332 ret void
3433 }
3534
4342 ; ILPMAX: s_waitcnt vmcnt(1)
4443 ; ILPMAX: s_waitcnt vmcnt(0)
4544 ; ILPMAX: s_endpgm
46
47 define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)*
48 byval, i32 inreg, i32 inreg, i32, i32, i32, i32) {
45 define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
4946 main_body:
50 %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
51 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
52 %13 = add i32 %5, %7
53 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
54 %15 = extractelement <4 x float> %14, i32 0
55 %16 = extractelement <4 x float> %14, i32 1
56 %17 = extractelement <4 x float> %14, i32 2
57 %18 = extractelement <4 x float> %14, i32 3
58 %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
59 %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
60 %21 = add i32 %5, %7
61 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
62 %23 = extractelement <4 x float> %22, i32 0
63 %24 = extractelement <4 x float> %22, i32 1
64 %25 = extractelement <4 x float> %22, i32 2
65 %26 = extractelement <4 x float> %22, i32 3
66 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18)
67 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26)
47 %tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
48 %tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
49 %tmp12 = add i32 %arg5, %arg7
50 %tmp13 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp11, i32 0, i32 %tmp12)
51 %tmp14 = extractelement <4 x float> %tmp13, i32 0
52 %tmp15 = extractelement <4 x float> %tmp13, i32 1
53 %tmp16 = extractelement <4 x float> %tmp13, i32 2
54 %tmp17 = extractelement <4 x float> %tmp13, i32 3
55 %tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 1
56 %tmp19 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp18, align 16, !tbaa !0
57 %tmp20 = add i32 %arg5, %arg7
58 %tmp21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp19, i32 0, i32 %tmp20)
59 %tmp22 = extractelement <4 x float> %tmp21, i32 0
60 %tmp23 = extractelement <4 x float> %tmp21, i32 1
61 %tmp24 = extractelement <4 x float> %tmp21, i32 2
62 %tmp25 = extractelement <4 x float> %tmp21, i32 3
63 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp14, float %tmp15, float %tmp16, float %tmp17, i1 true, i1 false) #0
64 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp22, float %tmp23, float %tmp24, float %tmp25, i1 false, i1 false) #0
6865 ret void
6966 }
7067
68 declare void @llvm.amdgcn.s.barrier() #1
69 declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
70 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
7171
72 ; Function Attrs: convergent nounwind
73 declare void @llvm.amdgcn.s.barrier() #1
74
75 ; Function Attrs: nounwind readnone
76 declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
77
78 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
79
72 attributes #0 = { nounwind }
8073 attributes #1 = { convergent nounwind }
8174 attributes #2 = { nounwind readnone }
8275
None ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SI
1 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=VI
0 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=SI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=VI %s
22
33 ; Check that WQM isn't triggered by image load/store intrinsics.
44 ;
2424 %c.3 = extractelement <4 x i32> %c.2, i32 0
2525 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3
2626 %data = load float, float addrspace(1)* %gep
27
28 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %data, float undef, float undef, float undef)
29
27 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %data, float undef, float undef, float undef, i1 true, i1 true) #1
3028 ret void
3129 }
3230
499497 ret <4 x float> %r
500498 }
501499
502
500 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
503501 declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
504502 declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
505503 declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
511509 declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
512510 declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
513511
514 declare void @llvm.AMDGPU.kill(float)
515 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
512 declare void @llvm.AMDGPU.kill(float) #1
516513
517514 attributes #1 = { nounwind }
518515 attributes #2 = { nounwind readonly }
55
66 target triple = "amdgcn--"
77
8 declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
9 declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
10 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2
11
12 define amdgpu_vs void @wrapper(i32 inreg, i32) {
8 define amdgpu_vs void @wrapper(i32 inreg %arg, i32 %arg1) {
139 main_body:
14 %2 = add i32 %1, %0
15 %3 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> undef, i32 0, i32 %2)
16 %4 = extractelement <4 x float> %3, i32 1
17 %5 = fptosi float %4 to i32
18 %6 = insertelement <2 x i32> undef, i32 %5, i32 1
10 %tmp = add i32 %arg1, %arg
11 %tmp2 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> undef, i32 0, i32 %tmp)
12 %tmp3 = extractelement <4 x float> %tmp2, i32 1
13 %tmp4 = fptosi float %tmp3 to i32
14 %tmp5 = insertelement <2 x i32> undef, i32 %tmp4, i32 1
1915 br label %loop11.i
2016
2117 loop11.i: ; preds = %endif46.i, %main_body
22 %7 = phi i32 [ 0, %main_body ], [ %15, %endif46.i ]
23 %8 = icmp sgt i32 %7, 999
24 br i1 %8, label %main.exit, label %if16.i
18 %tmp6 = phi i32 [ 0, %main_body ], [ %tmp14, %endif46.i ]
19 %tmp7 = icmp sgt i32 %tmp6, 999
20 br i1 %tmp7, label %main.exit, label %if16.i
2521
2622 if16.i: ; preds = %loop11.i
27 %9 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %6, <8 x i32> undef, i32 15, i1 true, i1 false, i1 false, i1 false)
28 %10 = extractelement <4 x float> %9, i32 0
29 %11 = fcmp ult float 0.000000e+00, %10
30 br i1 %11, label %if28.i, label %endif46.i
23 %tmp8 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp5, <8 x i32> undef, i32 15, i1 true, i1 false, i1 false, i1 false)
24 %tmp9 = extractelement <4 x float> %tmp8, i32 0
25 %tmp10 = fcmp ult float 0.000000e+00, %tmp9
26 br i1 %tmp10, label %if28.i, label %endif46.i
3127
3228 if28.i: ; preds = %if16.i
33 %12 = bitcast float %10 to i32
34 %13 = shl i32 %12, 16
35 %14 = bitcast i32 %13 to float
29 %tmp11 = bitcast float %tmp9 to i32
30 %tmp12 = shl i32 %tmp11, 16
31 %tmp13 = bitcast i32 %tmp12 to float
3632 br label %main.exit
3733
3834 endif46.i: ; preds = %if16.i
39 %15 = add i32 %7, 1
35 %tmp14 = add i32 %tmp6, 1
4036 br label %loop11.i
4137
4238 main.exit: ; preds = %if28.i, %loop11.i
43 %16 = phi float [ %14, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
44 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %16, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000)
39 %tmp15 = phi float [ %tmp13, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
40 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp15, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000, i1 false, i1 false) #0
4541 ret void
4642 }
4743
48 attributes #0 = { nounwind readnone }
49 attributes #1 = { nounwind readonly }
50 attributes #2 = { nounwind }
44 ; Function Attrs: nounwind
45 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
46
47 ; Function Attrs: nounwind readnone
48 declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
49
50 ; Function Attrs: nounwind readonly
51 declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
52
53 attributes #0 = { nounwind }
54 attributes #1 = { nounwind readnone }
55 attributes #2 = { nounwind readonly }