llvm.org GIT mirror llvm / 2223371
[NVPTX] Don't flag StoreParam/LoadParam memory chain operands as ReadMem/WriteMem (PR32146) Follow up to D33147 NVPTXTargetLowering::LowerCall was trusting the default argument values. Fixes another 17 of the NVPTX '-verify-machineinstrs with EXPENSIVE_CHECKS' errors in PR32146. Differential Revision: https://reviews.llvm.org/D33189 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303082 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
10 changed file(s) with 3621 addition(s) and 3619 deletion(s). Raw diff Collapse all Expand all
15481548 Chain = DAG.getMemIntrinsicNode(
15491549 Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
15501550 TheStoreType, MachinePointerInfo(), EltAlign,
1551 /* Volatile */ false, /* ReadMem */ true,
1551 /* Volatile */ false, /* ReadMem */ false,
15521552 /* WriteMem */ true, /* Size */ 0);
15531553 InFlag = Chain.getValue(1);
15541554
16101610 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
16111611 CopyParamOps, elemtype,
16121612 MachinePointerInfo(), /* Align */ 0,
1613 /* Volatile */ false, /* ReadMem */ true,
1613 /* Volatile */ false, /* ReadMem */ false,
16141614 /* WriteMem */ true, /* Size */ 0);
16151615
16161616 InFlag = Chain.getValue(1);
17981798 SDValue RetVal = DAG.getMemIntrinsicNode(
17991799 Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
18001800 MachinePointerInfo(), EltAlign, /* Volatile */ false,
1801 /* ReadMem */ true, /* WriteMem */ true, /* Size */ 0);
1801 /* ReadMem */ true, /* WriteMem */ false, /* Size */ 0);
18021802
18031803 for (unsigned j = 0; j < NumElts; ++j) {
18041804 SDValue Ret = RetVal.getValue(j);
None ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
1
2 ; ModuleID = '__kernelgen_main_module'
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
4 target triple = "nvptx64-nvidia-cuda"
5
6 define private ptx_device { double, double } @__utils1_MOD_trace(%"struct.array2_complex(kind=8).43.5.57"* noalias %m) {
7 entry:
8 ;unreachable
9 %t0 = insertvalue {double, double} undef, double 1.0, 0
10 %t1 = insertvalue {double, double} %t0, double 1.0, 1
11 ret { double, double } %t1
12 }
13
14 %struct.descriptor_dimension.0.52 = type { i64, i64, i64 }
15 %"struct.array2_complex(kind=8).37.18.70" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
16 %"struct.array2_complex(kind=8).43.5.57" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
17 @replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096
18
19 ; CHECK: .visible .entry __kernelgen_main
20 define ptx_kernel void @__kernelgen_main(i32* nocapture %args, i32*) {
21 entry:
22 %1 = tail call ptx_device { double, double } bitcast ({ double, double } (%"struct.array2_complex(kind=8).43.5.57"*)* @__utils1_MOD_trace to { double, double } (%"struct.array2_complex(kind=8).37.18.70"*)*)(%"struct.array2_complex(kind=8).37.18.70"* noalias @replacementOfAlloca8)
23 ret void
24 }
25
0 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
1
2 ; ModuleID = '__kernelgen_main_module'
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
4 target triple = "nvptx64-nvidia-cuda"
5
6 define private ptx_device { double, double } @__utils1_MOD_trace(%"struct.array2_complex(kind=8).43.5.57"* noalias %m) {
7 entry:
8 ;unreachable
9 %t0 = insertvalue {double, double} undef, double 1.0, 0
10 %t1 = insertvalue {double, double} %t0, double 1.0, 1
11 ret { double, double } %t1
12 }
13
14 %struct.descriptor_dimension.0.52 = type { i64, i64, i64 }
15 %"struct.array2_complex(kind=8).37.18.70" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
16 %"struct.array2_complex(kind=8).43.5.57" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
17 @replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096
18
19 ; CHECK: .visible .entry __kernelgen_main
20 define ptx_kernel void @__kernelgen_main(i32* nocapture %args, i32*) {
21 entry:
22 %1 = tail call ptx_device { double, double } bitcast ({ double, double } (%"struct.array2_complex(kind=8).43.5.57"*)* @__utils1_MOD_trace to { double, double } (%"struct.array2_complex(kind=8).37.18.70"*)*)(%"struct.array2_complex(kind=8).37.18.70"* noalias @replacementOfAlloca8)
23 ret void
24 }
25
None ; ## Full FP16 support enabled by default.
1 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
2 ; RUN: -O0 -disable-post-ra -disable-fp-elim \
3 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s
4 ; ## FP16 support explicitly disabled.
5 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
6 ; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
7 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
8 ; ## FP16 is not supported by hardware.
9 ; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
10 ; RUN: -disable-post-ra -disable-fp-elim \
11 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
12
13 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
14
15 ; CHECK-LABEL: test_ret_const(
16 ; CHECK: mov.b16 [[R:%h[0-9]+]], 0x3C00;
17 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
18 ; CHECK-NEXT: ret;
19 define half @test_ret_const() #0 {
20 ret half 1.0
21 }
22
23 ; CHECK-LABEL: test_fadd(
24 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_param_0];
25 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_param_1];
26 ; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
27 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
28 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
29 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
30 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
31 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
32 ; CHECK-NEXT: ret;
33 define half @test_fadd(half %a, half %b) #0 {
34 %r = fadd half %a, %b
35 ret half %r
36 }
37
38 ; CHECK-LABEL: test_fadd_v1f16(
39 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
40 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
41 ; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
42 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
43 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
44 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
45 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
46 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
47 ; CHECK-NEXT: ret;
48 define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
49 %r = fadd <1 x half> %a, %b
50 ret <1 x half> %r
51 }
52
53 ; Check that we can lower fadd with immediate arguments.
54 ; CHECK-LABEL: test_fadd_imm_0(
55 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
56 ; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
57 ; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
58 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
59 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
60 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
61 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
62 ; CHECK-NEXT: ret;
63 define half @test_fadd_imm_0(half %b) #0 {
64 %r = fadd half 1.0, %b
65 ret half %r
66 }
67
68 ; CHECK-LABEL: test_fadd_imm_1(
69 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_1_param_0];
70 ; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
71 ; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
72 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
73 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
74 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
75 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
76 ; CHECK-NEXT: ret;
77 define half @test_fadd_imm_1(half %a) #0 {
78 %r = fadd half %a, 1.0
79 ret half %r
80 }
81
82 ; CHECK-LABEL: test_fsub(
83 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fsub_param_0];
84 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fsub_param_1];
85 ; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
86 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
87 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
88 ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
89 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
90 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
91 ; CHECK-NEXT: ret;
92 define half @test_fsub(half %a, half %b) #0 {
93 %r = fsub half %a, %b
94 ret half %r
95 }
96
97 ; CHECK-LABEL: test_fneg(
98 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fneg_param_0];
99 ; CHECK-F16-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000
100 ; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[Z]], [[A]];
101 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
102 ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
103 ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]];
104 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
105 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
106 ; CHECK-NEXT: ret;
107 define half @test_fneg(half %a) #0 {
108 %r = fsub half 0.0, %a
109 ret half %r
110 }
111
112 ; CHECK-LABEL: test_fmul(
113 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmul_param_0];
114 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmul_param_1];
115 ; CHECK-F16-NEXT: mul.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
116 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
117 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
118 ; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
119 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
120 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
121 ; CHECK-NEXT: ret;
122 define half @test_fmul(half %a, half %b) #0 {
123 %r = fmul half %a, %b
124 ret half %r
125 }
126
127 ; CHECK-LABEL: test_fdiv(
128 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fdiv_param_0];
129 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fdiv_param_1];
130 ; CHECK-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]];
131 ; CHECK-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]];
132 ; CHECK-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]];
133 ; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[FR]];
134 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
135 ; CHECK-NEXT: ret;
136 define half @test_fdiv(half %a, half %b) #0 {
137 %r = fdiv half %a, %b
138 ret half %r
139 }
140
141 ; CHECK-LABEL: test_frem(
142 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_frem_param_0];
143 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_frem_param_1];
144 ; CHECK-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]];
145 ; CHECK-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]];
146 ; CHECK-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]];
147 ; CHECK-NEXT: cvt.rmi.f32.f32 [[DI:%f[0-9]+]], [[D]];
148 ; CHECK-NEXT: mul.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]];
149 ; CHECK-NEXT: sub.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]];
150 ; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
151 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
152 ; CHECK-NEXT: ret;
153 define half @test_frem(half %a, half %b) #0 {
154 %r = frem half %a, %b
155 ret half %r
156 }
157
158 ; CHECK-LABEL: test_store(
159 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_store_param_0];
160 ; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1];
161 ; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]];
162 ; CHECK-NEXT: ret;
163 define void @test_store(half %a, half* %b) #0 {
164 store half %a, half* %b
165 ret void
166 }
167
168 ; CHECK-LABEL: test_load(
169 ; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0];
170 ; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]];
171 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
172 ; CHECK-NEXT: ret;
173 define half @test_load(half* %a) #0 {
174 %r = load half, half* %a
175 ret half %r
176 }
177
178 ; CHECK-LABEL: .visible .func test_halfp0a1(
179 ; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0];
180 ; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1];
181 ; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
182 ; CHECK-DAG: st.u8 [%[[TO]]], [[B0]]
183 ; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
184 ; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]]
185 ; CHECK: ret
186 define void @test_halfp0a1(half * noalias readonly %from, half * %to) {
187 %1 = load half, half * %from , align 1
188 store half %1, half * %to , align 1
189 ret void
190 }
191
192 declare half @test_callee(half %a, half %b) #0
193
194 ; CHECK-LABEL: test_call(
195 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_param_0];
196 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_param_1];
197 ; CHECK: {
198 ; CHECK-DAG: .param .b32 param0;
199 ; CHECK-DAG: .param .b32 param1;
200 ; CHECK-DAG: st.param.b16 [param0+0], [[A]];
201 ; CHECK-DAG: st.param.b16 [param1+0], [[B]];
202 ; CHECK-DAG: .param .b32 retval0;
203 ; CHECK: call.uni (retval0),
204 ; CHECK-NEXT: test_callee,
205 ; CHECK: );
206 ; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
207 ; CHECK-NEXT: }
208 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
209 ; CHECK-NEXT: ret;
210 define half @test_call(half %a, half %b) #0 {
211 %r = call half @test_callee(half %a, half %b)
212 ret half %r
213 }
214
215 ; CHECK-LABEL: test_call_flipped(
216 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_flipped_param_0];
217 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_flipped_param_1];
218 ; CHECK: {
219 ; CHECK-DAG: .param .b32 param0;
220 ; CHECK-DAG: .param .b32 param1;
221 ; CHECK-DAG: st.param.b16 [param0+0], [[B]];
222 ; CHECK-DAG: st.param.b16 [param1+0], [[A]];
223 ; CHECK-DAG: .param .b32 retval0;
224 ; CHECK: call.uni (retval0),
225 ; CHECK-NEXT: test_callee,
226 ; CHECK: );
227 ; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
228 ; CHECK-NEXT: }
229 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
230 ; CHECK-NEXT: ret;
231 define half @test_call_flipped(half %a, half %b) #0 {
232 %r = call half @test_callee(half %b, half %a)
233 ret half %r
234 }
235
236 ; CHECK-LABEL: test_tailcall_flipped(
237 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_tailcall_flipped_param_0];
238 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_tailcall_flipped_param_1];
239 ; CHECK: {
240 ; CHECK-DAG: .param .b32 param0;
241 ; CHECK-DAG: .param .b32 param1;
242 ; CHECK-DAG: st.param.b16 [param0+0], [[B]];
243 ; CHECK-DAG: st.param.b16 [param1+0], [[A]];
244 ; CHECK-DAG: .param .b32 retval0;
245 ; CHECK: call.uni (retval0),
246 ; CHECK-NEXT: test_callee,
247 ; CHECK: );
248 ; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
249 ; CHECK-NEXT: }
250 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
251 ; CHECK-NEXT: ret;
252 define half @test_tailcall_flipped(half %a, half %b) #0 {
253 %r = tail call half @test_callee(half %b, half %a)
254 ret half %r
255 }
256
257 ; CHECK-LABEL: test_select(
258 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_param_0];
259 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_param_1];
260 ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
261 ; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
262 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
263 ; CHECK-NEXT: ret;
264 define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
265 %r = select i1 %c, half %a, half %b
266 ret half %r
267 }
268
269 ; CHECK-LABEL: test_select_cc(
270 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_param_0];
271 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_param_1];
272 ; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_param_2];
273 ; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_param_3];
274 ; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
275 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
276 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
277 ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]]
278 ; CHECK: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
279 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
280 ; CHECK-NEXT: ret;
281 define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
282 %cc = fcmp une half %c, %d
283 %r = select i1 %cc, half %a, half %b
284 ret half %r
285 }
286
287 ; CHECK-LABEL: test_select_cc_f32_f16(
288 ; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0];
289 ; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1];
290 ; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2];
291 ; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3];
292 ; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
293 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
294 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
295 ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]]
296 ; CHECK-NEXT: selp.f32 [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]];
297 ; CHECK-NEXT: st.param.f32 [func_retval0+0], [[R]];
298 ; CHECK-NEXT: ret;
299 define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
300 %cc = fcmp une half %c, %d
301 %r = select i1 %cc, float %a, float %b
302 ret float %r
303 }
304
305 ; CHECK-LABEL: test_select_cc_f16_f32(
306 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0];
307 ; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2];
308 ; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3];
309 ; CHECK-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]]
310 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1];
311 ; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
312 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
313 ; CHECK-NEXT: ret;
314 define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
315 %cc = fcmp une float %c, %d
316 %r = select i1 %cc, half %a, half %b
317 ret half %r
318 }
319
320 ; CHECK-LABEL: test_fcmp_une(
321 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_une_param_0];
322 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_une_param_1];
323 ; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
324 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
325 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
326 ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
327 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
328 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
329 ; CHECK-NEXT: ret;
330 define i1 @test_fcmp_une(half %a, half %b) #0 {
331 %r = fcmp une half %a, %b
332 ret i1 %r
333 }
334
335 ; CHECK-LABEL: test_fcmp_ueq(
336 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ueq_param_0];
337 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ueq_param_1];
338 ; CHECK-F16: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
339 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
340 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
341 ; CHECK-NOF16: setp.equ.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
342 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
343 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
344 ; CHECK-NEXT: ret;
345 define i1 @test_fcmp_ueq(half %a, half %b) #0 {
346 %r = fcmp ueq half %a, %b
347 ret i1 %r
348 }
349
350 ; CHECK-LABEL: test_fcmp_ugt(
351 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ugt_param_0];
352 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ugt_param_1];
353 ; CHECK-F16: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
354 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
355 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
356 ; CHECK-NOF16: setp.gtu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
357 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
358 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
359 ; CHECK-NEXT: ret;
360 define i1 @test_fcmp_ugt(half %a, half %b) #0 {
361 %r = fcmp ugt half %a, %b
362 ret i1 %r
363 }
364
365 ; CHECK-LABEL: test_fcmp_uge(
366 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uge_param_0];
367 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uge_param_1];
368 ; CHECK-F16: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
369 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
370 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
371 ; CHECK-NOF16: setp.geu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
372 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
373 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
374 ; CHECK-NEXT: ret;
375 define i1 @test_fcmp_uge(half %a, half %b) #0 {
376 %r = fcmp uge half %a, %b
377 ret i1 %r
378 }
379
380 ; CHECK-LABEL: test_fcmp_ult(
381 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ult_param_0];
382 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ult_param_1];
383 ; CHECK-F16: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
384 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
385 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
386 ; CHECK-NOF16: setp.ltu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
387 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
388 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
389 ; CHECK-NEXT: ret;
390 define i1 @test_fcmp_ult(half %a, half %b) #0 {
391 %r = fcmp ult half %a, %b
392 ret i1 %r
393 }
394
395 ; CHECK-LABEL: test_fcmp_ule(
396 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ule_param_0];
397 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ule_param_1];
398 ; CHECK-F16: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
399 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
400 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
401 ; CHECK-NOF16: setp.leu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
402 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
403 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
404 ; CHECK-NEXT: ret;
405 define i1 @test_fcmp_ule(half %a, half %b) #0 {
406 %r = fcmp ule half %a, %b
407 ret i1 %r
408 }
409
410
411 ; CHECK-LABEL: test_fcmp_uno(
412 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uno_param_0];
413 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uno_param_1];
414 ; CHECK-F16: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
415 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
416 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
417 ; CHECK-NOF16: setp.nan.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
418 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
419 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
420 ; CHECK-NEXT: ret;
421 define i1 @test_fcmp_uno(half %a, half %b) #0 {
422 %r = fcmp uno half %a, %b
423 ret i1 %r
424 }
425
426 ; CHECK-LABEL: test_fcmp_one(
427 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_one_param_0];
428 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_one_param_1];
429 ; CHECK-F16: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
430 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
431 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
432 ; CHECK-NOF16: setp.ne.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
433 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
434 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
435 ; CHECK-NEXT: ret;
436 define i1 @test_fcmp_one(half %a, half %b) #0 {
437 %r = fcmp one half %a, %b
438 ret i1 %r
439 }
440
441 ; CHECK-LABEL: test_fcmp_oeq(
442 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oeq_param_0];
443 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oeq_param_1];
444 ; CHECK-F16: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
445 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
446 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
447 ; CHECK-NOF16: setp.eq.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
448 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
449 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
450 ; CHECK-NEXT: ret;
451 define i1 @test_fcmp_oeq(half %a, half %b) #0 {
452 %r = fcmp oeq half %a, %b
453 ret i1 %r
454 }
455
456 ; CHECK-LABEL: test_fcmp_ogt(
457 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ogt_param_0];
458 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ogt_param_1];
459 ; CHECK-F16: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
460 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
461 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
462 ; CHECK-NOF16: setp.gt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
463 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
464 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
465 ; CHECK-NEXT: ret;
466 define i1 @test_fcmp_ogt(half %a, half %b) #0 {
467 %r = fcmp ogt half %a, %b
468 ret i1 %r
469 }
470
471 ; CHECK-LABEL: test_fcmp_oge(
472 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oge_param_0];
473 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oge_param_1];
474 ; CHECK-F16: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
475 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
476 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
477 ; CHECK-NOF16: setp.ge.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
478 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
479 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
480 ; CHECK-NEXT: ret;
481 define i1 @test_fcmp_oge(half %a, half %b) #0 {
482 %r = fcmp oge half %a, %b
483 ret i1 %r
484 }
485
486 ; XCHECK-LABEL: test_fcmp_olt(
487 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_olt_param_0];
488 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_olt_param_1];
489 ; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
490 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
491 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
492 ; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
493 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
494 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
495 ; CHECK-NEXT: ret;
496 define i1 @test_fcmp_olt(half %a, half %b) #0 {
497 %r = fcmp olt half %a, %b
498 ret i1 %r
499 }
500
501 ; XCHECK-LABEL: test_fcmp_ole(
502 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ole_param_0];
503 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ole_param_1];
504 ; CHECK-F16: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
505 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
506 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
507 ; CHECK-NOF16: setp.le.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
508 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
509 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
510 ; CHECK-NEXT: ret;
511 define i1 @test_fcmp_ole(half %a, half %b) #0 {
512 %r = fcmp ole half %a, %b
513 ret i1 %r
514 }
515
516 ; CHECK-LABEL: test_fcmp_ord(
517 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ord_param_0];
518 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ord_param_1];
519 ; CHECK-F16: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
520 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
521 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
522 ; CHECK-NOF16: setp.num.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
523 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
524 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
525 ; CHECK-NEXT: ret;
526 define i1 @test_fcmp_ord(half %a, half %b) #0 {
527 %r = fcmp ord half %a, %b
528 ret i1 %r
529 }
530
531 ; CHECK-LABEL: test_br_cc(
532 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_br_cc_param_0];
533 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_br_cc_param_1];
534 ; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2];
535 ; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3];
536 ; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
537 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
538 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
539 ; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
540 ; CHECK-NEXT: @[[PRED]] bra [[LABEL:LBB.*]];
541 ; CHECK: st.u32 [%[[C]]],
542 ; CHECK: [[LABEL]]:
543 ; CHECK: st.u32 [%[[D]]],
544 ; CHECK: ret;
545 define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
546 %c = fcmp uge half %a, %b
547 br i1 %c, label %then, label %else
548 then:
549 store i32 0, i32* %p1
550 ret void
551 else:
552 store i32 0, i32* %p2
553 ret void
554 }
555
556 ; CHECK-LABEL: test_phi(
557 ; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0];
558 ; CHECK: ld.b16 {{%h[0-9]+}}, [%[[P1]]];
559 ; CHECK: [[LOOP:LBB[0-9_]+]]:
560 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]];
561 ; CHECK: ld.b16 [[AB:%h[0-9]+]], [%[[P1]]];
562 ; CHECK: {
563 ; CHECK: st.param.b64 [param0+0], %[[P1]];
564 ; CHECK: call.uni (retval0),
565 ; CHECK-NEXT: test_dummy
566 ; CHECK: }
567 ; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
568 ; CHECK: @[[PRED]] bra [[LOOP]];
569 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
570 ; CHECK: ret;
571 define half @test_phi(half* %p1) #0 {
572 entry:
573 %a = load half, half* %p1
574 br label %loop
575 loop:
576 %r = phi half [%a, %entry], [%b, %loop]
577 %b = load half, half* %p1
578 %c = call i1 @test_dummy(half* %p1)
579 br i1 %c, label %loop, label %return
580 return:
581 ret half %r
582 }
583 declare i1 @test_dummy(half* %p1) #0
584
585 ; CHECK-LABEL: test_fptosi_i32(
586 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0];
587 ; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]];
588 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
589 ; CHECK: ret;
590 define i32 @test_fptosi_i32(half %a) #0 {
591 %r = fptosi half %a to i32
592 ret i32 %r
593 }
594
595 ; CHECK-LABEL: test_fptosi_i64(
596 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i64_param_0];
597 ; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]];
598 ; CHECK: st.param.b64 [func_retval0+0], [[R]];
599 ; CHECK: ret;
600 define i64 @test_fptosi_i64(half %a) #0 {
601 %r = fptosi half %a to i64
602 ret i64 %r
603 }
604
605 ; CHECK-LABEL: test_fptoui_i32(
606 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i32_param_0];
607 ; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]];
608 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
609 ; CHECK: ret;
610 define i32 @test_fptoui_i32(half %a) #0 {
611 %r = fptoui half %a to i32
612 ret i32 %r
613 }
614
615 ; CHECK-LABEL: test_fptoui_i64(
616 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i64_param_0];
617 ; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]];
618 ; CHECK: st.param.b64 [func_retval0+0], [[R]];
619 ; CHECK: ret;
620 define i64 @test_fptoui_i64(half %a) #0 {
621 %r = fptoui half %a to i64
622 ret i64 %r
623 }
624
625 ; CHECK-LABEL: test_uitofp_i32(
626 ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0];
627 ; CHECK: cvt.rn.f16.u32 [[R:%h[0-9]+]], [[A]];
628 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
629 ; CHECK: ret;
630 define half @test_uitofp_i32(i32 %a) #0 {
631 %r = uitofp i32 %a to half
632 ret half %r
633 }
634
635 ; CHECK-LABEL: test_uitofp_i64(
636 ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0];
637 ; CHECK: cvt.rn.f16.u64 [[R:%h[0-9]+]], [[A]];
638 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
639 ; CHECK: ret;
640 define half @test_uitofp_i64(i64 %a) #0 {
641 %r = uitofp i64 %a to half
642 ret half %r
643 }
644
645 ; CHECK-LABEL: test_sitofp_i32(
646 ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0];
647 ; CHECK: cvt.rn.f16.s32 [[R:%h[0-9]+]], [[A]];
648 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
649 ; CHECK: ret;
650 define half @test_sitofp_i32(i32 %a) #0 {
651 %r = sitofp i32 %a to half
652 ret half %r
653 }
654
655 ; CHECK-LABEL: test_sitofp_i64(
656 ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0];
657 ; CHECK: cvt.rn.f16.s64 [[R:%h[0-9]+]], [[A]];
658 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
659 ; CHECK: ret;
660 define half @test_sitofp_i64(i64 %a) #0 {
661 %r = sitofp i64 %a to half
662 ret half %r
663 }
664
665 ; CHECK-LABEL: test_uitofp_i32_fadd(
666 ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0];
667 ; CHECK-DAG: cvt.rn.f16.u32 [[C:%h[0-9]+]], [[A]];
668 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1];
669 ; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
670 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
671 ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
672 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
673 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
674 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
675 ; CHECK: ret;
676 define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
677 %c = uitofp i32 %a to half
678 %r = fadd half %b, %c
679 ret half %r
680 }
681
682 ; CHECK-LABEL: test_sitofp_i32_fadd(
683 ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0];
684 ; CHECK-DAG: cvt.rn.f16.s32 [[C:%h[0-9]+]], [[A]];
685 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1];
686 ; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
687 ; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
688 ; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
689 ; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
690 ; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
691 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
692 ; CHECK: ret;
693 define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
694 %c = sitofp i32 %a to half
695 %r = fadd half %b, %c
696 ret half %r
697 }
698
699 ; CHECK-LABEL: test_fptrunc_float(
700 ; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0];
701 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[A]];
702 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
703 ; CHECK: ret;
704 define half @test_fptrunc_float(float %a) #0 {
705 %r = fptrunc float %a to half
706 ret half %r
707 }
708
709 ; CHECK-LABEL: test_fptrunc_double(
710 ; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0];
711 ; CHECK: cvt.rn.f16.f64 [[R:%h[0-9]+]], [[A]];
712 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
713 ; CHECK: ret;
714 define half @test_fptrunc_double(double %a) #0 {
715 %r = fptrunc double %a to half
716 ret half %r
717 }
718
719 ; CHECK-LABEL: test_fpext_float(
720 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_float_param_0];
721 ; CHECK: cvt.f32.f16 [[R:%f[0-9]+]], [[A]];
722 ; CHECK: st.param.f32 [func_retval0+0], [[R]];
723 ; CHECK: ret;
724 define float @test_fpext_float(half %a) #0 {
725 %r = fpext half %a to float
726 ret float %r
727 }
728
729 ; CHECK-LABEL: test_fpext_double(
730 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_double_param_0];
731 ; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]];
732 ; CHECK: st.param.f64 [func_retval0+0], [[R]];
733 ; CHECK: ret;
734 define double @test_fpext_double(half %a) #0 {
735 %r = fpext half %a to double
736 ret double %r
737 }
738
739
740 ; CHECK-LABEL: test_bitcast_halftoi16(
741 ; CHECK: ld.param.b16 [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0];
742 ; CHECK: mov.b16 [[AS:%rs[0-9]+]], [[AH]]
743 ; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AS]]
744 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
745 ; CHECK: ret;
746 define i16 @test_bitcast_halftoi16(half %a) #0 {
747 %r = bitcast half %a to i16
748 ret i16 %r
749 }
750
751 ; CHECK-LABEL: test_bitcast_i16tohalf(
752 ; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0];
753 ; CHECK: mov.b16 [[AH:%h[0-9]+]], [[AS]]
754 ; CHECK: st.param.b16 [func_retval0+0], [[AH]];
755 ; CHECK: ret;
756 define half @test_bitcast_i16tohalf(i16 %a) #0 {
757 %r = bitcast i16 %a to half
758 ret half %r
759 }
760
761
762 declare half @llvm.sqrt.f16(half %a) #0
763 declare half @llvm.powi.f16(half %a, i32 %b) #0
764 declare half @llvm.sin.f16(half %a) #0
765 declare half @llvm.cos.f16(half %a) #0
766 declare half @llvm.pow.f16(half %a, half %b) #0
767 declare half @llvm.exp.f16(half %a) #0
768 declare half @llvm.exp2.f16(half %a) #0
769 declare half @llvm.log.f16(half %a) #0
770 declare half @llvm.log10.f16(half %a) #0
771 declare half @llvm.log2.f16(half %a) #0
772 declare half @llvm.fma.f16(half %a, half %b, half %c) #0
773 declare half @llvm.fabs.f16(half %a) #0
774 declare half @llvm.minnum.f16(half %a, half %b) #0
775 declare half @llvm.maxnum.f16(half %a, half %b) #0
776 declare half @llvm.copysign.f16(half %a, half %b) #0
777 declare half @llvm.floor.f16(half %a) #0
778 declare half @llvm.ceil.f16(half %a) #0
779 declare half @llvm.trunc.f16(half %a) #0
780 declare half @llvm.rint.f16(half %a) #0
781 declare half @llvm.nearbyint.f16(half %a) #0
782 declare half @llvm.round.f16(half %a) #0
783 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
784
785 ; CHECK-LABEL: test_sqrt(
786 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sqrt_param_0];
787 ; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
788 ; CHECK: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]];
789 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
790 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
791 ; CHECK: ret;
792 define half @test_sqrt(half %a) #0 {
793 %r = call half @llvm.sqrt.f16(half %a)
794 ret half %r
795 }
796
797 ;;; Can't do this yet: requires libcall.
798 ; XCHECK-LABEL: test_powi(
799 ;define half @test_powi(half %a, i32 %b) #0 {
800 ; %r = call half @llvm.powi.f16(half %a, i32 %b)
801 ; ret half %r
802 ;}
803
804 ; CHECK-LABEL: test_sin(
805 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sin_param_0];
806 ; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
807 ; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]];
808 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
809 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
810 ; CHECK: ret;
811 define half @test_sin(half %a) #0 #1 {
812 %r = call half @llvm.sin.f16(half %a)
813 ret half %r
814 }
815
816 ; CHECK-LABEL: test_cos(
817 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_cos_param_0];
818 ; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
819 ; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]];
820 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
821 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
822 ; CHECK: ret;
823 define half @test_cos(half %a) #0 #1 {
824 %r = call half @llvm.cos.f16(half %a)
825 ret half %r
826 }
827
828 ;;; Can't do this yet: requires libcall.
829 ; XCHECK-LABEL: test_pow(
830 ;define half @test_pow(half %a, half %b) #0 {
831 ; %r = call half @llvm.pow.f16(half %a, half %b)
832 ; ret half %r
833 ;}
834
835 ;;; Can't do this yet: requires libcall.
836 ; XCHECK-LABEL: test_exp(
837 ;define half @test_exp(half %a) #0 {
838 ; %r = call half @llvm.exp.f16(half %a)
839 ; ret half %r
840 ;}
841
842 ;;; Can't do this yet: requires libcall.
843 ; XCHECK-LABEL: test_exp2(
844 ;define half @test_exp2(half %a) #0 {
845 ; %r = call half @llvm.exp2.f16(half %a)
846 ; ret half %r
847 ;}
848
849 ;;; Can't do this yet: requires libcall.
850 ; XCHECK-LABEL: test_log(
851 ;define half @test_log(half %a) #0 {
852 ; %r = call half @llvm.log.f16(half %a)
853 ; ret half %r
854 ;}
855
856 ;;; Can't do this yet: requires libcall.
857 ; XCHECK-LABEL: test_log10(
858 ;define half @test_log10(half %a) #0 {
859 ; %r = call half @llvm.log10.f16(half %a)
860 ; ret half %r
861 ;}
862
863 ;;; Can't do this yet: requires libcall.
864 ; XCHECK-LABEL: test_log2(
865 ;define half @test_log2(half %a) #0 {
866 ; %r = call half @llvm.log2.f16(half %a)
867 ; ret half %r
868 ;}
869
870 ; CHECK-LABEL: test_fma(
871 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fma_param_0];
872 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fma_param_1];
873 ; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fma_param_2];
874 ; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
875 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
876 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
877 ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
878 ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
879 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
880 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
881 ; CHECK: ret
882 define half @test_fma(half %a, half %b, half %c) #0 {
883 %r = call half @llvm.fma.f16(half %a, half %b, half %c)
884 ret half %r
885 }
886
887 ; CHECK-LABEL: test_fabs(
888 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fabs_param_0];
889 ; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
890 ; CHECK: abs.f32 [[RF:%f[0-9]+]], [[AF]];
891 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
892 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
893 ; CHECK: ret;
894 define half @test_fabs(half %a) #0 {
895 %r = call half @llvm.fabs.f16(half %a)
896 ret half %r
897 }
898
899 ; CHECK-LABEL: test_minnum(
900 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_minnum_param_0];
901 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_minnum_param_1];
902 ; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
903 ; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
904 ; CHECK: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
905 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
906 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
907 ; CHECK: ret;
908 define half @test_minnum(half %a, half %b) #0 {
909 %r = call half @llvm.minnum.f16(half %a, half %b)
910 ret half %r
911 }
912
913 ; CHECK-LABEL: test_maxnum(
914 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_maxnum_param_0];
915 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_maxnum_param_1];
916 ; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
917 ; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
918 ; CHECK: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
919 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
920 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
921 ; CHECK: ret;
922 define half @test_maxnum(half %a, half %b) #0 {
923 %r = call half @llvm.maxnum.f16(half %a, half %b)
924 ret half %r
925 }
926
927 ; CHECK-LABEL: test_copysign(
928 ; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_param_0];
929 ; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_param_1];
930 ; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
931 ; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
932 ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
933 ; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
934 ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
935 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
936 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
937 ; CHECK: ret;
938 define half @test_copysign(half %a, half %b) #0 {
939 %r = call half @llvm.copysign.f16(half %a, half %b)
940 ret half %r
941 }
942
943 ; CHECK-LABEL: test_copysign_f32(
944 ; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f32_param_0];
945 ; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1];
946 ; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
947 ; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]];
948 ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
949 ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648;
950 ; CHECK-DAG: shr.u32 [[BX1:%r[0-9]+]], [[BX0]], 16;
951 ; CHECK-DAG: cvt.u16.u32 [[BX2:%rs[0-9]+]], [[BX1]];
952 ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
953 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
954 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
955 ; CHECK: ret;
956 define half @test_copysign_f32(half %a, float %b) #0 {
957 %tb = fptrunc float %b to half
958 %r = call half @llvm.copysign.f16(half %a, half %tb)
959 ret half %r
960 }
961
962 ; CHECK-LABEL: test_copysign_f64(
963 ; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f64_param_0];
964 ; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1];
965 ; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
966 ; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]];
967 ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
968 ; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808;
969 ; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48;
970 ; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]];
971 ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
972 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
973 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
974 ; CHECK: ret;
975 define half @test_copysign_f64(half %a, double %b) #0 {
976 %tb = fptrunc double %b to half
977 %r = call half @llvm.copysign.f16(half %a, half %tb)
978 ret half %r
979 }
980
981 ; CHECK-LABEL: test_copysign_extended(
982 ; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_extended_param_0];
983 ; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_extended_param_1];
984 ; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
985 ; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
986 ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
987 ; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
988 ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
989 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
990 ; CHECK: cvt.f32.f16 [[XR:%f[0-9]+]], [[R]];
991 ; CHECK: st.param.f32 [func_retval0+0], [[XR]];
992 ; CHECK: ret;
993 define float @test_copysign_extended(half %a, half %b) #0 {
994 %r = call half @llvm.copysign.f16(half %a, half %b)
995 %xr = fpext half %r to float
996 ret float %xr
997 }
998
999 ; CHECK-LABEL: test_floor(
1000 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_floor_param_0];
1001 ; CHECK: cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]];
1002 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1003 ; CHECK: ret;
1004 define half @test_floor(half %a) #0 {
1005 %r = call half @llvm.floor.f16(half %a)
1006 ret half %r
1007 }
1008
1009 ; CHECK-LABEL: test_ceil(
1010 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_ceil_param_0];
1011 ; CHECK: cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]];
1012 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1013 ; CHECK: ret;
1014 define half @test_ceil(half %a) #0 {
1015 %r = call half @llvm.ceil.f16(half %a)
1016 ret half %r
1017 }
1018
1019 ; CHECK-LABEL: test_trunc(
1020 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_trunc_param_0];
1021 ; CHECK: cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]];
1022 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1023 ; CHECK: ret;
1024 define half @test_trunc(half %a) #0 {
1025 %r = call half @llvm.trunc.f16(half %a)
1026 ret half %r
1027 }
1028
1029 ; CHECK-LABEL: test_rint(
1030 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_rint_param_0];
1031 ; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
1032 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1033 ; CHECK: ret;
1034 define half @test_rint(half %a) #0 {
1035 %r = call half @llvm.rint.f16(half %a)
1036 ret half %r
1037 }
1038
1039 ; CHECK-LABEL: test_nearbyint(
1040 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_nearbyint_param_0];
1041 ; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
1042 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1043 ; CHECK: ret;
1044 define half @test_nearbyint(half %a) #0 {
1045 %r = call half @llvm.nearbyint.f16(half %a)
1046 ret half %r
1047 }
1048
1049 ; CHECK-LABEL: test_round(
1050 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_round_param_0];
1051 ; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
1052 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1053 ; CHECK: ret;
1054 define half @test_round(half %a) #0 {
1055 %r = call half @llvm.round.f16(half %a)
1056 ret half %r
1057 }
1058
1059 ; CHECK-LABEL: test_fmuladd(
1060 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0];
1061 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmuladd_param_1];
1062 ; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fmuladd_param_2];
1063 ; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
1064 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
1065 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
1066 ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
1067 ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
1068 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
1069 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1070 ; CHECK: ret;
1071 define half @test_fmuladd(half %a, half %b, half %c) #0 {
1072 %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
1073 ret half %r
1074 }
1075
1076 attributes #0 = { nounwind }
1077 attributes #1 = { "unsafe-fp-math" = "true" }
0 ; ## Full FP16 support enabled by default.
1 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
2 ; RUN: -O0 -disable-post-ra -disable-fp-elim -verify-machineinstrs \
3 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s
4 ; ## FP16 support explicitly disabled.
5 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
6 ; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
7 ; RUN: -verify-machineinstrs \
8 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
9 ; ## FP16 is not supported by hardware.
10 ; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
11 ; RUN: -disable-post-ra -disable-fp-elim -verify-machineinstrs \
12 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
13
14 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
15
16 ; CHECK-LABEL: test_ret_const(
17 ; CHECK: mov.b16 [[R:%h[0-9]+]], 0x3C00;
18 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
19 ; CHECK-NEXT: ret;
20 define half @test_ret_const() #0 {
21 ret half 1.0
22 }
23
24 ; CHECK-LABEL: test_fadd(
25 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_param_0];
26 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_param_1];
27 ; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
28 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
29 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
30 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
31 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
32 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
33 ; CHECK-NEXT: ret;
34 define half @test_fadd(half %a, half %b) #0 {
35 %r = fadd half %a, %b
36 ret half %r
37 }
38
39 ; CHECK-LABEL: test_fadd_v1f16(
40 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
41 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
42 ; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
43 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
44 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
45 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
46 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
47 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
48 ; CHECK-NEXT: ret;
49 define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
50 %r = fadd <1 x half> %a, %b
51 ret <1 x half> %r
52 }
53
54 ; Check that we can lower fadd with immediate arguments.
55 ; CHECK-LABEL: test_fadd_imm_0(
56 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
57 ; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
58 ; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
59 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
60 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
61 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
62 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
63 ; CHECK-NEXT: ret;
64 define half @test_fadd_imm_0(half %b) #0 {
65 %r = fadd half 1.0, %b
66 ret half %r
67 }
68
69 ; CHECK-LABEL: test_fadd_imm_1(
70 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_1_param_0];
71 ; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00;
72 ; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]];
73 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
74 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
75 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
76 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
77 ; CHECK-NEXT: ret;
78 define half @test_fadd_imm_1(half %a) #0 {
79 %r = fadd half %a, 1.0
80 ret half %r
81 }
82
83 ; CHECK-LABEL: test_fsub(
84 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fsub_param_0];
85 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fsub_param_1];
86 ; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
87 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
88 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
89 ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
90 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
91 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
92 ; CHECK-NEXT: ret;
93 define half @test_fsub(half %a, half %b) #0 {
94 %r = fsub half %a, %b
95 ret half %r
96 }
97
98 ; CHECK-LABEL: test_fneg(
99 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fneg_param_0];
100 ; CHECK-F16-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000
101 ; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[Z]], [[A]];
102 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
103 ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
104 ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]];
105 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
106 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
107 ; CHECK-NEXT: ret;
108 define half @test_fneg(half %a) #0 {
109 %r = fsub half 0.0, %a
110 ret half %r
111 }
112
113 ; CHECK-LABEL: test_fmul(
114 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmul_param_0];
115 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmul_param_1];
116 ; CHECK-F16-NEXT: mul.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]];
117 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
118 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
119 ; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]];
120 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
121 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
122 ; CHECK-NEXT: ret;
123 define half @test_fmul(half %a, half %b) #0 {
124 %r = fmul half %a, %b
125 ret half %r
126 }
127
128 ; CHECK-LABEL: test_fdiv(
129 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fdiv_param_0];
130 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fdiv_param_1];
131 ; CHECK-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]];
132 ; CHECK-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]];
133 ; CHECK-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]];
134 ; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[FR]];
135 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
136 ; CHECK-NEXT: ret;
137 define half @test_fdiv(half %a, half %b) #0 {
138 %r = fdiv half %a, %b
139 ret half %r
140 }
141
142 ; CHECK-LABEL: test_frem(
143 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_frem_param_0];
144 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_frem_param_1];
145 ; CHECK-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]];
146 ; CHECK-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]];
147 ; CHECK-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]];
148 ; CHECK-NEXT: cvt.rmi.f32.f32 [[DI:%f[0-9]+]], [[D]];
149 ; CHECK-NEXT: mul.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]];
150 ; CHECK-NEXT: sub.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]];
151 ; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
152 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
153 ; CHECK-NEXT: ret;
154 define half @test_frem(half %a, half %b) #0 {
155 %r = frem half %a, %b
156 ret half %r
157 }
158
159 ; CHECK-LABEL: test_store(
160 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_store_param_0];
161 ; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1];
162 ; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]];
163 ; CHECK-NEXT: ret;
164 define void @test_store(half %a, half* %b) #0 {
165 store half %a, half* %b
166 ret void
167 }
168
169 ; CHECK-LABEL: test_load(
170 ; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0];
171 ; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]];
172 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
173 ; CHECK-NEXT: ret;
174 define half @test_load(half* %a) #0 {
175 %r = load half, half* %a
176 ret half %r
177 }
178
179 ; CHECK-LABEL: .visible .func test_halfp0a1(
180 ; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0];
181 ; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1];
182 ; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
183 ; CHECK-DAG: st.u8 [%[[TO]]], [[B0]]
184 ; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
185 ; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]]
186 ; CHECK: ret
187 define void @test_halfp0a1(half * noalias readonly %from, half * %to) {
188 %1 = load half, half * %from , align 1
189 store half %1, half * %to , align 1
190 ret void
191 }
192
193 declare half @test_callee(half %a, half %b) #0
194
195 ; CHECK-LABEL: test_call(
196 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_param_0];
197 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_param_1];
198 ; CHECK: {
199 ; CHECK-DAG: .param .b32 param0;
200 ; CHECK-DAG: .param .b32 param1;
201 ; CHECK-DAG: st.param.b16 [param0+0], [[A]];
202 ; CHECK-DAG: st.param.b16 [param1+0], [[B]];
203 ; CHECK-DAG: .param .b32 retval0;
204 ; CHECK: call.uni (retval0),
205 ; CHECK-NEXT: test_callee,
206 ; CHECK: );
207 ; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
208 ; CHECK-NEXT: }
209 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
210 ; CHECK-NEXT: ret;
211 define half @test_call(half %a, half %b) #0 {
212 %r = call half @test_callee(half %a, half %b)
213 ret half %r
214 }
215
216 ; CHECK-LABEL: test_call_flipped(
217 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_flipped_param_0];
218 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_flipped_param_1];
219 ; CHECK: {
220 ; CHECK-DAG: .param .b32 param0;
221 ; CHECK-DAG: .param .b32 param1;
222 ; CHECK-DAG: st.param.b16 [param0+0], [[B]];
223 ; CHECK-DAG: st.param.b16 [param1+0], [[A]];
224 ; CHECK-DAG: .param .b32 retval0;
225 ; CHECK: call.uni (retval0),
226 ; CHECK-NEXT: test_callee,
227 ; CHECK: );
228 ; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
229 ; CHECK-NEXT: }
230 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
231 ; CHECK-NEXT: ret;
232 define half @test_call_flipped(half %a, half %b) #0 {
233 %r = call half @test_callee(half %b, half %a)
234 ret half %r
235 }
236
237 ; CHECK-LABEL: test_tailcall_flipped(
238 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_tailcall_flipped_param_0];
239 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_tailcall_flipped_param_1];
240 ; CHECK: {
241 ; CHECK-DAG: .param .b32 param0;
242 ; CHECK-DAG: .param .b32 param1;
243 ; CHECK-DAG: st.param.b16 [param0+0], [[B]];
244 ; CHECK-DAG: st.param.b16 [param1+0], [[A]];
245 ; CHECK-DAG: .param .b32 retval0;
246 ; CHECK: call.uni (retval0),
247 ; CHECK-NEXT: test_callee,
248 ; CHECK: );
249 ; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0];
250 ; CHECK-NEXT: }
251 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
252 ; CHECK-NEXT: ret;
253 define half @test_tailcall_flipped(half %a, half %b) #0 {
254 %r = tail call half @test_callee(half %b, half %a)
255 ret half %r
256 }
257
258 ; CHECK-LABEL: test_select(
259 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_param_0];
260 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_param_1];
261 ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
262 ; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
263 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
264 ; CHECK-NEXT: ret;
265 define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
266 %r = select i1 %c, half %a, half %b
267 ret half %r
268 }
269
270 ; CHECK-LABEL: test_select_cc(
271 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_param_0];
272 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_param_1];
273 ; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_param_2];
274 ; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_param_3];
275 ; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
276 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
277 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
278 ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]]
279 ; CHECK: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
280 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
281 ; CHECK-NEXT: ret;
282 define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
283 %cc = fcmp une half %c, %d
284 %r = select i1 %cc, half %a, half %b
285 ret half %r
286 }
287
288 ; CHECK-LABEL: test_select_cc_f32_f16(
289 ; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0];
290 ; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1];
291 ; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2];
292 ; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3];
293 ; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]]
294 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
295 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
296 ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]]
297 ; CHECK-NEXT: selp.f32 [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]];
298 ; CHECK-NEXT: st.param.f32 [func_retval0+0], [[R]];
299 ; CHECK-NEXT: ret;
300 define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
301 %cc = fcmp une half %c, %d
302 %r = select i1 %cc, float %a, float %b
303 ret float %r
304 }
305
306 ; CHECK-LABEL: test_select_cc_f16_f32(
307 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0];
308 ; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2];
309 ; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3];
310 ; CHECK-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]]
311 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1];
312 ; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
313 ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
314 ; CHECK-NEXT: ret;
315 define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
316 %cc = fcmp une float %c, %d
317 %r = select i1 %cc, half %a, half %b
318 ret half %r
319 }
320
321 ; CHECK-LABEL: test_fcmp_une(
322 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_une_param_0];
323 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_une_param_1];
324 ; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
325 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
326 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
327 ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
328 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
329 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
330 ; CHECK-NEXT: ret;
331 define i1 @test_fcmp_une(half %a, half %b) #0 {
332 %r = fcmp une half %a, %b
333 ret i1 %r
334 }
335
336 ; CHECK-LABEL: test_fcmp_ueq(
337 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ueq_param_0];
338 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ueq_param_1];
339 ; CHECK-F16: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
340 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
341 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
342 ; CHECK-NOF16: setp.equ.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
343 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
344 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
345 ; CHECK-NEXT: ret;
346 define i1 @test_fcmp_ueq(half %a, half %b) #0 {
347 %r = fcmp ueq half %a, %b
348 ret i1 %r
349 }
350
351 ; CHECK-LABEL: test_fcmp_ugt(
352 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ugt_param_0];
353 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ugt_param_1];
354 ; CHECK-F16: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
355 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
356 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
357 ; CHECK-NOF16: setp.gtu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
358 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
359 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
360 ; CHECK-NEXT: ret;
361 define i1 @test_fcmp_ugt(half %a, half %b) #0 {
362 %r = fcmp ugt half %a, %b
363 ret i1 %r
364 }
365
366 ; CHECK-LABEL: test_fcmp_uge(
367 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uge_param_0];
368 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uge_param_1];
369 ; CHECK-F16: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
370 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
371 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
372 ; CHECK-NOF16: setp.geu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
373 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
374 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
375 ; CHECK-NEXT: ret;
376 define i1 @test_fcmp_uge(half %a, half %b) #0 {
377 %r = fcmp uge half %a, %b
378 ret i1 %r
379 }
380
381 ; CHECK-LABEL: test_fcmp_ult(
382 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ult_param_0];
383 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ult_param_1];
384 ; CHECK-F16: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
385 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
386 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
387 ; CHECK-NOF16: setp.ltu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
388 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
389 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
390 ; CHECK-NEXT: ret;
391 define i1 @test_fcmp_ult(half %a, half %b) #0 {
392 %r = fcmp ult half %a, %b
393 ret i1 %r
394 }
395
396 ; CHECK-LABEL: test_fcmp_ule(
397 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ule_param_0];
398 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ule_param_1];
399 ; CHECK-F16: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
400 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
401 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
402 ; CHECK-NOF16: setp.leu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
403 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
404 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
405 ; CHECK-NEXT: ret;
406 define i1 @test_fcmp_ule(half %a, half %b) #0 {
407 %r = fcmp ule half %a, %b
408 ret i1 %r
409 }
410
411
412 ; CHECK-LABEL: test_fcmp_uno(
413 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uno_param_0];
414 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uno_param_1];
415 ; CHECK-F16: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
416 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
417 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
418 ; CHECK-NOF16: setp.nan.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
419 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
420 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
421 ; CHECK-NEXT: ret;
422 define i1 @test_fcmp_uno(half %a, half %b) #0 {
423 %r = fcmp uno half %a, %b
424 ret i1 %r
425 }
426
427 ; CHECK-LABEL: test_fcmp_one(
428 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_one_param_0];
429 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_one_param_1];
430 ; CHECK-F16: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
431 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
432 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
433 ; CHECK-NOF16: setp.ne.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
434 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
435 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
436 ; CHECK-NEXT: ret;
437 define i1 @test_fcmp_one(half %a, half %b) #0 {
438 %r = fcmp one half %a, %b
439 ret i1 %r
440 }
441
442 ; CHECK-LABEL: test_fcmp_oeq(
443 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oeq_param_0];
444 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oeq_param_1];
445 ; CHECK-F16: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
446 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
447 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
448 ; CHECK-NOF16: setp.eq.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
449 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
450 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
451 ; CHECK-NEXT: ret;
452 define i1 @test_fcmp_oeq(half %a, half %b) #0 {
453 %r = fcmp oeq half %a, %b
454 ret i1 %r
455 }
456
457 ; CHECK-LABEL: test_fcmp_ogt(
458 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ogt_param_0];
459 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ogt_param_1];
460 ; CHECK-F16: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
461 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
462 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
463 ; CHECK-NOF16: setp.gt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
464 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
465 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
466 ; CHECK-NEXT: ret;
467 define i1 @test_fcmp_ogt(half %a, half %b) #0 {
468 %r = fcmp ogt half %a, %b
469 ret i1 %r
470 }
471
472 ; CHECK-LABEL: test_fcmp_oge(
473 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oge_param_0];
474 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oge_param_1];
475 ; CHECK-F16: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
476 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
477 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
478 ; CHECK-NOF16: setp.ge.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
479 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
480 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
481 ; CHECK-NEXT: ret;
482 define i1 @test_fcmp_oge(half %a, half %b) #0 {
483 %r = fcmp oge half %a, %b
484 ret i1 %r
485 }
486
487 ; XCHECK-LABEL: test_fcmp_olt(
488 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_olt_param_0];
489 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_olt_param_1];
490 ; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
491 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
492 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
493 ; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
494 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
495 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
496 ; CHECK-NEXT: ret;
497 define i1 @test_fcmp_olt(half %a, half %b) #0 {
498 %r = fcmp olt half %a, %b
499 ret i1 %r
500 }
501
502 ; XCHECK-LABEL: test_fcmp_ole(
503 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ole_param_0];
504 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ole_param_1];
505 ; CHECK-F16: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
506 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
507 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
508 ; CHECK-NOF16: setp.le.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
509 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
510 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
511 ; CHECK-NEXT: ret;
512 define i1 @test_fcmp_ole(half %a, half %b) #0 {
513 %r = fcmp ole half %a, %b
514 ret i1 %r
515 }
516
517 ; CHECK-LABEL: test_fcmp_ord(
518 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ord_param_0];
519 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ord_param_1];
520 ; CHECK-F16: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
521 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
522 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
523 ; CHECK-NOF16: setp.num.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
524 ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]];
525 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
526 ; CHECK-NEXT: ret;
527 define i1 @test_fcmp_ord(half %a, half %b) #0 {
528 %r = fcmp ord half %a, %b
529 ret i1 %r
530 }
531
532 ; CHECK-LABEL: test_br_cc(
533 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_br_cc_param_0];
534 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_br_cc_param_1];
535 ; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2];
536 ; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3];
537 ; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]]
538 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
539 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
540 ; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]]
541 ; CHECK-NEXT: @[[PRED]] bra [[LABEL:LBB.*]];
542 ; CHECK: st.u32 [%[[C]]],
543 ; CHECK: [[LABEL]]:
544 ; CHECK: st.u32 [%[[D]]],
545 ; CHECK: ret;
546 define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
547 %c = fcmp uge half %a, %b
548 br i1 %c, label %then, label %else
549 then:
550 store i32 0, i32* %p1
551 ret void
552 else:
553 store i32 0, i32* %p2
554 ret void
555 }
556
557 ; CHECK-LABEL: test_phi(
558 ; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0];
559 ; CHECK: ld.b16 {{%h[0-9]+}}, [%[[P1]]];
560 ; CHECK: [[LOOP:LBB[0-9_]+]]:
561 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]];
562 ; CHECK: ld.b16 [[AB:%h[0-9]+]], [%[[P1]]];
563 ; CHECK: {
564 ; CHECK: st.param.b64 [param0+0], %[[P1]];
565 ; CHECK: call.uni (retval0),
566 ; CHECK-NEXT: test_dummy
567 ; CHECK: }
568 ; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
569 ; CHECK: @[[PRED]] bra [[LOOP]];
570 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
571 ; CHECK: ret;
572 define half @test_phi(half* %p1) #0 {
573 entry:
574 %a = load half, half* %p1
575 br label %loop
576 loop:
577 %r = phi half [%a, %entry], [%b, %loop]
578 %b = load half, half* %p1
579 %c = call i1 @test_dummy(half* %p1)
580 br i1 %c, label %loop, label %return
581 return:
582 ret half %r
583 }
584 declare i1 @test_dummy(half* %p1) #0
585
586 ; CHECK-LABEL: test_fptosi_i32(
587 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0];
588 ; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]];
589 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
590 ; CHECK: ret;
591 define i32 @test_fptosi_i32(half %a) #0 {
592 %r = fptosi half %a to i32
593 ret i32 %r
594 }
595
596 ; CHECK-LABEL: test_fptosi_i64(
597 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i64_param_0];
598 ; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]];
599 ; CHECK: st.param.b64 [func_retval0+0], [[R]];
600 ; CHECK: ret;
601 define i64 @test_fptosi_i64(half %a) #0 {
602 %r = fptosi half %a to i64
603 ret i64 %r
604 }
605
606 ; CHECK-LABEL: test_fptoui_i32(
607 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i32_param_0];
608 ; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]];
609 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
610 ; CHECK: ret;
611 define i32 @test_fptoui_i32(half %a) #0 {
612 %r = fptoui half %a to i32
613 ret i32 %r
614 }
615
616 ; CHECK-LABEL: test_fptoui_i64(
617 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i64_param_0];
618 ; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]];
619 ; CHECK: st.param.b64 [func_retval0+0], [[R]];
620 ; CHECK: ret;
621 define i64 @test_fptoui_i64(half %a) #0 {
622 %r = fptoui half %a to i64
623 ret i64 %r
624 }
625
626 ; CHECK-LABEL: test_uitofp_i32(
627 ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0];
628 ; CHECK: cvt.rn.f16.u32 [[R:%h[0-9]+]], [[A]];
629 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
630 ; CHECK: ret;
631 define half @test_uitofp_i32(i32 %a) #0 {
632 %r = uitofp i32 %a to half
633 ret half %r
634 }
635
636 ; CHECK-LABEL: test_uitofp_i64(
637 ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0];
638 ; CHECK: cvt.rn.f16.u64 [[R:%h[0-9]+]], [[A]];
639 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
640 ; CHECK: ret;
641 define half @test_uitofp_i64(i64 %a) #0 {
642 %r = uitofp i64 %a to half
643 ret half %r
644 }
645
646 ; CHECK-LABEL: test_sitofp_i32(
647 ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0];
648 ; CHECK: cvt.rn.f16.s32 [[R:%h[0-9]+]], [[A]];
649 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
650 ; CHECK: ret;
651 define half @test_sitofp_i32(i32 %a) #0 {
652 %r = sitofp i32 %a to half
653 ret half %r
654 }
655
656 ; CHECK-LABEL: test_sitofp_i64(
657 ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0];
658 ; CHECK: cvt.rn.f16.s64 [[R:%h[0-9]+]], [[A]];
659 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
660 ; CHECK: ret;
661 define half @test_sitofp_i64(i64 %a) #0 {
662 %r = sitofp i64 %a to half
663 ret half %r
664 }
665
666 ; CHECK-LABEL: test_uitofp_i32_fadd(
667 ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0];
668 ; CHECK-DAG: cvt.rn.f16.u32 [[C:%h[0-9]+]], [[A]];
669 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1];
670 ; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
671 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
672 ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
673 ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
674 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
675 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
676 ; CHECK: ret;
677 define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
678 %c = uitofp i32 %a to half
679 %r = fadd half %b, %c
680 ret half %r
681 }
682
683 ; CHECK-LABEL: test_sitofp_i32_fadd(
684 ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0];
685 ; CHECK-DAG: cvt.rn.f16.s32 [[C:%h[0-9]+]], [[A]];
686 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1];
687 ; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]];
688 ; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
689 ; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
690 ; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]];
691 ; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
692 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
693 ; CHECK: ret;
694 define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
695 %c = sitofp i32 %a to half
696 %r = fadd half %b, %c
697 ret half %r
698 }
699
700 ; CHECK-LABEL: test_fptrunc_float(
701 ; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0];
702 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[A]];
703 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
704 ; CHECK: ret;
705 define half @test_fptrunc_float(float %a) #0 {
706 %r = fptrunc float %a to half
707 ret half %r
708 }
709
710 ; CHECK-LABEL: test_fptrunc_double(
711 ; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0];
712 ; CHECK: cvt.rn.f16.f64 [[R:%h[0-9]+]], [[A]];
713 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
714 ; CHECK: ret;
715 define half @test_fptrunc_double(double %a) #0 {
716 %r = fptrunc double %a to half
717 ret half %r
718 }
719
720 ; CHECK-LABEL: test_fpext_float(
721 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_float_param_0];
722 ; CHECK: cvt.f32.f16 [[R:%f[0-9]+]], [[A]];
723 ; CHECK: st.param.f32 [func_retval0+0], [[R]];
724 ; CHECK: ret;
725 define float @test_fpext_float(half %a) #0 {
726 %r = fpext half %a to float
727 ret float %r
728 }
729
730 ; CHECK-LABEL: test_fpext_double(
731 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_double_param_0];
732 ; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]];
733 ; CHECK: st.param.f64 [func_retval0+0], [[R]];
734 ; CHECK: ret;
735 define double @test_fpext_double(half %a) #0 {
736 %r = fpext half %a to double
737 ret double %r
738 }
739
740
741 ; CHECK-LABEL: test_bitcast_halftoi16(
742 ; CHECK: ld.param.b16 [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0];
743 ; CHECK: mov.b16 [[AS:%rs[0-9]+]], [[AH]]
744 ; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AS]]
745 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
746 ; CHECK: ret;
747 define i16 @test_bitcast_halftoi16(half %a) #0 {
748 %r = bitcast half %a to i16
749 ret i16 %r
750 }
751
752 ; CHECK-LABEL: test_bitcast_i16tohalf(
753 ; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0];
754 ; CHECK: mov.b16 [[AH:%h[0-9]+]], [[AS]]
755 ; CHECK: st.param.b16 [func_retval0+0], [[AH]];
756 ; CHECK: ret;
757 define half @test_bitcast_i16tohalf(i16 %a) #0 {
758 %r = bitcast i16 %a to half
759 ret half %r
760 }
761
762
763 declare half @llvm.sqrt.f16(half %a) #0
764 declare half @llvm.powi.f16(half %a, i32 %b) #0
765 declare half @llvm.sin.f16(half %a) #0
766 declare half @llvm.cos.f16(half %a) #0
767 declare half @llvm.pow.f16(half %a, half %b) #0
768 declare half @llvm.exp.f16(half %a) #0
769 declare half @llvm.exp2.f16(half %a) #0
770 declare half @llvm.log.f16(half %a) #0
771 declare half @llvm.log10.f16(half %a) #0
772 declare half @llvm.log2.f16(half %a) #0
773 declare half @llvm.fma.f16(half %a, half %b, half %c) #0
774 declare half @llvm.fabs.f16(half %a) #0
775 declare half @llvm.minnum.f16(half %a, half %b) #0
776 declare half @llvm.maxnum.f16(half %a, half %b) #0
777 declare half @llvm.copysign.f16(half %a, half %b) #0
778 declare half @llvm.floor.f16(half %a) #0
779 declare half @llvm.ceil.f16(half %a) #0
780 declare half @llvm.trunc.f16(half %a) #0
781 declare half @llvm.rint.f16(half %a) #0
782 declare half @llvm.nearbyint.f16(half %a) #0
783 declare half @llvm.round.f16(half %a) #0
784 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
785
786 ; CHECK-LABEL: test_sqrt(
787 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sqrt_param_0];
788 ; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
789 ; CHECK: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]];
790 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
791 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
792 ; CHECK: ret;
793 define half @test_sqrt(half %a) #0 {
794 %r = call half @llvm.sqrt.f16(half %a)
795 ret half %r
796 }
797
798 ;;; Can't do this yet: requires libcall.
799 ; XCHECK-LABEL: test_powi(
800 ;define half @test_powi(half %a, i32 %b) #0 {
801 ; %r = call half @llvm.powi.f16(half %a, i32 %b)
802 ; ret half %r
803 ;}
804
805 ; CHECK-LABEL: test_sin(
806 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sin_param_0];
807 ; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
808 ; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]];
809 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
810 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
811 ; CHECK: ret;
812 define half @test_sin(half %a) #0 #1 {
813 %r = call half @llvm.sin.f16(half %a)
814 ret half %r
815 }
816
817 ; CHECK-LABEL: test_cos(
818 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_cos_param_0];
819 ; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
820 ; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]];
821 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
822 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
823 ; CHECK: ret;
824 define half @test_cos(half %a) #0 #1 {
825 %r = call half @llvm.cos.f16(half %a)
826 ret half %r
827 }
828
829 ;;; Can't do this yet: requires libcall.
830 ; XCHECK-LABEL: test_pow(
831 ;define half @test_pow(half %a, half %b) #0 {
832 ; %r = call half @llvm.pow.f16(half %a, half %b)
833 ; ret half %r
834 ;}
835
836 ;;; Can't do this yet: requires libcall.
837 ; XCHECK-LABEL: test_exp(
838 ;define half @test_exp(half %a) #0 {
839 ; %r = call half @llvm.exp.f16(half %a)
840 ; ret half %r
841 ;}
842
843 ;;; Can't do this yet: requires libcall.
844 ; XCHECK-LABEL: test_exp2(
845 ;define half @test_exp2(half %a) #0 {
846 ; %r = call half @llvm.exp2.f16(half %a)
847 ; ret half %r
848 ;}
849
850 ;;; Can't do this yet: requires libcall.
851 ; XCHECK-LABEL: test_log(
852 ;define half @test_log(half %a) #0 {
853 ; %r = call half @llvm.log.f16(half %a)
854 ; ret half %r
855 ;}
856
857 ;;; Can't do this yet: requires libcall.
858 ; XCHECK-LABEL: test_log10(
859 ;define half @test_log10(half %a) #0 {
860 ; %r = call half @llvm.log10.f16(half %a)
861 ; ret half %r
862 ;}
863
864 ;;; Can't do this yet: requires libcall.
865 ; XCHECK-LABEL: test_log2(
866 ;define half @test_log2(half %a) #0 {
867 ; %r = call half @llvm.log2.f16(half %a)
868 ; ret half %r
869 ;}
870
871 ; CHECK-LABEL: test_fma(
872 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fma_param_0];
873 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fma_param_1];
874 ; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fma_param_2];
875 ; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
876 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
877 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
878 ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
879 ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
880 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
881 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
882 ; CHECK: ret
883 define half @test_fma(half %a, half %b, half %c) #0 {
884 %r = call half @llvm.fma.f16(half %a, half %b, half %c)
885 ret half %r
886 }
887
888 ; CHECK-LABEL: test_fabs(
889 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fabs_param_0];
890 ; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
891 ; CHECK: abs.f32 [[RF:%f[0-9]+]], [[AF]];
892 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
893 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
894 ; CHECK: ret;
895 define half @test_fabs(half %a) #0 {
896 %r = call half @llvm.fabs.f16(half %a)
897 ret half %r
898 }
899
900 ; CHECK-LABEL: test_minnum(
901 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_minnum_param_0];
902 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_minnum_param_1];
903 ; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
904 ; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
905 ; CHECK: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
906 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
907 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
908 ; CHECK: ret;
909 define half @test_minnum(half %a, half %b) #0 {
910 %r = call half @llvm.minnum.f16(half %a, half %b)
911 ret half %r
912 }
913
914 ; CHECK-LABEL: test_maxnum(
915 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_maxnum_param_0];
916 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_maxnum_param_1];
917 ; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
918 ; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
919 ; CHECK: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]];
920 ; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
921 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
922 ; CHECK: ret;
923 define half @test_maxnum(half %a, half %b) #0 {
924 %r = call half @llvm.maxnum.f16(half %a, half %b)
925 ret half %r
926 }
927
928 ; CHECK-LABEL: test_copysign(
929 ; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_param_0];
930 ; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_param_1];
931 ; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
932 ; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
933 ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
934 ; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
935 ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
936 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
937 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
938 ; CHECK: ret;
939 define half @test_copysign(half %a, half %b) #0 {
940 %r = call half @llvm.copysign.f16(half %a, half %b)
941 ret half %r
942 }
943
944 ; CHECK-LABEL: test_copysign_f32(
945 ; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f32_param_0];
946 ; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1];
947 ; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
948 ; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]];
949 ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
950 ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648;
951 ; CHECK-DAG: shr.u32 [[BX1:%r[0-9]+]], [[BX0]], 16;
952 ; CHECK-DAG: cvt.u16.u32 [[BX2:%rs[0-9]+]], [[BX1]];
953 ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
954 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
955 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
956 ; CHECK: ret;
957 define half @test_copysign_f32(half %a, float %b) #0 {
958 %tb = fptrunc float %b to half
959 %r = call half @llvm.copysign.f16(half %a, half %tb)
960 ret half %r
961 }
962
963 ; CHECK-LABEL: test_copysign_f64(
964 ; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f64_param_0];
965 ; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1];
966 ; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]];
967 ; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]];
968 ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767;
969 ; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808;
970 ; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48;
971 ; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]];
972 ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
973 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
974 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
975 ; CHECK: ret;
976 define half @test_copysign_f64(half %a, double %b) #0 {
977 %tb = fptrunc double %b to half
978 %r = call half @llvm.copysign.f16(half %a, half %tb)
979 ret half %r
980 }
981
982 ; CHECK-LABEL: test_copysign_extended(
983 ; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_extended_param_0];
984 ; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_extended_param_1];
985 ; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]];
986 ; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]];
987 ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767;
988 ; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768;
989 ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]];
990 ; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]];
991 ; CHECK: cvt.f32.f16 [[XR:%f[0-9]+]], [[R]];
992 ; CHECK: st.param.f32 [func_retval0+0], [[XR]];
993 ; CHECK: ret;
994 define float @test_copysign_extended(half %a, half %b) #0 {
995 %r = call half @llvm.copysign.f16(half %a, half %b)
996 %xr = fpext half %r to float
997 ret float %xr
998 }
999
1000 ; CHECK-LABEL: test_floor(
1001 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_floor_param_0];
1002 ; CHECK: cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]];
1003 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1004 ; CHECK: ret;
1005 define half @test_floor(half %a) #0 {
1006 %r = call half @llvm.floor.f16(half %a)
1007 ret half %r
1008 }
1009
1010 ; CHECK-LABEL: test_ceil(
1011 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_ceil_param_0];
1012 ; CHECK: cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]];
1013 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1014 ; CHECK: ret;
1015 define half @test_ceil(half %a) #0 {
1016 %r = call half @llvm.ceil.f16(half %a)
1017 ret half %r
1018 }
1019
1020 ; CHECK-LABEL: test_trunc(
1021 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_trunc_param_0];
1022 ; CHECK: cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]];
1023 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1024 ; CHECK: ret;
1025 define half @test_trunc(half %a) #0 {
1026 %r = call half @llvm.trunc.f16(half %a)
1027 ret half %r
1028 }
1029
1030 ; CHECK-LABEL: test_rint(
1031 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_rint_param_0];
1032 ; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
1033 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1034 ; CHECK: ret;
1035 define half @test_rint(half %a) #0 {
1036 %r = call half @llvm.rint.f16(half %a)
1037 ret half %r
1038 }
1039
1040 ; CHECK-LABEL: test_nearbyint(
1041 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_nearbyint_param_0];
1042 ; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
1043 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1044 ; CHECK: ret;
1045 define half @test_nearbyint(half %a) #0 {
1046 %r = call half @llvm.nearbyint.f16(half %a)
1047 ret half %r
1048 }
1049
1050 ; CHECK-LABEL: test_round(
1051 ; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_round_param_0];
1052 ; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
1053 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1054 ; CHECK: ret;
1055 define half @test_round(half %a) #0 {
1056 %r = call half @llvm.round.f16(half %a)
1057 ret half %r
1058 }
1059
1060 ; CHECK-LABEL: test_fmuladd(
1061 ; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0];
1062 ; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmuladd_param_1];
1063 ; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fmuladd_param_2];
1064 ; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
1065 ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
1066 ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]]
1067 ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]]
1068 ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
1069 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
1070 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
1071 ; CHECK: ret;
1072 define half @test_fmuladd(half %a, half %b, half %c) #0 {
1073 %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
1074 ret half %r
1075 }
1076
1077 attributes #0 = { nounwind }
1078 attributes #1 = { "unsafe-fp-math" = "true" }
None ; ## Full FP16 support enabled by default.
1 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
2 ; RUN: -O0 -disable-post-ra -disable-fp-elim \
3 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s
4 ; ## FP16 support explicitly disabled.
5 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
6 ; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
7 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
8 ; ## FP16 is not supported by hardware.
9 ; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
10 ; RUN: -disable-post-ra -disable-fp-elim \
11 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
12
13 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
14
15 ; CHECK-LABEL: test_ret_const(
16 ; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184;
17 ; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]];
18 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
19 ; CHECK-NEXT: ret;
20 define <2 x half> @test_ret_const() #0 {
21 ret <2 x half>
22 }
23
24 ; CHECK-LABEL: test_extract_0(
25 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0];
26 ; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]];
27 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
28 ; CHECK: ret;
29 define half @test_extract_0(<2 x half> %a) #0 {
30 %e = extractelement <2 x half> %a, i32 0
31 ret half %e
32 }
33
34 ; CHECK-LABEL: test_extract_1(
35 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0];
36 ; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]];
37 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
38 ; CHECK: ret;
39 define half @test_extract_1(<2 x half> %a) #0 {
40 %e = extractelement <2 x half> %a, i32 1
41 ret half %e
42 }
43
44 ; CHECK-LABEL: test_extract_i(
45 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0];
46 ; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
47 ; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0;
48 ; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]];
49 ; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]];
50 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
51 ; CHECK: ret;
52 define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
53 %e = extractelement <2 x half> %a, i64 %idx
54 ret half %e
55 }
56
57 ; CHECK-LABEL: test_fadd(
58 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0];
59 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1];
60 ;
61 ; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
62 ;
63 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
64 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
65 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
66 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
67 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
68 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
69 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
70 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
71 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
72 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
73 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
74 ;
75 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
76 ; CHECK-NEXT: ret;
77 define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 {
78 %r = fadd <2 x half> %a, %b
79 ret <2 x half> %r
80 }
81
82 ; Check that we can lower fadd with immediate arguments.
83 ; CHECK-LABEL: test_fadd_imm_0(
84 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0];
85 ;
86 ; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
87 ; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
88 ; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]];
89 ;
90 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
91 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
92 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
93 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
94 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
95 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
96 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
97 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
98 ;
99 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
100 ; CHECK-NEXT: ret;
101 define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 {
102 %r = fadd <2 x half> , %a
103 ret <2 x half> %r
104 }
105
106 ; CHECK-LABEL: test_fadd_imm_1(
107 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0];
108 ;
109 ; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184;
110 ; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]];
111 ; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]];
112 ;
113 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
114 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
115 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
116 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
117 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
118 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
119 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
120 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
121 ;
122 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
123 ; CHECK-NEXT: ret;
124 define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 {
125 %r = fadd <2 x half> %a,
126 ret <2 x half> %r
127 }
128
129 ; CHECK-LABEL: test_fsub(
130 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0];
131 ;
132 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1];
133 ; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
134 ;
135 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
136 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
137 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
138 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
139 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
140 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
141 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
142 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
143 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
144 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
145 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
146 ;
147 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
148 ; CHECK-NEXT: ret;
149 define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 {
150 %r = fsub <2 x half> %a, %b
151 ret <2 x half> %r
152 }
153
154 ; CHECK-LABEL: test_fneg(
155 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0];
156 ;
157 ; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0;
158 ; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]];
159 ; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]];
160 ;
161 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
162 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
163 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
164 ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
165 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]];
166 ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]];
167 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
168 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
169 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
170 ;
171 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
172 ; CHECK-NEXT: ret;
173 define <2 x half> @test_fneg(<2 x half> %a) #0 {
174 %r = fsub <2 x half> , %a
175 ret <2 x half> %r
176 }
177
178 ; CHECK-LABEL: test_fmul(
179 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0];
180 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1];
181 ; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]];
182 ;
183 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
184 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
185 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
186 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
187 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
188 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
189 ; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
190 ; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
191 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
192 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
193 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
194 ;
195 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
196 ; CHECK-NEXT: ret;
197 define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 {
198 %r = fmul <2 x half> %a, %b
199 ret <2 x half> %r
200 }
201
202 ; CHECK-LABEL: test_fdiv(
203 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0];
204 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1];
205 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
206 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
207 ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
208 ; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]];
209 ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
210 ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
211 ; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
212 ; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
213 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]];
214 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]];
215 ; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
216 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
217 ; CHECK-NEXT: ret;
218 define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
219 %r = fdiv <2 x half> %a, %b
220 ret <2 x half> %r
221 }
222
223 ; CHECK-LABEL: test_frem(
224 ; -- Load two 16x2 inputs and split them into f16 elements
225 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0];
226 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1];
227 ; -- Split into elements
228 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
229 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
230 ; -- promote to f32.
231 ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]];
232 ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]];
233 ; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]];
234 ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
235 ; -- frem(a[0],b[0]).
236 ; CHECK-DAG: div.rn.f32 [[FD0:%f[0-9]+]], [[FA0]], [[FB0]];
237 ; CHECK-DAG: cvt.rmi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]];
238 ; CHECK-DAG: mul.f32 [[RI0:%f[0-9]+]], [[DI0]], [[FB0]];
239 ; CHECK-DAG: sub.f32 [[RF0:%f[0-9]+]], [[FA0]], [[RI0]];
240 ; -- frem(a[1],b[1]).
241 ; CHECK-DAG: div.rn.f32 [[FD1:%f[0-9]+]], [[FA1]], [[FB1]];
242 ; CHECK-DAG: cvt.rmi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]];
243 ; CHECK-DAG: mul.f32 [[RI1:%f[0-9]+]], [[DI1]], [[FB1]];
244 ; CHECK-DAG: sub.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RI1]];
245 ; -- convert back to f16.
246 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
247 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
248 ; -- merge into f16x2 and return it.
249 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
250 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
251 ; CHECK-NEXT: ret;
252 define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
253 %r = frem <2 x half> %a, %b
254 ret <2 x half> %r
255 }
256
257 ; CHECK-LABEL: .func test_ldst_v2f16(
258 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0];
259 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1];
260 ; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]]
261 ; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]];
262 ; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]};
263 ; CHECK: ret;
264 define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) {
265 %t1 = load <2 x half>, <2 x half>* %a
266 store <2 x half> %t1, <2 x half>* %b, align 16
267 ret void
268 }
269
270 ; CHECK-LABEL: .func test_ldst_v3f16(
271 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3f16_param_0];
272 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3f16_param_1];
273 ; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair
274 ; number of bitshifting instructions that may change at llvm's whim.
275 ; So we only verify that we only issue correct number of writes using
276 ; correct offset, but not the values we write.
277 ; CHECK-DAG: ld.u64
278 ; CHECK-DAG: st.u32 [%[[B]]],
279 ; CHECK-DAG: st.b16 [%[[B]]+4],
280 ; CHECK: ret;
281 define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) {
282 %t1 = load <3 x half>, <3 x half>* %a
283 store <3 x half> %t1, <3 x half>* %b, align 16
284 ret void
285 }
286
287 ; CHECK-LABEL: .func test_ldst_v4f16(
288 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0];
289 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1];
290 ; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]];
291 ; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
292 ; CHECK: ret;
293 define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) {
294 %t1 = load <4 x half>, <4 x half>* %a
295 store <4 x half> %t1, <4 x half>* %b, align 16
296 ret void
297 }
298
299 ; CHECK-LABEL: .func test_ldst_v8f16(
300 ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v8f16_param_0];
301 ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v8f16_param_1];
302 ; CHECK-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]];
303 ; CHECK-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
304 ; CHECK: ret;
305 define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) {
306 %t1 = load <8 x half>, <8 x half>* %a
307 store <8 x half> %t1, <8 x half>* %b, align 16
308 ret void
309 }
310
311 declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0
312
313 ; CHECK-LABEL: test_call(
314 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0];
315 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1];
316 ; CHECK: {
317 ; CHECK-DAG: .param .align 4 .b8 param0[4];
318 ; CHECK-DAG: .param .align 4 .b8 param1[4];
319 ; CHECK-DAG: st.param.b32 [param0+0], [[A]];
320 ; CHECK-DAG: st.param.b32 [param1+0], [[B]];
321 ; CHECK-DAG: .param .align 4 .b8 retval0[4];
322 ; CHECK: call.uni (retval0),
323 ; CHECK-NEXT: test_callee,
324 ; CHECK: );
325 ; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
326 ; CHECK-NEXT: }
327 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
328 ; CHECK-NEXT: ret;
329 define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
330 %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b)
331 ret <2 x half> %r
332 }
333
334 ; CHECK-LABEL: test_call_flipped(
335 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0];
336 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1];
337 ; CHECK: {
338 ; CHECK-DAG: .param .align 4 .b8 param0[4];
339 ; CHECK-DAG: .param .align 4 .b8 param1[4];
340 ; CHECK-DAG: st.param.b32 [param0+0], [[B]];
341 ; CHECK-DAG: st.param.b32 [param1+0], [[A]];
342 ; CHECK-DAG: .param .align 4 .b8 retval0[4];
343 ; CHECK: call.uni (retval0),
344 ; CHECK-NEXT: test_callee,
345 ; CHECK: );
346 ; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
347 ; CHECK-NEXT: }
348 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
349 ; CHECK-NEXT: ret;
350 define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
351 %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
352 ret <2 x half> %r
353 }
354
355 ; CHECK-LABEL: test_tailcall_flipped(
356 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0];
357 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1];
358 ; CHECK: {
359 ; CHECK-DAG: .param .align 4 .b8 param0[4];
360 ; CHECK-DAG: .param .align 4 .b8 param1[4];
361 ; CHECK-DAG: st.param.b32 [param0+0], [[B]];
362 ; CHECK-DAG: st.param.b32 [param1+0], [[A]];
363 ; CHECK-DAG: .param .align 4 .b8 retval0[4];
364 ; CHECK: call.uni (retval0),
365 ; CHECK-NEXT: test_callee,
366 ; CHECK: );
367 ; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0];
368 ; CHECK-NEXT: }
369 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
370 ; CHECK-NEXT: ret;
371 define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
372 %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
373 ret <2 x half> %r
374 }
375
376 ; CHECK-LABEL: test_select(
377 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0];
378 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1];
379 ; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2]
380 ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
381 ; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]];
382 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
383 ; CHECK-NEXT: ret;
384 define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
385 %r = select i1 %c, <2 x half> %a, <2 x half> %b
386 ret <2 x half> %r
387 }
388
389 ; CHECK-LABEL: test_select_cc(
390 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0];
391 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1];
392 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2];
393 ; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3];
394 ;
395 ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
396 ;
397 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
398 ; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
399 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
400 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
401 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
402 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
403 ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
404 ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
405 ;
406 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
407 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
408 ; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
409 ; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
410 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
411 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
412 ; CHECK-NEXT: ret;
413 define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 {
414 %cc = fcmp une <2 x half> %c, %d
415 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
416 ret <2 x half> %r
417 }
418
419 ; CHECK-LABEL: test_select_cc_f32_f16(
420 ; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0];
421 ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1];
422 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2];
423 ; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3];
424 ;
425 ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]]
426 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
427 ; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]]
428 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]];
429 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]];
430 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]];
431 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]];
432 ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]]
433 ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]]
434 ;
435 ; CHECK-DAG: selp.f32 [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]];
436 ; CHECK-DAG: selp.f32 [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]];
437 ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
438 ; CHECK-NEXT: ret;
439 define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b,
440 <2 x half> %c, <2 x half> %d) #0 {
441 %cc = fcmp une <2 x half> %c, %d
442 %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b
443 ret <2 x float> %r
444 }
445
446 ; CHECK-LABEL: test_select_cc_f16_f32(
447 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0];
448 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1];
449 ; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2];
450 ; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3];
451 ; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]]
452 ; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]]
453 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
454 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
455 ; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]];
456 ; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]];
457 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
458 ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]];
459 ; CHECK-NEXT: ret;
460 define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b,
461 <2 x float> %c, <2 x float> %d) #0 {
462 %cc = fcmp une <2 x float> %c, %d
463 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
464 ret <2 x half> %r
465 }
466
467 ; CHECK-LABEL: test_fcmp_une(
468 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0];
469 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1];
470 ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
471 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
472 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
473 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
474 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
475 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
476 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
477 ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
478 ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
479 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
480 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
481 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
482 ; CHECK-NEXT: ret;
483 define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 {
484 %r = fcmp une <2 x half> %a, %b
485 ret <2 x i1> %r
486 }
487
488 ; CHECK-LABEL: test_fcmp_ueq(
489 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0];
490 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1];
491 ; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
492 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
493 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
494 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
495 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
496 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
497 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
498 ; CHECK-NOF16-DAG: setp.equ.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
499 ; CHECK-NOF16-DAG: setp.equ.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
500 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
501 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
502 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
503 ; CHECK-NEXT: ret;
504 define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 {
505 %r = fcmp ueq <2 x half> %a, %b
506 ret <2 x i1> %r
507 }
508
509 ; CHECK-LABEL: test_fcmp_ugt(
510 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0];
511 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1];
512 ; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
513 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
514 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
515 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
516 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
517 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
518 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
519 ; CHECK-NOF16-DAG: setp.gtu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
520 ; CHECK-NOF16-DAG: setp.gtu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
521 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
522 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
523 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
524 ; CHECK-NEXT: ret;
525 define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 {
526 %r = fcmp ugt <2 x half> %a, %b
527 ret <2 x i1> %r
528 }
529
530 ; CHECK-LABEL: test_fcmp_uge(
531 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0];
532 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1];
533 ; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
534 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
535 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
536 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
537 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
538 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
539 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
540 ; CHECK-NOF16-DAG: setp.geu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
541 ; CHECK-NOF16-DAG: setp.geu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
542 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
543 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
544 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
545 ; CHECK-NEXT: ret;
546 define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 {
547 %r = fcmp uge <2 x half> %a, %b
548 ret <2 x i1> %r
549 }
550
551 ; CHECK-LABEL: test_fcmp_ult(
552 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0];
553 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1];
554 ; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
555 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
556 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
557 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
558 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
559 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
560 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
561 ; CHECK-NOF16-DAG: setp.ltu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
562 ; CHECK-NOF16-DAG: setp.ltu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
563 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
564 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
565 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
566 ; CHECK-NEXT: ret;
567 define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 {
568 %r = fcmp ult <2 x half> %a, %b
569 ret <2 x i1> %r
570 }
571
572 ; CHECK-LABEL: test_fcmp_ule(
573 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0];
574 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1];
575 ; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
576 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
577 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
578 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
579 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
580 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
581 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
582 ; CHECK-NOF16-DAG: setp.leu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
583 ; CHECK-NOF16-DAG: setp.leu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
584 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
585 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
586 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
587 ; CHECK-NEXT: ret;
588 define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 {
589 %r = fcmp ule <2 x half> %a, %b
590 ret <2 x i1> %r
591 }
592
593
594 ; CHECK-LABEL: test_fcmp_uno(
595 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0];
596 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1];
597 ; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
598 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
599 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
600 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
601 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
602 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
603 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
604 ; CHECK-NOF16-DAG: setp.nan.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
605 ; CHECK-NOF16-DAG: setp.nan.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
606 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
607 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
608 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
609 ; CHECK-NEXT: ret;
610 define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 {
611 %r = fcmp uno <2 x half> %a, %b
612 ret <2 x i1> %r
613 }
614
615 ; CHECK-LABEL: test_fcmp_one(
616 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0];
617 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1];
618 ; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
619 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
620 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
621 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
622 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
623 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
624 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
625 ; CHECK-NOF16-DAG: setp.ne.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
626 ; CHECK-NOF16-DAG: setp.ne.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
627 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
628 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
629 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
630 ; CHECK-NEXT: ret;
631 define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 {
632 %r = fcmp one <2 x half> %a, %b
633 ret <2 x i1> %r
634 }
635
636 ; CHECK-LABEL: test_fcmp_oeq(
637 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0];
638 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1];
639 ; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
640 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
641 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
642 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
643 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
644 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
645 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
646 ; CHECK-NOF16-DAG: setp.eq.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
647 ; CHECK-NOF16-DAG: setp.eq.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
648 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
649 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
650 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
651 ; CHECK-NEXT: ret;
652 define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 {
653 %r = fcmp oeq <2 x half> %a, %b
654 ret <2 x i1> %r
655 }
656
657 ; CHECK-LABEL: test_fcmp_ogt(
658 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0];
659 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1];
660 ; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
661 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
662 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
663 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
664 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
665 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
666 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
667 ; CHECK-NOF16-DAG: setp.gt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
668 ; CHECK-NOF16-DAG: setp.gt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
669 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
670 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
671 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
672 ; CHECK-NEXT: ret;
673 define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 {
674 %r = fcmp ogt <2 x half> %a, %b
675 ret <2 x i1> %r
676 }
677
678 ; CHECK-LABEL: test_fcmp_oge(
679 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0];
680 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1];
681 ; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
682 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
683 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
684 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
685 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
686 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
687 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
688 ; CHECK-NOF16-DAG: setp.ge.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
689 ; CHECK-NOF16-DAG: setp.ge.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
690 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
691 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
692 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
693 ; CHECK-NEXT: ret;
694 define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 {
695 %r = fcmp oge <2 x half> %a, %b
696 ret <2 x i1> %r
697 }
698
699 ; CHECK-LABEL: test_fcmp_olt(
700 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0];
701 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1];
702 ; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
703 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
704 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
705 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
706 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
707 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
708 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
709 ; CHECK-NOF16-DAG: setp.lt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
710 ; CHECK-NOF16-DAG: setp.lt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
711 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
712 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
713 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
714 ; CHECK-NEXT: ret;
715 define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 {
716 %r = fcmp olt <2 x half> %a, %b
717 ret <2 x i1> %r
718 }
719
720 ; XCHECK-LABEL: test_fcmp_ole(
721 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0];
722 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1];
723 ; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
724 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
725 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
726 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
727 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
728 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
729 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
730 ; CHECK-NOF16-DAG: setp.le.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
731 ; CHECK-NOF16-DAG: setp.le.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
732 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
733 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
734 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
735 ; CHECK-NEXT: ret;
736 define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 {
737 %r = fcmp ole <2 x half> %a, %b
738 ret <2 x i1> %r
739 }
740
741 ; CHECK-LABEL: test_fcmp_ord(
742 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0];
743 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1];
744 ; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]]
745 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
746 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
747 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
748 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
749 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
750 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
751 ; CHECK-NOF16-DAG: setp.num.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]]
752 ; CHECK-NOF16-DAG: setp.num.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]]
753 ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]];
754 ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]];
755 ; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]};
756 ; CHECK-NEXT: ret;
757 define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 {
758 %r = fcmp ord <2 x half> %a, %b
759 ret <2 x i1> %r
760 }
761
762 ; CHECK-LABEL: test_fptosi_i32(
763 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0];
764 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
765 ; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]];
766 ; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]];
767 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
768 ; CHECK: ret;
769 define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 {
770 %r = fptosi <2 x half> %a to <2 x i32>
771 ret <2 x i32> %r
772 }
773
774 ; CHECK-LABEL: test_fptosi_i64(
775 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0];
776 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
777 ; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]];
778 ; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]];
779 ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
780 ; CHECK: ret;
781 define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 {
782 %r = fptosi <2 x half> %a to <2 x i64>
783 ret <2 x i64> %r
784 }
785
786 ; CHECK-LABEL: test_fptoui_2xi32(
787 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0];
788 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
789 ; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]];
790 ; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]];
791 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}
792 ; CHECK: ret;
793 define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 {
794 %r = fptoui <2 x half> %a to <2 x i32>
795 ret <2 x i32> %r
796 }
797
798 ; CHECK-LABEL: test_fptoui_2xi64(
799 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0];
800 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
801 ; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]];
802 ; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]];
803 ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}
804 ; CHECK: ret;
805 define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 {
806 %r = fptoui <2 x half> %a to <2 x i64>
807 ret <2 x i64> %r
808 }
809
810 ; CHECK-LABEL: test_uitofp_2xi32(
811 ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0];
812 ; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]];
813 ; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]];
814 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
815 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
816 ; CHECK: ret;
817 define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 {
818 %r = uitofp <2 x i32> %a to <2 x half>
819 ret <2 x half> %r
820 }
821
822 ; CHECK-LABEL: test_uitofp_2xi64(
823 ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0];
824 ; CHECK-DAG: cvt.rn.f32.u64 [[F0:%f[0-9]+]], [[A0]];
825 ; CHECK-DAG: cvt.rn.f32.u64 [[F1:%f[0-9]+]], [[A1]];
826 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]];
827 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]];
828 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
829 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
830 ; CHECK: ret;
831 define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 {
832 %r = uitofp <2 x i64> %a to <2 x half>
833 ret <2 x half> %r
834 }
835
836 ; CHECK-LABEL: test_sitofp_2xi32(
837 ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0];
838 ; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]];
839 ; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]];
840 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
841 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
842 ; CHECK: ret;
843 define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 {
844 %r = sitofp <2 x i32> %a to <2 x half>
845 ret <2 x half> %r
846 }
847
848 ; CHECK-LABEL: test_sitofp_2xi64(
849 ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0];
850 ; CHECK-DAG: cvt.rn.f32.s64 [[F0:%f[0-9]+]], [[A0]];
851 ; CHECK-DAG: cvt.rn.f32.s64 [[F1:%f[0-9]+]], [[A1]];
852 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]];
853 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]];
854 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
855 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
856 ; CHECK: ret;
857 define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 {
858 %r = sitofp <2 x i64> %a to <2 x half>
859 ret <2 x half> %r
860 }
861
862 ; CHECK-LABEL: test_uitofp_2xi32_fadd(
863 ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0];
864 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1];
865 ; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]];
866 ; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]];
867
868 ; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
869 ; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
870 ;
871 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
872 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
873 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
874 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
875 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
876 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
877 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
878 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
879 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
880 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
881 ;
882 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
883 ; CHECK: ret;
884 define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
885 %c = uitofp <2 x i32> %a to <2 x half>
886 %r = fadd <2 x half> %b, %c
887 ret <2 x half> %r
888 }
889
890 ; CHECK-LABEL: test_sitofp_2xi32_fadd(
891 ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0];
892 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1];
893 ; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]];
894 ; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]];
895 ;
896 ; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]}
897 ; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]];
898 ;
899 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
900 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
901 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
902 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
903 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]]
904 ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]];
905 ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]];
906 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
907 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
908 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
909 ;
910 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
911 ; CHECK: ret;
912 define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
913 %c = sitofp <2 x i32> %a to <2 x half>
914 %r = fadd <2 x half> %b, %c
915 ret <2 x half> %r
916 }
917
918 ; CHECK-LABEL: test_fptrunc_2xfloat(
919 ; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0];
920 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]];
921 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]];
922 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
923 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
924 ; CHECK: ret;
925 define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
926 %r = fptrunc <2 x float> %a to <2 x half>
927 ret <2 x half> %r
928 }
929
930 ; CHECK-LABEL: test_fptrunc_2xdouble(
931 ; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0];
932 ; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]];
933 ; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]];
934 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
935 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
936 ; CHECK: ret;
937 define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 {
938 %r = fptrunc <2 x double> %a to <2 x half>
939 ret <2 x half> %r
940 }
941
942 ; CHECK-LABEL: test_fpext_2xfloat(
943 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0];
944 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
945 ; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]];
946 ; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]];
947 ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]};
948 ; CHECK: ret;
949 define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 {
950 %r = fpext <2 x half> %a to <2 x float>
951 ret <2 x float> %r
952 }
953
954 ; CHECK-LABEL: test_fpext_2xdouble(
955 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0];
956 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
957 ; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]];
958 ; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]];
959 ; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]};
960 ; CHECK: ret;
961 define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 {
962 %r = fpext <2 x half> %a to <2 x double>
963 ret <2 x double> %r
964 }
965
966
967 ; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16(
968 ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_bitcast_2xhalf_to_2xi16_param_0];
969 ; CHECK-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A]]
970 ; CHECK-DAG: shr.u32 [[AH:%r[0-9]+]], [[A]], 16
971 ; CHECK-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[AH]]
972 ; CHECK: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}
973 ; CHECK: ret;
974 define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 {
975 %r = bitcast <2 x half> %a to <2 x i16>
976 ret <2 x i16> %r
977 }
978
979 ; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf(
980 ; CHECK: ld.param.v2.u16 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [test_bitcast_2xi16_to_2xhalf_param_0];
981 ; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RS0]];
982 ; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]];
983 ; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16;
984 ; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]];
985 ; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]];
986 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
987 ; CHECK: ret;
988 define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 {
989 %r = bitcast <2 x i16> %a to <2 x half>
990 ret <2 x half> %r
991 }
992
993
994 declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0
995 declare <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) #0
996 declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0
997 declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0
998 declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0
999 declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0
1000 declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0
1001 declare <2 x half> @llvm.log.f16(<2 x half> %a) #0
1002 declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0
1003 declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0
1004 declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
1005 declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0
1006 declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0
1007 declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0
1008 declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0
1009 declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0
1010 declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0
1011 declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0
1012 declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0
1013 declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0
1014 declare <2 x half> @llvm.round.f16(<2 x half> %a) #0
1015 declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
1016
1017 ; CHECK-LABEL: test_sqrt(
1018 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0];
1019 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1020 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1021 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1022 ; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]];
1023 ; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]];
1024 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
1025 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
1026 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1027 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1028 ; CHECK: ret;
1029 define <2 x half> @test_sqrt(<2 x half> %a) #0 {
1030 %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a)
1031 ret <2 x half> %r
1032 }
1033
1034 ;;; Can't do this yet: requires libcall.
1035 ; XCHECK-LABEL: test_powi(
1036 ;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 {
1037 ; %r = call <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b)
1038 ; ret <2 x half> %r
1039 ;}
1040
1041 ; CHECK-LABEL: test_sin(
1042 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0];
1043 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1044 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1045 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1046 ; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
1047 ; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
1048 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
1049 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
1050 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1051 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1052 ; CHECK: ret;
1053 define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
1054 %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
1055 ret <2 x half> %r
1056 }
1057
1058 ; CHECK-LABEL: test_cos(
1059 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0];
1060 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1061 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1062 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1063 ; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
1064 ; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
1065 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
1066 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
1067 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1068 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1069 ; CHECK: ret;
1070 define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
1071 %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
1072 ret <2 x half> %r
1073 }
1074
1075 ;;; Can't do this yet: requires libcall.
1076 ; XCHECK-LABEL: test_pow(
1077 ;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 {
1078 ; %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b)
1079 ; ret <2 x half> %r
1080 ;}
1081
1082 ;;; Can't do this yet: requires libcall.
1083 ; XCHECK-LABEL: test_exp(
1084 ;define <2 x half> @test_exp(<2 x half> %a) #0 {
1085 ; %r = call <2 x half> @llvm.exp.f16(<2 x half> %a)
1086 ; ret <2 x half> %r
1087 ;}
1088
1089 ;;; Can't do this yet: requires libcall.
1090 ; XCHECK-LABEL: test_exp2(
1091 ;define <2 x half> @test_exp2(<2 x half> %a) #0 {
1092 ; %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a)
1093 ; ret <2 x half> %r
1094 ;}
1095
1096 ;;; Can't do this yet: requires libcall.
1097 ; XCHECK-LABEL: test_log(
1098 ;define <2 x half> @test_log(<2 x half> %a) #0 {
1099 ; %r = call <2 x half> @llvm.log.f16(<2 x half> %a)
1100 ; ret <2 x half> %r
1101 ;}
1102
1103 ;;; Can't do this yet: requires libcall.
1104 ; XCHECK-LABEL: test_log10(
1105 ;define <2 x half> @test_log10(<2 x half> %a) #0 {
1106 ; %r = call <2 x half> @llvm.log10.f16(<2 x half> %a)
1107 ; ret <2 x half> %r
1108 ;}
1109
1110 ;;; Can't do this yet: requires libcall.
1111 ; XCHECK-LABEL: test_log2(
1112 ;define <2 x half> @test_log2(<2 x half> %a) #0 {
1113 ; %r = call <2 x half> @llvm.log2.f16(<2 x half> %a)
1114 ; ret <2 x half> %r
1115 ;}
1116
1117 ; CHECK-LABEL: test_fma(
1118 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0];
1119 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1];
1120 ; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2];
1121 ;
1122 ; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]];
1123 ;
1124 ; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1125 ; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
1126 ; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]]
1127 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]
1128 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]
1129 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
1130 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]
1131 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]
1132 ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]]
1133 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]];
1134 ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]];
1135 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]
1136 ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]
1137 ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1138
1139 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1140 ; CHECK: ret
1141 define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
1142 %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
1143 ret <2 x half> %r
1144 }
1145
1146 ; CHECK-LABEL: test_fabs(
1147 ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0];
1148 ; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1149 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1150 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1151 ; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
1152 ; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
1153 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
1154 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
1155 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1156 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1157 ; CHECK: ret;
1158 define <2 x half> @test_fabs(<2 x half> %a) #0 {
1159 %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a)
1160 ret <2 x half> %r
1161 }
1162
1163 ; CHECK-LABEL: test_minnum(
1164 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0];
1165 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1];
1166 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1167 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
1168 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1169 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1170 ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
1171 ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
1172 ; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
1173 ; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
1174 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
1175 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
1176 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1177 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1178 ; CHECK: ret;
1179 define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 {
1180 %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b)
1181 ret <2 x half> %r
1182 }
1183
1184 ; CHECK-LABEL: test_maxnum(
1185 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0];
1186 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1];
1187 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1188 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
1189 ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1190 ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1191 ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]];
1192 ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]];
1193 ; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]];
1194 ; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]];
1195 ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]];
1196 ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]];
1197 ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1198 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1199 ; CHECK: ret;
1200 define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
1201 %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b)
1202 ret <2 x half> %r
1203 }
1204
1205 ; CHECK-LABEL: test_copysign(
1206 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0];
1207 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1];
1208 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1209 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
1210 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
1211 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
1212 ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
1213 ; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
1214 ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
1215 ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
1216 ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
1217 ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
1218 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
1219 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
1220 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
1221 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
1222 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1223 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1224 ; CHECK: ret;
1225 define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
1226 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
1227 ret <2 x half> %r
1228 }
1229
1230 ; CHECK-LABEL: test_copysign_f32(
1231 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0];
1232 ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
1233 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1234 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
1235 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
1236 ; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
1237 ; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
1238 ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
1239 ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
1240 ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
1241 ; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
1242 ; CHECK-DAG: shr.u32 [[BY0:%r[0-9]+]], [[BX0]], 16;
1243 ; CHECK-DAG: shr.u32 [[BY1:%r[0-9]+]], [[BX1]], 16;
1244 ; CHECK-DAG: cvt.u16.u32 [[BZ0:%rs[0-9]+]], [[BY0]];
1245 ; CHECK-DAG: cvt.u16.u32 [[BZ1:%rs[0-9]+]], [[BY1]];
1246 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1247 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1248 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
1249 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
1250 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1251 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1252 ; CHECK: ret;
1253 define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
1254 %tb = fptrunc <2 x float> %b to <2 x half>
1255 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
1256 ret <2 x half> %r
1257 }
1258
1259 ; CHECK-LABEL: test_copysign_f64(
1260 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0];
1261 ; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
1262 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1263 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
1264 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
1265 ; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
1266 ; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
1267 ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767;
1268 ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767;
1269 ; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
1270 ; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
1271 ; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
1272 ; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
1273 ; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
1274 ; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
1275 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1276 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1277 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
1278 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
1279 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1280 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
1281 ; CHECK: ret;
1282 define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
1283 %tb = fptrunc <2 x double> %b to <2 x half>
1284 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
1285 ret <2 x half> %r
1286 }
1287
1288 ; CHECK-LABEL: test_copysign_extended(
1289 ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0];
1290 ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1];
1291 ; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]
1292 ; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]]
1293 ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]];
1294 ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]];
1295 ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]];
1296 ; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]];
1297 ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767;
1298 ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767;
1299 ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768;
1300 ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768;
1301 ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]];
1302 ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]];
1303 ; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]];
1304 ; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]];
1305 ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}
1306 ; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]]
1307 ; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]];
1308 ; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]];
1309 ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
1310 ; CHECK: ret;
1311 define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {
1312 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
1313 %xr = fpext <2 x half> %r to <2 x float>
1314 ret <2 x float> %xr
1315 }