llvm.org GIT mirror llvm / 78e0f47
AMDGPU: Reduce code size with fcanonicalize (fneg x) When fcanonicalize is lowered to a mul, we can use -1.0 for free and avoid the cost of the bigger encoding for source modifers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338244 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 6 months ago
6 changed file(s) with 82 addition(s) and 48 deletion(s). Raw diff Collapse all Expand all
566566 int TWO_PI_INV = 0x3e22f983;
567567 int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
568568 int FP16_ONE = 0x3C00;
569 int FP16_NEG_ONE = 0xBC00;
569570 int V2FP16_ONE = 0x3C003C00;
570571 int FP32_ONE = 0x3f800000;
571572 int FP32_NEG_ONE = 0xbf800000;
13861386 >;
13871387
13881388 def : GCNPat<
1389 (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
1390 (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src, 0, 0)
1391 >;
1392
1393 def : GCNPat<
13891394 (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
13901395 (V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
13911396 >;
14091414 def : GCNPat<
14101415 (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
14111416 (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0)
1417 >;
1418
1419 def : GCNPat<
1420 (fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))),
1421 (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src, 0, 0)
14121422 >;
14131423 }
14141424
224224 }
225225
226226 ; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
227 ; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, -v{{[0-9]+}}
227 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}}
228228 ; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
229229 define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
230230 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
None ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
0 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
22
33 declare half @llvm.fabs.f16(half) #0
44 declare half @llvm.canonicalize.f16(half) #0
99
1010 ; GCN-LABEL: {{^}}v_test_canonicalize_var_f16:
1111 ; GCN: v_max_f16_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
12 ; GCN: buffer_store_short [[REG]]
12 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
1313 define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
1414 %val = load half, half addrspace(1)* %out
1515 %canonicalized = call half @llvm.canonicalize.f16(half %val)
1919
2020 ; GCN-LABEL: {{^}}s_test_canonicalize_var_f16:
2121 ; GCN: v_max_f16_e64 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
22 ; GCN: buffer_store_short [[REG]]
22 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
2323 define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 {
2424 %val = bitcast i16 %val.arg to half
2525 %canonicalized = call half @llvm.canonicalize.f16(half %val)
2929
3030 ; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f16:
3131 ; GCN: v_max_f16_e64 [[REG:v[0-9]+]], |{{v[0-9]+}}|, |{{v[0-9]+}}|
32 ; GCN: buffer_store_short [[REG]]
32 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
3333 define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %out) #1 {
3434 %val = load half, half addrspace(1)* %out
3535 %val.fabs = call half @llvm.fabs.f16(half %val)
4040
4141 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f16:
4242 ; GCN: v_max_f16_e64 [[REG:v[0-9]+]], -|{{v[0-9]+}}|, -|{{v[0-9]+}}|
43 ; GCN: buffer_store_short [[REG]]
43 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
4444 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
4545 %val = load half, half addrspace(1)* %out
4646 %val.fabs = call half @llvm.fabs.f16(half %val)
5252
5353 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f16:
5454 ; GCN: v_max_f16_e64 [[REG:v[0-9]+]], -{{v[0-9]+}}, -{{v[0-9]+}}
55 ; GCN: buffer_store_short [[REG]]
55 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
5656 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
5757 %val = load half, half addrspace(1)* %out
5858 %val.fneg = fsub half -0.0, %val
6161 ret void
6262 }
6363
64 ; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_var_f16:
65 ; GCN: v_mul_f16_e32 [[REG:v[0-9]+]], -1.0, v{{[0-9]+}}
66 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
67 define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 {
68 %val = load half, half addrspace(1)* %out
69 %val.fneg = fsub half -0.0, %val
70 %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
71 store half %canonicalized, half addrspace(1)* %out
72 ret void
73 }
74
75 ; GCN-LABEL: {{^}}v_test_no_denormals_canonicalize_fneg_fabs_var_f16:
76 ; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], -1.0, |v{{[0-9]+}}|
77 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
78 define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #2 {
79 %val = load half, half addrspace(1)* %out
80 %val.fabs = call half @llvm.fabs.f16(half %val)
81 %val.fabs.fneg = fsub half -0.0, %val.fabs
82 %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
83 store half %canonicalized, half addrspace(1)* %out
84 ret void
85 }
86
6487 ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16:
6588 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
66 ; GCN: buffer_store_short [[REG]]
89 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
6790 define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 {
6891 %canonicalized = call half @llvm.canonicalize.f16(half 0.0)
6992 store half %canonicalized, half addrspace(1)* %out
7295
7396 ; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16:
7497 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
75 ; GCN: buffer_store_short [[REG]]
98 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
7699 define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 {
77100 %canonicalized = call half @llvm.canonicalize.f16(half -0.0)
78101 store half %canonicalized, half addrspace(1)* %out
81104
82105 ; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16:
83106 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
84 ; GCN: buffer_store_short [[REG]]
107 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
85108 define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 {
86109 %canonicalized = call half @llvm.canonicalize.f16(half 1.0)
87110 store half %canonicalized, half addrspace(1)* %out
90113
91114 ; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16:
92115 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
93 ; GCN: buffer_store_short [[REG]]
116 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
94117 define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 {
95118 %canonicalized = call half @llvm.canonicalize.f16(half -1.0)
96119 store half %canonicalized, half addrspace(1)* %out
99122
100123 ; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16:
101124 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}}
102 ; GCN: buffer_store_short [[REG]]
125 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
103126 define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
104127 %canonicalized = call half @llvm.canonicalize.f16(half 16.0)
105128 store half %canonicalized, half addrspace(1)* %out
108131
109132 ; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16:
110133 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
111 ; GCN: buffer_store_short [[REG]]
134 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
112135 define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
113136 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
114137 store half %canonicalized, half addrspace(1)* %out
117140
118141 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16:
119142 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
120 ; GCN: buffer_store_short [[REG]]
143 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
121144 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 {
122145 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
123146 store half %canonicalized, half addrspace(1)* %out
126149
127150 ; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16:
128151 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
129 ; GCN: buffer_store_short [[REG]]
152 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
130153 define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
131154 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
132155 store half %canonicalized, half addrspace(1)* %out
135158
136159 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16:
137160 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
138 ; GCN: buffer_store_short [[REG]]
161 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
139162 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 {
140163 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
141164 store half %canonicalized, half addrspace(1)* %out
144167
145168 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16:
146169 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}}
147 ; GCN: buffer_store_short [[REG]]
170 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
148171 define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 {
149172 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
150173 store half %canonicalized, half addrspace(1)* %out
153176
154177 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16:
155178 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
156 ; GCN: buffer_store_short [[REG]]
179 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
157180 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 {
158181 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
159182 store half %canonicalized, half addrspace(1)* %out
162185
163186 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16:
164187 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
165 ; GCN: buffer_store_short [[REG]]
188 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
166189 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 {
167190 %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
168191 store half %canonicalized, half addrspace(1)* %out
171194
172195 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16:
173196 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
174 ; GCN: buffer_store_short [[REG]]
197 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
175198 define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 {
176199 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
177200 store half %canonicalized, half addrspace(1)* %out
180203
181204 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16:
182205 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
183 ; GCN: buffer_store_short [[REG]]
206 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
184207 define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 {
185208 %canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
186209 store half %canonicalized, half addrspace(1)* %out
189212
190213 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16:
191214 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
192 ; GCN: buffer_store_short [[REG]]
215 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
193216 define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 {
194217 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
195218 store half %canonicalized, half addrspace(1)* %out
198221
199222 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16:
200223 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}}
201 ; GCN: buffer_store_short [[REG]]
224 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
202225 define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 {
203226 %canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
204227 store half %canonicalized, half addrspace(1)* %out
211234 ; VI-NOT: v_and_b32
212235
213236 ; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+$}}
214 ; GFX9: buffer_store_dword [[REG]]
237 ; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
215238 define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
216239 %tid = call i32 @llvm.amdgcn.workitem.id.x()
217240 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
229252
230253 ; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
231254 ; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], [[ABS]], [[ABS]]{{$}}
232 ; GCN: buffer_store_dword
255 ; GCN: {{flat|global}}_store_dword
233256 define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
234257 %tid = call i32 @llvm.amdgcn.workitem.id.x()
235258 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
247270
248271 ; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}}
249272 ; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1]{{$}}
250 ; GCN: buffer_store_dword
273 ; GCN: {{flat|global}}_store_dword
251274 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
252275 %tid = call i32 @llvm.amdgcn.workitem.id.x()
253276 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
265288 ; VI-NOT: 0xffff
266289
267290 ; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} neg_lo:[1,1] neg_hi:[1,1]{{$}}
268 ; GFX9: buffer_store_dword [[REG]]
291 ; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
269292 define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
270293 %tid = call i32 @llvm.amdgcn.workitem.id.x()
271294 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
282305 ; VI-NOT: v_and_b32
283306
284307 ; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+$}}
285 ; GFX9: buffer_store_dword [[REG]]
308 ; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
286309 define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out, i32 zeroext %val.arg) #1 {
287310 %val = bitcast i32 %val.arg to <2 x half>
288311 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
292315
293316 ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_v2f16:
294317 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
295 ; GCN: buffer_store_dword [[REG]]
318 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
296319 define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(1)* %out) #1 {
297320 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> zeroinitializer)
298321 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
301324
302325 ; GCN-LABEL: {{^}}test_fold_canonicalize_n0_v2f16:
303326 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
304 ; GCN: buffer_store_dword [[REG]]
327 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
305328 define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(1)* %out) #1 {
306329 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
307330 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
310333
311334 ; GCN-LABEL: {{^}}test_fold_canonicalize_p1_v2f16:
312335 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
313 ; GCN: buffer_store_dword [[REG]]
336 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
314337 define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(1)* %out) #1 {
315338 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
316339 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
319342
320343 ; GCN-LABEL: {{^}}test_fold_canonicalize_n1_v2f16:
321344 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
322 ; GCN: buffer_store_dword [[REG]]
345 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
323346 define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(1)* %out) #1 {
324347 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
325348 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
328351
329352 ; GCN-LABEL: {{^}}test_fold_canonicalize_literal_v2f16:
330353 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c004c00{{$}}
331 ; GCN: buffer_store_dword [[REG]]
354 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
332355 define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out) #1 {
333356 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
334357 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
337360
338361 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16:
339362 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
340 ; GCN: buffer_store_dword [[REG]]
363 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
341364 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
342365 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
343366 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
346369
347370 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_v2f16:
348371 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
349 ; GCN: buffer_store_dword [[REG]]
372 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
350373 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #3 {
351374 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
352375 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
355378
356379 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16:
357380 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
358 ; GCN: buffer_store_dword [[REG]]
381 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
359382 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
360383 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
361384 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
364387
365388 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_v2f16:
366389 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
367 ; GCN: buffer_store_dword [[REG]]
390 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
368391 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #3 {
369392 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
370393 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
373396
374397 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_v2f16:
375398 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c007c00{{$}}
376 ; GCN: buffer_store_dword [[REG]]
399 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
377400 define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspace(1)* %out) #1 {
378401 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
379402 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
382405
383406 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_v2f16:
384407 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
385 ; GCN: buffer_store_dword [[REG]]
408 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
386409 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x half> addrspace(1)* %out) #1 {
387410 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> bitcast (i32 -1 to <2 x half>))
388411 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
391414
392415 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_v2f16:
393416 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
394 ; GCN: buffer_store_dword [[REG]]
417 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
395418 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x half> addrspace(1)* %out) #1 {
396419 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
397420 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
400423
401424 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_v2f16:
402425 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
403 ; GCN: buffer_store_dword [[REG]]
426 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
404427 define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
405428 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
406429 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
409432
410433 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_v2f16:
411434 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
412 ; GCN: buffer_store_dword [[REG]]
435 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
413436 define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
414437 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
415438 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
418441
419442 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_v2f16:
420443 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
421 ; GCN: buffer_store_dword [[REG]]
444 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
422445 define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
423446 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
424447 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
427450
428451 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_v2f16:
429452 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e007e00{{$}}
430 ; GCN: buffer_store_dword [[REG]]
453 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
431454 define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
432455 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
433456 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
3939 }
4040
4141 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32:
42 ; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, -|{{v[0-9]+}}|
42 ; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], -1.0, |{{v[0-9]+}}|
4343 ; GCN: buffer_store_dword [[REG]]
4444 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
4545 %val = load float, float addrspace(1)* %out
5151 }
5252
5353 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32:
54 ; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, -{{v[0-9]+}}
54 ; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], -1.0, {{v[0-9]+}}
5555 ; GCN: buffer_store_dword [[REG]]
5656 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
5757 %val = load float, float addrspace(1)* %out
17291729
17301730 ; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
17311731 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1732 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], 1.0, -[[A]]
1732 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
17331733 ; GCN: buffer_store_dword [[RESULT]]
17341734 define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
17351735 %tid = call i32 @llvm.amdgcn.workitem.id.x()