llvm.org GIT mirror llvm / f3a691f
AMDGPU: Combine fp16/fp64 subtarget features The same control register controls both, and are set to the same defaults. Keep the old names around as aliases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292837 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
13 changed file(s) with 120 addition(s) and 66 deletion(s). Raw diff Collapse all Expand all
205205 // Subtarget Features (options and debugging)
206206 //===------------------------------------------------------------===//
207207
208 def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
209 "FP16Denormals",
210 "true",
211 "Enable half precision denormal handling"
212 >;
213
214208 // Some instructions do not support denormals despite this flag. Using
215209 // fp32 denormals also causes instructions to run at the double
216210 // precision rate for the device.
220214 "Enable single precision denormal handling"
221215 >;
222216
217 // Denormal handling for fp64 and fp16 is controlled by the same
218 // config register when fp16 supported.
219 // TODO: Do we need a separate f16 setting when not legal?
220 def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
221 "FP64FP16Denormals",
222 "true",
223 "Enable double and half precision denormal handling",
224 [FeatureFP64]
225 >;
226
223227 def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
224 "FP64Denormals",
225 "true",
226 "Enable double precision denormal handling",
227 [FeatureFP64]
228 "FP64FP16Denormals",
229 "true",
230 "Enable double and half precision denormal handling",
231 [FeatureFP64, FeatureFP64FP16Denormals]
232 >;
233
234 def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
235 "FP64FP16Denormals",
236 "true",
237 "Enable half precision denormal handling",
238 [FeatureFP64FP16Denormals]
228239 >;
229240
230241 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
4040 // for SI has the unhelpful behavior that it unsets everything else if you
4141 // disable it.
4242
43 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
43 SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
4444 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
4545 FullFS += "+flat-for-global,+unaligned-buffer-access,";
46
4647 FullFS += FS;
4748
4849 ParseSubtargetFeatures(GPU, FullFS);
5152 // denormals, but should be checked. Should we issue a warning somewhere
5253 // if someone tries to enable these?
5354 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
54 FP16Denormals = false;
55 FP64FP16Denormals = false;
5556 FP32Denormals = false;
56 FP64Denormals = false;
5757 }
5858
5959 // Set defaults if needed.
7777 FastFMAF32(false),
7878 HalfRate64Ops(false),
7979
80 FP16Denormals(false),
8180 FP32Denormals(false),
82 FP64Denormals(false),
81 FP64FP16Denormals(false),
8382 FPExceptions(false),
8483 FlatForGlobal(false),
8584 UnalignedScratchAccess(false),
8080 bool HalfRate64Ops;
8181
8282 // Dynamially set bits that enable features.
83 bool FP16Denormals;
8483 bool FP32Denormals;
85 bool FP64Denormals;
84 bool FP64FP16Denormals;
8685 bool FPExceptions;
8786 bool FlatForGlobal;
8887 bool UnalignedScratchAccess;
281280 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
282281
283282 bool hasFP16Denormals() const {
284 return FP16Denormals;
283 return FP64FP16Denormals;
285284 }
286285
287286 bool hasFP32Denormals() const {
289288 }
290289
291290 bool hasFP64Denormals() const {
292 return FP64Denormals;
291 return FP64FP16Denormals;
293292 }
294293
295294 bool hasFPExceptions() const {
5353 ret void
5454 }
5555
56 ; GCN-LABEL: {{^}}test_f16_f64_denormals:
57 ; GCN: FloatMode: 192
58 ; GCN: IeeeMode: 1
59 define void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 {
60 store half 0.0, half addrspace(1)* %out0
61 store double 0.0, double addrspace(1)* %out1
62 ret void
63 }
64
65 ; GCN-LABEL: {{^}}test_no_f16_f64_denormals:
66 ; GCN: FloatMode: 0
67 ; GCN: IeeeMode: 1
68 define void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 {
69 store half 0.0, half addrspace(1)* %out0
70 store double 0.0, double addrspace(1)* %out1
71 ret void
72 }
73
74 ; GCN-LABEL: {{^}}test_f32_f16_f64_denormals:
75 ; GCN: FloatMode: 240
76 ; GCN: IeeeMode: 1
77 define void @test_f32_f16_f64_denormals(half addrspace(1)* %out0, float addrspace(1)* %out1, double addrspace(1)* %out2) #8 {
78 store half 0.0, half addrspace(1)* %out0
79 store float 0.0, float addrspace(1)* %out1
80 store double 0.0, double addrspace(1)* %out2
81 ret void
82 }
83
5684 ; GCN-LABEL: {{^}}kill_gs_const:
5785 ; GCN: IeeeMode: 0
5886 define amdgpu_gs void @kill_gs_const() {
86114 attributes #2 = { nounwind "target-features"="+fp64-denormals" }
87115 attributes #3 = { nounwind "target-features"="+fp32-denormals" }
88116 attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
89 attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
117 attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
118 attributes #6 = { nounwind "target-features"="+fp64-fp16-denormals" }
119 attributes #7 = { nounwind "target-features"="-fp64-fp16-denormals" }
120 attributes #8 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
6868 ret void
6969 }
7070
71 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f16:
72 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
73 ; GCN: buffer_store_short [[REG]]
74 define void @test_no_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
71 ; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal0_f16:
72 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}}
73 ; GCN: buffer_store_short [[REG]]
74 define void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
7575 %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
7676 store half %canonicalized, half addrspace(1)* %out
7777 ret void
8686 ret void
8787 }
8888
89 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f16:
90 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
91 ; GCN: buffer_store_short [[REG]]
92 define void @test_no_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
89 ; GCN-LABEL: {{^}}test_default_denormals_fold_canonicalize_denormal1_f16:
90 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}}
91 ; GCN: buffer_store_short [[REG]]
92 define void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
9393 %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
9494 store half %canonicalized, half addrspace(1)* %out
9595 ret void
281281 }
282282
283283 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_v2f16:
284 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
284 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff03ff{{$}}
285285 ; GCN: buffer_store_dword [[REG]]
286286 define void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
287287 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
299299 }
300300
301301 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_v2f16:
302 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
302 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x83ff83ff{{$}}
303303 ; GCN: buffer_store_dword [[REG]]
304304 define void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
305305 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> )
381381
382382 attributes #0 = { nounwind readnone }
383383 attributes #1 = { nounwind }
384 attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" }
385 attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" }
384 attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
385 attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }
346346
347347 attributes #0 = { nounwind readnone }
348348 attributes #1 = { nounwind }
349 attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
350 attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
349 attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
350 attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
0 ; XUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
3
24
35 ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
46 ; make add an instruction if the fadd has more than one use.
114116 ; VI: v_cndmask_b32_e32
115117 ; VI: v_add_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
116118 ; VI: v_mul_f16_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
117 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
119 ; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
120 ; VI-DENORM: v_fma_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
118121 define void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
119122 %x = bitcast i16 %x.arg to half
120123 %y = bitcast i16 %y.arg to half
135138
136139 ; GCN-LABEL: {{^}}multiple_use_fadd_fmac_f16:
137140 ; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], [[X:s[0-9]+]], s{{[0-9]+}}
138 ; GCN-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0
141
142 ; VI-FLUSH-DAG: v_mac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0
143 ; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, v{{[0-9]+}}
144
139145 ; GCN-DAG: buffer_store_short [[MUL2]]
140146 ; GCN-DAG: buffer_store_short [[MAD]]
141147 ; GCN: s_endpgm
152158
153159 ; GCN-LABEL: {{^}}multiple_use_fadd_fmad_f16:
154160 ; GCN-DAG: v_add_f16_e64 [[MUL2:v[0-9]+]], |[[X:s[0-9]+]]|, |s{{[0-9]+}}|
155 ; GCN-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
161
162 ; VI-FLUSH-DAG: v_mad_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
163 ; VI-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
164
156165 ; GCN-DAG: buffer_store_short [[MUL2]]
157166 ; GCN-DAG: buffer_store_short [[MAD]]
158167 ; GCN: s_endpgm
169178 }
170179
171180 ; GCN-LABEL: {{^}}multiple_use_fadd_multi_fmad_f16:
172 ; GCN: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}}
173 ; GCN: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
181 ; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}}
182 ; VI-FLUSH: v_mad_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
183
184 ; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, v{{[0-9]+}}
185 ; VI-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
186
174187 define void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
175188 %x = bitcast i16 %x.arg to half
176189 %y = bitcast i16 %y.arg to half
None ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
3 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
4
5 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
6 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
7 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
8 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
0 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s
2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
3 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s
4
5 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
6 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s
7 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
8 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s
99
1010 declare i32 @llvm.amdgcn.workitem.id.x() #1
1111 declare half @llvm.fmuladd.f16(half, half, half) #1
6161
6262 attributes #0 = { nounwind "target-cpu"="kaveri" }
6363 attributes #1 = { nounwind "target-cpu"="fiji" }
64 attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-denormals" }
65 attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-denormals" }
66 attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
67 attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
64 attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }
65 attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-fp16-denormals" }
66 attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
67 attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
None ; RUN: llc -march=amdgcn -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
2 ; RUN: llc -march=amdgcn -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
3 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
0 ; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
2 ; RUN: llc -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
3 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
44
55 declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
66 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
23
34 ; GCN-LABEL: {{^}}mac_vvv:
45 ; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
249250 ; FIXME: How is this not folded?
250251 ; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, 0x3c00
251252
252 ; VI: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
253 ; VI: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
253 ; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
254 ; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
254255 define void @fold_inline_imm_into_mac_src2_f16(half addrspace(1)* %out, half addrspace(1)* %a, half addrspace(1)* %b) #3 {
255256 bb:
256257 %tid = call i32 @llvm.amdgcn.workitem.id.x()
None ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
0 ; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
22
33 ; GCN-LABEL: {{^}}mac_f16
44 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
603603 ret void
604604 }
605605
606 attributes #0 = {"unsafe-fp-math"="false"}
607 attributes #1 = {"unsafe-fp-math"="true"}
606 attributes #0 = { nounwind "unsafe-fp-math"="false" }
607 attributes #1 = { nounwind "unsafe-fp-math"="true" }
None ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
0 ; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
1 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
22
33 ; GCN-LABEL: {{^}}madak_f16
44 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]