llvm.org GIT mirror llvm / a0ad797
AMDGPU: Conversions always produce canonical results Not sure why this was checking for denormals for f16. My interpretation of the IEEE standard is conversions should produce a canonical result, and the ISA manual says denormals are created when appropriate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@339064 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 6 months ago
2 changed file(s) with 37 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
67746774 case ISD::FSQRT:
67756775 case ISD::FDIV:
67766776 case ISD::FREM:
6777 case ISD::FP_ROUND:
6778 case ISD::FP_EXTEND:
67776779 case AMDGPUISD::FMUL_LEGACY:
67786780 case AMDGPUISD::FMAD_FTZ:
67796781 return true;
6780 case ISD::FP_ROUND:
6781 return Op.getValueType().getScalarType() != MVT::f16 ||
6782 Subtarget->hasFP16Denormals();
6783
6784 case ISD::FP_EXTEND:
6785 return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
6786 Subtarget->hasFP16Denormals();
67876782
67886783 // It can/will be lowered or combined as a bit operation.
67896784 // Need to check their input recursively to handle.
214214 ret void
215215 }
216216
217 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
218 ; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
219 ; GCN-NOT: v_mul
220 ; GCN-NOT: v_max
221 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
222 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(half addrspace(1)* %arg, float addrspace(1)* %out) #2 {
223 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
224 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
225 %load = load half, half addrspace(1)* %gep, align 2
226 %v = fpext half %load to float
227 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
228 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
229 store float %canonicalized, float addrspace(1)* %gep2, align 4
230 ret void
231 }
232
217233 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
218234 ; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
219235 ; GCN-NOT: v_mul
232248
233249 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
234250 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
251 ; GCN-NOT: v_max
252 ; GCN-NOT: v_mul
235253 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
236 ; GCN-NOT: 1.0
237254 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
255 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
256 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
257 %load = load float, float addrspace(1)* %gep, align 4
258 %v = fptrunc float %load to half
259 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
260 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
261 store half %canonicalized, half addrspace(1)* %gep2, align 2
262 ret void
263 }
264
265 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
266 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
267 ; GCN-NOT: v_max
268 ; GCN-NOT: v_mul
269 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
270 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(float addrspace(1)* %arg, half addrspace(1)* %out) #2 {
238271 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
239272 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
240273 %load = load float, float addrspace(1)* %gep, align 4
737770
738771 attributes #0 = { nounwind readnone }
739772 attributes #1 = { "no-nans-fp-math"="true" }
773 attributes #2 = { "target-features"="-fp64-fp16-denormals" }