llvm.org GIT mirror llvm / 3447c8c
Merging r233075: ------------------------------------------------------------------------ r233075 | marek.olsak | 2015-03-24 09:40:08 -0400 (Tue, 24 Mar 2015) | 8 lines R600/SI: Expand fract to floor, then only select V_FRACT on CI V_FRACT is buggy on SI. R600-specific code is left intact. v2: drop the multiclass, use complex VOP3 patterns ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@236070 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 5 years ago
5 changed file(s) with 74 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
874874 return LowerIntrinsicIABS(Op, DAG);
875875 case AMDGPUIntrinsic::AMDGPU_lrp:
876876 return LowerIntrinsicLRP(Op, DAG);
877 case AMDGPUIntrinsic::AMDGPU_fract:
878 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
879 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
880877
881878 case AMDGPUIntrinsic::AMDGPU_clamp:
882879 case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
837837 case Intrinsic::AMDGPU_rsq:
838838 // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
839839 return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
840
841 case AMDGPUIntrinsic::AMDGPU_fract:
842 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
843 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
840844 }
841845 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
842846 break;
920920 Op.getOperand(1),
921921 Op.getOperand(2),
922922 Op.getOperand(3));
923
924 case AMDGPUIntrinsic::AMDGPU_fract:
925 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
926 return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
927 DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
928
923929 default:
924930 return AMDGPUTargetLowering::LowerOperation(Op, DAG);
925931 }
32573257 (V_CNDMASK_B32_e64 $src0, $src1, $src2)
32583258 >;
32593259
3260 //===----------------------------------------------------------------------===//
3261 // Fract Patterns
3262 //===----------------------------------------------------------------------===//
3263
3264 let Predicates = [isCI] in {
3265
3266 // Convert (x - floor(x)) to fract(x)
3267 def : Pat <
3268 (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
3269 (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
3270 (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
3271 >;
3272
3273 // Convert (x + (-floor(x))) to fract(x)
3274 def : Pat <
3275 (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
3276 (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
3277 (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
3278 >;
3279
3280 } // End Predicates = [isCI]
3281
32603282 //============================================================================//
32613283 // Miscellaneous Optimization Patterns
32623284 //============================================================================//
None ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
0 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
34
5 declare float @llvm.fabs.f32(float %Val)
46 declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
57
68 ; Legacy name
79 declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
810
911 ; FUNC-LABEL: {{^}}fract_f32:
10 ; SI: v_fract_f32
12 ; CI: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
13 ; SI: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
14 ; SI: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]]
15 ; GCN: buffer_store_dword [[RESULT]]
1116 ; EG: FRACT
1217 define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
1318 %val = load float addrspace(1)* %src, align 4
1722 }
1823
1924 ; FUNC-LABEL: {{^}}fract_f32_legacy_amdil:
20 ; SI: v_fract_f32
25 ; CI: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
26 ; SI: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
27 ; SI: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]]
28 ; GCN: buffer_store_dword [[RESULT]]
2129 ; EG: FRACT
2230 define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
2331 %val = load float addrspace(1)* %src, align 4
2533 store float %fract, float addrspace(1)* %out, align 4
2634 ret void
2735 }
36
37 ; FUNC-LABEL: {{^}}fract_f32_neg:
38 ; CI: v_fract_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT:v[0-9]+]]
39 ; SI: v_floor_f32_e64 [[FLR:v[0-9]+]], -[[INPUT:v[0-9]+]]
40 ; SI: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT]], [[FLR]]
41 ; GCN: buffer_store_dword [[RESULT]]
42 ; EG: FRACT
43 define void @fract_f32_neg(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
44 %val = load float addrspace(1)* %src, align 4
45 %neg = fsub float 0.0, %val
46 %fract = call float @llvm.AMDGPU.fract.f32(float %neg) nounwind readnone
47 store float %fract, float addrspace(1)* %out, align 4
48 ret void
49 }
50
51 ; FUNC-LABEL: {{^}}fract_f32_neg_abs:
52 ; CI: v_fract_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
53 ; SI: v_floor_f32_e64 [[FLR:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
54 ; SI: v_sub_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT]]|, [[FLR]]
55 ; GCN: buffer_store_dword [[RESULT]]
56 ; EG: FRACT
57 define void @fract_f32_neg_abs(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
58 %val = load float addrspace(1)* %src, align 4
59 %abs = call float @llvm.fabs.f32(float %val)
60 %neg = fsub float 0.0, %abs
61 %fract = call float @llvm.AMDGPU.fract.f32(float %neg) nounwind readnone
62 store float %fract, float addrspace(1)* %out, align 4
63 ret void
64 }