llvm.org GIT mirror llvm / 29ad750
Combine fcmp + select to fminnum / fmaxnum if no nans and legal Also require unsafe FP math for no since there isn't a way to test for signed zeros. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225744 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 5 years ago
3 changed file(s) with 86 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
46164616 return SDValue();
46174617 }
46184618
4619
4620 /// \brief Generate Min/Max node
4621 static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
4622 SDValue True, SDValue False,
4623 ISD::CondCode CC, const TargetLowering &TLI,
4624 SelectionDAG &DAG) {
4625 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
4626 return SDValue();
4627
4628 switch (CC) {
4629 case ISD::SETOLT:
4630 case ISD::SETOLE:
4631 case ISD::SETLT:
4632 case ISD::SETLE:
4633 case ISD::SETULT:
4634 case ISD::SETULE: {
4635 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
4636 if (TLI.isOperationLegal(Opcode, VT))
4637 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
4638 return SDValue();
4639 }
4640 case ISD::SETOGT:
4641 case ISD::SETOGE:
4642 case ISD::SETGT:
4643 case ISD::SETGE:
4644 case ISD::SETUGT:
4645 case ISD::SETUGE: {
4646 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
4647 if (TLI.isOperationLegal(Opcode, VT))
4648 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
4649 return SDValue();
4650 }
4651 default:
4652 return SDValue();
4653 }
4654 }
4655
46194656 SDValue DAGCombiner::visitSELECT(SDNode *N) {
46204657 SDValue N0 = N->getOperand(0);
46214658 SDValue N1 = N->getOperand(1);
46954732
46964733 // fold selects based on a setcc into other things, such as min/max/abs
46974734 if (N0.getOpcode() == ISD::SETCC) {
4735 // select x, y (fcmp lt x, y) -> fminnum x, y
4736 // select x, y (fcmp gt x, y) -> fmaxnum x, y
4737 //
4738 // This is OK if we don't care about what happens if either operand is a
4739 // NaN.
4740 //
4741
4742 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
4743 // no signed zeros as well as no nans.
4744 const TargetOptions &Options = DAG.getTarget().Options;
4745 if (Options.UnsafeFPMath &&
4746 VT.isFloatingPoint() && N0.hasOneUse() &&
4747 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
4748 ISD::CondCode CC = cast(N0.getOperand(2))->get();
4749
4750 SDValue FMinMax =
4751 combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
4752 N1, N2, CC, TLI, DAG);
4753 if (FMinMax)
4754 return FMinMax;
4755 }
4756
46984757 if ((!LegalOperations &&
46994758 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
47004759 TLI.isOperationLegal(ISD::SELECT_CC, VT))
None ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
0 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
1 ; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
12 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4 ; FIXME: Should replace unsafe-fp-math with no signed zeros.
25
36 declare i32 @llvm.r600.read.tidig.x() #1
47
58 ; FUNC-LABEL: @test_fmax_legacy_uge_f32
69 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
710 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
8 ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
11 ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
12 ; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
13
914 ; EG: MAX
1015 define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
1116 %tid = call i32 @llvm.r600.read.tidig.x() #1
2429 ; FUNC-LABEL: @test_fmax_legacy_oge_f32
2530 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
2631 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
27 ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
32 ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
33 ; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
2834 ; EG: MAX
2935 define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
3036 %tid = call i32 @llvm.r600.read.tidig.x() #1
4349 ; FUNC-LABEL: @test_fmax_legacy_ugt_f32
4450 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
4551 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
46 ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
52 ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
53 ; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
4754 ; EG: MAX
4855 define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
4956 %tid = call i32 @llvm.r600.read.tidig.x() #1
6269 ; FUNC-LABEL: @test_fmax_legacy_ogt_f32
6370 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
6471 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
65 ; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
72 ; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
73 ; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
6674 ; EG: MAX
6775 define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
6876 %tid = call i32 @llvm.r600.read.tidig.x() #1
None ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
0 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
12 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
3
4 ; FIXME: Should replace unsafe-fp-math with no signed zeros.
25
36 declare i32 @llvm.r600.read.tidig.x() #1
47
58 ; FUNC-LABEL: @test_fmin_legacy_f32
69 ; EG: MIN *
7 ; SI: v_min_legacy_f32_e32
10 ; SI-SAFE: v_min_legacy_f32_e32
11 ; SI-NONAN: v_min_f32_e32
812 define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) #0 {
913 %r0 = extractelement <4 x float> %reg0, i32 0
1014 %r1 = extractelement <4 x float> %reg0, i32 1
1822 ; FUNC-LABEL: @test_fmin_legacy_ule_f32
1923 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
2024 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
21 ; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
25 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
26 ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
2227 define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
2328 %tid = call i32 @llvm.r600.read.tidig.x() #1
2429 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
3641 ; FUNC-LABEL: @test_fmin_legacy_ole_f32
3742 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
3843 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
39 ; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
44 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
45 ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
4046 define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
4147 %tid = call i32 @llvm.r600.read.tidig.x() #1
4248 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
5460 ; FUNC-LABEL: @test_fmin_legacy_olt_f32
5561 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
5662 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
57 ; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
63 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
64 ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
5865 define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
5966 %tid = call i32 @llvm.r600.read.tidig.x() #1
6067 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
7279 ; FUNC-LABEL: @test_fmin_legacy_ult_f32
7380 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
7481 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
75 ; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
82 ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
83 ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
7684 define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
7785 %tid = call i32 @llvm.r600.read.tidig.x() #1
7886 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid