llvm.org GIT mirror llvm / 033a537
More FMA folding opportunities. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225380 91177308-0d34-0410-b5e6-96231b3b80d8 Olivier Sallenave 5 years ago
3 changed file(s) with 305 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
68776877 }
68786878 } // enable-unsafe-fp-math
68796879
6880
68816880 // FADD -> FMA combines:
68826881 if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
68836882 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
68956894 (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
68966895 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
68976896 N1.getOperand(0), N1.getOperand(1), N0);
6897
6898 // Remove FP_EXTEND when there is an opportunity to combine. This is
6899 // legal here since extra precision is allowed.
6900
6901 // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z)
6902 if (N0.getOpcode() == ISD::FP_EXTEND) {
6903 SDValue N00 = N0.getOperand(0);
6904 if (N00.getOpcode() == ISD::FMUL)
6905 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6906 N00.getOperand(0), N00.getOperand(1), N1);
6907 }
6908
6909 // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x)
6910 // Note: Commutes FADD operands.
6911 if (N1.getOpcode() == ISD::FP_EXTEND) {
6912 SDValue N10 = N1.getOperand(0);
6913 if (N10.getOpcode() == ISD::FMUL)
6914 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6915 N10.getOperand(0), N10.getOperand(1), N0);
6916 }
6917 }
6918
6919 // More folding opportunities when target permits.
6920 if (TLI.enableAggressiveFMAFusion(VT)) {
6921
6922 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
6923 if (N0.getOpcode() == ISD::FMA &&
6924 N0.getOperand(2).getOpcode() == ISD::FMUL)
6925 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6926 N0.getOperand(0), N0.getOperand(1),
6927 DAG.getNode(ISD::FMA, SDLoc(N), VT,
6928 N0.getOperand(2).getOperand(0),
6929 N0.getOperand(2).getOperand(1),
6930 N1));
6931
6932 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
6933 if (N1->getOpcode() == ISD::FMA &&
6934 N1.getOperand(2).getOpcode() == ISD::FMUL)
6935 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6936 N1.getOperand(0), N1.getOperand(1),
6937 DAG.getNode(ISD::FMA, SDLoc(N), VT,
6938 N1.getOperand(2).getOperand(0),
6939 N1.getOperand(2).getOperand(1),
6940 N0));
68986941 }
68996942
69006943 return SDValue();
69877030 return DAG.getNode(ISD::FMA, dl, VT,
69887031 DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
69897032 DAG.getNode(ISD::FNEG, dl, VT, N1));
7033 }
7034
7035 // Remove FP_EXTEND when there is an opportunity to combine. This is
7036 // legal here since extra precision is allowed.
7037
7038 // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z))
7039 if (N0.getOpcode() == ISD::FP_EXTEND) {
7040 SDValue N00 = N0.getOperand(0);
7041 if (N00.getOpcode() == ISD::FMUL)
7042 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
7043 N00.getOperand(0),
7044 N00.getOperand(1),
7045 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
7046 }
7047
7048 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x)
7049 // Note: Commutes FSUB operands.
7050 if (N1.getOpcode() == ISD::FP_EXTEND) {
7051 SDValue N10 = N1.getOperand(0);
7052 if (N10.getOpcode() == ISD::FMUL)
7053 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
7054 DAG.getNode(ISD::FNEG, SDLoc(N), VT,
7055 N10.getOperand(0)),
7056 N10.getOperand(1),
7057 N0);
7058 }
7059
7060 // fold (fsub (fpext (fneg (fmul, x, y))), z)
7061 // -> (fma (fneg x), y, (fneg z))
7062 if (N0.getOpcode() == ISD::FP_EXTEND) {
7063 SDValue N00 = N0.getOperand(0);
7064 if (N00.getOpcode() == ISD::FNEG) {
7065 SDValue N000 = N00.getOperand(0);
7066 if (N000.getOpcode() == ISD::FMUL) {
7067 return DAG.getNode(ISD::FMA, dl, VT,
7068 DAG.getNode(ISD::FNEG, dl, VT,
7069 N000.getOperand(0)),
7070 N000.getOperand(1),
7071 DAG.getNode(ISD::FNEG, dl, VT, N1));
7072 }
7073 }
7074 }
7075
7076 // fold (fsub (fneg (fpext (fmul, x, y))), z)
7077 // -> (fma (fneg x), y, (fneg z))
7078 if (N0.getOpcode() == ISD::FNEG) {
7079 SDValue N00 = N0.getOperand(0);
7080 if (N00.getOpcode() == ISD::FP_EXTEND) {
7081 SDValue N000 = N00.getOperand(0);
7082 if (N000.getOpcode() == ISD::FMUL) {
7083 return DAG.getNode(ISD::FMA, dl, VT,
7084 DAG.getNode(ISD::FNEG, dl, VT,
7085 N000.getOperand(0)),
7086 N000.getOperand(1),
7087 DAG.getNode(ISD::FNEG, dl, VT, N1));
7088 }
7089 }
7090 }
7091
7092 // More folding opportunities when target permits.
7093 if (TLI.enableAggressiveFMAFusion(VT)) {
7094
7095 // fold (fsub (fma x, y, (fmul u, v)), z)
7096 // -> (fma x, y (fma u, v, (fneg z)))
7097 if (N0.getOpcode() == ISD::FMA &&
7098 N0.getOperand(2).getOpcode() == ISD::FMUL)
7099 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
7100 N0.getOperand(0), N0.getOperand(1),
7101 DAG.getNode(ISD::FMA, SDLoc(N), VT,
7102 N0.getOperand(2).getOperand(0),
7103 N0.getOperand(2).getOperand(1),
7104 DAG.getNode(ISD::FNEG, SDLoc(N), VT,
7105 N1)));
7106
7107 // fold (fsub x, (fma y, z, (fmul u, v)))
7108 // -> (fma (fneg y), z, (fma (fneg u), v, x))
7109 if (N1.getOpcode() == ISD::FMA &&
7110 N1.getOperand(2).getOpcode() == ISD::FMUL) {
7111 SDValue N20 = N1.getOperand(2).getOperand(0);
7112 SDValue N21 = N1.getOperand(2).getOperand(1);
7113 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
7114 DAG.getNode(ISD::FNEG, SDLoc(N), VT,
7115 N1.getOperand(0)),
7116 N1.getOperand(1),
7117 DAG.getNode(ISD::FMA, SDLoc(N), VT,
7118 DAG.getNode(ISD::FNEG, SDLoc(N), VT,
7119 N20),
7120 N21, N0));
7121 }
69907122 }
69917123 }
69927124
0 ; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
1 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
2
3 define double @test_FMADD_ASSOC1(double %A, double %B, double %C,
4 double %D, double %E) {
5 %F = fmul double %A, %B ; [#uses=1]
6 %G = fmul double %C, %D ; [#uses=1]
7 %H = fadd double %F, %G ; [#uses=1]
8 %I = fadd double %H, %E ; [#uses=1]
9 ret double %I
10 ; CHECK-LABEL: test_FMADD_ASSOC1:
11 ; CHECK: fmadd
12 ; CHECK-NEXT: fmadd
13 ; CHECK-NEXT: blr
14
15 ; CHECK-VSX-LABEL: test_FMADD_ASSOC1:
16 ; CHECK-VSX: xsmaddmdp
17 ; CHECK-VSX-NEXT: xsmaddadp
18 ; CHECK-VSX-NEXT: fmr
19 ; CHECK-VSX-NEXT: blr
20 }
21
22 define double @test_FMADD_ASSOC2(double %A, double %B, double %C,
23 double %D, double %E) {
24 %F = fmul double %A, %B ; [#uses=1]
25 %G = fmul double %C, %D ; [#uses=1]
26 %H = fadd double %F, %G ; [#uses=1]
27 %I = fadd double %E, %H ; [#uses=1]
28 ret double %I
29 ; CHECK-LABEL: test_FMADD_ASSOC2:
30 ; CHECK: fmadd
31 ; CHECK-NEXT: fmadd
32 ; CHECK-NEXT: blr
33
34 ; CHECK-VSX-LABEL: test_FMADD_ASSOC2:
35 ; CHECK-VSX: xsmaddmdp
36 ; CHECK-VSX-NEXT: xsmaddadp
37 ; CHECK-VSX-NEXT: fmr
38 ; CHECK-VSX-NEXT: blr
39 }
40
41 define double @test_FMSUB_ASSOC1(double %A, double %B, double %C,
42 double %D, double %E) {
43 %F = fmul double %A, %B ; [#uses=1]
44 %G = fmul double %C, %D ; [#uses=1]
45 %H = fadd double %F, %G ; [#uses=1]
46 %I = fsub double %H, %E ; [#uses=1]
47 ret double %I
48 ; CHECK-LABEL: test_FMSUB_ASSOC1:
49 ; CHECK: fmsub
50 ; CHECK-NEXT: fmadd
51 ; CHECK-NEXT: blr
52
53 ; CHECK-VSX-LABEL: test_FMSUB_ASSOC1:
54 ; CHECK-VSX: xsmsubmdp
55 ; CHECK-VSX-NEXT: xsmaddadp
56 ; CHECK-VSX-NEXT: fmr
57 ; CHECK-VSX-NEXT: blr
58 }
59
60 define double @test_FMSUB_ASSOC2(double %A, double %B, double %C,
61 double %D, double %E) {
62 %F = fmul double %A, %B ; [#uses=1]
63 %G = fmul double %C, %D ; [#uses=1]
64 %H = fadd double %F, %G ; [#uses=1]
65 %I = fsub double %E, %H ; [#uses=1]
66 ret double %I
67 ; CHECK-LABEL: test_FMSUB_ASSOC2:
68 ; CHECK: fnmsub
69 ; CHECK-NEXT: fnmsub
70 ; CHECK-NEXT: blr
71
72 ; CHECK-VSX-LABEL: test_FMSUB_ASSOC2:
73 ; CHECK-VSX: xsnmsubmdp
74 ; CHECK-VSX-NEXT: xsnmsubadp
75 ; CHECK-VSX-NEXT: fmr
76 ; CHECK-VSX-NEXT: blr
77 }
78
0 ; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
1 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
2
3 define double @test_FMADD_EXT1(float %A, float %B, double %C) {
4 %D = fmul float %A, %B ; [#uses=1]
5 %E = fpext float %D to double ; [#uses=1]
6 %F = fadd double %E, %C ; [#uses=1]
7 ret double %F
8 ; CHECK-LABEL: test_FMADD_EXT1:
9 ; CHECK: fmadd
10 ; CHECK-NEXT: blr
11
12 ; CHECK-VSX-LABEL: test_FMADD_EXT1:
13 ; CHECK-VSX: xsmaddmdp
14 ; CHECK-VSX-NEXT: blr
15 }
16
17 define double @test_FMADD_EXT2(float %A, float %B, double %C) {
18 %D = fmul float %A, %B ; [#uses=1]
19 %E = fpext float %D to double ; [#uses=1]
20 %F = fadd double %C, %E ; [#uses=1]
21 ret double %F
22 ; CHECK-LABEL: test_FMADD_EXT2:
23 ; CHECK: fmadd
24 ; CHECK-NEXT: blr
25
26 ; CHECK-VSX-LABEL: test_FMADD_EXT2:
27 ; CHECK-VSX: xsmaddmdp
28 ; CHECK-VSX-NEXT: blr
29 }
30
31 define double @test_FMSUB_EXT1(float %A, float %B, double %C) {
32 %D = fmul float %A, %B ; [#uses=1]
33 %E = fpext float %D to double ; [#uses=1]
34 %F = fsub double %E, %C ; [#uses=1]
35 ret double %F
36 ; CHECK-LABEL: test_FMSUB_EXT1:
37 ; CHECK: fmsub
38 ; CHECK-NEXT: blr
39
40 ; CHECK-VSX-LABEL: test_FMSUB_EXT1:
41 ; CHECK-VSX: xsmsubmdp
42 ; CHECK-VSX-NEXT: blr
43 }
44
45 define double @test_FMSUB_EXT2(float %A, float %B, double %C) {
46 %D = fmul float %A, %B ; [#uses=1]
47 %E = fpext float %D to double ; [#uses=1]
48 %F = fsub double %C, %E ; [#uses=1]
49 ret double %F
50 ; CHECK-LABEL: test_FMSUB_EXT2:
51 ; CHECK: fnmsub
52 ; CHECK-NEXT: blr
53
54 ; CHECK-VSX-LABEL: test_FMSUB_EXT2:
55 ; CHECK-VSX: xsnmsubmdp
56 ; CHECK-VSX-NEXT: fmr
57 ; CHECK-VSX-NEXT: blr
58 }
59
60 define double @test_FMSUB_EXT3(float %A, float %B, double %C) {
61 %D = fmul float %A, %B ; [#uses=1]
62 %E = fsub float -0.000000e+00, %D ; [#uses=1]
63 %F = fpext float %E to double ; [#uses=1]
64 %G = fsub double %F, %C ; [#uses=1]
65 ret double %G
66 ; CHECK-LABEL: test_FMSUB_EXT3:
67 ; CHECK: fneg
68 ; CHECK-NEXT: fmsub
69 ; CHECK-NEXT: blr
70
71 ; CHECK-VSX-LABEL: test_FMSUB_EXT3:
72 ; CHECK-VSX: xsnegdp
73 ; CHECK-VSX-NEXT: xsmsubmdp
74 ; CHECK-VSX-NEXT: blr
75 }
76
77 define double @test_FMSUB_EXT4(float %A, float %B, double %C) {
78 %D = fmul float %A, %B ; [#uses=1]
79 %E = fpext float %D to double ; [#uses=1]
80 %F = fsub double -0.000000e+00, %E ; [#uses=1]
81 %G = fsub double %F, %C ; [#uses=1]
82 ret double %G
83 ; CHECK-LABEL: test_FMSUB_EXT4:
84 ; CHECK: fneg
85 ; CHECK-NEXT: fmsub
86 ; CHECK-NEXT: blr
87
88 ; CHECK-VSX-LABEL: test_FMSUB_EXT4:
89 ; CHECK-VSX: xsnegdp
90 ; CHECK-VSX-NEXT: xsmsubmdp
91 ; CHECK-VSX-NEXT: blr
92 }