llvm.org GIT mirror llvm / 40ddb2c
Partial fix to r225380 (More FMA folding opportunities) As pointed out by Aditya (and Owen), there are two things wrong with this code. First, it adds patterns which elide FP extends when forming FMAs, and that might not be profitable on all targets (it belongs behind the pre-existing aggressive-FMA-formation flag). This is fixed by this change. Second, the resulting nodes might have operands of different types (the extensions need to be re-added). That will be fixed in the follow-up commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225485 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 5 years ago
1 changed file(s) with 97 addition(s) and 98 deletion(s). Raw diff Collapse all Expand all
68976897 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
68986898 N1.getOperand(0), N1.getOperand(1), N0);
68996899
6900 // Remove FP_EXTEND when there is an opportunity to combine. This is
6901 // legal here since extra precision is allowed.
6902
6903 // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z)
6904 if (N0.getOpcode() == ISD::FP_EXTEND) {
6905 SDValue N00 = N0.getOperand(0);
6906 if (N00.getOpcode() == ISD::FMUL)
6900 // More folding opportunities when target permits.
6901 if (TLI.enableAggressiveFMAFusion(VT)) {
6902 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
6903 if (N0.getOpcode() == ISD::FMA &&
6904 N0.getOperand(2).getOpcode() == ISD::FMUL)
69076905 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6908 N00.getOperand(0), N00.getOperand(1), N1);
6909 }
6910
6911 // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x)
6912 // Note: Commutes FADD operands.
6913 if (N1.getOpcode() == ISD::FP_EXTEND) {
6914 SDValue N10 = N1.getOperand(0);
6915 if (N10.getOpcode() == ISD::FMUL)
6906 N0.getOperand(0), N0.getOperand(1),
6907 DAG.getNode(ISD::FMA, SDLoc(N), VT,
6908 N0.getOperand(2).getOperand(0),
6909 N0.getOperand(2).getOperand(1),
6910 N1));
6911
6912 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
6913 if (N1->getOpcode() == ISD::FMA &&
6914 N1.getOperand(2).getOpcode() == ISD::FMUL)
69166915 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6917 N10.getOperand(0), N10.getOperand(1), N0);
6918 }
6919 }
6920
6921 // More folding opportunities when target permits.
6922 if (TLI.enableAggressiveFMAFusion(VT)) {
6923
6924 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
6925 if (N0.getOpcode() == ISD::FMA &&
6926 N0.getOperand(2).getOpcode() == ISD::FMUL)
6927 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6928 N0.getOperand(0), N0.getOperand(1),
6929 DAG.getNode(ISD::FMA, SDLoc(N), VT,
6930 N0.getOperand(2).getOperand(0),
6931 N0.getOperand(2).getOperand(1),
6932 N1));
6933
6934 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
6935 if (N1->getOpcode() == ISD::FMA &&
6936 N1.getOperand(2).getOpcode() == ISD::FMUL)
6937 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6938 N1.getOperand(0), N1.getOperand(1),
6939 DAG.getNode(ISD::FMA, SDLoc(N), VT,
6940 N1.getOperand(2).getOperand(0),
6941 N1.getOperand(2).getOperand(1),
6942 N0));
6916 N1.getOperand(0), N1.getOperand(1),
6917 DAG.getNode(ISD::FMA, SDLoc(N), VT,
6918 N1.getOperand(2).getOperand(0),
6919 N1.getOperand(2).getOperand(1),
6920 N0));
6921
6922 // Remove FP_EXTEND when there is an opportunity to combine. This is
6923 // legal here since extra precision is allowed.
6924
6925 // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z)
6926 if (N0.getOpcode() == ISD::FP_EXTEND) {
6927 SDValue N00 = N0.getOperand(0);
6928 if (N00.getOpcode() == ISD::FMUL)
6929 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6930 N00.getOperand(0), N00.getOperand(1), N1);
6931 }
6932
6933 // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x)
6934 // Note: Commutes FADD operands.
6935 if (N1.getOpcode() == ISD::FP_EXTEND) {
6936 SDValue N10 = N1.getOperand(0);
6937 if (N10.getOpcode() == ISD::FMUL)
6938 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6939 N10.getOperand(0), N10.getOperand(1), N0);
6940 }
6941 }
69436942 }
69446943
69456944 return SDValue();
70327031 return DAG.getNode(ISD::FMA, dl, VT,
70337032 DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
70347033 DAG.getNode(ISD::FNEG, dl, VT, N1));
7035 }
7036
7037 // Remove FP_EXTEND when there is an opportunity to combine. This is
7038 // legal here since extra precision is allowed.
7039
7040 // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z))
7041 if (N0.getOpcode() == ISD::FP_EXTEND) {
7042 SDValue N00 = N0.getOperand(0);
7043 if (N00.getOpcode() == ISD::FMUL)
7044 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
7045 N00.getOperand(0),
7046 N00.getOperand(1),
7047 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
7048 }
7049
7050 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x)
7051 // Note: Commutes FSUB operands.
7052 if (N1.getOpcode() == ISD::FP_EXTEND) {
7053 SDValue N10 = N1.getOperand(0);
7054 if (N10.getOpcode() == ISD::FMUL)
7055 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
7056 DAG.getNode(ISD::FNEG, SDLoc(N), VT,
7057 N10.getOperand(0)),
7058 N10.getOperand(1),
7059 N0);
7060 }
7061
7062 // fold (fsub (fpext (fneg (fmul, x, y))), z)
7063 // -> (fma (fneg x), y, (fneg z))
7064 if (N0.getOpcode() == ISD::FP_EXTEND) {
7065 SDValue N00 = N0.getOperand(0);
7066 if (N00.getOpcode() == ISD::FNEG) {
7067 SDValue N000 = N00.getOperand(0);
7068 if (N000.getOpcode() == ISD::FMUL) {
7069 return DAG.getNode(ISD::FMA, dl, VT,
7070 DAG.getNode(ISD::FNEG, dl, VT,
7071 N000.getOperand(0)),
7072 N000.getOperand(1),
7073 DAG.getNode(ISD::FNEG, dl, VT, N1));
7074 }
7075 }
7076 }
7077
7078 // fold (fsub (fneg (fpext (fmul, x, y))), z)
7079 // -> (fma (fneg x), y, (fneg z))
7080 if (N0.getOpcode() == ISD::FNEG) {
7081 SDValue N00 = N0.getOperand(0);
7082 if (N00.getOpcode() == ISD::FP_EXTEND) {
7083 SDValue N000 = N00.getOperand(0);
7084 if (N000.getOpcode() == ISD::FMUL) {
7085 return DAG.getNode(ISD::FMA, dl, VT,
7086 DAG.getNode(ISD::FNEG, dl, VT,
7087 N000.getOperand(0)),
7088 N000.getOperand(1),
7089 DAG.getNode(ISD::FNEG, dl, VT, N1));
7090 }
7091 }
70927034 }
70937035
70947036 // More folding opportunities when target permits.
71207062 DAG.getNode(ISD::FNEG, SDLoc(N), VT,
71217063 N20),
71227064 N21, N0));
7065 }
7066
7067 // Remove FP_EXTEND when there is an opportunity to combine. This is
7068 // legal here since extra precision is allowed.
7069
7070 // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z))
7071 if (N0.getOpcode() == ISD::FP_EXTEND) {
7072 SDValue N00 = N0.getOperand(0);
7073 if (N00.getOpcode() == ISD::FMUL)
7074 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
7075 N00.getOperand(0),
7076 N00.getOperand(1),
7077 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
7078 }
7079
7080 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x)
7081 // Note: Commutes FSUB operands.
7082 if (N1.getOpcode() == ISD::FP_EXTEND) {
7083 SDValue N10 = N1.getOperand(0);
7084 if (N10.getOpcode() == ISD::FMUL)
7085 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
7086 DAG.getNode(ISD::FNEG, SDLoc(N), VT,
7087 N10.getOperand(0)),
7088 N10.getOperand(1),
7089 N0);
7090 }
7091
7092 // fold (fsub (fpext (fneg (fmul, x, y))), z)
7093 // -> (fma (fneg x), y, (fneg z))
7094 if (N0.getOpcode() == ISD::FP_EXTEND) {
7095 SDValue N00 = N0.getOperand(0);
7096 if (N00.getOpcode() == ISD::FNEG) {
7097 SDValue N000 = N00.getOperand(0);
7098 if (N000.getOpcode() == ISD::FMUL) {
7099 return DAG.getNode(ISD::FMA, dl, VT,
7100 DAG.getNode(ISD::FNEG, dl, VT,
7101 N000.getOperand(0)),
7102 N000.getOperand(1),
7103 DAG.getNode(ISD::FNEG, dl, VT, N1));
7104 }
7105 }
7106 }
7107
7108 // fold (fsub (fneg (fpext (fmul, x, y))), z)
7109 // -> (fma (fneg x), y, (fneg z))
7110 if (N0.getOpcode() == ISD::FNEG) {
7111 SDValue N00 = N0.getOperand(0);
7112 if (N00.getOpcode() == ISD::FP_EXTEND) {
7113 SDValue N000 = N00.getOperand(0);
7114 if (N000.getOpcode() == ISD::FMUL) {
7115 return DAG.getNode(ISD::FMA, dl, VT,
7116 DAG.getNode(ISD::FNEG, dl, VT,
7117 N000.getOperand(0)),
7118 N000.getOperand(1),
7119 DAG.getNode(ISD::FNEG, dl, VT, N1));
7120 }
7121 }
71237122 }
71247123 }
71257124 }