llvm.org GIT mirror llvm / 4b4e622
Add fused multiple+add instructions from VFPv4. Patch by Ana Pazos! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148658 91177308-0d34-0410-b5e6-96231b3b80d8 Anton Korobeynikov 8 years ago
9 changed file(s) with 255 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
3131 def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
3232 "Enable VFP3 instructions",
3333 [FeatureVFP2]>;
34 def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
35 "Enable VFP4 instructions",
36 [FeatureVFP3]>;
3437 def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
3538 "Enable NEON instructions",
3639 [FeatureVFP3]>;
40 def FeatureNEONVFP4 : SubtargetFeature<"neon-vfpv4", "HasNEONVFPv4", "true",
41 "Enable NEON-VFP4 instructions",
42 [FeatureVFP4, FeatureNEON]>;
3743 def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
3844 "Enable Thumb2 instructions">;
3945 def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
718718
719719 if (Subtarget->hasNEON() && emitFPU) {
720720 /* NEON is not exactly a VFP architecture, but GAS emit one of
721 * neon/vfpv3/vfpv2 for .fpu parameters */
722 AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
721 * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
722 if (Subtarget->hasNEONVFP4())
723 AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4");
724 else
725 AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
723726 /* If emitted for NEON, omit from VFP below, since you can have both
724727 * NEON and VFP in build attributes but only one .fpu */
725728 emitFPU = false;
726729 }
727730
731 /* VFPv4 + .fpu */
732 if (Subtarget->hasVFP4()) {
733 AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
734 ARMBuildAttrs::AllowFPv4A);
735 if (emitFPU)
736 AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
737
728738 /* VFPv3 + .fpu */
729 if (Subtarget->hasVFP3()) {
739 } else if (Subtarget->hasVFP3()) {
730740 AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
731741 ARMBuildAttrs::AllowFPv3A);
732742 if (emitFPU)
178178 AssemblerPredicate<"FeatureVFP2">;
179179 def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
180180 AssemblerPredicate<"FeatureVFP3">;
181 def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
182 AssemblerPredicate<"FeatureVFP4">;
183 def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">;
181184 def HasNEON : Predicate<"Subtarget->hasNEON()">,
182185 AssemblerPredicate<"FeatureNEON">;
186 def HasNEONVFP4 : Predicate<"Subtarget->hasNEONVFP4()">,
187 AssemblerPredicate<"FeatureNEONVFP4">;
188 def NoNEONVFP4 : Predicate<"!Subtarget->hasNEONVFP4()">;
183189 def HasFP16 : Predicate<"Subtarget->hasFP16()">,
184190 AssemblerPredicate<"FeatureFP16">;
185191 def HasDivide : Predicate<"Subtarget->hasDivide()">,
38963896 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
38973897 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
38983898 v2f32, fmul_su, fadd_mlx>,
3899 Requires<[HasNEON, UseFPVMLx]>;
3899 Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
39003900 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
39013901 v4f32, fmul_su, fadd_mlx>,
3902 Requires<[HasNEON, UseFPVMLx]>;
3902 Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
39033903 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
39043904 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
39053905 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
39543954 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
39553955 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
39563956 v2f32, fmul_su, fsub_mlx>,
3957 Requires<[HasNEON, UseFPVMLx]>;
3957 Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
39583958 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
39593959 v4f32, fmul_su, fsub_mlx>,
3960 Requires<[HasNEON, UseFPVMLx]>;
3960 Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
39613961 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
39623962 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
39633963 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
40054005 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
40064006 "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
40074007 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
4008
4009
4010 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4011 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4012 v2f32, fmul_su, fadd_mlx>,
4013 Requires<[HasNEONVFP4]>;
4014
4015 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4016 v4f32, fmul_su, fadd_mlx>,
4017 Requires<[HasNEONVFP4]>;
4018
4019 // Fused Vector Multiply Subtract (floating-point)
4020 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4021 v2f32, fmul_su, fsub_mlx>,
4022 Requires<[HasNEONVFP4]>;
4023 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4024 v4f32, fmul_su, fsub_mlx>,
4025 Requires<[HasNEONVFP4]>;
40084026
40094027 // Vector Subtract Operations.
40104028
53575375 def : N3VSPat;
53585376 def : N3VSPat;
53595377 def : N3VSMulOpPat,
5360 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
5378 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
53615379 def : N3VSMulOpPat,
5362 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
5380 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
5381 def : N3VSMulOpPat,
5382 Requires<[HasNEONVFP4, UseNEONForFP]>;
5383 def : N3VSMulOpPat,
5384 Requires<[HasNEONVFP4, UseNEONForFP]>;
53635385 def : N2VSPat;
53645386 def : N2VSPat;
53655387 def : N3VSPat;
919919 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
920920 (f64 DPR:$Ddin)))]>,
921921 RegConstraint<"$Ddin = $Dd">,
922 Requires<[HasVFP2,UseFPVMLx]>;
922 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
923923
924924 def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
925925 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
927927 [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
928928 SPR:$Sdin))]>,
929929 RegConstraint<"$Sdin = $Sd">,
930 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
930 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
931931 // Some single precision VFP instructions may be executed on both NEON and
932932 // VFP pipelines on A8.
933933 let D = VFPNeonA8Domain;
935935
936936 def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
937937 (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
938 Requires<[HasVFP2,UseFPVMLx]>;
938 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
939939 def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
940940 (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
941 Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
941 Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>;
942942
943943 def VMLSD : ADbI<0b11100, 0b00, 1, 0,
944944 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
946946 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
947947 (f64 DPR:$Ddin)))]>,
948948 RegConstraint<"$Ddin = $Dd">,
949 Requires<[HasVFP2,UseFPVMLx]>;
949 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
950950
951951 def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
952952 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
954954 [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
955955 SPR:$Sdin))]>,
956956 RegConstraint<"$Sdin = $Sd">,
957 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
957 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
958958 // Some single precision VFP instructions may be executed on both NEON and
959959 // VFP pipelines on A8.
960960 let D = VFPNeonA8Domain;
962962
963963 def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
964964 (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
965 Requires<[HasVFP2,UseFPVMLx]>;
965 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
966966 def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
967967 (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
968 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
968 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
969969
970970 def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
971971 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
973973 [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
974974 (f64 DPR:$Ddin)))]>,
975975 RegConstraint<"$Ddin = $Dd">,
976 Requires<[HasVFP2,UseFPVMLx]>;
976 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
977977
978978 def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
979979 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
981981 [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
982982 SPR:$Sdin))]>,
983983 RegConstraint<"$Sdin = $Sd">,
984 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
984 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
985985 // Some single precision VFP instructions may be executed on both NEON and
986986 // VFP pipelines on A8.
987987 let D = VFPNeonA8Domain;
989989
990990 def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
991991 (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
992 Requires<[HasVFP2,UseFPVMLx]>;
992 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
993993 def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
994994 (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
995 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
995 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
996996
997997 def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
998998 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
10001000 [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
10011001 (f64 DPR:$Ddin)))]>,
10021002 RegConstraint<"$Ddin = $Dd">,
1003 Requires<[HasVFP2,UseFPVMLx]>;
1003 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
10041004
10051005 def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
10061006 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
10071007 IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
10081008 [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
10091009 RegConstraint<"$Sdin = $Sd">,
1010 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
1010 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
10111011 // Some single precision VFP instructions may be executed on both NEON and
10121012 // VFP pipelines on A8.
10131013 let D = VFPNeonA8Domain;
10151015
10161016 def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
10171017 (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
1018 Requires<[HasVFP2,UseFPVMLx]>;
1018 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
10191019 def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
10201020 (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
1021 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
1022
1021 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
1022
1023 //===----------------------------------------------------------------------===//
1024 // Fused FP Multiply-Accumulate Operations.
1025 //
1026 def VFMAD : ADbI<0b11101, 0b10, 0, 0,
1027 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
1028 IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
1029 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
1030 (f64 DPR:$Ddin)))]>,
1031 RegConstraint<"$Ddin = $Dd">,
1032 Requires<[HasVFP4]>;
1033
1034 def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
1035 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
1036 IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
1037 [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
1038 SPR:$Sdin))]>,
1039 RegConstraint<"$Sdin = $Sd">,
1040 Requires<[HasVFP4,DontUseNEONForFP]> {
1041 // Some single precision VFP instructions may be executed on both NEON and
1042 // VFP pipelines.
1043 }
1044
1045 def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
1046 (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
1047 Requires<[HasVFP4]>;
1048 def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
1049 (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
1050 Requires<[HasVFP4,DontUseNEONForFP]>;
1051
1052 def VFMSD : ADbI<0b11101, 0b10, 1, 0,
1053 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
1054 IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
1055 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
1056 (f64 DPR:$Ddin)))]>,
1057 RegConstraint<"$Ddin = $Dd">,
1058 Requires<[HasVFP4]>;
1059
1060 def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
1061 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
1062 IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
1063 [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
1064 SPR:$Sdin))]>,
1065 RegConstraint<"$Sdin = $Sd">,
1066 Requires<[HasVFP4,DontUseNEONForFP]> {
1067 // Some single precision VFP instructions may be executed on both NEON and
1068 // VFP pipelines.
1069 }
1070
1071 def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
1072 (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
1073 Requires<[HasVFP4]>;
1074 def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
1075 (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
1076 Requires<[HasVFP4,DontUseNEONForFP]>;
1077
1078 def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
1079 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
1080 IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
1081 [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
1082 (f64 DPR:$Ddin)))]>,
1083 RegConstraint<"$Ddin = $Dd">,
1084 Requires<[HasVFP4]>;
1085
1086 def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
1087 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
1088 IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
1089 [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
1090 SPR:$Sdin))]>,
1091 RegConstraint<"$Sdin = $Sd">,
1092 Requires<[HasVFP4,DontUseNEONForFP]> {
1093 // Some single precision VFP instructions may be executed on both NEON and
1094 // VFP pipelines.
1095 }
1096
1097 def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
1098 (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
1099 Requires<[HasVFP4]>;
1100 def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
1101 (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
1102 Requires<[HasVFP4,DontUseNEONForFP]>;
1103
1104 def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
1105 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
1106 IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
1107 [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
1108 (f64 DPR:$Ddin)))]>,
1109 RegConstraint<"$Ddin = $Dd">,
1110 Requires<[HasVFP4]>;
1111
1112 def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
1113 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
1114 IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
1115 [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
1116 RegConstraint<"$Sdin = $Sd">,
1117 Requires<[HasVFP4,DontUseNEONForFP]> {
1118 // Some single precision VFP instructions may be executed on both NEON and
1119 // VFP pipelines.
1120 }
1121
1122 def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
1123 (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
1124 Requires<[HasVFP4]>;
1125 def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
1126 (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
1127 Requires<[HasVFP4,DontUseNEONForFP]>;
10231128
10241129 //===----------------------------------------------------------------------===//
10251130 // FP Conditional moves.
117117 def IIC_fpMUL64 : InstrItinClass;
118118 def IIC_fpMAC32 : InstrItinClass;
119119 def IIC_fpMAC64 : InstrItinClass;
120 def IIC_fpFMAC32 : InstrItinClass;
121 def IIC_fpFMAC64 : InstrItinClass;
120122 def IIC_fpDIV32 : InstrItinClass;
121123 def IIC_fpDIV64 : InstrItinClass;
122124 def IIC_fpSQRT32 : InstrItinClass;
207209 def IIC_VPERMQ3 : InstrItinClass;
208210 def IIC_VMACD : InstrItinClass;
209211 def IIC_VMACQ : InstrItinClass;
212 def IIC_VFMACD : InstrItinClass;
213 def IIC_VFMACQ : InstrItinClass;
210214 def IIC_VRECSD : InstrItinClass;
211215 def IIC_VRECSQ : InstrItinClass;
212216 def IIC_VCNTiD : InstrItinClass;
4646 , HasV7Ops(false)
4747 , HasVFPv2(false)
4848 , HasVFPv3(false)
49 , HasVFPv4(false)
4950 , HasNEON(false)
51 , HasNEONVFPv4(false)
5052 , UseNEONForSinglePrecisionFP(false)
5153 , SlowFPVMLx(false)
5254 , HasVMLxForwarding(false)
4444 bool HasV6T2Ops;
4545 bool HasV7Ops;
4646
47 /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
48 /// supported.
47 /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what
48 /// floating point ISAs are supported.
4949 bool HasVFPv2;
5050 bool HasVFPv3;
51 bool HasVFPv4;
5152 bool HasNEON;
53 bool HasNEONVFPv4;
5254
5355 /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
5456 /// specified. Use the method useNEONForSinglePrecisionFP() to
196198
197199 bool hasVFP2() const { return HasVFPv2; }
198200 bool hasVFP3() const { return HasVFPv3; }
201 bool hasVFP4() const { return HasVFPv4; }
199202 bool hasNEON() const { return HasNEON; }
203 bool hasNEONVFP4() const { return HasNEONVFPv4; }
200204 bool useNEONForSinglePrecisionFP() const {
201205 return hasNEON() && UseNEONForSinglePrecisionFP; }
202206
0 ; RUN: llc < %s -march=arm -mattr=+neon-vfpv4 | FileCheck %s
1 ; Check generated fused MAC and MLS.
2
3 define double @fusedMACTest1(double %d1, double %d2, double %d3) nounwind readnone noinline {
4 ;CHECK: fusedMACTest1:
5 ;CHECK: vfma.f64
6 %1 = fmul double %d1, %d2
7 %2 = fadd double %1, %d3
8 ret double %2
9 }
10
11 define float @fusedMACTest2(float %f1, float %f2, float %f3) nounwind readnone noinline {
12 ;CHECK: fusedMACTest2:
13 ;CHECK: vfma.f32
14 %1 = fmul float %f1, %f2
15 %2 = fadd float %1, %f3
16 ret float %2
17 }
18
19 define double @fusedMACTest3(double %d1, double %d2, double %d3) nounwind readnone noinline {
20 ;CHECK: fusedMACTest3:
21 ;CHECK: vfms.f64
22 %1 = fmul double %d2, %d3
23 %2 = fsub double %d1, %1
24 ret double %2
25 }
26
27 define float @fusedMACTest4(float %f1, float %f2, float %f3) nounwind readnone noinline {
28 ;CHECK: fusedMACTest4:
29 ;CHECK: vfms.f32
30 %1 = fmul float %f2, %f3
31 %2 = fsub float %f1, %1
32 ret float %2
33 }
34
35 define double @fusedMACTest5(double %d1, double %d2, double %d3) nounwind readnone noinline {
36 ;CHECK: fusedMACTest5:
37 ;CHECK: vfnma.f64
38 %1 = fmul double %d1, %d2
39 %2 = fsub double -0.0, %1
40 %3 = fsub double %2, %d3
41 ret double %3
42 }
43
44 define float @fusedMACTest6(float %f1, float %f2, float %f3) nounwind {
45 ;CHECK: fusedMACTest6:
46 ;CHECK: vfnma.f32
47 %1 = fmul float %f1, %f2
48 %2 = fsub float -0.0, %1
49 %3 = fsub float %2, %f3
50 ret float %3
51 }
52
53 define double @fusedMACTest7(double %d1, double %d2, double %d3) nounwind {
54 ;CHECK: fusedMACTest7:
55 ;CHECK: vfnms.f64
56 %1 = fmul double %d1, %d2
57 %2 = fsub double %1, %d3
58 ret double %2
59 }
60
61 define float @fusedMACTest8(float %f1, float %f2, float %f3) nounwind {
62 ;CHECK: fusedMACTest8:
63 ;CHECK: vfnms.f32
64 %1 = fmul float %f1, %f2
65 %2 = fsub float %1, %f3
66 ret float %2
67 }