llvm.org GIT mirror llvm / bee78fe
Clean up ARM fused multiply + add/sub support some more: rename some isel predicates. Also remove NEON2 since it's not really useful and it is confusing. If NEON + VFP4 implies NEON2 but NEON2 doesn't imply NEON + VFP4, what does it really mean? rdar://10139676 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154480 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 8 years ago
8 changed file(s) with 53 addition(s) and 61 deletion(s). Raw diff Collapse all Expand all
3737 def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
3838 "Enable NEON instructions",
3939 [FeatureVFP3]>;
40 def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true",
41 "Enable Advanced SIMD2 instructions",
42 [FeatureNEON]>;
4340 def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
4441 "Enable Thumb2 instructions">;
4542 def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
727727 if (Subtarget->hasNEON() && emitFPU) {
728728 /* NEON is not exactly a VFP architecture, but GAS emit one of
729729 * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
730 if (Subtarget->hasNEON2())
730 if (Subtarget->hasVFP4())
731731 AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
732732 "neon-vfpv4");
733733 else
180180 AssemblerPredicate<"FeatureVFP3">;
181181 def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
182182 AssemblerPredicate<"FeatureVFP4">;
183 def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">;
184183 def HasNEON : Predicate<"Subtarget->hasNEON()">,
185184 AssemblerPredicate<"FeatureNEON">;
186 def HasNEON2 : Predicate<"Subtarget->hasNEON2()">,
187 AssemblerPredicate<"FeatureNEON,FeatureVFP4">;
188 def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">;
189185 def HasFP16 : Predicate<"Subtarget->hasFP16()">,
190186 AssemblerPredicate<"FeatureFP16">;
191187 def HasDivide : Predicate<"Subtarget->hasDivide()">,
220216 def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
221217 def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
222218
223 // Allow more precision in FP computation
224 def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
219 // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
220 // But only select them if more precision in FP computation is allowed.
221 def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision">;
222 def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4()">;
225223
226224 //===----------------------------------------------------------------------===//
227225 // ARM Flag Definitions.
40044004 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
40054005 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
40064006 v2f32, fmul_su, fadd_mlx>,
4007 Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
4007 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
40084008 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
40094009 v4f32, fmul_su, fadd_mlx>,
4010 Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
4010 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
40114011 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
40124012 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
40134013 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
40624062 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
40634063 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
40644064 v2f32, fmul_su, fsub_mlx>,
4065 Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
4065 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
40664066 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
40674067 v4f32, fmul_su, fsub_mlx>,
4068 Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
4068 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
40694069 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
40704070 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
40714071 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
41174117 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
41184118 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
41194119 v2f32, fmul_su, fadd_mlx>,
4120 Requires<[HasNEON2,FPContractions]>;
4120 Requires<[HasVFP4,UseFusedMAC]>;
41214121
41224122 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
41234123 v4f32, fmul_su, fadd_mlx>,
4124 Requires<[HasNEON2,FPContractions]>;
4124 Requires<[HasVFP4,UseFusedMAC]>;
41254125
41264126 // Fused Vector Multiply Subtract (floating-point)
41274127 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
41284128 v2f32, fmul_su, fsub_mlx>,
4129 Requires<[HasNEON2,FPContractions]>;
4129 Requires<[HasVFP4,UseFusedMAC]>;
41304130 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
41314131 v4f32, fmul_su, fsub_mlx>,
4132 Requires<[HasNEON2,FPContractions]>;
4132 Requires<[HasVFP4,UseFusedMAC]>;
41334133
41344134 // Match @llvm.fma.* intrinsics
41354135 def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)),
41364136 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4137 Requires<[HasNEON2]>;
4137 Requires<[HasVFP4]>;
41384138 def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)),
41394139 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4140 Requires<[HasNEON2]>;
4140 Requires<[HasVFP4]>;
41414141
41424142 // Vector Subtract Operations.
41434143
54915491 def : N3VSPat;
54925492 def : N3VSPat;
54935493 def : N3VSMulOpPat,
5494 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
5494 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
54955495 def : N3VSMulOpPat,
5496 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
5496 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
54975497 def : N3VSMulOpPat,
5498 Requires<[HasNEON2, UseNEONForFP, FPContractions]>;
5498 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
54995499 def : N3VSMulOpPat,
5500 Requires<[HasNEON2, UseNEONForFP, FPContractions]>;
5500 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
55015501 def : N2VSPat;
55025502 def : N2VSPat;
55035503 def : N3VSPat;
949949 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
950950 (f64 DPR:$Ddin)))]>,
951951 RegConstraint<"$Ddin = $Dd">,
952 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
952 Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
953953
954954 def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
955955 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
957957 [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
958958 SPR:$Sdin))]>,
959959 RegConstraint<"$Sdin = $Sd">,
960 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
960 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
961961 // Some single precision VFP instructions may be executed on both NEON and
962962 // VFP pipelines on A8.
963963 let D = VFPNeonA8Domain;
965965
966966 def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
967967 (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
968 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
968 Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
969969 def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
970970 (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
971 Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>;
971 Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
972972
973973 def VMLSD : ADbI<0b11100, 0b00, 1, 0,
974974 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
976976 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
977977 (f64 DPR:$Ddin)))]>,
978978 RegConstraint<"$Ddin = $Dd">,
979 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
979 Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
980980
981981 def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
982982 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
984984 [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
985985 SPR:$Sdin))]>,
986986 RegConstraint<"$Sdin = $Sd">,
987 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
987 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
988988 // Some single precision VFP instructions may be executed on both NEON and
989989 // VFP pipelines on A8.
990990 let D = VFPNeonA8Domain;
992992
993993 def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
994994 (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
995 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
995 Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
996996 def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
997997 (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
998 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
998 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
999999
10001000 def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
10011001 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
10031003 [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
10041004 (f64 DPR:$Ddin)))]>,
10051005 RegConstraint<"$Ddin = $Dd">,
1006 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
1006 Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
10071007
10081008 def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
10091009 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
10111011 [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
10121012 SPR:$Sdin))]>,
10131013 RegConstraint<"$Sdin = $Sd">,
1014 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
1014 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
10151015 // Some single precision VFP instructions may be executed on both NEON and
10161016 // VFP pipelines on A8.
10171017 let D = VFPNeonA8Domain;
10191019
10201020 def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
10211021 (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
1022 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
1022 Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
10231023 def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
10241024 (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
1025 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
1025 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
10261026
10271027 def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
10281028 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
10301030 [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
10311031 (f64 DPR:$Ddin)))]>,
10321032 RegConstraint<"$Ddin = $Dd">,
1033 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
1033 Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
10341034
10351035 def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
10361036 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
10371037 IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
10381038 [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
10391039 RegConstraint<"$Sdin = $Sd">,
1040 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
1040 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
10411041 // Some single precision VFP instructions may be executed on both NEON and
10421042 // VFP pipelines on A8.
10431043 let D = VFPNeonA8Domain;
10451045
10461046 def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
10471047 (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
1048 Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
1048 Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
10491049 def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
10501050 (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
1051 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
1051 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
10521052
10531053 //===----------------------------------------------------------------------===//
10541054 // Fused FP Multiply-Accumulate Operations.
10591059 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
10601060 (f64 DPR:$Ddin)))]>,
10611061 RegConstraint<"$Ddin = $Dd">,
1062 Requires<[HasVFP4,FPContractions]>;
1062 Requires<[HasVFP4,UseFusedMAC]>;
10631063
10641064 def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
10651065 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
10671067 [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
10681068 SPR:$Sdin))]>,
10691069 RegConstraint<"$Sdin = $Sd">,
1070 Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
1070 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
10711071 // Some single precision VFP instructions may be executed on both NEON and
10721072 // VFP pipelines.
10731073 }
10741074
10751075 def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
10761076 (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
1077 Requires<[HasVFP4,FPContractions]>;
1077 Requires<[HasVFP4,UseFusedMAC]>;
10781078 def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
10791079 (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
1080 Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
1080 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
10811081
10821082 // Match @llvm.fma.* intrinsics
10831083 def : Pat<(fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm)),
10931093 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
10941094 (f64 DPR:$Ddin)))]>,
10951095 RegConstraint<"$Ddin = $Dd">,
1096 Requires<[HasVFP4,FPContractions]>;
1096 Requires<[HasVFP4,UseFusedMAC]>;
10971097
10981098 def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
10991099 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
11011101 [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
11021102 SPR:$Sdin))]>,
11031103 RegConstraint<"$Sdin = $Sd">,
1104 Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
1104 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
11051105 // Some single precision VFP instructions may be executed on both NEON and
11061106 // VFP pipelines.
11071107 }
11081108
11091109 def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
11101110 (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
1111 Requires<[HasVFP4,FPContractions]>;
1111 Requires<[HasVFP4,UseFusedMAC]>;
11121112 def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
11131113 (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
1114 Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
1114 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
11151115
11161116 def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
11171117 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
11191119 [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
11201120 (f64 DPR:$Ddin)))]>,
11211121 RegConstraint<"$Ddin = $Dd">,
1122 Requires<[HasVFP4,FPContractions]>;
1122 Requires<[HasVFP4,UseFusedMAC]>;
11231123
11241124 def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
11251125 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
11271127 [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
11281128 SPR:$Sdin))]>,
11291129 RegConstraint<"$Sdin = $Sd">,
1130 Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
1130 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
11311131 // Some single precision VFP instructions may be executed on both NEON and
11321132 // VFP pipelines.
11331133 }
11341134
11351135 def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
11361136 (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
1137 Requires<[HasVFP4,FPContractions]>;
1137 Requires<[HasVFP4,UseFusedMAC]>;
11381138 def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
11391139 (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
1140 Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
1140 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
11411141
11421142 // Match @llvm.fma.* intrinsics
11431143 def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
11531153 [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
11541154 (f64 DPR:$Ddin)))]>,
11551155 RegConstraint<"$Ddin = $Dd">,
1156 Requires<[HasVFP4,FPContractions]>;
1156 Requires<[HasVFP4,UseFusedMAC]>;
11571157
11581158 def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
11591159 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
11601160 IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
11611161 [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
11621162 RegConstraint<"$Sdin = $Sd">,
1163 Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
1163 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
11641164 // Some single precision VFP instructions may be executed on both NEON and
11651165 // VFP pipelines.
11661166 }
11671167
11681168 def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
11691169 (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
1170 Requires<[HasVFP4,FPContractions]>;
1170 Requires<[HasVFP4,UseFusedMAC]>;
11711171 def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
11721172 (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
1173 Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
1173 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
11741174
11751175 //===----------------------------------------------------------------------===//
11761176 // FP Conditional moves.
4747 , HasVFPv3(false)
4848 , HasVFPv4(false)
4949 , HasNEON(false)
50 , HasNEON2(false)
5150 , UseNEONForSinglePrecisionFP(false)
5251 , SlowFPVMLx(false)
5352 , HasVMLxForwarding(false)
4444 bool HasV6T2Ops;
4545 bool HasV7Ops;
4646
47 /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEON2 - Specify what
47 /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what
4848 /// floating point ISAs are supported.
4949 bool HasVFPv2;
5050 bool HasVFPv3;
5151 bool HasVFPv4;
5252 bool HasNEON;
53 bool HasNEON2;
5453
5554 /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
5655 /// specified. Use the method useNEONForSinglePrecisionFP() to
204203 bool hasVFP3() const { return HasVFPv3; }
205204 bool hasVFP4() const { return HasVFPv4; }
206205 bool hasNEON() const { return HasNEON; }
207 bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4); }
208206 bool useNEONForSinglePrecisionFP() const {
209207 return hasNEON() && UseNEONForSinglePrecisionFP; }
210208
0 @ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=ARM
11 @ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB
22
3 @ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
3 @ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
44 @ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
55 vfma.f64 d16, d18, d17
66