llvm.org GIT mirror llvm / 42a83f2
Initial support for single-precision FP using NEON. Added "neonfp" attribute to enable. Added patterns for some binary FP operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78081 91177308-0d34-0410-b5e6-96231b3b80d8 David Goodwin 10 years ago
16 changed file(s) with 168 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
3131 "ARM v6t2">;
3232 def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
3333 "ARM v7A">;
34 def FeatureNEONFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
35 "true",
36 "Use NEON for single-precision FP">;
3437 def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
3538 "Enable VFP2 instructions">;
3639 def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
10791079 let Inst{11-8} = 0b1010;
10801080 }
10811081
1082 // Single precision, binary if no NEON
1083 // Same as ASbI except not available if NEON is enabled
1084 class ASbIn opcod, dag oops, dag iops, string opc,
1085 string asm, list pattern>
1086 : ASbI {
1087 list Predicates = [HasVFP2,DontUseNEONForFP];
1088 }
1089
10821090 // VFP conversion instructions
10831091 class AVConv1I opcod1, bits<4> opcod2, bits<4> opcod3,
10841092 dag oops, dag iops, string opc, string asm, list pattern>
12191227 class NVDup opcod1, bits<4> opcod2, bits<2> opcod3,
12201228 dag oops, dag iops, string opc, string asm, list pattern>
12211229 : NVLaneOp;
1230
1231 // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
1232 // for single-precision FP.
1233 class NEONFPPat : Pat {
1234 list Predicates = [HasNEON,UseNEONForFP];
1235 }
103103 def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
104104 def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
105105 def HasNEON : Predicate<"Subtarget->hasNEON()">;
106 def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
107 def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
106108 def IsThumb : Predicate<"Subtarget->isThumb()">;
107109 def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
108110 def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
281281 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
282282 let isCommutable = Commutable;
283283 }
284
285 // Basic 3-register operations, scalar single-precision
286 class N3VDs
287 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
288 (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
289 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
290 arm_ssubreg_0)>;
284291
285292 // Basic 3-register intrinsics, both double- and quad-register.
286293 class N3VDInt op21_20, bits<4> op11_8, bit op4,
317324 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
318325 [(set QPR:$dst, (Ty (OpNode QPR:$src1,
319326 (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
327
328 // Multiply-Add/Sub operations, scalar single-precision
329 class N3VDMulOps
330 : NEONFPPat<(f32 (OpNode SPR:$acc,
331 (f32 (MulNode SPR:$a, SPR:$b)))),
332 (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0),
333 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
334 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
335 arm_ssubreg_0)>;
320336
321337 // Neon 3-argument intrinsics, both double- and quad-register.
322338 // The destination register is also used as the first source operand register.
885901 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
886902 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>;
887903
904 // Vector Add Operations used for single-precision FP
905 def : N3VDs;
906
888907 // Vector Multiply Operations.
889908
890909 // VMUL : Vector Multiply (integer, polynomial and floating-point)
907926 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
908927 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>;
909928
929 // Vector Multiply Operations used for single-precision FP
930 def : N3VDs;
931
910932 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
911933
912934 // VMLA : Vector Multiply Accumulate (integer and floating-point)
927949 defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>;
928950 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
929951 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
952
953 // Vector Multiply-Accumulate/Subtract used for single-precision FP
954 def : N3VDMulOps;
930955
931956 // Vector Subtract Operations.
932957
950975 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>;
951976 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
952977 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>;
978
979 // Vector Sub Operations used for single-precision FP
980 def : N3VDs;
953981
954982 // Vector Comparisons.
955983
9797 "faddd", " $dst, $a, $b",
9898 [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
9999
100 def FADDS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
101 "fadds", " $dst, $a, $b",
102 [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
100 def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
101 "fadds", " $dst, $a, $b",
102 [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
103103
104104 // These are encoded as unary instructions.
105105 let Defs = [FPSCR] in {
124124 "fmuld", " $dst, $a, $b",
125125 [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
126126
127 def FMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
128 "fmuls", " $dst, $a, $b",
129 [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
127 def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
128 "fmuls", " $dst, $a, $b",
129 [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
130130
131131 def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
132132 "fnmuld", " $dst, $a, $b",
153153 let Inst{6} = 1;
154154 }
155155
156 def FSUBS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
157 "fsubs", " $dst, $a, $b",
158 [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
156 def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
157 "fsubs", " $dst, $a, $b",
158 [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
159159 let Inst{6} = 1;
160160 }
161161
316316 [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
317317 RegConstraint<"$dstin = $dst">;
318318
319 def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
320 "fmacs", " $dst, $a, $b",
321 [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
322 RegConstraint<"$dstin = $dst">;
319 def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
320 "fmacs", " $dst, $a, $b",
321 [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
322 RegConstraint<"$dstin = $dst">;
323323
324324 def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
325325 "fmscd", " $dst, $a, $b",
338338 let Inst{6} = 1;
339339 }
340340
341 def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
342 "fnmacs", " $dst, $a, $b",
341 def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
342 "fnmacs", " $dst, $a, $b",
343343 [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
344344 RegConstraint<"$dstin = $dst"> {
345345 let Inst{6} = 1;
2424 bool isThumb)
2525 : ARMArchVersion(V4T)
2626 , ARMFPUType(None)
27 , UseNEONForSinglePrecisionFP(false)
2728 , IsThumb(isThumb)
2829 , ThumbMode(Thumb1)
2930 , IsR9Reserved(ReserveR9)
4040
4141 /// ARMFPUType - Floating Point Unit type.
4242 ARMFPEnum ARMFPUType;
43
44 /// UseNEONForSinglePrecisionFP - if NEON is available use for FP
45 bool UseNEONForSinglePrecisionFP;
4346
4447 /// IsThumb - True if we are in thumb mode, false if in ARM mode.
4548 bool IsThumb;
97100 bool hasVFP2() const { return ARMFPUType >= VFPv2; }
98101 bool hasVFP3() const { return ARMFPUType >= VFPv3; }
99102 bool hasNEON() const { return ARMFPUType >= NEON; }
100
103 bool useNEONForSinglePrecisionFP() const {
104 return hasNEON() && UseNEONForSinglePrecisionFP; }
105
101106 bool isTargetDarwin() const { return TargetType == isDarwin; }
102107 bool isTargetELF() const { return TargetType == isELF; }
103108
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
1 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
2 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
3
4 define float @test(float %a, float %b) {
5 entry:
6 %0 = fadd float %a, %b
7 ret float %0
8 }
9
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
1 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
2 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
3
4 define float @test(float %a, float %b) {
5 entry:
6 %0 = fdiv float %a, %b
7 ret float %0
8 }
9
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
1 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
2 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
3
4 define float @test(float %acc, float %a, float %b) {
5 entry:
6 %0 = fmul float %a, %b
7 %1 = fadd float %acc, %0
8 ret float %1
9 }
10
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
1 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
2 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
3
4 define float @test(float %acc, float %a, float %b) {
5 entry:
6 %0 = fmul float %a, %b
7 %1 = fsub float %0, %acc
8 ret float %1
9 }
10
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
1 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
2 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
3
4 define float @test(float %a, float %b) {
5 entry:
6 %0 = fmul float %a, %b
7 ret float %0
8 }
9
0 ; XFAIL: *
1 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
2 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
3 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
4
5 define float @test(float %acc, float %a, float %b) {
6 entry:
7 %0 = fmul float %a, %b
8 %1 = fsub float %acc, %0
9 ret float %1
10 }
11
0 ; XFAIL: *
1 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
2 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
3 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
4
5 define float @test(float %acc, float %a, float %b) {
6 entry:
7 %0 = fmul float %a, %b
8 %1 = fsub float 0.0, %0
9 %2 = fsub float %1, %acc
10 ret float %2
11 }
12
0 ; XFAIL: *
1 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
2 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
3 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
4
5 define float @test(float %a, float %b) {
6 entry:
7 %0 = fmul float %a, %b
8 %1 = fsub float 0.0, %0
9 ret float %1
10 }
11
0 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
1 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
2 ; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
3
4 define float @test(float %a, float %b) {
5 entry:
6 %0 = fsub float %a, %b
7 ret float %0
8 }
9