llvm.org GIT mirror llvm / 529916c
Add some missing isel predicates on def : pat patterns to avoid generating VFP vmla / vmls (they cause stalls). Disabling them in isel is properly not a right solution, I'll look into a proper solution next. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118922 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 9 years ago
8 changed file(s) with 211 addition(s) and 120 deletion(s). Raw diff Collapse all Expand all
167167 [ArchV7A, ProcA8,
168168 FeatureHasSlowVMLx, FeatureT2XtPk]>;
169169 def : Processor<"cortex-a9", CortexA9Itineraries,
170 [ArchV7A, ProcA9, FeatureT2XtPk]>;
170 [ArchV7A, ProcA9,
171 FeatureHasSlowVMLx, FeatureT2XtPk]>;
171172
172173 // V7M Processors.
173174 def : ProcNoItin<"cortex-m3", [ArchV7M]>;
15761576 let Inst{4} = op4;
15771577 }
15781578
1579 // Double precision, binary, VML[AS] (for additional predicate)
1580 class ADbI_vmlX opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
1581 dag iops, InstrItinClass itin, string opc, string asm,
1582 list pattern>
1583 : VFPAI {
1584 // Instruction operands.
1585 bits<5> Dd;
1586 bits<5> Dn;
1587 bits<5> Dm;
1588
1589 // Encode instruction operands.
1590 let Inst{19-16} = Dn{3-0};
1591 let Inst{7} = Dn{4};
1592 let Inst{15-12} = Dd{3-0};
1593 let Inst{22} = Dd{4};
1594 let Inst{3-0} = Dm{3-0};
1595 let Inst{5} = Dm{4};
1596
1597 let Inst{27-23} = opcod1;
1598 let Inst{21-20} = opcod2;
1599 let Inst{11-9} = 0b101;
1600 let Inst{8} = 1; // Double precision
1601 let Inst{6} = op6;
1602 let Inst{4} = op4;
1603 list Predicates = [HasVFP2, UseVMLx];
1604 }
1605
16061579 // Single precision, unary
16071580 class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
16081581 bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
737737 // FP FMA Operations.
738738 //
739739
740 def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
741 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
742 IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
743 [(set DPR:$Dd, (fadd (fmul DPR:$Dn, DPR:$Dm),
744 (f64 DPR:$Ddin)))]>,
745 RegConstraint<"$Ddin = $Dd">;
740 def VMLAD : ADbI<0b11100, 0b00, 0, 0,
741 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
742 IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
743 [(set DPR:$Dd, (fadd (fmul DPR:$Dn, DPR:$Dm),
744 (f64 DPR:$Ddin)))]>,
745 RegConstraint<"$Ddin = $Dd">,
746 Requires<[HasVFP2,UseVMLx]>;
746747
747748 def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
748749 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
749750 IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
750751 [(set SPR:$Sd, (fadd (fmul SPR:$Sn, SPR:$Sm),
751752 SPR:$Sdin))]>,
752 RegConstraint<"$Sdin = $Sd">;
753 RegConstraint<"$Sdin = $Sd">,
754 Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
753755
754756 def : Pat<(fadd DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
755 (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
757 (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
758 Requires<[HasVFP2,UseVMLx]>;
756759 def : Pat<(fadd SPR:$dstin, (fmul SPR:$a, SPR:$b)),
757 (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
758
759 def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
760 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
761 IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
762 [(set DPR:$Dd, (fadd (fneg (fmul DPR:$Dn,DPR:$Dm)),
763 (f64 DPR:$Ddin)))]>,
764 RegConstraint<"$Ddin = $Dd">;
760 (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
761 Requires<[HasVFP2,DontUseNEONForFP, UseVMLx]>;
762
763 def VMLSD : ADbI<0b11100, 0b00, 1, 0,
764 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
765 IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
766 [(set DPR:$Dd, (fadd (fneg (fmul DPR:$Dn,DPR:$Dm)),
767 (f64 DPR:$Ddin)))]>,
768 RegConstraint<"$Ddin = $Dd">,
769 Requires<[HasVFP2,UseVMLx]>;
765770
766771 def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
767772 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
768773 IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
769774 [(set SPR:$Sd, (fadd (fneg (fmul SPR:$Sn, SPR:$Sm)),
770775 SPR:$Sdin))]>,
771 RegConstraint<"$Sdin = $Sd">;
776 RegConstraint<"$Sdin = $Sd">,
777 Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
772778
773779 def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
774 (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
780 (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
781 Requires<[HasVFP2,UseVMLx]>;
775782 def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
776 (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
777
778 def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
779 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
780 IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
781 [(set DPR:$Dd,(fsub (fneg (fmul DPR:$Dn,DPR:$Dm)),
782 (f64 DPR:$Ddin)))]>,
783 RegConstraint<"$Ddin = $Dd">;
783 (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
784 Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
785
786 def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
787 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
788 IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
789 [(set DPR:$Dd,(fsub (fneg (fmul DPR:$Dn,DPR:$Dm)),
790 (f64 DPR:$Ddin)))]>,
791 RegConstraint<"$Ddin = $Dd">,
792 Requires<[HasVFP2,UseVMLx]>;
784793
785794 def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
786795 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
787796 IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
788797 [(set SPR:$Sd, (fsub (fneg (fmul SPR:$Sn, SPR:$Sm)),
789798 SPR:$Sdin))]>,
790 RegConstraint<"$Sdin = $Sd">;
799 RegConstraint<"$Sdin = $Sd">,
800 Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
791801
792802 def : Pat<(fsub (fneg (fmul DPR:$a, (f64 DPR:$b))), DPR:$dstin),
793 (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
803 (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
804 Requires<[HasVFP2,UseVMLx]>;
794805 def : Pat<(fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin),
795 (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
796
797 def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
798 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
799 IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
800 [(set DPR:$Dd, (fsub (fmul DPR:$Dn, DPR:$Dm),
801 (f64 DPR:$Ddin)))]>,
802 RegConstraint<"$Ddin = $Dd">;
806 (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
807 Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
808
809 def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
810 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
811 IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
812 [(set DPR:$Dd, (fsub (fmul DPR:$Dn, DPR:$Dm),
813 (f64 DPR:$Ddin)))]>,
814 RegConstraint<"$Ddin = $Dd">,
815 Requires<[HasVFP2,UseVMLx]>;
803816
804817 def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
805818 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
806819 IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
807820 [(set SPR:$Sd, (fsub (fmul SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
808 RegConstraint<"$Sdin = $Sd">;
821 RegConstraint<"$Sdin = $Sd">,
822 Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
809823
810824 def : Pat<(fsub (fmul DPR:$a, (f64 DPR:$b)), DPR:$dstin),
811 (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
825 (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
826 Requires<[HasVFP2,UseVMLx]>;
812827 def : Pat<(fsub (fmul SPR:$a, SPR:$b), SPR:$dstin),
813 (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
828 (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
829 Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
814830
815831
816832 //===----------------------------------------------------------------------===//
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
3 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
43
5 define float @test(float %acc, float %a, float %b) {
4 define float @t1(float %acc, float %a, float %b) {
65 entry:
6 ; VFP2: t1:
7 ; VFP2: vmla.f32
8
9 ; NEON: t1:
10 ; NEON: vmla.f32
11
12 ; A8: t1:
13 ; A8: vmul.f32
14 ; A8: vadd.f32
715 %0 = fmul float %a, %b
816 %1 = fadd float %acc, %0
917 ret float %1
1018 }
1119
12 ; VFP2: test:
13 ; VFP2: vmla.f32 s2, s1, s0
20 define double @t2(double %acc, double %a, double %b) {
21 entry:
22 ; VFP2: t2:
23 ; VFP2: vmla.f64
1424
15 ; NFP1: test:
16 ; NFP1: vmul.f32 d0, d1, d0
17 ; NFP0: test:
18 ; NFP0: vmla.f32 s2, s1, s0
25 ; NEON: t2:
26 ; NEON: vmla.f64
1927
20 ; CORTEXA8: test:
21 ; CORTEXA8: vmul.f32 d0, d1, d0
22 ; CORTEXA9: test:
23 ; CORTEXA9: vmla.f32 s2, s1, s0
28 ; A8: t2:
29 ; A8: vmul.f64
30 ; A8: vadd.f64
31 %0 = fmul double %a, %b
32 %1 = fadd double %acc, %0
33 ret double %1
34 }
35
36 define float @t3(float %acc, float %a, float %b) {
37 entry:
38 ; VFP2: t3:
39 ; VFP2: vmla.f32
40
41 ; NEON: t3:
42 ; NEON: vmla.f32
43
44 ; A8: t3:
45 ; A8: vmul.f32
46 ; A8: vadd.f32
47 %0 = fmul float %a, %b
48 %1 = fadd float %0, %acc
49 ret float %1
50 }
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
3 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
43
5 define float @test(float %acc, float %a, float %b) {
4 define float @t1(float %acc, float %a, float %b) {
65 entry:
6 ; VFP2: t1:
7 ; VFP2: vnmls.f32
8
9 ; NEON: t1:
10 ; NEON: vnmls.f32
11
12 ; A8: t1:
13 ; A8: vmul.f32
14 ; A8: vsub.f32
715 %0 = fmul float %a, %b
816 %1 = fsub float %0, %acc
917 ret float %1
1018 }
1119
12 ; VFP2: test:
13 ; VFP2: vnmls.f32 s2, s1, s0
20 define double @t2(double %acc, double %a, double %b) {
21 entry:
22 ; VFP2: t2:
23 ; VFP2: vnmls.f64
1424
15 ; NFP1: test:
16 ; NFP1: vnmls.f32 s2, s1, s0
17 ; NFP0: test:
18 ; NFP0: vnmls.f32 s2, s1, s0
25 ; NEON: t2:
26 ; NEON: vnmls.f64
1927
20 ; CORTEXA8: test:
21 ; CORTEXA8: vnmls.f32 s2, s1, s0
22 ; CORTEXA9: test:
23 ; CORTEXA9: vnmls.f32 s2, s1, s0
28 ; A8: t2:
29 ; A8: vmul.f64
30 ; A8: vsub.f64
31 %0 = fmul double %a, %b
32 %1 = fsub double %0, %acc
33 ret double %1
34 }
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
11 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEONFP
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
33
4 define float @test(float %acc, float %a, float %b) {
4 define float @t1(float %acc, float %a, float %b) {
55 entry:
6 ; VFP2: t1:
67 ; VFP2: vmls.f32
8
9 ; NEON: t1:
710 ; NEON: vmls.f32
811
9 ; NEONFP-NOT: vmls
10 ; NEONFP-NOT: vmov.f32
11 ; NEONFP: vmul.f32
12 ; NEONFP: vsub.f32
13 ; NEONFP: vmov
14
12 ; A8: t1:
13 ; A8: vmul.f32
14 ; A8: vsub.f32
1515 %0 = fmul float %a, %b
1616 %1 = fsub float %acc, %0
1717 ret float %1
1818 }
1919
20 define double @t2(double %acc, double %a, double %b) {
21 entry:
22 ; VFP2: t2:
23 ; VFP2: vmls.f64
24
25 ; NEON: t2:
26 ; NEON: vmls.f64
27
28 ; A8: t2:
29 ; A8: vmul.f64
30 ; A8: vsub.f64
31 %0 = fmul double %a, %b
32 %1 = fsub double %acc, %0
33 ret double %1
34 }
None ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
3 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
43
5 define float @test1(float %acc, float %a, float %b) nounwind {
6 ; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
4 define float @t1(float %acc, float %a, float %b) nounwind {
75 entry:
6 ; VFP2: t1:
7 ; VFP2: vnmla.f32
8
9 ; NEON: t1:
10 ; NEON: vnmla.f32
11
12 ; A8: t1:
13 ; A8: vnmul.f32 s0, s1, s0
14 ; A8: vsub.f32 d0, d0, d1
815 %0 = fmul float %a, %b
916 %1 = fsub float -0.0, %0
1017 %2 = fsub float %1, %acc
1118 ret float %2
1219 }
1320
14 define float @test2(float %acc, float %a, float %b) nounwind {
15 ; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
21 define float @t2(float %acc, float %a, float %b) nounwind {
1622 entry:
23 ; VFP2: t2:
24 ; VFP2: vnmla.f32
25
26 ; NEON: t2:
27 ; NEON: vnmla.f32
28
29 ; A8: t2:
30 ; A8: vnmul.f32 s0, s1, s0
31 ; A8: vsub.f32 d0, d0, d1
1732 %0 = fmul float %a, %b
1833 %1 = fmul float -1.0, %0
1934 %2 = fsub float %1, %acc
2035 ret float %2
2136 }
2237
38 define double @t3(double %acc, double %a, double %b) nounwind {
39 entry:
40 ; VFP2: t3:
41 ; VFP2: vnmla.f64
42
43 ; NEON: t3:
44 ; NEON: vnmla.f64
45
46 ; A8: t3:
47 ; A8: vnmul.f64 d16, d16, d17
48 ; A8: vsub.f64 d16, d16, d17
49 %0 = fmul double %a, %b
50 %1 = fsub double -0.0, %0
51 %2 = fsub double %1, %acc
52 ret double %2
53 }
54
55 define double @t4(double %acc, double %a, double %b) nounwind {
56 entry:
57 ; VFP2: t4:
58 ; VFP2: vnmla.f64
59
60 ; NEON: t4:
61 ; NEON: vnmla.f64
62
63 ; A8: t4:
64 ; A8: vnmul.f64 d16, d16, d17
65 ; A8: vsub.f64 d16, d16, d17
66 %0 = fmul double %a, %b
67 %1 = fmul double -1.0, %0
68 %2 = fsub double %1, %acc
69 ret double %2
70 }
33 ; constant offset addressing, so that each of the following stores
44 ; uses the same register.
55
6 ; CHECK: vstr.32 s{{.*}}, [lr, #-128]
7 ; CHECK: vstr.32 s{{.*}}, [lr, #-96]
8 ; CHECK: vstr.32 s{{.*}}, [lr, #-64]
9 ; CHECK: vstr.32 s{{.*}}, [lr, #-32]
10 ; CHECK: vstr.32 s{{.*}}, [lr]
11 ; CHECK: vstr.32 s{{.*}}, [lr, #32]
12 ; CHECK: vstr.32 s{{.*}}, [lr, #64]
13 ; CHECK: vstr.32 s{{.*}}, [lr, #96]
6 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128]
7 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96]
8 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64]
9 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32]
10 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}]
11 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
12 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
13 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
1414
1515 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
1616
626626 ; in a register.
627627
628628 ; CHECK: @ %bb24
629 ; CHECK: subs{{.*}} [[REGISTER:(r[0-9]+)|(lr)]], #1
629 ; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
630630 ; CHECK: bne.w
631631
632632 %92 = icmp eq i32 %tmp81, %indvar78 ; [#uses=1]