llvm.org GIT mirror llvm / 3382a84
Avoid NEON SP-FP unless unsafe-math or Darwin NEON is not IEEE 754 compliant, so we should avoid lowering single-precision floating point operations with NEON unless unsafe-math is turned on. The equivalent VFP instructions are IEEE 754 compliant, but in some cores they're much slower, so some archs/OSs might still request it to be on by default, such as Swift and Darwin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177651 91177308-0d34-0410-b5e6-96231b3b80d8 Renato Golin 6 years ago
12 changed file(s) with 143 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
142142 // ARM processor families.
143143 def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
144144 "Cortex-A5 ARM processors",
145 [FeatureSlowFPBrcc, FeatureNEONForFP,
146 FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
147 FeatureT2XtPk]>;
145 [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
146 FeatureVMLxForwarding, FeatureT2XtPk]>;
148147 def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
149148 "Cortex-A8 ARM processors",
150 [FeatureSlowFPBrcc, FeatureNEONForFP,
151 FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
152 FeatureT2XtPk]>;
149 [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
150 FeatureVMLxForwarding, FeatureT2XtPk]>;
153151 def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
154152 "Cortex-A9 ARM processors",
155153 [FeatureVMLxForwarding,
1818 #include "llvm/IR/Function.h"
1919 #include "llvm/Support/CommandLine.h"
2020 #include "llvm/Target/TargetInstrInfo.h"
21 #include "llvm/Target/TargetOptions.h"
2122
2223 #define GET_SUBTARGETINFO_TARGET_DESC
2324 #define GET_SUBTARGETINFO_CTOR
4142 cl::desc("Disallow all unaligned memory accesses"));
4243
4344 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
44 const std::string &FS)
45 const std::string &FS, const TargetOptions &Options)
4546 : ARMGenSubtargetInfo(TT, CPU, FS)
4647 , ARMProcFamily(Others)
4748 , stackAlignment(4)
4849 , CPUString(CPU)
4950 , TargetTriple(TT)
51 , Options(Options)
5052 , TargetABI(ARM_ABI_APCS) {
5153 initializeEnvironment();
5254 resetSubtargetFeatures(CPU, FS);
9193 AllowsUnalignedMem = false;
9294 Thumb2DSP = false;
9395 UseNaClTrap = false;
96 UnsafeFPMath = false;
9497 }
9598
9699 void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
161164 // configuration.
162165 if (!StrictAlign && hasV6Ops() && isTargetDarwin())
163166 AllowsUnalignedMem = true;
167
168 // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
169 uint64_t Bits = getFeatureBits();
170 if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
171 (Options.UnsafeFPMath || isTargetDarwin()))
172 UseNEONForSinglePrecisionFP = true;
164173 }
165174
166175 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
2525 namespace llvm {
2626 class GlobalValue;
2727 class StringRef;
28 class TargetOptions;
2829
2930 class ARMSubtarget : public ARMGenSubtargetInfo {
3031 protected:
158159 /// NaCl TRAP instruction is generated instead of the regular TRAP.
159160 bool UseNaClTrap;
160161
162 /// Target machine allowed unsafe FP math (such as use of NEON fp)
163 bool UnsafeFPMath;
164
161165 /// stackAlignment - The minimum alignment known to hold of the stack frame on
162166 /// entry to the function and which must be maintained by every function.
163167 unsigned stackAlignment;
173177
174178 /// Selected instruction itineraries (one entry per itinerary class.)
175179 InstrItineraryData InstrItins;
180
181 /// Options passed via command line that could influence the target
182 const TargetOptions &Options;
176183
177184 public:
178185 enum {
188195 /// of the specified triple.
189196 ///
190197 ARMSubtarget(const std::string &TT, const std::string &CPU,
191 const std::string &FS);
198 const std::string &FS, const TargetOptions &Options);
192199
193200 /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
194201 /// that still makes it profitable to inline the call.
4747 Reloc::Model RM, CodeModel::Model CM,
4848 CodeGenOpt::Level OL)
4949 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
50 Subtarget(TT, CPU, FS),
50 Subtarget(TT, CPU, FS, Options),
5151 JITInfo(),
5252 InstrItins(Subtarget.getInstrItineraryData()) {
5353 // Default to soft float ABI
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
11 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
22 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
3 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
34 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
45
56 define float @test(float %a, float %b) {
1718 ; NFP0: vadd.f32 s
1819
1920 ; CORTEXA8: test:
20 ; CORTEXA8: vadd.f32 d
21 ; CORTEXA8: vadd.f32 s
22 ; CORTEXA8U: test:
23 ; CORTEXA8U: vadd.f32 d
2124 ; CORTEXA9: test:
22 ; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}}
25 ; CORTEXA9: vadd.f32 s
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
11 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
22 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
3 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
34 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
45
56 define float @test(float %a, float %b) {
1718 ; NFP0: vmul.f32 s
1819
1920 ; CORTEXA8: test:
20 ; CORTEXA8: vmul.f32 d
21 ; CORTEXA8: vmul.f32 s
22 ; CORTEXA8U: test:
23 ; CORTEXA8U: vmul.f32 d
2124 ; CORTEXA9: test:
22 ; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}}
25 ; CORTEXA9: vmul.f32 s
2326
2427 ; VFP2: test2
2528 define float @test2(float %a) nounwind {
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
11 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
22 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
3 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
34 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
45
56 define float @test1(float* %a) {
2122 ; NFP0: vneg.f32 s{{.*}}, s{{.*}}
2223
2324 ; CORTEXA8: test1:
24 ; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
25 ; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}}
26
27 ; CORTEXA8U: test1:
28 ; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}}
2529
2630 ; CORTEXA9: test1:
2731 ; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
4549 ; NFP0: vneg.f32 s{{.*}}, s{{.*}}
4650
4751 ; CORTEXA8: test2:
48 ; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
52 ; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}}
53
54 ; CORTEXA8U: test2:
55 ; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}}
4956
5057 ; CORTEXA9: test2:
5158 ; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
11 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
22 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
3 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U
34 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
45
56 define float @t1(float %acc, float %a, float %b) nounwind {
1011 ; NEON: t1:
1112 ; NEON: vnmla.f32
1213
14 ; A8U: t1:
15 ; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
16 ; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
17
1318 ; A8: t1:
1419 ; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
15 ; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
20 ; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
1621 %0 = fmul float %a, %b
1722 %1 = fsub float -0.0, %0
1823 %2 = fsub float %1, %acc
2732 ; NEON: t2:
2833 ; NEON: vnmla.f32
2934
35 ; A8U: t2:
36 ; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
37 ; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
38
3039 ; A8: t2:
3140 ; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
32 ; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
41 ; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
3342 %0 = fmul float %a, %b
3443 %1 = fmul float -1.0, %0
3544 %2 = fsub float %1, %acc
4352
4453 ; NEON: t3:
4554 ; NEON: vnmla.f64
55
56 ; A8U: t3:
57 ; A8U: vnmul.f64 d
58 ; A8U: vsub.f64 d
4659
4760 ; A8: t3:
4861 ; A8: vnmul.f64 d
6174 ; NEON: t4:
6275 ; NEON: vnmla.f64
6376
77 ; A8U: t4:
78 ; A8U: vnmul.f64 d
79 ; A8U: vsub.f64 d
80
6481 ; A8: t4:
6582 ; A8: vnmul.f64 d
6683 ; A8: vsub.f64 d
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
11 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON
3 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2
34 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
45
56 define i32 @test1(float %a, float %b) {
0 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
1 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U
12 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
23 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
34
89 }
910
1011 ; VFP2: vsub.f32 s
11 ; NFP1: vsub.f32 d
12 ; NFP1U: vsub.f32 d
13 ; NFP1: vsub.f32 s
1214 ; NFP0: vsub.f32 s
0 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5
1 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8
2 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9
3 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15
4 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT
5
6 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5
7 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8
8 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9
9 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15
10 ; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT
11
12 ; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5
13 ; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8
14 ; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9
15 ; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15
16 ; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT
17
18 ; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since
19 ; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected.
20
21 @.str = private unnamed_addr constant [12 x i8] c"S317\09%.5g \0A\00", align 1
22
23 ; CHECK-LINUXA5: main:
24 ; CHECK-LINUXA8: main:
25 ; CHECK-LINUXA9: main:
26 ; CHECK-LINUXA15: main:
27 ; CHECK-LINUXSWIFT: main:
28 ; CHECK-UNSAFEA5: main:
29 ; CHECK-UNSAFEA8: main:
30 ; CHECK-UNSAFEA9: main:
31 ; CHECK-UNSAFEA15: main:
32 ; CHECK-UNSAFESWIFT: main:
33 ; CHECK-DARWINA5: main:
34 ; CHECK-DARWINA8: main:
35 ; CHECK-DARWINA9: main:
36 ; CHECK-DARWINA15: main:
37 ; CHECK-DARWINSWIFT: main:
38 define i32 @main() {
39 entry:
40 br label %for.body
41
42 for.body: ; preds = %for.body, %entry
43 %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
44 %q.03 = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
45 %mul = fmul float %q.03, 0x3FEFAE1480000000
46 ; CHECK-LINUXA5: vmul.f32 s{{[0-9]*}}
47 ; CHECK-LINUXA8: vmul.f32 s{{[0-9]*}}
48 ; CHECK-LINUXA9: vmul.f32 s{{[0-9]*}}
49 ; CHECK-LINUXA15: vmul.f32 s{{[0-9]*}}
50 ; Swift is *always* unsafe
51 ; CHECK-LINUXSWIFT: vmul.f32 d{{[0-9]*}}
52
53 ; CHECK-UNSAFEA5: vmul.f32 d{{[0-9]*}}
54 ; CHECK-UNSAFEA8: vmul.f32 d{{[0-9]*}}
55 ; A9 and A15 don't need this
56 ; CHECK-UNSAFEA9: vmul.f32 s{{[0-9]*}}
57 ; CHECK-UNSAFEA15: vmul.f32 s{{[0-9]*}}
58 ; CHECK-UNSAFESWIFT: vmul.f32 d{{[0-9]*}}
59
60 ; CHECK-DARWINA5: vmul.f32 d{{[0-9]*}}
61 ; CHECK-DARWINA8: vmul.f32 d{{[0-9]*}}
62 ; CHECK-DARWINA9: vmul.f32 s{{[0-9]*}}
63 ; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}}
64 ; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}}
65 %conv = fpext float %mul to double
66 %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %conv) #1
67 %inc = add nsw i32 %i.04, 1
68 %exitcond = icmp eq i32 %inc, 16000
69 br i1 %exitcond, label %for.end, label %for.body
70
71 for.end: ; preds = %for.body
72 ret i32 0
73 }
74
75 declare i32 @printf(i8* nocapture, ...)
None ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
0 ; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
11
22 define float @fmin_ole(float %x) nounwind {
33 ;CHECK: fmin_ole: