llvm.org GIT mirror llvm / 68f374b
[ARM] Inline callee if its target-features are a subset of the caller Summary: Similar to X86, it should be safe to inline callees if their target-features are a subset of the caller. As some subtarget features provide different instructions depending on whether they are set or unset (e.g. ThumbMode and ModeSoftFloat), we use a whitelist of target-features describing hardware capabilities only. Reviewers: kristof.beyls, rengolin, t.p.northover, SjoerdMeijer, peter.smith, silviu.baranga, efriedma Reviewed By: SjoerdMeijer, efriedma Subscribers: dschuff, efriedma, aemerson, sdardis, javed.absar, arichardson, eraman, llvm-commits Differential Revision: https://reviews.llvm.org/D34697 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307889 91177308-0d34-0410-b5e6-96231b3b80d8 Florian Hahn 3 years ago
4 changed file(s) with 116 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
1313 using namespace llvm;
1414
1515 #define DEBUG_TYPE "armtti"
16
17 bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
18 const Function *Callee) const {
19 const TargetMachine &TM = getTLI()->getTargetMachine();
20 const FeatureBitset &CallerBits =
21 TM.getSubtargetImpl(*Caller)->getFeatureBits();
22 const FeatureBitset &CalleeBits =
23 TM.getSubtargetImpl(*Callee)->getFeatureBits();
24
25 // To inline a callee, all features not in the whitelist must match exactly.
26 bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
27 (CalleeBits & ~InlineFeatureWhitelist);
28 // For features in the whitelist, the callee's features must be a subset of
29 // the callers'.
30 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
31 (CalleeBits & InlineFeatureWhitelist);
32 return MatchExact && MatchSubset;
33 }
1634
1735 int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
1836 assert(Ty->isIntegerTy());
3232 const ARMSubtarget *ST;
3333 const ARMTargetLowering *TLI;
3434
35 // Currently the following features are excluded from InlineFeatureWhitelist.
36 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
37 // Depending on whether they are set or unset, different
38 // instructions/registers are available. For example, inlining a callee with
39 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
40 // fail if the callee uses ARM only instructions, e.g. in inline asm.
41 const FeatureBitset InlineFeatureWhitelist = {
42 ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
43 ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
44 ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
45 ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
46 ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
47 ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
48 ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
49 ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
50 ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
51 ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
52 ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
53 ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
54 ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
55 ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
56 ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
57 ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding,
58 ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR,
59 ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp,
60 ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor,
61 ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization,
62 ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass,
63 ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
64 ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
65 ARM::FeatureNoNegativeImmediates
66 };
67
3568 const ARMSubtarget *getST() const { return ST; }
3669 const ARMTargetLowering *getTLI() const { return TLI; }
3770
3972 explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
4073 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
4174 TLI(ST->getTargetLowering()) {}
75
76 bool areInlineCompatible(const Function *Caller,
77 const Function *Callee) const;
4278
4379 bool enableInterleavedAccessVectorization() { return true; }
4480
0 ; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -inline | FileCheck %s
1 ; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s
2 ; Check that we only inline when we have compatible target attributes.
3 ; ARM has implemented a target attribute that will verify that the attribute
4 ; sets are compatible.
5
6 define i32 @foo() #0 {
7 entry:
8 %call = call i32 (...) @baz()
9 ret i32 %call
10 ; CHECK-LABEL: foo
11 ; CHECK: call i32 (...) @baz()
12 }
13 declare i32 @baz(...) #0
14
15 define i32 @bar() #1 {
16 entry:
17 %call = call i32 @foo()
18 ret i32 %call
19 ; CHECK-LABEL: bar
20 ; CHECK: call i32 (...) @baz()
21 }
22
23 define i32 @qux() #0 {
24 entry:
25 %call = call i32 @bar()
26 ret i32 %call
27 ; CHECK-LABEL: qux
28 ; CHECK: call i32 @bar()
29 }
30
31 define i32 @thumb_fn() #2 {
32 entry:
33 %call = call i32 @foo()
34 ret i32 %call
35 ; CHECK-LABEL: thumb_fn
36 ; CHECK: call i32 @foo
37 }
38
39 define i32 @strict_align() #3 {
40 entry:
41 %call = call i32 @foo()
42 ret i32 %call
43 ; CHECK-LABEL: strict_align
44 ; CHECK: call i32 (...) @baz()
45 }
46
47 define i32 @soft_float_fn() #4 {
48 entry:
49 %call = call i32 @foo()
50 ret i32 %call
51 ; CHECK-LABEL: thumb_fn
52 ; CHECK: call i32 @foo
53 }
54
55 attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" }
56 attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16" }
57 attributes #2 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+thumb-mode" }
58 attributes #3 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+strict-align" }
59 attributes #4 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+soft-float" }
0 if not 'ARM' in config.root.targets:
1 config.unsupported = True