llvm.org GIT mirror llvm / 9c7e6b2
[AMDGPU] Constrain the AMDGPU inliner on maximum number of basic blocks in a caller function (compile time performance) Differential revision: https://reviews.llvm.org/D62917 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362789 91177308-0d34-0410-b5e6-96231b3b80d8 Valery Pykhtin 4 months ago
2 changed file(s) with 49 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
4848 ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
4949 cl::desc("Maximum alloca size to use for inline cost"));
5050
51 // Inliner constraint to achieve reasonable compilation time
52 static cl::opt
53 MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300),
54 cl::desc("Maximum BB number allowed in a function after inlining"
55 " (compile time constraint)"));
56
5157 namespace {
5258
5359 class AMDGPUInliner : public LegacyInlinerBase {
207213 return ACT->getAssumptionCache(F);
208214 };
209215
210 return llvm::getInlineCost(cast(*CS.getInstruction()), Callee,
216 auto IC = llvm::getInlineCost(cast(*CS.getInstruction()), Callee,
211217 LocalParams, TTI, GetAssumptionCache, None, PSI,
212218 RemarksEnabled ? &ORE : nullptr);
213 }
219
220 if (IC && !IC.isAlways()) {
221 // Single BB does not increase total BB amount, thus subtract 1
222 size_t Size = Caller->size() + Callee->size() - 1;
223 if (MaxBB && Size > MaxBB)
224 return llvm::InlineCost::getNever("max number of bb exceeded");
225 }
226 return IC;
227 }
0 ; RUN: opt -mtriple=amdgcn-- --amdgpu-inline -S -amdgpu-inline-max-bb=2 %s | FileCheck %s --check-prefix=NOINL
1 ; RUN: opt -mtriple=amdgcn-- --amdgpu-inline -S -amdgpu-inline-max-bb=3 %s | FileCheck %s --check-prefix=INL
2
3 define i32 @callee(i32 %x) {
4 entry:
5 %cc = icmp eq i32 %x, 1
6 br i1 %cc, label %ret_res, label %mulx
7
8 mulx:
9 %mul1 = mul i32 %x, %x
10 %mul2 = mul i32 %mul1, %x
11 %mul3 = mul i32 %mul1, %mul2
12 %mul4 = mul i32 %mul3, %mul2
13 %mul5 = mul i32 %mul4, %mul3
14 br label %ret_res
15
16 ret_res:
17 %r = phi i32 [ %mul5, %mulx ], [ %x, %entry ]
18 ret i32 %r
19 }
20
21 ; INL-LABEL: @caller
22 ; NOINL-LABEL: @caller
23 ; INL: mul i32
24 ; INL-NOT: call i32
25 ; NOINL-NOT: mul i32
26 ; NOINL: call i32
27
28 define amdgpu_kernel void @caller(i32 %x) {
29 %res = call i32 @callee(i32 %x)
30 store volatile i32 %res, i32 addrspace(1)* undef
31 ret void
32 }