llvm.org GIT mirror llvm / 5993a0e
remove inalloca parameters in globalopt and simplify argpromotion Summary: Inalloca parameters require special handling in some optimizations. This change causes globalopt to strip the inalloca attribute from function parameters when it is safe to do so, removes the special handling for inallocas from argpromotion, and replaces it with a simple check that causes argpromotion to skip functions that receive inallocas (for when the pass is invoked on code that didn't run through globalopt first). This also avoids a case where argpromotion would incorrectly try to pass an inalloca in a register. Fixes PR41658. Reviewers: rnk, efriedma Reviewed By: rnk Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D61286 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359743 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Haarman 4 months ago
5 changed file(s) with 76 addition(s) and 34 deletion(s). Raw diff Collapse all Expand all
565565 /// This method limits promotion of aggregates to only promote up to three
566566 /// elements of the aggregate in order to avoid exploding the number of
567567 /// arguments passed in.
568 static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
569 AAResults &AAR, unsigned MaxElements) {
568 static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
569 unsigned MaxElements) {
570570 using GEPIndicesSet = std::set;
571571
572572 // Quick exit for unused arguments
588588 //
589589 // This set will contain all sets of indices that are loaded in the entry
590590 // block, and thus are safe to unconditionally load in the caller.
591 //
592 // This optimization is also safe for InAlloca parameters, because it verifies
593 // that the address isn't captured.
594591 GEPIndicesSet SafeToUnconditionallyLoad;
595592
596593 // This set contains all the sets of indices that we are planning to promote.
598595 GEPIndicesSet ToPromote;
599596
600597 // If the pointer is always valid, any load with first index 0 is valid.
601 if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
598 if (isByVal || allCallersPassInValidPointerForArgument(Arg))
602599 SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
603600
604601 // First, iterate the entry block and mark loads of (geps of) arguments as
655652 // TODO: This runs the above loop over and over again for dead GEPs
656653 // Couldn't we just do increment the UI iterator earlier and erase the
657654 // use?
658 return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR,
659 MaxElements);
655 return isSafeToPromoteArgument(Arg, isByVal, AAR, MaxElements);
660656 }
661657
662658 // Ensure that all of the indices are constants.
855851 if (F->isVarArg())
856852 return nullptr;
857853
854 // Don't transform functions that receive inallocas, as the transformation may
855 // not be safe depending on calling convention.
856 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
857 return nullptr;
858
858859 // First check: see if there are any pointer arguments! If not, quick exit.
859860 SmallVector PointerArgs;
860861 for (Argument &I : F->args())
913914
914915 // If this is a byval argument, and if the aggregate type is small, just
915916 // pass the elements, which is always safe, if the passed value is densely
916 // packed or if we can prove the padding bytes are never accessed. This does
917 // not apply to inalloca.
917 // packed or if we can prove the padding bytes are never accessed.
918918 bool isSafeToPromote =
919919 PtrArg->hasByValAttr() &&
920920 (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
965965 }
966966
967967 // Otherwise, see if we can promote the pointer to its value.
968 if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
968 if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr(), AAR,
969969 MaxElements))
970970 ArgsToPromote.insert(PtrArg);
971971 }
20892089 }
20902090 }
20912091
2092 static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
2093 // There can be at most one attribute set with a nest attribute.
2094 unsigned NestIndex;
2095 if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
2096 return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
2092 static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
2093 Attribute::AttrKind A) {
2094 unsigned AttrIndex;
2095 if (Attrs.hasAttrSomewhere(A, &AttrIndex))
2096 return Attrs.removeAttribute(C, AttrIndex, A);
20972097 return Attrs;
20982098 }
20992099
2100 static void RemoveNestAttribute(Function *F) {
2101 F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
2100 static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
2101 F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
21022102 for (User *U : F->users()) {
21032103 if (isa(U))
21042104 continue;
21052105 CallSite CS(cast(U));
2106 CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
2106 CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
21072107 }
21082108 }
21092109
21162116
21172117 // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
21182118 if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
2119 return false;
2120
2121 // Don't break the invariant that the inalloca parameter is the only parameter
2122 // passed in memory.
2123 // FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
2124 // alloca it uses to the entry block if possible.
2125 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
21262119 return false;
21272120
21282121 // FIXME: Change CC for the whole chain of musttail calls when possible.
22862279 if (!F->hasLocalLinkage())
22872280 continue;
22882281
2282 // If we have an inalloca parameter that we can safely remove the
2283 // inalloca attribute from, do so. This unlocks optimizations that
2284 // wouldn't be safe in the presence of inalloca.
2285 // FIXME: We should also hoist alloca affected by this to the entry
2286 // block if possible.
2287 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
2288 !F->hasAddressTaken()) {
2289 RemoveAttribute(F, Attribute::InAlloca);
2290 Changed = true;
2291 }
2292
22892293 if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
22902294 NumInternalFunc++;
22912295 TargetTransformInfo &TTI = GetTTI(*F);
23182322 !F->hasAddressTaken()) {
23192323 // The function is not used by a trampoline intrinsic, so it is safe
23202324 // to remove the 'nest' attribute.
2321 RemoveNestAttribute(F);
2325 RemoveAttribute(F, Attribute::Nest);
23222326 ++NumNestRemoved;
23232327 Changed = true;
23242328 }
0 ; In PR41658, argpromotion put an inalloca in a position that per the
1 ; calling convention is passed in a register. This test verifies that
2 ; we don't do that anymore. It also verifies that the combination of
3 ; globalopt and argpromotion is able to optimize the call safely.
4 ;
5 ; RUN: opt -S -argpromotion %s | FileCheck --check-prefix=THIS %s
6 ; RUN: opt -S -globalopt -argpromotion %s | FileCheck --check-prefix=OPT %s
7 ; THIS: define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a
8 ; OPT: define internal fastcc void @internalfun(<{ %struct.a }>*)
9
10 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
11 target triple = "i386-pc-windows-msvc19.11.0"
12
13 %struct.a = type { i8 }
14
15 define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) {
16 entry:
17 %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0
18 %argmem = alloca inalloca <{ %struct.a }>, align 4
19 %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0
20 %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a)
21 call void @ext(<{ %struct.a }>* inalloca %argmem)
22 ret void
23 }
24
25 ; This is here to ensure @internalfun is live.
26 define void @exportedfun(%struct.a* %a) {
27 %inalloca.save = tail call i8* @llvm.stacksave()
28 %argmem = alloca inalloca <{ %struct.a }>, align 4
29 call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem)
30 call void @llvm.stackrestore(i8* %inalloca.save)
31 ret void
32 }
33
34 declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1))
35 declare void @ext(<{ %struct.a }>* inalloca)
36 declare i8* @llvm.stacksave()
37 declare void @llvm.stackrestore(i8*)
None ; RUN: opt %s -argpromotion -sroa -S | FileCheck %s
1 ; RUN: opt %s -passes='argpromotion,function(sroa)' -S | FileCheck %s
0 ; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s
1 ; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s
22
33 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
44
1414 %r = add i32 %a, %b
1515 ret i32 %r
1616 }
17 ; CHECK-LABEL: define internal i32 @f
17 ; CHECK-LABEL: define internal fastcc i32 @f
1818 ; CHECK-NOT: load
1919 ; CHECK: ret
2020
3434
3535 ; Argpromote can't promote %a because of the icmp use.
3636 define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind {
37 ; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b)
37 ; CHECK: define internal fastcc i1 @g(%struct.ss* %a, %struct.ss* %b)
3838 entry:
3939 %c = icmp eq %struct.ss* %a, %b
4040 ret i1 %c
4444 entry:
4545 %S = alloca inalloca %struct.ss
4646 %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
47 ; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
47 ; CHECK: call fastcc i1 @g(%struct.ss* %S, %struct.ss* %S)
4848 ret i32 0
4949 }
2626 }
2727
2828 define internal i32 @inalloca(i32* inalloca %p) {
29 ; CHECK-LABEL: define internal i32 @inalloca(i32* inalloca %p)
29 ; CHECK-LABEL: define internal fastcc i32 @inalloca(i32* %p)
3030 %rv = load i32, i32* %p
3131 ret i32 %rv
3232 }
5151 ; CHECK: call fastcc i32 @g
5252 ; CHECK: call coldcc i32 @h
5353 ; CHECK: call i32 @j
54 ; CHECK: call i32 @inalloca(i32* inalloca %args)
54 ; CHECK: call fastcc i32 @inalloca(i32* %args)