llvm.org GIT mirror llvm / 03fddb7
Teach TailRecursionElimination to handle certain cases of nocapture escaping allocas. Without the changes introduced into this patch, if TRE saw any allocas at all, TRE would not perform TRE *or* mark callsites with the tail marker. Because TRE runs after mem2reg, this inadequacy is not a death sentence. But given a callsite A without escaping alloca argument, A may not be able to have the tail marker placed on it due to a separate callsite B having a write-back parameter passed in via an argument with the nocapture attribute. Assume that B is the only other callsite besides A and B only has nocapture escaping alloca arguments (*NOTE* B may have other arguments that are not passed allocas). In this case not marking A with the tail marker is unnecessarily conservative since: 1. By assumption A has no escaping alloca arguments itself so it can not access the caller's stack via its arguments. 2. Since all of B's escaping alloca arguments are passed as parameters with the nocapture attribute, we know that B does not stash said escaping allocas in a manner that outlives B itself and thus could be accessed indirectly by A. With the changes introduced by this patch: 1. If we see any escaping allocas passed as a capturing argument, we do nothing and bail early. 2. If we do not see any escaping allocas passed as captured arguments but we do see escaping allocas passed as nocapture arguments: i. We do not perform TRE to avoid PR962 since the code generator produces significantly worse code for the dynamic allocas that would be created by the TRE algorithm. ii. If we do not return twice, mark call sites without escaping allocas with the tail marker. *NOTE* This excludes functions with escaping nocapture allocas. 3. If we do not see any escaping allocas at all (whether captured or not): i. If we do not have usage of setjmp, mark all callsites with the tail marker. ii. If there are no dynamic/variable sized allocas in the function, attempt to perform TRE on all callsites in the function. Based off of a patch by Nick Lewycky. rdar://14324281. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186057 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Gottesman 6 years ago
3 changed file(s) with 176 addition(s) and 94 deletion(s). Raw diff Collapse all Expand all
5252 #define DEBUG_TYPE "tailcallelim"
5353 #include "llvm/Transforms/Scalar.h"
5454 #include "llvm/ADT/STLExtras.h"
55 #include "llvm/ADT/SmallPtrSet.h"
5556 #include "llvm/ADT/Statistic.h"
5657 #include "llvm/Analysis/CaptureTracking.h"
5758 #include "llvm/Analysis/InlineCost.h"
6869 #include "llvm/Support/CFG.h"
6970 #include "llvm/Support/CallSite.h"
7071 #include "llvm/Support/Debug.h"
72 #include "llvm/Support/ValueHandle.h"
7173 #include "llvm/Support/raw_ostream.h"
7274 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
7375 #include "llvm/Transforms/Utils/Local.h"
128130 AU.addRequired();
129131 }
130132
131 /// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
132 /// callees of this function. We only do very simple analysis right now, this
133 /// could be expanded in the future to use mod/ref information for particular
134 /// call sites if desired.
135 static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
136 // FIXME: do simple 'address taken' analysis.
137 return true;
138 }
139
140 /// CheckForEscapingAllocas - Scan the specified basic block for alloca
141 /// instructions. If it contains any that might be accessed by calls, return
142 /// true.
143 static bool CheckForEscapingAllocas(BasicBlock *BB,
144 bool &CannotTCETailMarkedCall) {
145 bool RetVal = false;
146 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
147 if (AllocaInst *AI = dyn_cast(I)) {
148 RetVal |= AllocaMightEscapeToCalls(AI);
149
150 // If this alloca is in the body of the function, or if it is a variable
151 // sized allocation, we cannot tail call eliminate calls marked 'tail'
152 // with this mechanism.
153 if (BB != &BB->getParent()->getEntryBlock() ||
154 !isa(AI->getArraySize()))
155 CannotTCETailMarkedCall = true;
156 }
157 return RetVal;
158 }
133 /// CanTRE - Scan the specified basic block for alloca instructions.
134 /// If it contains any that are variable-sized or not in the entry block,
135 /// returns false.
136 static bool CanTRE(AllocaInst *AI) {
137 // Because of PR962, we don't TRE allocas outside the entry block.
138
139 // If this alloca is in the body of the function, or if it is a variable
140 // sized allocation, we cannot tail call eliminate calls marked 'tail'
141 // with this mechanism.
142 BasicBlock *BB = AI->getParent();
143 return BB == &BB->getParent()->getEntryBlock() &&
144 isa(AI->getArraySize());
145 }
146
147 struct AllocaCaptureTracker : public CaptureTracker {
148 AllocaCaptureTracker() : Captured(false) {}
149
150 void tooManyUses() { Captured = true; }
151
152 bool shouldExplore(Use *U) {
153 Value *V = U->getUser();
154 if (isa(V) || isa(V))
155 UsesAlloca.insert(V);
156 return true;
157 }
158
159 bool captured(Use *U) {
160 if (isa(U->getUser()))
161 return false;
162 Captured = true;
163 return true;
164 }
165
166 bool Captured;
167 SmallPtrSet UsesAlloca;
168 };
159169
160170 bool TailCallElim::runOnFunction(Function &F) {
161171 // If this function is a varargs function, we won't be able to PHI the args
167177 bool TailCallsAreMarkedTail = false;
168178 SmallVector ArgumentPHIs;
169179 bool MadeChange = false;
170 bool FunctionContainsEscapingAllocas = false;
171
172 // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
180
181 // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls
173182 // marked with the 'tail' attribute, because doing so would cause the stack
174 // size to increase (real TCE would deallocate variable sized allocas, TCE
183 // size to increase (real TRE would deallocate variable sized allocas, TRE
175184 // doesn't).
176 bool CannotTCETailMarkedCall = false;
177
178 // Loop over the function, looking for any returning blocks, and keeping track
179 // of whether this function has any non-trivially used allocas.
180 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
181 if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
182 break;
183
184 FunctionContainsEscapingAllocas |=
185 CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
186 }
187
188 /// FIXME: The code generator produces really bad code when an 'escaping
189 /// alloca' is changed from being a static alloca to being a dynamic alloca.
190 /// Until this is resolved, disable this transformation if that would ever
191 /// happen. This bug is PR962.
192 if (FunctionContainsEscapingAllocas)
193 return false;
194
195 // Second pass, change any tail calls to loops.
196 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
197 if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) {
198 bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
199 ArgumentPHIs,CannotTCETailMarkedCall);
200 if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
201 Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
202 TailCallsAreMarkedTail, ArgumentPHIs,
203 CannotTCETailMarkedCall);
204 MadeChange |= Change;
185 bool CanTRETailMarkedCall = true;
186
187 // Find calls that can be marked tail.
188 AllocaCaptureTracker ACT;
189 for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
190 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
191 if (AllocaInst *AI = dyn_cast(I)) {
192 CanTRETailMarkedCall &= CanTRE(AI);
193 PointerMayBeCaptured(AI, &ACT);
194 // If any allocas are captured, exit.
195 if (ACT.Captured)
196 return false;
197 }
198 }
199 }
200
201 // Second pass, change any tail recursive calls to loops.
202 //
203 // FIXME: The code generator produces really bad code when an 'escaping
204 // alloca' is changed from being a static alloca to being a dynamic alloca.
205 // Until this is resolved, disable this transformation if that would ever
206 // happen. This bug is PR962.
207 if (ACT.UsesAlloca.empty()) {
208 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
209 if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) {
210 bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
211 ArgumentPHIs, !CanTRETailMarkedCall);
212 if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
213 Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
214 TailCallsAreMarkedTail, ArgumentPHIs,
215 !CanTRETailMarkedCall);
216 MadeChange |= Change;
217 }
205218 }
206219 }
207220
222235 }
223236 }
224237
225 // Finally, if this function contains no non-escaping allocas, or calls
226 // setjmp, mark all calls in the function as eligible for tail calls
227 //(there is no stack memory for them to access).
228 if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice())
229 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
230 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
238 // At this point, we know that the function does not have any captured
239 // allocas. If additionally the function does not call setjmp, mark all calls
240 // in the function that do not access stack memory with the tail keyword. This
241 // implies ensuring that there does not exist any path from a call that takes
242 // in an alloca but does not capture it and the call which we wish to mark
243 // with "tail".
244 if (!F.callsFunctionThatReturnsTwice()) {
245 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
246 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
231247 if (CallInst *CI = dyn_cast(I)) {
232 CI->setTailCall();
233 MadeChange = true;
248 if (!ACT.UsesAlloca.count(CI)) {
249 CI->setTailCall();
250 MadeChange = true;
251 }
234252 }
253 }
254 }
255 }
235256
236257 return MadeChange;
237258 }
11
22 declare void @noarg()
33 declare void @use(i32*)
4 declare void @use_nocapture(i32* nocapture)
5 declare void @use2_nocapture(i32* nocapture, i32* nocapture)
46
57 ; Trivial case. Mark @noarg with tail call.
68 define void @test0() {
5658 ret i32 0
5759 }
5860
61 ; Make sure that a nocapture pointer does not stop adding a tail call marker to
62 ; an unrelated call and additionally that we do not mark the nocapture call with
63 ; a tail call.
64 ;
65 ; rdar://14324281
66 define void @test4() {
67 ; CHECK: void @test4
68 ; CHECK-NOT: tail call void @use_nocapture
69 ; CHECK: tail call void @noarg()
70 ; CHECK: ret void
71 %a = alloca i32
72 call void @use_nocapture(i32* %a)
73 call void @noarg()
74 ret void
75 }
76
77 ; Make sure that we do not perform TRE even with a nocapture use. This is due to
78 ; bad codegen caused by PR962.
79 ;
80 ; rdar://14324281.
81 define i32* @test5(i32* nocapture %A, i1 %cond) {
82 ; CHECK: i32* @test5
83 ; CHECK-NOT: tailrecurse:
84 ; CHECK: ret i32* null
85 %B = alloca i32
86 br i1 %cond, label %cond_true, label %cond_false
87 cond_true:
88 call i32* @test5(i32* %B, i1 false)
89 ret i32* null
90 cond_false:
91 call void @use2_nocapture(i32* %A, i32* %B)
92 call void @noarg()
93 ret i32* null
94 }
95
96 ; PR14143: Make sure that we do not mark functions with nocapture allocas with tail.
97 ;
98 ; rdar://14324281.
99 define void @test6(i32* %a, i32* %b) {
100 ; CHECK: @test6
101 ; CHECK-NOT: tail call
102 ; CHECK: ret void
103 %c = alloca [100 x i8], align 16
104 %tmp = bitcast [100 x i8]* %c to i32*
105 call void @use2_nocapture(i32* %b, i32* %tmp)
106 ret void
107 }
108
109 ; PR14143: Make sure that we do not mark functions with nocapture allocas with tail.
110 ;
111 ; rdar://14324281
112 define void @test7(i32* %a, i32* %b) nounwind uwtable {
113 entry:
114 ; CHECK: @test7
115 ; CHECK-NOT: tail call
116 ; CHECK: ret void
117 %c = alloca [100 x i8], align 16
118 %0 = bitcast [100 x i8]* %c to i32*
119 call void @use2_nocapture(i32* %0, i32* %a)
120 call void @use2_nocapture(i32* %b, i32* %0)
121 ret void
122 }
123
124 ; If we have a mix of escaping captured/non-captured allocas, ensure that we do
125 ; not do anything including marking callsites with the tail call marker.
126 ;
127 ; rdar://14324281.
128 define i32* @test8(i32* nocapture %A, i1 %cond) {
129 ; CHECK: i32* @test8
130 ; CHECK-NOT: tailrecurse:
131 ; CHECK-NOT: tail call
132 ; CHECK: ret i32* null
133 %B = alloca i32
134 %B2 = alloca i32
135 br i1 %cond, label %cond_true, label %cond_false
136 cond_true:
137 call void @use(i32* %B2)
138 call i32* @test8(i32* %B, i1 false)
139 ret i32* null
140 cond_false:
141 call void @use2_nocapture(i32* %A, i32* %B)
142 call void @noarg()
143 ret i32* null
144 }
+0
-25
test/Transforms/TailCallElim/nocapture.ll less more
None ; RUN: opt -tailcallelim -S < %s | FileCheck %s
1 ; XFAIL: *
2
3 declare void @use(i8* nocapture, i8* nocapture)
4
5 define i8* @foo(i8* nocapture %A, i1 %cond) {
6 ; CHECK: tailrecurse:
7 ; CHECK: %A.tr = phi i8* [ %A, %0 ], [ %B, %cond_true ]
8 ; CHECK: %cond.tr = phi i1 [ %cond, %0 ], [ false, %cond_true ]
9 %B = alloca i8
10 ; CHECK: %B = alloca i8
11 br i1 %cond, label %cond_true, label %cond_false
12 ; CHECK: br i1 %cond.tr, label %cond_true, label %cond_false
13 cond_true:
14 ; CHECK: cond_true:
15 ; CHECK: br label %tailrecurse
16 call i8* @foo(i8* %B, i1 false)
17 ret i8* null
18 cond_false:
19 ; CHECK: cond_false
20 call void @use(i8* %A, i8* %B)
21 ; CHECK: tail call void @use(i8* %A.tr, i8* %B)
22 ret i8* null
23 ; CHECK: ret i8* null
24 }