llvm.org GIT mirror llvm / 80a961a
[WinEH] Optimize WinEH state stores 32-bit x86 Windows targets use a linked-list of nodes allocated on the stack, referenced to via thread-local storage. The personality routine interprets one of the fields in the node as a 'state number' which indicates where the personality routine should transfer control. State transitions are possible only before call-sites which may throw exceptions. Our previous scheme had us update the state number before all call-sites which may throw. Instead, we can try to minimize the number of times we need to store by reasoning about the nearest store which dominates the current call-site. If the last store agrees with the current call-site, then we know that the state-update is redundant and can be elided. This is largely straightforward: an RPO walk of the blocks allows us to correctly forward propagate the information when the function is a DAG. Currently, loops are not handled optimally and may trigger superfluous state stores. Differential Revision: http://reviews.llvm.org/D16763 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261122 91177308-0d34-0410-b5e6-96231b3b80d8 David Majnemer 4 years ago
3 changed file(s) with 249 addition(s) and 34 deletion(s). Raw diff Collapse all Expand all
1414 //===----------------------------------------------------------------------===//
1515
1616 #include "X86.h"
17 #include "llvm/ADT/PostOrderIterator.h"
18 #include "llvm/Analysis/CFG.h"
1719 #include "llvm/Analysis/EHPersonalities.h"
1820 #include "llvm/CodeGen/MachineModuleInfo.h"
1921 #include "llvm/CodeGen/WinEHFuncInfo.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/Function.h"
2024 #include "llvm/IR/Instructions.h"
2125 #include "llvm/IR/IntrinsicInst.h"
2226 #include "llvm/IR/IRBuilder.h"
2327 #include "llvm/IR/Module.h"
2428 #include "llvm/Pass.h"
29 #include "llvm/Support/Debug.h"
30 #include
2531
2632 using namespace llvm;
2733
3238 }
3339
3440 namespace {
41 const int OverdefinedState = INT_MIN;
42
3543 class WinEHStatePass : public FunctionPass {
3644 public:
3745 static char ID; // Pass identification, replacement for typeid.
8189 // Per-function state
8290 EHPersonality Personality = EHPersonality::Unknown;
8391 Function *PersonalityFn = nullptr;
92 bool UseStackGuard = false;
93 int ParentBaseState;
8494
8595 /// The stack allocation containing all EH data, including the link in the
8696 /// fs:00 chain and the current state.
169179 // Reset per-function state.
170180 PersonalityFn = nullptr;
171181 Personality = EHPersonality::Unknown;
182 UseStackGuard = false;
172183 return true;
173184 }
174185
246257 // Struct type of RegNode. Used for GEPing.
247258 Type *RegNodeTy;
248259
249 StringRef PersonalityName = PersonalityFn->getName();
250260 IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin());
251261 Type *Int8PtrType = Builder.getInt8PtrTy();
252262 if (Personality == EHPersonality::MSVC_CXX) {
258268 Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
259269 // TryLevel = -1
260270 StateFieldIndex = 2;
261 insertStateNumberStore(&*Builder.GetInsertPoint(), -1);
271 ParentBaseState = -1;
272 insertStateNumberStore(&*Builder.GetInsertPoint(), ParentBaseState);
262273 // Handler = __ehhandler$F
263274 Function *Trampoline = generateLSDAInEAXThunk(F);
264275 Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
266277 } else if (Personality == EHPersonality::MSVC_X86SEH) {
267278 // If _except_handler4 is in use, some additional guard checks and prologue
268279 // stuff is required.
269 bool UseStackGuard = (PersonalityName == "_except_handler4");
270280 RegNodeTy = getSEHRegistrationType();
271281 RegNode = Builder.CreateAlloca(RegNodeTy);
272282 // SavedESP = llvm.stacksave()
275285 Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
276286 // TryLevel = -2 / -1
277287 StateFieldIndex = 4;
278 insertStateNumberStore(&*Builder.GetInsertPoint(), UseStackGuard ? -2 : -1);
288 StringRef PersonalityName = PersonalityFn->getName();
289 UseStackGuard = (PersonalityName == "_except_handler4");
290 ParentBaseState = UseStackGuard ? -2 : -1;
291 insertStateNumberStore(&*Builder.GetInsertPoint(), ParentBaseState);
279292 // ScopeTable = llvm.x86.seh.lsda(F)
280293 Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
281294 Value *LSDA = Builder.CreateCall(
387400 Builder.CreateStore(Next, FSZero);
388401 }
389402
403 // Figure out what state we should assign calls in this block.
404 static int getBaseStateForBB(DenseMap &BlockColors,
405 WinEHFuncInfo &FuncInfo, BasicBlock *BB) {
406 int BaseState = -1;
407 auto &BBColors = BlockColors[BB];
408
409 assert(BBColors.size() == 1 && "multi-color BB not removed by preparation");
410 BasicBlock *FuncletEntryBB = BBColors.front();
411 if (auto *FuncletPad =
412 dyn_cast(FuncletEntryBB->getFirstNonPHI())) {
413 auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
414 if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
415 BaseState = BaseStateI->second;
416 }
417
418 return BaseState;
419 }
420
421 // Calculate the state a call-site is in.
422 static int getStateForCallSite(DenseMap &BlockColors,
423 WinEHFuncInfo &FuncInfo, CallSite CS) {
424 if (auto *II = dyn_cast(CS.getInstruction())) {
425 // Look up the state number of the EH pad this unwinds to.
426 assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
427 return FuncInfo.InvokeStateMap[II];
428 }
429 // Possibly throwing call instructions have no actions to take after
430 // an unwind. Ensure they are in the -1 state.
431 return getBaseStateForBB(BlockColors, FuncInfo, CS.getParent());
432 }
433
434 // Calculate the intersection of all the FinalStates for a BasicBlock's
435 // predecessor.
436 static int getPredState(DenseMap &FinalStates, Function &F,
437 int ParentBaseState, BasicBlock *BB) {
438 // The entry block has no predecessors but we know that the prologue always
439 // sets us up with a fixed state.
440 if (&F.getEntryBlock() == BB)
441 return ParentBaseState;
442
443 // This is an EH Pad, conservatively report this basic block as overdefined.
444 if (BB->isEHPad())
445 return OverdefinedState;
446
447 int CommonState = OverdefinedState;
448 for (BasicBlock *PredBB : predecessors(BB)) {
449 // We didn't manage to get a state for one of these predecessors,
450 // conservatively report this basic block as overdefined.
451 auto PredEndState = FinalStates.find(PredBB);
452 if (PredEndState == FinalStates.end())
453 return OverdefinedState;
454
455 // This code is reachable via exceptional control flow,
456 // conservatively report this basic block as overdefined.
457 if (isa(PredBB->getTerminator()))
458 return OverdefinedState;
459
460 int PredState = PredEndState->second;
461 assert(PredState != OverdefinedState &&
462 "overdefined BBs shouldn't be in FinalStates");
463 if (CommonState == OverdefinedState)
464 CommonState = PredState;
465
466 // At least two predecessors have different FinalStates,
467 // conservatively report this basic block as overdefined.
468 if (CommonState != PredState)
469 return OverdefinedState;
470 }
471
472 return CommonState;
473 };
474
475 static bool isStateStoreNeeded(EHPersonality Personality, CallSite CS) {
476 if (!CS)
477 return false;
478
479 if (isAsynchronousEHPersonality(Personality))
480 return !CS.doesNotAccessMemory();
481
482 return !CS.doesNotThrow();
483 }
484
390485 void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
391486 // Mark the registration node. The backend needs to know which alloca it is so
392487 // that it can recover the original frame pointer.
404499
405500 // Iterate all the instructions and emit state number stores.
406501 DenseMap BlockColors = colorEHFunclets(F);
407 for (BasicBlock &BB : F) {
408 // Figure out what state we should assign calls in this block.
409 int BaseState = -1;
410 auto &BBColors = BlockColors[&BB];
411
412 assert(BBColors.size() == 1 &&
413 "multi-color BB not removed by preparation");
502 ReversePostOrderTraversal RPOT(&F);
503
504 // InitialStates yields the state of the first call-site for a BasicBlock.
505 DenseMap InitialStates;
506 // FinalStates yields the state of the last call-site for a BasicBlock.
507 DenseMap FinalStates;
508 // Worklist used to revisit BasicBlocks with indeterminate
509 // Initial/Final-States.
510 std::deque Worklist;
511 // Fill in InitialStates and FinalStates for BasicBlocks with call-sites.
512 for (BasicBlock *BB : RPOT) {
513 int InitialState = OverdefinedState;
514 int FinalState;
515 if (&F.getEntryBlock() == BB)
516 InitialState = FinalState = ParentBaseState;
517 for (Instruction &I : *BB) {
518 CallSite CS(&I);
519 if (!isStateStoreNeeded(Personality, CS))
520 continue;
521
522 int State = getStateForCallSite(BlockColors, FuncInfo, CS);
523 if (InitialState == OverdefinedState)
524 InitialState = State;
525 FinalState = State;
526 }
527 // No call-sites in this basic block? That's OK, we will come back to these
528 // in a later pass.
529 if (InitialState == OverdefinedState) {
530 Worklist.push_back(BB);
531 continue;
532 }
533 DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
534 << " InitialState=" << InitialState << '\n');
535 DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
536 << " FinalState=" << FinalState << '\n');
537 InitialStates.insert({BB, InitialState});
538 FinalStates.insert({BB, FinalState});
539 }
540
541 // Try to fill-in InitialStates and FinalStates which have no call-sites.
542 while (!Worklist.empty()) {
543 BasicBlock *BB = Worklist.front();
544 Worklist.pop_front();
545 // This BasicBlock has already been figured out, nothing more we can do.
546 if (InitialStates.count(BB) != 0)
547 continue;
548
549 int PredState = getPredState(FinalStates, F, ParentBaseState, BB);
550 if (PredState == OverdefinedState)
551 continue;
552
553 // We successfully inferred this BasicBlock's state via it's predecessors;
554 // enqueue it's successors to see if we can infer their states.
555 InitialStates.insert({BB, PredState});
556 FinalStates.insert({BB, PredState});
557 for (BasicBlock *SuccBB : successors(BB))
558 Worklist.push_back(SuccBB);
559 }
560
561 // Finally, insert state stores before call-sites which transition us to a new
562 // state.
563 for (BasicBlock *BB : RPOT) {
564 auto &BBColors = BlockColors[BB];
414565 BasicBlock *FuncletEntryBB = BBColors.front();
415 if (auto *FuncletPad =
416 dyn_cast(FuncletEntryBB->getFirstNonPHI())) {
417 // We do not support nesting funclets within cleanuppads.
418 if (isa(FuncletPad))
566 if (isa(FuncletEntryBB->getFirstNonPHI()))
567 continue;
568
569 int PrevState = getPredState(FinalStates, F, ParentBaseState, BB);
570 DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
571 << " PrevState=" << PrevState << '\n');
572
573 for (Instruction &I : *BB) {
574 CallSite CS(&I);
575 if (!isStateStoreNeeded(Personality, CS))
419576 continue;
420577
421 auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
422 if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
423 BaseState = BaseStateI->second;
424 }
425
426 for (Instruction &I : BB) {
427 if (auto *CI = dyn_cast(&I)) {
428 // Possibly throwing call instructions have no actions to take after
429 // an unwind. Ensure they are in the -1 state.
430 if (CI->doesNotThrow())
431 continue;
432 insertStateNumberStore(CI, BaseState);
433 } else if (auto *II = dyn_cast(&I)) {
434 // Look up the state number of the landingpad this unwinds to.
435 assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
436 int State = FuncInfo.InvokeStateMap[II];
437 insertStateNumberStore(II, State);
438 }
578 int State = getStateForCallSite(BlockColors, FuncInfo, CS);
579 if (State != PrevState)
580 insertStateNumberStore(&I, State);
581 PrevState = State;
439582 }
440583 }
441584 }
2727 ; CHECK: entry:
2828 ; CHECK: store i32 -1
2929 ; CHECK: call void @g(i32 3)
30 ; CHECK-NEXT: call void @g(i32 4)
31 ; CHECK-NEXT: call void @g(i32 5)
3032 call void @g(i32 3)
33 call void @g(i32 4)
34 call void @g(i32 5)
3135 store i32 0, i32* %tmp, align 4
3236 %0 = bitcast i32* %tmp to i8*
3337 ; CHECK: store i32 0
5357 ; CHECK: catch.3:
5458 ; CHECK: store i32 3
5559 ; CHECK: call void @g(i32 1)
60 ; CHECK-NEXT: call void @g(i32 2)
61 ; CHECK-NEXT: call void @g(i32 3)
5662 call void @g(i32 1)
63 call void @g(i32 2)
64 call void @g(i32 3)
5765 catchret from %2 to label %try.cont
5866
5967 try.cont: ; preds = %catch.3
6068 ; CHECK: try.cont:
6169 ; CHECK: store i32 1
6270 ; CHECK: call void @g(i32 2)
71 ; CHECK-NEXT: call void @g(i32 3)
72 ; CHECK-NEXT: call void @g(i32 4)
6373 call void @g(i32 2)
74 call void @g(i32 3)
75 call void @g(i32 4)
6476 unreachable
6577
6678 unreachable: ; preds = %catch
110122 ; CHECK: try.cont:
111123 ; CHECK: store i32 1
112124 ; CHECK: call void @dtor()
125 ; CHECK-NEXT: call void @dtor()
126 ; CHECK-NEXT: call void @dtor()
127 call void @dtor() #3 [ "funclet"(token %1) ]
128 call void @dtor() #3 [ "funclet"(token %1) ]
113129 call void @dtor() #3 [ "funclet"(token %1) ]
114130 catchret from %1 to label %try.cont4
115131
130146 unreachable
131147 }
132148
149 ; CHECK-LABEL: define void @required_state_store(
150 define void @required_state_store(i1 zeroext %cond) personality i32 (...)* @_except_handler3 {
151 entry:
152 %__exception_code = alloca i32, align 4
153 call void (...) @llvm.localescape(i32* nonnull %__exception_code)
154 ; CHECK: store i32 -1
155 ; CHECK: call void @g(i32 0)
156 call void @g(i32 0)
157 br i1 %cond, label %if.then, label %if.end
158
159 if.then: ; preds = %entry
160 ; CHECK: store i32 0
161 ; CHECK-NEXT: invoke void @g(i32 1)
162 invoke void @g(i32 1)
163 to label %if.end unwind label %catch.dispatch
164
165 catch.dispatch: ; preds = %if.then
166 %0 = catchswitch within none [label %__except.ret] unwind to caller
167
168 __except.ret: ; preds = %catch.dispatch
169 %1 = catchpad within %0 [i8* bitcast (i32 ()* @"\01?filt$0@0@required_state_store@@" to i8*)]
170 catchret from %1 to label %if.end
171
172 if.end: ; preds = %if.then, %__except.ret, %entry
173 ; CHECK: store i32 -1
174 ; CHECK-NEXT: call void @dtor()
175 call void @dtor()
176 ret void
177 }
178
179 define internal i32 @"\01?filt$0@0@required_state_store@@"() {
180 entry:
181 %0 = tail call i8* @llvm.frameaddress(i32 1)
182 %1 = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void (i1)* @required_state_store to i8*), i8* %0)
183 %2 = tail call i8* @llvm.localrecover(i8* bitcast (void (i1)* @required_state_store to i8*), i8* %1, i32 0)
184 %__exception_code = bitcast i8* %2 to i32*
185 %3 = getelementptr inbounds i8, i8* %0, i32 -20
186 %4 = bitcast i8* %3 to { i32*, i8* }**
187 %5 = load { i32*, i8* }*, { i32*, i8* }** %4, align 4
188 %6 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %5, i32 0, i32 0
189 %7 = load i32*, i32** %6, align 4
190 %8 = load i32, i32* %7, align 4
191 store i32 %8, i32* %__exception_code, align 4
192 ret i32 1
193 }
194
133195 declare void @g(i32) #0
134196
135197 declare void @dtor()
137199 declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
138200
139201 declare i32 @__CxxFrameHandler3(...)
202
203 declare i8* @llvm.frameaddress(i32)
204
205 declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
206
207 declare i8* @llvm.localrecover(i8*, i8*, i32)
208
209 declare void @llvm.localescape(...)
210
211 declare i32 @_except_handler3(...)
140212
141213 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
142214 attributes #1 = { noreturn }
5050 ; CHECK: "?dtor$2@?0?passes_two@4HA":
5151 ; CHECK: pushl %ebp
5252 ; CHECK: subl $8, %esp
53 ; CHECK: addl $12, %ebp
53 ; CHECK: addl $16, %ebp
5454 ; CHECK: {{movl|leal}} -{{[0-9]+}}(%ebp), %ecx
5555 ; CHECK: calll "??1A@@QAE@XZ"
5656 ; CHECK: addl $8, %esp