llvm.org GIT mirror llvm / 2f7322b
[ShrinkWrap] Add (a simplified version) of shrink-wrapping. This patch introduces a new pass that computes the safe point to insert the prologue and epilogue of the function. The interest is to find safe points that are cheaper than the entry and exits blocks. As an example and to avoid regressions to be introduce, this patch also implements the required bits to enable the shrink-wrapping pass for AArch64. ** Context ** Currently we insert the prologue and epilogue of the method/function in the entry and exits blocks. Although this is correct, we can do a better job when those are not immediately required and insert them at less frequently executed places. The job of the shrink-wrapping pass is to identify such places. ** Motivating example ** Let us consider the following function that perform a call only in one branch of a if: define i32 @f(i32 %a, i32 %b) { %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false true: store i32 %a, i32* %tmp, align 4 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) br label %false false: %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] ret i32 %tmp.0 } On AArch64 this code generates (removing the cfi directives to ease readabilities): _f: ; @f ; BB#0: stp x29, x30, [sp, #-16]! mov x29, sp sub sp, sp, #16 ; =16 cmp w0, w1 b.ge LBB0_2 ; BB#1: ; %true stur w0, [x29, #-4] sub x1, x29, #4 ; =4 mov w0, wzr bl _doSomething LBB0_2: ; %false mov sp, x29 ldp x29, x30, [sp], #16 ret With shrink-wrapping we could generate: _f: ; @f ; BB#0: cmp w0, w1 b.ge LBB0_2 ; BB#1: ; %true stp x29, x30, [sp, #-16]! mov x29, sp sub sp, sp, #16 ; =16 stur w0, [x29, #-4] sub x1, x29, #4 ; =4 mov w0, wzr bl _doSomething add sp, x29, #16 ; =16 ldp x29, x30, [sp], #16 LBB0_2: ; %false ret Therefore, we would pay the overhead of setting up/destroying the frame only if we actually do the call. ** Proposed Solution ** This patch introduces a new machine pass that perform the shrink-wrapping analysis (See the comments at the beginning of ShrinkWrap.cpp for more details). It then stores the safe save and restore point into the MachineFrameInfo attached to the MachineFunction. This information is then used by the PrologEpilogInserter (PEI) to place the related code at the right place. This pass runs right before the PEI. Unlike the original paper of Chow from PLDI’88, this implementation of shrink-wrapping does not use expensive data-flow analysis and does not need hack to properly avoid frequently executed point. Instead, it relies on dominance and loop properties. The pass is off by default and each target can opt-in by setting the EnableShrinkWrap boolean to true in their derived class of TargetPassConfig. This setting can also be overwritten on the command line by using -enable-shrink-wrap. Before you try out the pass for your target, make sure you properly fix your emitProlog/emitEpilog/adjustForXXX method to cope with basic blocks that are not necessarily the entry block. ** Design Decisions ** 1. ShrinkWrap is its own pass right now. It could frankly be merged into PEI but for debugging and clarity I thought it was best to have its own file. 2. Right now, we only support one save point and one restore point. At some point we can expand this to several save point and restore point, the impacted component would then be: - The pass itself: New algorithm needed. - MachineFrameInfo: Hold a list or set of Save/Restore point instead of one pointer. - PEI: Should loop over the save point and restore point. Anyhow, at least for this first iteration, I do not believe this is interesting to support the complex cases. We should revisit that when we motivating examples. Differential Revision: http://reviews.llvm.org/D9210 <rdar://problem/3201744> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236507 91177308-0d34-0410-b5e6-96231b3b80d8 Quentin Colombet 5 years ago
42 changed file(s) with 1173 addition(s) and 118 deletion(s). Raw diff Collapse all Expand all
245245 /// True if this is a varargs function that contains a musttail call.
246246 bool HasMustTailInVarArgFunc;
247247
248 /// Not null, if shrink-wrapping found a better place for the prologue.
249 MachineBasicBlock *Save;
250 /// Not null, if shrink-wrapping found a better place for the epilogue.
251 MachineBasicBlock *Restore;
252
253 /// Check if it exists a path from \p MBB leading to the basic
254 /// block with a SavePoint (a.k.a. prologue).
255 bool isBeforeSavePoint(const MachineFunction &MF,
256 const MachineBasicBlock &MBB) const;
257
248258 public:
249259 explicit MachineFrameInfo(unsigned StackAlign, bool isStackRealign,
250260 bool RealignOpt)
268278 HasInlineAsmWithSPAdjust = false;
269279 HasVAStart = false;
270280 HasMustTailInVarArgFunc = false;
281 Save = nullptr;
282 Restore = nullptr;
271283 }
272284
273285 /// hasStackObjects - Return true if there are any stack objects in this
596608
597609 void setCalleeSavedInfoValid(bool v) { CSIValid = v; }
598610
611 MachineBasicBlock *getSavePoint() const { return Save; }
612 void setSavePoint(MachineBasicBlock *NewSave) { Save = NewSave; }
613 MachineBasicBlock *getRestorePoint() const { return Restore; }
614 void setRestorePoint(MachineBasicBlock *NewRestore) { Restore = NewRestore; }
615
599616 /// getPristineRegs - Return a set of physical registers that are pristine on
600617 /// entry to the MBB.
601618 ///
119119 /// Default setting for -enable-tail-merge on this target.
120120 bool EnableTailMerge;
121121
122 /// Default setting for -enable-shrink-wrap on this target.
123 bool EnableShrinkWrap;
124
122125 public:
123126 TargetPassConfig(TargetMachine *tm, PassManagerBase &pm);
124127 // Dummy constructor.
178181 /// Return true if the optimized regalloc pipeline is enabled.
179182 bool getOptimizeRegAlloc() const;
180183
184 /// Return true if shrink wrapping is enabled.
185 bool getEnableShrinkWrap() const;
186
181187 /// Return true if the default global register allocator is in use and
182188 /// has not be overriden on the command line with '-regalloc=...'
183189 bool usingDefaultRegAlloc() const;
424430 /// SpillPlacement analysis. Suggest optimal placement of spill code between
425431 /// basic blocks.
426432 extern char &SpillPlacementID;
433
434 /// ShrinkWrap pass. Look for the best place to insert save and restore
435 // instruction and update the MachineFunctionInfo with that information.
436 extern char &ShrinkWrapID;
427437
428438 /// VirtRegRewriter pass. Rewrite virtual registers to physical registers as
429439 /// assigned in VirtRegMap.
246246 void initializeSROA_SSAUpPass(PassRegistry&);
247247 void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
248248 void initializeScalarEvolutionPass(PassRegistry&);
249 void initializeShrinkWrapPass(PassRegistry &);
249250 void initializeSimpleInlinerPass(PassRegistry&);
250251 void initializeShadowStackGCLoweringPass(PassRegistry&);
251252 void initializeRegisterCoalescerPass(PassRegistry&);
129129
130130 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
131131 /// the function.
132 virtual void emitPrologue(MachineFunction &MF) const = 0;
132 virtual void emitPrologue(MachineFunction &MF,
133 MachineBasicBlock &MBB) const = 0;
133134 virtual void emitEpilogue(MachineFunction &MF,
134135 MachineBasicBlock &MBB) const = 0;
135136
136137 /// Adjust the prologue to have the function use segmented stacks. This works
137138 /// by adding a check even before the "normal" function prologue.
138 virtual void adjustForSegmentedStacks(MachineFunction &MF) const { }
139 virtual void adjustForSegmentedStacks(MachineFunction &MF,
140 MachineBasicBlock &PrologueMBB) const {}
139141
140142 /// Adjust the prologue to add Erlang Run-Time System (ERTS) specific code in
141143 /// the assembly prologue to explicitly handle the stack.
142 virtual void adjustForHiPEPrologue(MachineFunction &MF) const { }
144 virtual void adjustForHiPEPrologue(MachineFunction &MF,
145 MachineBasicBlock &PrologueMBB) const {}
143146
144147 /// Adjust the prologue to add an allocation at a fixed offset from the frame
145148 /// pointer.
146 virtual void adjustForFrameAllocatePrologue(MachineFunction &MF) const { }
149 virtual void
150 adjustForFrameAllocatePrologue(MachineFunction &MF,
151 MachineBasicBlock &PrologueMBB) const {}
147152
148153 /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
149154 /// saved registers and returns true if it isn't possible / profitable to do
9292 ScheduleDAGInstrs.cpp
9393 ScheduleDAGPrinter.cpp
9494 ScoreboardHazardRecognizer.cpp
95 ShrinkWrap.cpp
9596 ShadowStackGC.cpp
9697 ShadowStackGCLowering.cpp
9798 SjLjEHPrepare.cpp
6060 initializePostRASchedulerPass(Registry);
6161 initializeProcessImplicitDefsPass(Registry);
6262 initializeRegisterCoalescerPass(Registry);
63 initializeShrinkWrapPass(Registry);
6364 initializeSlotIndexesPass(Registry);
6465 initializeStackColoringPass(Registry);
6566 initializeStackMapLivenessPass(Registry);
599599 for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
600600 BV.set(*CSR);
601601
602 // The entry MBB always has all CSRs pristine.
603 if (MBB == &MF->front())
602 // Each MBB before the save point has all CSRs pristine.
603 if (isBeforeSavePoint(*MF, *MBB))
604604 return BV;
605605
606606 // On other MBBs the saved CSRs are not pristine.
610610 BV.reset(I->getReg());
611611
612612 return BV;
613 }
614
615 // Note: We could use some sort of caching mecanism, but we lack the ability
616 // to know when the cache is invalid, i.e., the CFG changed.
617 // Assuming we have that, we can simply compute all the set of MBBs
618 // that are before the save point.
619 bool MachineFrameInfo::isBeforeSavePoint(const MachineFunction &MF,
620 const MachineBasicBlock &MBB) const {
621 // Early exit if shrink-wrapping did not kick.
622 if (!Save)
623 return &MBB == &MF.front();
624
625 // Starting from MBB, check if there is a path leading to Save that do
626 // not cross Restore.
627 SmallPtrSet Visited;
628 SmallVector WorkList;
629 WorkList.push_back(&MBB);
630 Visited.insert(&MBB);
631 do {
632 const MachineBasicBlock *CurBB = WorkList.pop_back_val();
633 // By construction, the region that is after the save point is
634 // dominated by the Save and post-dominated by the Restore.
635 // If we do not reach Restore and still reach Save, this
636 // means MBB is before Save.
637 if (CurBB == Save)
638 return true;
639 if (CurBB == Restore)
640 continue;
641 // Enqueue all the successors not already visited.
642 for (MachineBasicBlock *SuccBB : CurBB->successors())
643 if (Visited.insert(SuccBB).second)
644 WorkList.push_back(SuccBB);
645 } while (!WorkList.empty());
646 return false;
613647 }
614648
615649 unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
5151 static cl::opt DisableMachineCSE("disable-machine-cse", cl::Hidden,
5252 cl::desc("Disable Machine Common Subexpression Elimination"));
5353 static cl::opt
54 OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
54 EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
55 cl::desc("enable the shrink-wrapping pass"));
56 static cl::opt OptimizeRegAlloc(
57 "optimize-regalloc", cl::Hidden,
5558 cl::desc("Enable optimized register allocation compilation path."));
5659 static cl::opt DisablePostRAMachineLICM("disable-postra-machine-licm",
5760 cl::Hidden,
205208 // Out of line constructor provides default values for pass options and
206209 // registers all common codegen passes.
207210 TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
208 : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
209 Started(true), Stopped(false), AddingMachinePasses(false), TM(tm),
210 Impl(nullptr), Initialized(false), DisableVerify(false),
211 EnableTailMerge(true) {
211 : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
212 Started(true), Stopped(false), AddingMachinePasses(false), TM(tm),
213 Impl(nullptr), Initialized(false), DisableVerify(false),
214 EnableTailMerge(true), EnableShrinkWrap(false) {
212215
213216 Impl = new PassConfigImpl();
214217
523526 addPostRegAlloc();
524527
525528 // Insert prolog/epilog code. Eliminate abstract frame index references...
529 if (getEnableShrinkWrap())
530 addPass(&ShrinkWrapID);
526531 addPass(&PrologEpilogCodeInserterID);
527532
528533 /// Add passes that optimize machine instructions after register allocation.
596601 // Clean-up the dead code that may have been generated by peephole
597602 // rewriting.
598603 addPass(&DeadMachineInstructionElimID);
604 }
605
606 bool TargetPassConfig::getEnableShrinkWrap() const {
607 switch (EnableShrinkWrapOpt) {
608 case cl::BOU_UNSET:
609 return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None;
610 // If EnableShrinkWrap is set, it takes precedence on whatever the
611 // target sets. The rational is that we assume we want to test
612 // something related to shrink-wrapping.
613 case cl::BOU_TRUE:
614 return true;
615 case cl::BOU_FALSE:
616 return false;
617 }
618 llvm_unreachable("Invalid shrink-wrapping state");
599619 }
600620
601621 //===---------------------------------------------------------------------===//
7070 // stack frame indexes.
7171 unsigned MinCSFrameIndex, MaxCSFrameIndex;
7272
73 // Entry and return blocks of the current function.
74 MachineBasicBlock *EntryBlock;
75 SmallVector ReturnBlocks;
73 // Save and Restore blocks of the current function.
74 MachineBasicBlock *SaveBlock;
75 SmallVector RestoreBlocks;
7676
7777 // Flag to control whether to use the register scavenger to resolve
7878 // frame index materialization registers. Set according to
132132
133133 /// Compute the set of return blocks
134134 void PEI::calculateSets(MachineFunction &Fn) {
135 // Sets used to compute spill, restore placement sets.
136 const std::vector &CSI =
137 Fn.getFrameInfo()->getCalleeSavedInfo();
138
139 // If no CSRs used, we are done.
140 if (CSI.empty())
135 const MachineFrameInfo *MFI = Fn.getFrameInfo();
136
137 // Even when we do not change any CSR, we still want to insert the
138 // prologue and epilogue of the function.
139 // So set the save points for those.
140
141 // Use the points found by shrink-wrapping, if any.
142 if (MFI->getSavePoint()) {
143 SaveBlock = MFI->getSavePoint();
144 assert(MFI->getRestorePoint() && "Both restore and save must be set");
145 RestoreBlocks.push_back(MFI->getRestorePoint());
141146 return;
147 }
142148
143149 // Save refs to entry and return blocks.
144 EntryBlock = Fn.begin();
150 SaveBlock = Fn.begin();
145151 for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
146152 MBB != E; ++MBB)
147153 if (isReturnBlock(MBB))
148 ReturnBlocks.push_back(MBB);
154 RestoreBlocks.push_back(MBB);
149155
150156 return;
151157 }
225231 }
226232
227233 delete RS;
228 ReturnBlocks.clear();
234 RestoreBlocks.clear();
229235 return true;
230236 }
231237
371377 MFI->setCalleeSavedInfo(CSI);
372378 }
373379
380 /// Helper function to update the liveness information for the callee-saved
381 /// registers.
382 static void updateLiveness(MachineFunction &MF) {
383 MachineFrameInfo *MFI = MF.getFrameInfo();
384 // Visited will contain all the basic blocks that are in the region
385 // where the callee saved registers are alive:
386 // - Anything that is not Save or Restore -> LiveThrough.
387 // - Save -> LiveIn.
388 // - Restore -> LiveOut.
389 // The live-out is not attached to the block, so no need to keep
390 // Restore in this set.
391 SmallPtrSet Visited;
392 SmallVector WorkList;
393 MachineBasicBlock *Entry = &MF.front();
394 MachineBasicBlock *Save = MFI->getSavePoint();
395
396 if (!Save)
397 Save = Entry;
398
399 if (Entry != Save) {
400 WorkList.push_back(Entry);
401 Visited.insert(Entry);
402 }
403 Visited.insert(Save);
404
405 MachineBasicBlock *Restore = MFI->getRestorePoint();
406 if (Restore)
407 // By construction Restore cannot be visited, otherwise it
408 // means there exists a path to Restore that does not go
409 // through Save.
410 WorkList.push_back(Restore);
411
412 while (!WorkList.empty()) {
413 const MachineBasicBlock *CurBB = WorkList.pop_back_val();
414 // By construction, the region that is after the save point is
415 // dominated by the Save and post-dominated by the Restore.
416 if (CurBB == Save)
417 continue;
418 // Enqueue all the successors not already visited.
419 // Those are by construction either before Save or after Restore.
420 for (MachineBasicBlock *SuccBB : CurBB->successors())
421 if (Visited.insert(SuccBB).second)
422 WorkList.push_back(SuccBB);
423 }
424
425 const std::vector &CSI = MFI->getCalleeSavedInfo();
426
427 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
428 for (MachineBasicBlock *MBB : Visited)
429 // Add the callee-saved register as live-in.
430 // It's killed at the spill.
431 MBB->addLiveIn(CSI[i].getReg());
432 }
433 }
434
374435 /// insertCSRSpillsAndRestores - Insert spill and restore code for
375436 /// callee saved registers used in the function.
376437 ///
391452 MachineBasicBlock::iterator I;
392453
393454 // Spill using target interface.
394 I = EntryBlock->begin();
395 if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
455 I = SaveBlock->begin();
456 if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
396457 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
397 // Add the callee-saved register as live-in.
398 // It's killed at the spill.
399 EntryBlock->addLiveIn(CSI[i].getReg());
400
401458 // Insert the spill to the stack frame.
402459 unsigned Reg = CSI[i].getReg();
403460 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
404 TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(),
461 TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
405462 RC, TRI);
406463 }
407464 }
465 // Update the live-in information of all the blocks up to the save point.
466 updateLiveness(Fn);
408467
409468 // Restore using target interface.
410 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
411 MachineBasicBlock *MBB = ReturnBlocks[ri];
469 for (MachineBasicBlock *MBB : RestoreBlocks) {
412470 I = MBB->end();
413 --I;
414471
415472 // Skip over all terminator instructions, which are part of the return
416473 // sequence.
720777 const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
721778
722779 // Add prologue to the function...
723 TFI.emitPrologue(Fn);
724
725 // Add epilogue to restore the callee-save registers in each exiting block
726 for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
727 // If last instruction is a return instruction, add an epilogue
728 if (!I->empty() && I->back().isReturn())
729 TFI.emitEpilogue(Fn, *I);
730 }
780 TFI.emitPrologue(Fn, *SaveBlock);
781
782 // Add epilogue to restore the callee-save registers in each exiting block.
783 for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
784 TFI.emitEpilogue(Fn, *RestoreBlock);
731785
732786 // Emit additional code that is required to support segmented stacks, if
733787 // we've been asked for it. This, when linked with a runtime with support
734788 // for segmented stacks (libgcc is one), will result in allocating stack
735789 // space in small chunks instead of one large contiguous block.
736790 if (Fn.shouldSplitStack())
737 TFI.adjustForSegmentedStacks(Fn);
791 TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
738792
739793 // Emit additional code that is required to explicitly handle the stack in
740794 // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
742796 // different conditional check and another BIF for allocating more stack
743797 // space.
744798 if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
745 TFI.adjustForHiPEPrologue(Fn);
799 TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
746800 }
747801
748802 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
0 //===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass looks for safe point where the prologue and epilogue can be
10 // inserted.
11 // The safe point for the prologue (resp. epilogue) is called Save
12 // (resp. Restore).
13 // A point is safe for prologue (resp. epilogue) if and only if
14 // it 1) dominates (resp. post-dominates) all the frame related operations and
15 // between 2) two executions of the Save (resp. Restore) point there is an
16 // execution of the Restore (resp. Save) point.
17 //
18 // For instance, the following points are safe:
19 // for (int i = 0; i < 10; ++i) {
20 // Save
21 // ...
22 // Restore
23 // }
24 // Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
25 // And the following points are not:
26 // for (int i = 0; i < 10; ++i) {
27 // Save
28 // ...
29 // }
30 // for (int i = 0; i < 10; ++i) {
31 // ...
32 // Restore
33 // }
34 // Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
35 //
36 // This pass also ensures that the safe points are 3) cheaper than the regular
37 // entry and exits blocks.
38 //
39 // Property #1 is ensured via the use of MachineDominatorTree and
40 // MachinePostDominatorTree.
41 // Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
42 // points must be in the same loop.
43 // Property #3 is ensured via the MachineBlockFrequencyInfo.
44 //
45 // If this pass found points matching all this properties, then
46 // MachineFrameInfo is updated this that information.
47 //===----------------------------------------------------------------------===//
48 #include "llvm/ADT/Statistic.h"
49 // To check for profitability.
50 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
51 // For property #1 for Save.
52 #include "llvm/CodeGen/MachineDominators.h"
53 #include "llvm/CodeGen/MachineFunctionPass.h"
54 // To record the result of the analysis.
55 #include "llvm/CodeGen/MachineFrameInfo.h"
56 // For property #2.
57 #include "llvm/CodeGen/MachineLoopInfo.h"
58 // For property #1 for Restore.
59 #include "llvm/CodeGen/MachinePostDominators.h"
60 #include "llvm/CodeGen/Passes.h"
61 // To know about callee-saved.
62 #include "llvm/CodeGen/RegisterClassInfo.h"
63 #include "llvm/Support/Debug.h"
64 // To know about frame setup operation.
65 #include "llvm/Target/TargetInstrInfo.h"
66 // To access TargetInstrInfo.
67 #include "llvm/Target/TargetSubtargetInfo.h"
68
69 #define DEBUG_TYPE "shrink-wrap"
70
71 using namespace llvm;
72
73 STATISTIC(NumFunc, "Number of functions");
74 STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
75 STATISTIC(NumCandidatesDropped,
76 "Number of shrink-wrapping candidates dropped because of frequency");
77
78 namespace {
79 /// \brief Class to determine where the safe point to insert the
80 /// prologue and epilogue are.
81 /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
82 /// shrink-wrapping term for prologue/epilogue placement, this pass
83 /// does not rely on expensive data-flow analysis. Instead we use the
84 /// dominance properties and loop information to decide which point
85 /// are safe for such insertion.
86 class ShrinkWrap : public MachineFunctionPass {
87 /// Hold callee-saved information.
88 RegisterClassInfo RCI;
89 MachineDominatorTree *MDT;
90 MachinePostDominatorTree *MPDT;
91 /// Current safe point found for the prologue.
92 /// The prologue will be inserted before the first instruction
93 /// in this basic block.
94 MachineBasicBlock *Save;
95 /// Current safe point found for the epilogue.
96 /// The epilogue will be inserted before the first terminator instruction
97 /// in this basic block.
98 MachineBasicBlock *Restore;
99 /// Hold the information of the basic block frequency.
100 /// Use to check the profitability of the new points.
101 MachineBlockFrequencyInfo *MBFI;
102 /// Hold the loop information. Used to determine if Save and Restore
103 /// are in the same loop.
104 MachineLoopInfo *MLI;
105 /// Frequency of the Entry block.
106 uint64_t EntryFreq;
107 /// Current opcode for frame setup.
108 int FrameSetupOpcode;
109 /// Current opcode for frame destroy.
110 int FrameDestroyOpcode;
111 /// Entry block.
112 const MachineBasicBlock *Entry;
113
114 /// \brief Check if \p MI uses or defines a callee-saved register or
115 /// a frame index. If this is the case, this means \p MI must happen
116 /// after Save and before Restore.
117 bool useOrDefCSROrFI(const MachineInstr &MI) const;
118
119 /// \brief Update the Save and Restore points such that \p MBB is in
120 /// the region that is dominated by Save and post-dominated by Restore
121 /// and Save and Restore still match the safe point definition.
122 /// Such point may not exist and Save and/or Restore may be null after
123 /// this call.
124 void updateSaveRestorePoints(MachineBasicBlock &MBB);
125
126 /// \brief Initialize the pass for \p MF.
127 void init(MachineFunction &MF) {
128 RCI.runOnMachineFunction(MF);
129 MDT = &getAnalysis();
130 MPDT = &getAnalysis();
131 Save = nullptr;
132 Restore = nullptr;
133 MBFI = &getAnalysis();
134 MLI = &getAnalysis();
135 EntryFreq = MBFI->getEntryFreq();
136 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
137 FrameSetupOpcode = TII.getCallFrameSetupOpcode();
138 FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
139 Entry = &MF.front();
140
141 ++NumFunc;
142 }
143
144 /// Check whether or not Save and Restore points are still interesting for
145 /// shrink-wrapping.
146 bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
147
148 public:
149 static char ID;
150
151 ShrinkWrap() : MachineFunctionPass(ID) {
152 initializeShrinkWrapPass(*PassRegistry::getPassRegistry());
153 }
154
155 void getAnalysisUsage(AnalysisUsage &AU) const override {
156 AU.setPreservesAll();
157 AU.addRequired();
158 AU.addRequired();
159 AU.addRequired();
160 AU.addRequired();
161 MachineFunctionPass::getAnalysisUsage(AU);
162 }
163
164 const char *getPassName() const override {
165 return "Shrink Wrapping analysis";
166 }
167
168 /// \brief Perform the shrink-wrapping analysis and update
169 /// the MachineFrameInfo attached to \p MF with the results.
170 bool runOnMachineFunction(MachineFunction &MF) override;
171 };
172 } // End anonymous namespace.
173
174 char ShrinkWrap::ID = 0;
175 char &llvm::ShrinkWrapID = ShrinkWrap::ID;
176
177 INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false,
178 false)
179 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
180 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
181 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
182 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
183 INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false)
184
185 bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const {
186 if (MI.getOpcode() == FrameSetupOpcode ||
187 MI.getOpcode() == FrameDestroyOpcode) {
188 DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
189 return true;
190 }
191 for (const MachineOperand &MO : MI.operands()) {
192 bool UseCSR = false;
193 if (MO.isReg()) {
194 unsigned PhysReg = MO.getReg();
195 if (!PhysReg)
196 continue;
197 assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
198 "Unallocated register?!");
199 UseCSR = RCI.getLastCalleeSavedAlias(PhysReg);
200 }
201 // TODO: Handle regmask more accurately.
202 // For now, be conservative about them.
203 if (UseCSR || MO.isFI() || MO.isRegMask()) {
204 DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI()
205 << "): " << MI << '\n');
206 return true;
207 }
208 }
209 return false;
210 }
211
212 /// \brief Helper function to find the immediate (post) dominator.
213 template
214 MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
215 DominanceAnalysis &Dom) {
216 MachineBasicBlock *IDom = &Block;
217 for (MachineBasicBlock *BB : BBs) {
218 IDom = Dom.findNearestCommonDominator(IDom, BB);
219 if (!IDom)
220 break;
221 }
222 return IDom;
223 }
224
225 void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
226 // Get rid of the easy cases first.
227 if (!Save)
228 Save = &MBB;
229 else
230 Save = MDT->findNearestCommonDominator(Save, &MBB);
231
232 if (!Save) {
233 DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
234 return;
235 }
236
237 if (!Restore)
238 Restore = &MBB;
239 else
240 Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
241
242 // Make sure we would be able to insert the restore code before the
243 // terminator.
244 if (Restore == &MBB) {
245 for (const MachineInstr &Terminator : MBB.terminators()) {
246 if (!useOrDefCSROrFI(Terminator))
247 continue;
248 // One of the terminator needs to happen before the restore point.
249 if (MBB.succ_empty()) {
250 Restore = nullptr;
251 break;
252 }
253 // Look for a restore point that post-dominates all the successors.
254 // The immediate post-dominator is what we are looking for.
255 Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
256 break;
257 }
258 }
259
260 if (!Restore) {
261 DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n");
262 return;
263 }
264
265 // Make sure Save and Restore are suitable for shrink-wrapping:
266 // 1. all path from Save needs to lead to Restore before exiting.
267 // 2. all path to Restore needs to go through Save from Entry.
268 // We achieve that by making sure that:
269 // A. Save dominates Restore.
270 // B. Restore post-dominates Save.
271 // C. Save and Restore are in the same loop.
272 bool SaveDominatesRestore = false;
273 bool RestorePostDominatesSave = false;
274 while (Save && Restore &&
275 (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
276 !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
277 MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) {
278 // Fix (A).
279 if (!SaveDominatesRestore) {
280 Save = MDT->findNearestCommonDominator(Save, Restore);
281 continue;
282 }
283 // Fix (B).
284 if (!RestorePostDominatesSave)
285 Restore = MPDT->findNearestCommonDominator(Restore, Save);
286
287 // Fix (C).
288 if (Save && Restore && Save != Restore &&
289 MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) {
290 if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore))
291 // Push Save outside of this loop.
292 Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
293 else
294 // Push Restore outside of this loop.
295 Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
296 }
297 }
298 }
299
300 bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
301 if (MF.empty())
302 return false;
303 DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
304
305 init(MF);
306
307 for (MachineBasicBlock &MBB : MF) {
308 DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName()
309 << '\n');
310
311 for (const MachineInstr &MI : MBB) {
312 if (!useOrDefCSROrFI(MI))
313 continue;
314 // Save (resp. restore) point must dominate (resp. post dominate)
315 // MI. Look for the proper basic block for those.
316 updateSaveRestorePoints(MBB);
317 // If we are at a point where we cannot improve the placement of
318 // save/restore instructions, just give up.
319 if (!ArePointsInteresting()) {
320 DEBUG(dbgs() << "No Shrink wrap candidate found\n");
321 return false;
322 }
323 // No need to look for other instructions, this basic block
324 // will already be part of the handled region.
325 break;
326 }
327 }
328 if (!ArePointsInteresting()) {
329 // If the points are not interesting at this point, then they must be null
330 // because it means we did not encounter any frame/CSR related code.
331 // Otherwise, we would have returned from the previous loop.
332 assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!");
333 DEBUG(dbgs() << "Nothing to shrink-wrap\n");
334 return false;
335 }
336
337 DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
338 << '\n');
339
340 do {
341 DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
342 << Save->getNumber() << ' ' << Save->getName() << ' '
343 << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: "
344 << Restore->getNumber() << ' ' << Restore->getName() << ' '
345 << MBFI->getBlockFreq(Restore).getFrequency() << '\n');
346
347 bool IsSaveCheap;
348 if ((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) &&
349 EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency())
350 break;
351 DEBUG(dbgs() << "New points are too expensive\n");
352 MachineBasicBlock *NewBB;
353 if (!IsSaveCheap) {
354 Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
355 if (!Save)
356 break;
357 NewBB = Save;
358 } else {
359 // Restore is expensive.
360 Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
361 if (!Restore)
362 break;
363 NewBB = Restore;
364 }
365 updateSaveRestorePoints(*NewBB);
366 } while (Save && Restore);
367
368 if (!ArePointsInteresting()) {
369 ++NumCandidatesDropped;
370 return false;
371 }
372
373 DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber()
374 << ' ' << Save->getName() << "\nRestore: "
375 << Restore->getNumber() << ' ' << Restore->getName() << '\n');
376
377 MachineFrameInfo *MFI = MF.getFrameInfo();
378 MFI->setSavePoint(Save);
379 MFI->setRestorePoint(Restore);
380 ++NumCandidates;
381 return false;
382 }
274274 MBBI->getOpcode() == AArch64::STPDpre;
275275 }
276276
277 void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
278 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
277 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
278 MachineBasicBlock &MBB) const {
279279 MachineBasicBlock::iterator MBBI = MBB.begin();
280280 const MachineFrameInfo *MFI = MF.getFrameInfo();
281281 const Function *Fn = MF.getFunction();
538538 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
539539 MachineBasicBlock &MBB) const {
540540 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
541 assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
542541 MachineFrameInfo *MFI = MF.getFrameInfo();
543542 const AArch64InstrInfo *TII =
544543 static_cast(MF.getSubtarget().getInstrInfo());
545544 const AArch64RegisterInfo *RegInfo = static_cast(
546545 MF.getSubtarget().getRegisterInfo());
547 DebugLoc DL = MBBI->getDebugLoc();
548 unsigned RetOpcode = MBBI->getOpcode();
549
546 DebugLoc DL;
547 bool IsTailCallReturn = false;
548 if (MBB.end() != MBBI) {
549 DL = MBBI->getDebugLoc();
550 unsigned RetOpcode = MBBI->getOpcode();
551 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
552 RetOpcode == AArch64::TCRETURNri;
553 }
550554 int NumBytes = MFI->getStackSize();
551555 const AArch64FunctionInfo *AFI = MF.getInfo();
552556
558562 // Initial and residual are named for consistency with the prologue. Note that
559563 // in the epilogue, the residual adjustment is executed first.
560564 uint64_t ArgumentPopSize = 0;
561 if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
565 if (IsTailCallReturn) {
562566 MachineOperand &StackAdjust = MBBI->getOperand(1);
563567
564568 // For a tail-call in a callee-pops-arguments environment, some or all of
603607
604608 unsigned NumRestores = 0;
605609 // Move past the restores of the callee-saved registers.
606 MachineBasicBlock::iterator LastPopI = MBBI;
610 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
607611 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
608612 if (LastPopI != MBB.begin()) {
609613 do {
3333
3434 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
3535 /// the function.
36 void emitPrologue(MachineFunction &MF) const override;
36 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3737 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3838
3939 int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
277277 }
278278 }
279279
280 void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
281 MachineBasicBlock &MBB = MF.front();
280 void ARMFrameLowering::emitPrologue(MachineFunction &MF,
281 MachineBasicBlock &MBB) const {
282 assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
282283 MachineBasicBlock::iterator MBBI = MBB.begin();
283284 MachineFrameInfo *MFI = MF.getFrameInfo();
284285 ARMFunctionInfo *AFI = MF.getInfo();
18601861 // ARM can be found at [1].
18611862 //
18621863 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
1863 void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
1864 void ARMFrameLowering::adjustForSegmentedStacks(
1865 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
18641866 unsigned Opcode;
18651867 unsigned CFIIndex;
18661868 const ARMSubtarget *ST = &MF.getSubtarget();
18731875 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
18741876 report_fatal_error("Segmented stacks not supported on this platform.");
18751877
1876 MachineBasicBlock &prologueMBB = MF.front();
1878 assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented");
18771879 MachineFrameInfo *MFI = MF.getFrameInfo();
18781880 MachineModuleInfo &MMI = MF.getMMI();
18791881 MCContext &Context = MMI.getContext();
19011903 MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
19021904 MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
19031905
1904 for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
1905 e = prologueMBB.livein_end();
1906 for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
1907 e = PrologueMBB.livein_end();
19061908 i != e; ++i) {
19071909 AllocMBB->addLiveIn(*i);
19081910 GetMBB->addLiveIn(*i);
21552157 .addCFIIndex(CFIIndex);
21562158
21572159 // Organizing MBB lists
2158 PostStackMBB->addSuccessor(&prologueMBB);
2160 PostStackMBB->addSuccessor(&PrologueMBB);
21592161
21602162 AllocMBB->addSuccessor(PostStackMBB);
21612163
2727
2828 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
2929 /// the function.
30 void emitPrologue(MachineFunction &MF) const override;
30 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3131 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3232
3333 void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
5454 void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
5555 RegScavenger *RS) const override;
5656
57 void adjustForSegmentedStacks(MachineFunction &MF) const override;
57 void adjustForSegmentedStacks(MachineFunction &MF,
58 MachineBasicBlock &MBB) const override;
5859
5960 private:
6061 void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
8181 MBB.erase(I);
8282 }
8383
84 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
85 MachineBasicBlock &MBB = MF.front();
84 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
85 MachineBasicBlock &MBB) const {
86 assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
8687 MachineBasicBlock::iterator MBBI = MBB.begin();
8788 MachineFrameInfo *MFI = MF.getFrameInfo();
8889 ARMFunctionInfo *AFI = MF.getInfo();
2626
2727 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
2828 /// the function.
29 void emitPrologue(MachineFunction &MF) const override;
29 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3030 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3131
3232 bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2222
2323 bool BPFFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
2424
25 void BPFFrameLowering::emitPrologue(MachineFunction &MF) const {}
25 void BPFFrameLowering::emitPrologue(MachineFunction &MF,
26 MachineBasicBlock &MBB) const {}
2627
2728 void BPFFrameLowering::emitEpilogue(MachineFunction &MF,
2829 MachineBasicBlock &MBB) const {}
2323 explicit BPFFrameLowering(const BPFSubtarget &sti)
2424 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {}
2525
26 void emitPrologue(MachineFunction &MF) const override;
26 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
2727 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
2828
2929 bool hasFP(const MachineFunction &MF) const override;
343343 EpilogB = PDomB;
344344 }
345345
346
347346 /// Perform most of the PEI work here:
348347 /// - saving/restoring of the callee-saved registers,
349348 /// - stack frame creation and destruction.
350349 /// Normally, this work is distributed among various functions, but doing it
351350 /// in one place allows shrink-wrapping of the stack frame.
352 void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
351 void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
352 MachineBasicBlock &MBB) const {
353353 auto &HST = static_cast(MF.getSubtarget());
354354 auto &HRI = *HST.getRegisterInfo();
355355
356 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
356357 MachineFrameInfo *MFI = MF.getFrameInfo();
357358 const std::vector &CSI = MFI->getCalleeSavedInfo();
358359
2525 // All of the prolog/epilog functionality, including saving and restoring
2626 // callee-saved registers is handled in emitPrologue. This is to have the
2727 // logic for shrink-wrapping in one place.
28 void emitPrologue(MachineFunction &MF) const override;
28 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
29 override;
2930 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
3031 override {}
3132 bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
3838 return !MF.getFrameInfo()->hasVarSizedObjects();
3939 }
4040
41 void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
42 MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
41 void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
42 MachineBasicBlock &MBB) const {
43 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
4344 MachineFrameInfo *MFI = MF.getFrameInfo();
4445 MSP430MachineFunctionInfo *MSP430FI = MF.getInfo();
4546 const MSP430InstrInfo &TII =
2626
2727 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
2828 /// the function.
29 void emitPrologue(MachineFunction &MF) const override;
29 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3030 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3131
3232 void eliminateCallFramePseudoInstr(MachineFunction &MF,
3131 Mips16FrameLowering::Mips16FrameLowering(const MipsSubtarget &STI)
3232 : MipsFrameLowering(STI, STI.stackAlignment()) {}
3333
34 void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
35 MachineBasicBlock &MBB = MF.front();
34 void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
35 MachineBasicBlock &MBB) const {
36 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
3637 MachineFrameInfo *MFI = MF.getFrameInfo();
3738 const Mips16InstrInfo &TII =
3839 *static_cast(STI.getInstrInfo());
2222
2323 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
2424 /// the function.
25 void emitPrologue(MachineFunction &MF) const override;
25 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
2626 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
2727
2828 bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
363363 MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI)
364364 : MipsFrameLowering(STI, STI.stackAlignment()) {}
365365
366 void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
367 MachineBasicBlock &MBB = MF.front();
366 void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
367 MachineBasicBlock &MBB) const {
368 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
368369 MachineFrameInfo *MFI = MF.getFrameInfo();
369370 MipsFunctionInfo *MipsFI = MF.getInfo();
370371
2323
2424 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
2525 /// the function.
26 void emitPrologue(MachineFunction &MF) const override;
26 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
2727 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
2828
2929 bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
3030
3131 bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
3232
33 void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
33 void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
34 MachineBasicBlock &MBB) const {
3435 if (MF.getFrameInfo()->hasStackObjects()) {
35 MachineBasicBlock &MBB = MF.front();
36 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
3637 // Insert "mov.u32 %SP, %Depot"
3738 MachineBasicBlock::iterator MBBI = MBB.begin();
3839 // This instruction really occurs before first instruction
2222 explicit NVPTXFrameLowering();
2323
2424 bool hasFP(const MachineFunction &MF) const override;
25 void emitPrologue(MachineFunction &MF) const override;
25 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
2626 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
2727
2828 void
6767 }
6868
6969 // Add function prolog/epilog
70 TFI.emitPrologue(MF);
70 TFI.emitPrologue(MF, MF.front());
7171
7272 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
7373 // If last instruction is a return instruction, add an epilogue
554554 }
555555 }
556556
557 void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
558 MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
557 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
558 MachineBasicBlock &MBB) const {
559 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
559560 MachineBasicBlock::iterator MBBI = MBB.begin();
560561 MachineFrameInfo *MFI = MF.getFrameInfo();
561562 const PPCInstrInfo &TII =
3737
3838 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
3939 /// the function.
40 void emitPrologue(MachineFunction &MF) const override;
40 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
4141 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
4242
4343 bool hasFP(const MachineFunction &MF) const override;
9898 NumEntries = 0;
9999 return nullptr;
100100 }
101 void
102 AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
103 }
101 void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF,
102 MachineBasicBlock &MBB) const {}
104103 void
105104 AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
106105 MachineBasicBlock &MBB) const {
3636 int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
3737 const SpillSlot *
3838 getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
39 void emitPrologue(MachineFunction &MF) const override;
39 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
4040 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
4141 bool hasFP(const MachineFunction &MF) const override;
4242 };
8181 .addReg(SP::O6).addReg(SP::G1);
8282 }
8383
84 void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
84 void SparcFrameLowering::emitPrologue(MachineFunction &MF,
85 MachineBasicBlock &MBB) const {
8586 SparcMachineFunctionInfo *FuncInfo = MF.getInfo();
8687
87 MachineBasicBlock &MBB = MF.front();
88 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
8889 MachineFrameInfo *MFI = MF.getFrameInfo();
8990 const SparcInstrInfo &TII =
9091 *static_cast(MF.getSubtarget().getInstrInfo());
2525
2626 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
2727 /// the function.
28 void emitPrologue(MachineFunction &MF) const override;
28 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
2929 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3030
3131 void
308308 }
309309 }
310310
311 void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
312 MachineBasicBlock &MBB = MF.front();
311 void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
312 MachineBasicBlock &MBB) const {
313 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
313314 MachineFrameInfo *MFFrame = MF.getFrameInfo();
314315 auto *ZII =
315316 static_cast(MF.getSubtarget().getInstrInfo());
3939 override;
4040 void processFunctionBeforeFrameFinalized(MachineFunction &MF,
4141 RegScavenger *RS) const override;
42 void emitPrologue(MachineFunction &MF) const override;
42 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
4343 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
4444 bool hasFP(const MachineFunction &MF) const override;
4545 int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
564564 - for 32-bit code, substitute %e?? registers for %r??
565565 */
566566
567 void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
568 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
567 void X86FrameLowering::emitPrologue(MachineFunction &MF,
568 MachineBasicBlock &MBB) const {
569 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
569570 MachineBasicBlock::iterator MBBI = MBB.begin();
570571 MachineFrameInfo *MFI = MF.getFrameInfo();
571572 const Function *Fn = MF.getFunction();
15891590 // limit.
15901591 static const uint64_t kSplitStackAvailable = 256;
15911592
1592 void
1593 X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
1594 MachineBasicBlock &prologueMBB = MF.front();
1593 void X86FrameLowering::adjustForSegmentedStacks(
1594 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
1595 assert(&PrologueMBB == &MF.front() &&
1596 "Shrink-wrapping is not implemented yet");
15951597 MachineFrameInfo *MFI = MF.getFrameInfo();
15961598 const X86Subtarget &STI = MF.getSubtarget();
15971599 const TargetInstrInfo &TII = *STI.getInstrInfo();
16331635 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
16341636 // allocMBB needs to be last (terminating) instruction.
16351637
1636 for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
1637 e = prologueMBB.livein_end(); i != e; i++) {
1638 for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
1639 e = PrologueMBB.livein_end();
1640 i != e; i++) {
16381641 allocMBB->addLiveIn(*i);
16391642 checkMBB->addLiveIn(*i);
16401643 }
17481751
17491752 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
17501753 // It jumps to normal execution of the function body.
1751 BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB);
1754 BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB);
17521755
17531756 // On 32 bit we first push the arguments size and then the frame size. On 64
17541757 // bit, we pass the stack frame size in r10 and the argument size in r11.
18151818 else
18161819 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
18171820
1818 allocMBB->addSuccessor(&prologueMBB);
1821 allocMBB->addSuccessor(&PrologueMBB);
18191822
18201823 checkMBB->addSuccessor(allocMBB);
1821 checkMBB->addSuccessor(&prologueMBB);
1824 checkMBB->addSuccessor(&PrologueMBB);
18221825
18231826 #ifdef XDEBUG
18241827 MF.verify();
18401843 /// call inc_stack # doubles the stack space
18411844 /// temp0 = sp - MaxStack
18421845 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
1843 void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
1846 void X86FrameLowering::adjustForHiPEPrologue(
1847 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
18441848 const X86Subtarget &STI = MF.getSubtarget();
18451849 const TargetInstrInfo &TII = *STI.getInstrInfo();
18461850 MachineFrameInfo *MFI = MF.getFrameInfo();
19091913 // If the stack frame needed is larger than the guaranteed then runtime checks
19101914 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
19111915 if (MaxStack > Guaranteed) {
1912 MachineBasicBlock &prologueMBB = MF.front();
1916 assert(&PrologueMBB == &MF.front() &&
1917 "Shrink-wrapping is not implemented yet");
19131918 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
19141919 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
19151920
1916 for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
1917 E = prologueMBB.livein_end(); I != E; I++) {
1921 for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(),
1922 E = PrologueMBB.livein_end();
1923 I != E; I++) {
19181924 stackCheckMBB->addLiveIn(*I);
19191925 incStackMBB->addLiveIn(*I);
19201926 }
19501956 // SPLimitOffset is in a fixed heap location (pointed by BP).
19511957 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
19521958 .addReg(ScratchReg), PReg, false, SPLimitOffset);
1953 BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB);
1959 BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB);
19541960
19551961 // Create new MBB for IncStack:
19561962 BuildMI(incStackMBB, DL, TII.get(CALLop)).
19611967 .addReg(ScratchReg), PReg, false, SPLimitOffset);
19621968 BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
19631969
1964 stackCheckMBB->addSuccessor(&prologueMBB, 99);
1970 stackCheckMBB->addSuccessor(&PrologueMBB, 99);
19651971 stackCheckMBB->addSuccessor(incStackMBB, 1);
1966 incStackMBB->addSuccessor(&prologueMBB, 99);
1972 incStackMBB->addSuccessor(&PrologueMBB, 99);
19671973 incStackMBB->addSuccessor(incStackMBB, 1);
19681974 }
19691975 #ifdef XDEBUG
3434
3535 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
3636 /// the function.
37 void emitPrologue(MachineFunction &MF) const override;
37 void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3838 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
3939
40 void adjustForSegmentedStacks(MachineFunction &MF) const override;
40 void adjustForSegmentedStacks(MachineFunction &MF,
41 MachineBasicBlock &PrologueMBB) const override;
4142
42 void adjustForHiPEPrologue(MachineFunction &MF) const override;
43 void adjustForHiPEPrologue(MachineFunction &MF,
44 MachineBasicBlock &PrologueMBB) const override;
4345
4446 void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
4547 RegScavenger *RS = nullptr) const override;
219219 MF.getFrameInfo()->hasVarSizedObjects();
220220 }
221221
222 void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
223 MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
222 void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
223 MachineBasicBlock &MBB) const {
224 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
224225 MachineBasicBlock::iterator MBBI = MBB.begin();
225226 MachineFrameInfo *MFI = MF.getFrameInfo();
226227 MachineModuleInfo *MMI = &MF.getMMI();
2626
2727 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
2828 /// the function.
29 void emitPrologue(MachineFunction &MF) const override;
29 void emitPrologue(MachineFunction &MF,
30 MachineBasicBlock &MBB) const override;
3031 void emitEpilogue(MachineFunction &MF,
3132 MachineBasicBlock &MBB) const override;
3233
0 ; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
1 ; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
2 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
3 target triple = "arm64-apple-ios"
4
5
6 ; Initial motivating example: Simple diamond with a call just on one side.
7 ; CHECK-LABEL: foo:
8 ;
9 ; Compare the arguments and jump to exit.
10 ; No prologue needed.
11 ; ENABLE: cmp w0, w1
12 ; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]
13 ;
14 ; Prologue code.
15 ; CHECK: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #-16]!
16 ; CHECK-NEXT: mov [[SAVE_SP]], sp
17 ; CHECK-NEXT: sub sp, sp, #16
18 ;
19 ; Compare the arguments and jump to exit.
20 ; After the prologue is set.
21 ; DISABLE: cmp w0, w1
22 ; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]
23 ;
24 ; Store %a in the alloca.
25 ; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4]
26 ; Set the alloca address in the second argument.
27 ; CHECK-NEXT: sub x1, [[SAVE_SP]], #4
28 ; Set the first argument to zero.
29 ; CHECK-NEXT: mov w0, wzr
30 ; CHECK-NEXT: bl _doSomething
31 ;
32 ; Without shrink-wrapping, epilogue is in the exit block.
33 ; DISABLE: [[EXIT_LABEL]]:
34 ; Epilogue code.
35 ; CHECK-NEXT: mov sp, [[SAVE_SP]]
36 ; CHECK-NEXT: ldp [[SAVE_SP]], [[CSR]], [sp], #16
37 ;
38 ; With shrink-wrapping, exit block is a simple return.
39 ; ENABLE: [[EXIT_LABEL]]:
40 ; CHECK-NEXT: ret
41 define i32 @foo(i32 %a, i32 %b) {
42 %tmp = alloca i32, align 4
43 %tmp2 = icmp slt i32 %a, %b
44 br i1 %tmp2, label %true, label %false
45
46 true:
47 store i32 %a, i32* %tmp, align 4
48 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
49 br label %false
50
51 false:
52 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
53 ret i32 %tmp.0
54 }
55
56 ; Function Attrs: optsize
57 declare i32 @doSomething(i32, i32*)
58
59
60 ; Check that we do not perform the restore inside the loop whereas the save
61 ; is outside.
62 ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
63 ;
64 ; Shrink-wrapping allows to skip the prologue in the else case.
65 ; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
66 ;
67 ; Prologue code.
68 ; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
69 ; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
70 ; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
71 ;
72 ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
73 ;
74 ; CHECK: mov [[SUM:w[0-9]+]], wzr
75 ; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
76 ;
77 ; Next BB.
78 ; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body
79 ; CHECK: bl _something
80 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
81 ; CHECK-NEXT: sub [[IV]], [[IV]], #1
82 ; CHECK-NEXT: cbnz [[IV]], [[LOOP]]
83 ;
84 ; Next BB.
85 ; Copy SUM into the returned register + << 3.
86 ; CHECK: lsl w0, [[SUM]], #3
87 ;
88 ; Jump to epilogue.
89 ; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]]
90 ;
91 ; DISABLE: [[ELSE_LABEL]]: ; %if.else
92 ; Shift second argument by one and store into returned register.
93 ; DISABLE: lsl w0, w1, #1
94 ; DISABLE: [[EPILOG_BB]]: ; %if.end
95 ;
96 ; Epilogue code.
97 ; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16]
98 ; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
99 ; CHECK-NEXT: ret
100 ;
101 ; ENABLE: [[ELSE_LABEL]]: ; %if.else
102 ; Shift second argument by one and store into returned register.
103 ; ENABLE: lsl w0, w1, #1
104 ; ENABLE: ret
105 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
106 entry:
107 %tobool = icmp eq i32 %cond, 0
108 br i1 %tobool, label %if.else, label %for.body
109
110 for.body: ; preds = %entry, %for.body
111 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
112 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ]
113 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
114 %add = add nsw i32 %call, %sum.04
115 %inc = add nuw nsw i32 %i.05, 1
116 %exitcond = icmp eq i32 %inc, 10
117 br i1 %exitcond, label %for.end, label %for.body
118
119 for.end: ; preds = %for.body
120 %shl = shl i32 %add, 3
121 br label %if.end
122
123 if.else: ; preds = %entry
124 %mul = shl nsw i32 %N, 1
125 br label %if.end
126
127 if.end: ; preds = %if.else, %for.end
128 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
129 ret i32 %sum.1
130 }
131
132 declare i32 @something(...)
133
134 ; Check that we do not perform the shrink-wrapping inside the loop even
135 ; though that would be legal. The cost model must prevent that.
136 ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
137 ; Prologue code.
138 ; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
139 ; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
140 ; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
141 ; CHECK: mov [[SUM:w[0-9]+]], wzr
142 ; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
143 ; Next BB.
144 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
145 ; CHECK: bl _something
146 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
147 ; CHECK-NEXT: sub [[IV]], [[IV]], #1
148 ; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
149 ; Next BB.
150 ; CHECK: ; %for.end
151 ; CHECK: mov w0, [[SUM]]
152 ; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16]
153 ; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
154 ; CHECK-NEXT: ret
155 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
156 entry:
157 br label %for.body
158
159 for.body: ; preds = %for.body, %entry
160 %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
161 %sum.03 = phi i32 [ 0, %entry ], [ %add, %for.body ]
162 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
163 %add = add nsw i32 %call, %sum.03
164 %inc = add nuw nsw i32 %i.04, 1
165 %exitcond = icmp eq i32 %inc, 10
166 br i1 %exitcond, label %for.end, label %for.body
167
168 for.end: ; preds = %for.body
169 ret i32 %add
170 }
171
172 ; Check with a more complex case that we do not have save within the loop and
173 ; restore outside.
174 ; CHECK-LABEL: loopInfoSaveOutsideLoop:
175 ;
176 ; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
177 ;
178 ; Prologue code.
179 ; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
180 ; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
181 ; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
182 ;
183 ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
184 ;
185 ; CHECK: mov [[SUM:w[0-9]+]], wzr
186 ; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
187 ;
188 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
189 ; CHECK: bl _something
190 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
191 ; CHECK-NEXT: sub [[IV]], [[IV]], #1
192 ; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
193 ; Next BB.
194 ; CHECK: bl _somethingElse
195 ; CHECK-NEXT: lsl w0, [[SUM]], #3
196 ;
197 ; Jump to epilogue.
198 ; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]]
199 ;
200 ; DISABLE: [[ELSE_LABEL]]: ; %if.else
201 ; Shift second argument by one and store into returned register.
202 ; DISABLE: lsl w0, w1, #1
203 ; DISABLE: [[EPILOG_BB]]: ; %if.end
204 ; Epilogue code.
205 ; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16]
206 ; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
207 ; CHECK-NEXT: ret
208 ;
209 ; ENABLE: [[ELSE_LABEL]]: ; %if.else
210 ; Shift second argument by one and store into returned register.
211 ; ENABLE: lsl w0, w1, #1
212 ; ENABLE: ret
213 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
214 entry:
215 %tobool = icmp eq i32 %cond, 0
216 br i1 %tobool, label %if.else, label %for.body
217
218 for.body: ; preds = %entry, %for.body
219 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
220 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ]
221 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
222 %add = add nsw i32 %call, %sum.04
223 %inc = add nuw nsw i32 %i.05, 1
224 %exitcond = icmp eq i32 %inc, 10
225 br i1 %exitcond, label %for.end, label %for.body
226
227 for.end: ; preds = %for.body
228 tail call void bitcast (void (...)* @somethingElse to void ()*)()
229 %shl = shl i32 %add, 3
230 br label %if.end
231
232 if.else: ; preds = %entry
233 %mul = shl nsw i32 %N, 1
234 br label %if.end
235
236 if.end: ; preds = %if.else, %for.end
237 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
238 ret i32 %sum.1
239 }
240
241 declare void @somethingElse(...)
242
243 ; Check with a more complex case that we do not have restore within the loop and
244 ; save outside.
245 ; CHECK-LABEL: loopInfoRestoreOutsideLoop:
246 ;
247 ; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
248 ;
249 ; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
250 ; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
251 ; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
252 ;
253 ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
254 ;
255 ; CHECK: bl _somethingElse
256 ; CHECK-NEXT: mov [[SUM:w[0-9]+]], wzr
257 ; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
258 ;
259 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
260 ; CHECK: bl _something
261 ; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
262 ; CHECK-NEXT: sub [[IV]], [[IV]], #1
263 ; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
264 ; Next BB.
265 ; CHECK: lsl w0, [[SUM]], #3
266 ;
267 ; Jump to epilogue.
268 ; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]]
269 ;
270 ; DISABLE: [[ELSE_LABEL]]: ; %if.else
271 ; Shift second argument by one and store into returned register.
272 ; DISABLE: lsl w0, w1, #1
273 ; DISABLE: [[EPILOG_BB]]: ; %if.end
274 ; Epilogue code.
275 ; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16]
276 ; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
277 ; CHECK-NEXT: ret
278 ;
279 ; ENABLE: [[ELSE_LABEL]]: ; %if.else
280 ; Shift second argument by one and store into returned register.
281 ; ENABLE: lsl w0, w1, #1
282 ; ENABLE: ret
283 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
284 entry:
285 %tobool = icmp eq i32 %cond, 0
286 br i1 %tobool, label %if.else, label %if.then
287
288 if.then: ; preds = %entry
289 tail call void bitcast (void (...)* @somethingElse to void ()*)()
290 br label %for.body
291
292 for.body: ; preds = %for.body, %if.then
293 %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
294 %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
295 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
296 %add = add nsw i32 %call, %sum.04
297 %inc = add nuw nsw i32 %i.05, 1
298 %exitcond = icmp eq i32 %inc, 10
299 br i1 %exitcond, label %for.end, label %for.body
300
301 for.end: ; preds = %for.body
302 %shl = shl i32 %add, 3
303 br label %if.end
304
305 if.else: ; preds = %entry
306 %mul = shl nsw i32 %N, 1
307 br label %if.end
308
309 if.end: ; preds = %if.else, %for.end
310 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
311 ret i32 %sum.1
312 }
313
314 ; Check that we handle function with no frame information correctly.
315 ; CHECK-LABEL: emptyFrame:
316 ; CHECK: ; %entry
317 ; CHECK-NEXT: mov w0, wzr
318 ; CHECK-NEXT: ret
319 define i32 @emptyFrame() {
320 entry:
321 ret i32 0
322 }
323
324 ; Check that we handle variadic function correctly.
325 ; CHECK-LABEL: variadicFunc:
326 ;
327 ; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
328 ;
329 ; Prologue code.
330 ; CHECK: sub sp, sp, #16
331 ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
332 ;
333 ; Sum is merged with the returned register.
334 ; CHECK: mov [[SUM:w0]], wzr
335 ; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16
336 ; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
337 ; CHECK-NEXT: cmp w1, #1
338 ; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]]
339 ;
340 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
341 ; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8]
342 ; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8
343 ; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8]
344 ; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]]
345 ; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]]
346 ; CHECK-NEXT: sub w1, w1, #1
347 ; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
348 ;
349 ; DISABLE-NEXT: b [[IFEND_LABEL]]
350 ; DISABLE: [[ELSE_LABEL]]: ; %if.else
351 ; DISABLE: lsl w0, w1, #1
352 ;
353 ; CHECK: [[IFEND_LABEL]]:
354 ; Epilogue code.
355 ; CHECK: add sp, sp, #16
356 ; CHECK-NEXT: ret
357 ;
358 ; ENABLE: [[ELSE_LABEL]]: ; %if.else
359 ; ENABLE: lsl w0, w1, #1
360 ; ENABLE-NEXT: ret
361 define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
362 entry:
363 %ap = alloca i8*, align 8
364 %tobool = icmp eq i32 %cond, 0
365 br i1 %tobool, label %if.else, label %if.then
366
367 if.then: ; preds = %entry
368 %ap1 = bitcast i8** %ap to i8*
369 call void @llvm.va_start(i8* %ap1)
370 %cmp6 = icmp sgt i32 %count, 0
371 br i1 %cmp6, label %for.body, label %for.end
372
373 for.body: ; preds = %if.then, %for.body
374 %i.08 = phi i32 [ %inc, %for.body ], [ 0, %if.then ]
375 %sum.07 = phi i32 [ %add, %for.body ], [ 0, %if.then ]
376 %0 = va_arg i8** %ap, i32
377 %add = add nsw i32 %sum.07, %0
378 %inc = add nuw nsw i32 %i.08, 1
379 %exitcond = icmp eq i32 %inc, %count
380 br i1 %exitcond, label %for.end, label %for.body
381
382 for.end: ; preds = %for.body, %if.then
383 %sum.0.lcssa = phi i32 [ 0, %if.then ], [ %add, %for.body ]
384 call void @llvm.va_end(i8* %ap1)
385 br label %if.end
386
387 if.else: ; preds = %entry
388 %mul = shl nsw i32 %count, 1
389 br label %if.end
390
391 if.end: ; preds = %if.else, %for.end
392 %sum.1 = phi i32 [ %sum.0.lcssa, %for.end ], [ %mul, %if.else ]
393 ret i32 %sum.1
394 }
395
396 declare void @llvm.va_start(i8*)
397
398 declare void @llvm.va_end(i8*)
399
400 ; Check that we handle inline asm correctly.
401 ; CHECK-LABEL: inlineAsm:
402 ;
403 ; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
404 ;
405 ; Prologue code.
406 ; Make sure we save the CSR used in the inline asm: x19.
407 ; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x19]], [sp, #-16]!
408 ;
409 ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
410 ;
411 ; CHECK: movz [[IV:w[0-9]+]], #0xa
412 ;
413 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
414 ; Inline asm statement.
415 ; CHECK: add x19, x19, #1
416 ; CHECK: sub [[IV]], [[IV]], #1
417 ; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
418 ; Next BB.
419 ; CHECK: mov w0, wzr
420 ; Epilogue code.
421 ; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
422 ; CHECK-NEXT: ret
423 ; Next BB.
424 ; CHECK: [[ELSE_LABEL]]: ; %if.else
425 ; CHECK-NEXT: lsl w0, w1, #1
426 ; Epilogue code.
427 ; DISABLE-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
428 ; CHECK-NEXT: ret
429 define i32 @inlineAsm(i32 %cond, i32 %N) {
430 entry:
431 %tobool = icmp eq i32 %cond, 0
432 br i1 %tobool, label %if.else, label %for.body
433
434 for.body: ; preds = %entry, %for.body
435 %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
436 tail call void asm sideeffect "add x19, x19, #1", "~{x19}"()
437 %inc = add nuw nsw i32 %i.03, 1
438 %exitcond = icmp eq i32 %inc, 10
439 br i1 %exitcond, label %if.end, label %for.body
440
441 if.else: ; preds = %entry
442 %mul = shl nsw i32 %N, 1
443 br label %if.end
444
445 if.end: ; preds = %for.body, %if.else
446 %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.body ]
447 ret i32 %sum.0
448 }
449
450 ; Check that we handle calls to variadic functions correctly.
451 ; CHECK-LABEL: callVariadicFunc:
452 ;
453 ; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
454 ;
455 ; Prologue code.
456 ; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-16]!
457 ; CHECK-NEXT: mov [[NEW_SP:x[0-9]+]], sp
458 ; CHECK-NEXT: sub sp, sp, #48
459 ;
460 ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
461 ; Setup of the varags.
462 ; CHECK: stp x1, x1, [sp, #32]
463 ; CHECK-NEXT: stp x1, x1, [sp, #16]
464 ; CHECK-NEXT: stp x1, x1, [sp]
465 ; CHECK-NEXT: mov w0, w1
466 ; CHECK-NEXT: bl _someVariadicFunc
467 ; CHECK-NEXT: lsl w0, w0, #3
468 ;
469 ; DISABLE: b [[IFEND_LABEL:LBB[0-9_]+]]
470 ; DISABLE: [[ELSE_LABEL]]: ; %if.else
471 ; DISABLE-NEXT: lsl w0, w1, #1
472 ; DISABLE: [[IFEND_LABEL]]: ; %if.end
473 ;
474 ; Epilogue code.
475 ; CHECK: mov sp, [[NEW_SP]]
476 ; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
477 ; CHECK-NEXT: ret
478 ;
479 ; ENABLE: [[ELSE_LABEL]]: ; %if.else
480 ; ENABLE-NEXT: lsl w0, w1, #1
481 ; ENABLE-NEXT: ret
482 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
483 entry:
484 %tobool = icmp eq i32 %cond, 0
485 br i1 %tobool, label %if.else, label %if.then
486
487 if.then: ; preds = %entry
488 %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
489 %shl = shl i32 %call, 3
490 br label %if.end
491
492 if.else: ; preds = %entry
493 %mul = shl nsw i32 %N, 1
494 br label %if.end
495
496 if.end: ; preds = %if.else, %if.then
497 %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
498 ret i32 %sum.0
499 }
500
501 declare i32 @someVariadicFunc(i32, ...)