llvm.org GIT mirror llvm / c05d306
Add a new optimization pass: Stack Coloring, that merges disjoint static allocations (allocas). Allocas are known to be disjoint if they are marked by disjoint lifetime markers (@llvm.lifetime.XXX intrinsics). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163299 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 7 years ago
22 changed file(s) with 1058 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
498498
499499
500500
501

Stack Coloring - We have implemented a new optimization pass

502 to merge stack objects which are used in disjoin areas of the code.
503 This optimization reduces the required stack space significantly, in cases
504 where it is clear to the optimizer that the stack slot is not shared.
505 We use the lifetime markers to tell the codegen that a certain alloca
506 is used within a region.

507
501508

We have put a significant amount of work into the code generator

502509 infrastructure, which allows us to implement more aggressive algorithms and
503510 make it run faster:

310310 return !(*this == RHS);
311311 }
312312
313 // Intersection, union, disjoint union.
313 /// Intersection, union, disjoint union.
314314 BitVector &operator&=(const BitVector &RHS) {
315315 unsigned ThisWords = NumBitWords(size());
316316 unsigned RHSWords = NumBitWords(RHS.size());
327327 return *this;
328328 }
329329
330 // reset - Reset bits that are set in RHS. Same as *this &= ~RHS.
330 /// reset - Reset bits that are set in RHS. Same as *this &= ~RHS.
331331 BitVector &reset(const BitVector &RHS) {
332332 unsigned ThisWords = NumBitWords(size());
333333 unsigned RHSWords = NumBitWords(RHS.size());
335335 for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
336336 Bits[i] &= ~RHS.Bits[i];
337337 return *this;
338 }
339
340 /// test - Check if (This - RHS) is zero.
341 /// This is the same as reset(RHS) and any().
342 bool test(const BitVector &RHS) const {
343 unsigned ThisWords = NumBitWords(size());
344 unsigned RHSWords = NumBitWords(RHS.size());
345 unsigned i;
346 for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
347 if ((Bits[i] & ~RHS.Bits[i]) != 0)
348 return true;
349
350 for (; i != ThisWords ; ++i)
351 if (Bits[i] != 0)
352 return true;
353
354 return false;
338355 }
339356
340357 BitVector &operator|=(const BitVector &RHS) {
635635 ATOMIC_LOAD_MAX,
636636 ATOMIC_LOAD_UMIN,
637637 ATOMIC_LOAD_UMAX,
638
639 /// This corresponds to the llvm.lifetime.* intrinsics. The first operand
640 /// is the chain and the second operand is the alloca pointer.
641 LIFETIME_START, LIFETIME_END,
638642
639643 /// BUILTIN_OP_END - This must be the last enum value in this list.
640644 /// The target-specific pre-isel opcode values start here.
2727 class MachineBasicBlock;
2828 class TargetFrameLowering;
2929 class BitVector;
30 class Value;
3031
3132 /// The CalleeSavedInfo class tracks the information need to locate where a
3233 /// callee saved register is in the current frame.
102103 // protector.
103104 bool MayNeedSP;
104105
106 /// Alloca - If this stack object is originated from an Alloca instruction
107 /// this value saves the original IR allocation. Can be NULL.
108 const Value *Alloca;
109
105110 // PreAllocated - If true, the object was mapped into the local frame
106111 // block and doesn't need additional handling for allocation beyond that.
107112 bool PreAllocated;
108113
109114 StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
110 bool isSS, bool NSP)
115 bool isSS, bool NSP, const Value *Val)
111116 : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
112 isSpillSlot(isSS), MayNeedSP(NSP), PreAllocated(false) {}
117 isSpillSlot(isSS), MayNeedSP(NSP), Alloca(Val), PreAllocated(false) {}
113118 };
114119
115120 /// Objects - The list of stack objects allocated...
361366 ensureMaxAlignment(Align);
362367 }
363368
369 /// getObjectAllocation - Return the underlying Alloca of the specified
370 /// stack object if it exists. Returns 0 if none exists.
371 const Value* getObjectAllocation(int ObjectIdx) const {
372 assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
373 "Invalid Object Idx!");
374 return Objects[ObjectIdx+NumFixedObjects].Alloca;
375 }
376
364377 /// NeedsStackProtector - Returns true if the object may need stack
365378 /// protectors.
366379 bool MayNeedStackProtector(int ObjectIdx) const {
481494 /// a nonnegative identifier to represent it.
482495 ///
483496 int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
484 bool MayNeedSP = false) {
497 bool MayNeedSP = false, const Value *Alloca = 0) {
485498 assert(Size != 0 && "Cannot allocate zero size stack objects!");
486 Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP));
499 Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
500 Alloca));
487501 int Index = (int)Objects.size() - NumFixedObjects - 1;
488502 assert(Index >= 0 && "Bad frame index!");
489503 ensureMaxAlignment(Alignment);
515529 ///
516530 int CreateVariableSizedObject(unsigned Alignment) {
517531 HasVarSizedObjects = true;
518 Objects.push_back(StackObject(0, Alignment, 0, false, false, true));
532 Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
519533 ensureMaxAlignment(Alignment);
520534 return (int)Objects.size()-NumFixedObjects-1;
521535 }
403403 /// inserting cmov instructions.
404404 extern char &EarlyIfConverterID;
405405
406 /// StackSlotColoring - This pass performs stack coloring and merging.
407 /// It merges disjoint allocas to reduce the stack size.
408 extern char &StackColoringID;
409
406410 /// IfConverter - This pass performs machine code if conversion.
407411 extern char &IfConverterID;
408412
231231 void initializeSlotIndexesPass(PassRegistry&);
232232 void initializeSpillPlacementPass(PassRegistry&);
233233 void initializeStackProtectorPass(PassRegistry&);
234 void initializeStackColoringPass(PassRegistry&);
234235 void initializeStackSlotColoringPass(PassRegistry&);
235236 void initializeStripDeadDebugInfoPass(PassRegistry&);
236237 void initializeStripDeadPrototypesPassPass(PassRegistry&);
744744 let InOperandList = (ins variable_ops);
745745 let AsmString = "BUNDLE";
746746 }
747 def LIFETIME_START : Instruction {
748 let OutOperandList = (outs);
749 let InOperandList = (ins i32imm:$id);
750 let AsmString = "LIFETIME_START";
751 let neverHasSideEffects = 1;
752 }
753 def LIFETIME_END : Instruction {
754 let OutOperandList = (outs);
755 let InOperandList = (ins i32imm:$id);
756 let AsmString = "LIFETIME_END";
757 let neverHasSideEffects = 1;
758 }
747759 }
748760
749761 //===----------------------------------------------------------------------===//
8686 /// BUNDLE - This instruction represents an instruction bundle. Instructions
8787 /// which immediately follow a BUNDLE instruction which are marked with
8888 /// 'InsideBundle' flag are inside the bundle.
89 BUNDLE
89 BUNDLE = 14,
90
91 /// Lifetime markers.
92 LIFETIME_START = 15,
93 LIFETIME_END = 16
9094 };
9195 } // end namespace TargetOpcode
9296 } // end namespace llvm
9494 SplitKit.cpp
9595 StackProtector.cpp
9696 StackSlotColoring.cpp
97 StackColoring.cpp
9798 StrongPHIElimination.cpp
9899 TailDuplication.cpp
99100 TargetFrameLoweringImpl.cpp
5555 initializeRegisterCoalescerPass(Registry);
5656 initializeSlotIndexesPass(Registry);
5757 initializeStackProtectorPass(Registry);
58 initializeStackColoringPass(Registry);
5859 initializeStackSlotColoringPass(Registry);
5960 initializeStrongPHIEliminationPass(Registry);
6061 initializeTailDuplicatePassPass(Registry);
457457 unsigned StackAlign = TFI.getStackAlignment();
458458 unsigned Align = MinAlign(SPOffset, StackAlign);
459459 Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
460 /*isSS*/false, false));
460 /*isSS*/ false,
461 /*NeedSP*/ false,
462 /*Alloca*/ 0));
461463 return -++NumFixedObjects;
462464 }
463465
528528 // instructions dead.
529529 addPass(&OptimizePHIsID);
530530
531 // This pass merges large allocas. StackSlotColoring is a different pass
532 // which merges spill slots.
533 addPass(&StackColoringID);
534
531535 // If the target requests it, assign local variables to stack slots relative
532536 // to one another and simplify frame index references where possible.
533537 addPass(&LocalStackSlotAllocationID);
9696 cast(Ty)->getElementType()->isIntegerTy(8)));
9797 StaticAllocaMap[AI] =
9898 MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
99 MayNeedSP);
99 MayNeedSP, AI);
100100 }
101101
102102 for (; BB != EB; ++BB)
872872 break;
873873 }
874874
875 case ISD::LIFETIME_START:
876 case ISD::LIFETIME_END: {
877 unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
878 TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
879
880 FrameIndexSDNode *FI = dyn_cast(Node->getOperand(1));
881 BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
882 .addFrameIndex(FI->getIndex());
883 break;
884 }
885
875886 case ISD::INLINEASM: {
876887 unsigned NumOps = Node->getNumOperands();
877888 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
655655 break;
656656 case ISD::MERGE_VALUES:
657657 case ISD::TokenFactor:
658 case ISD::LIFETIME_START:
659 case ISD::LIFETIME_END:
658660 case ISD::CopyToReg:
659661 case ISD::CopyFromReg:
660662 case ISD::EH_LABEL:
41604160 assert((Opcode == ISD::INTRINSIC_VOID ||
41614161 Opcode == ISD::INTRINSIC_W_CHAIN ||
41624162 Opcode == ISD::PREFETCH ||
4163 Opcode == ISD::LIFETIME_START ||
4164 Opcode == ISD::LIFETIME_END ||
41634165 (Opcode <= INT_MAX &&
41644166 (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
41654167 "Opcode is not a memory-accessing opcode!");
1818 #include "llvm/ADT/SmallSet.h"
1919 #include "llvm/Analysis/AliasAnalysis.h"
2020 #include "llvm/Analysis/ConstantFolding.h"
21 #include "llvm/Analysis/ValueTracking.h"
2122 #include "llvm/Constants.h"
2223 #include "llvm/CallingConv.h"
2324 #include "llvm/DebugInfo.h"
52145215 rw==1)); /* write */
52155216 return 0;
52165217 }
5217
5218 case Intrinsic::lifetime_start:
5219 case Intrinsic::lifetime_end: {
5220 SDValue Ops[2];
5221 AllocaInst *LifetimeObject =dyn_cast_or_null(
5222 GetUnderlyingObject(I.getArgOperand(1), TD));
5223 // Could not find an Alloca.
5224 if (!LifetimeObject)
5225 return 0;
5226
5227 int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
5228 Ops[0] = getRoot();
5229 Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
5230 bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
5231 unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
5232
5233 Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
5234 DAG.setRoot(Res);
5235 return 0;
5236 }
52185237 case Intrinsic::invariant_start:
5219 case Intrinsic::lifetime_start:
52205238 // Discard region information.
52215239 setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
52225240 return 0;
52235241 case Intrinsic::invariant_end:
5224 case Intrinsic::lifetime_end:
52255242 // Discard region information.
52265243 return 0;
52275244 case Intrinsic::donothing:
266266 case ISD::STACKRESTORE: return "stackrestore";
267267 case ISD::TRAP: return "trap";
268268 case ISD::DEBUGTRAP: return "debugtrap";
269 case ISD::LIFETIME_START: return "lifetime.start";
270 case ISD::LIFETIME_END: return "lifetime.end";
269271
270272 // Bit manipulation
271273 case ISD::BSWAP: return "bswap";
18001800 User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
18011801 continue;
18021802
1803 if (User->getOpcode() == ISD::CopyToReg ||
1804 User->getOpcode() == ISD::CopyFromReg ||
1805 User->getOpcode() == ISD::INLINEASM ||
1806 User->getOpcode() == ISD::EH_LABEL) {
1803 unsigned UserOpcode = User->getOpcode();
1804 if (UserOpcode == ISD::CopyToReg ||
1805 UserOpcode == ISD::CopyFromReg ||
1806 UserOpcode == ISD::INLINEASM ||
1807 UserOpcode == ISD::EH_LABEL ||
1808 UserOpcode == ISD::LIFETIME_START ||
1809 UserOpcode == ISD::LIFETIME_END) {
18071810 // If their node ID got reset to -1 then they've already been selected.
18081811 // Treat them like a MachineOpcode.
18091812 if (User->getNodeId() == -1)
22192222 case ISD::CopyFromReg:
22202223 case ISD::CopyToReg:
22212224 case ISD::EH_LABEL:
2225 case ISD::LIFETIME_START:
2226 case ISD::LIFETIME_END:
22222227 NodeToMatch->setNodeId(-1); // Mark selected.
22232228 return 0;
22242229 case ISD::AssertSext:
0 //===-- StackColoring.cpp -------------------------------------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements the stack-coloring optimization that looks for
10 // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
11 // which represent the possible lifetime of stack slots. It attempts to
12 // merge disjoint stack slots and reduce the used stack space.
13 // NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
14 //
15 // TODO: In the future we plan to improve stack coloring in the following ways:
16 // 1. Allow merging multiple small slots into a single larger slot at different
17 // offsets.
18 // 2. Merge this pass with StackSlotColoring and allow merging of allocas with
19 // spill slots.
20 //
21 //===----------------------------------------------------------------------===//
22
23 #define DEBUG_TYPE "stackcoloring"
24 #include "MachineTraceMetrics.h"
25 #include "llvm/Function.h"
26 #include "llvm/Module.h"
27 #include "llvm/ADT/BitVector.h"
28 #include "llvm/Analysis/Dominators.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/ADT/DepthFirstIterator.h"
31 #include "llvm/ADT/PostOrderIterator.h"
32 #include "llvm/ADT/SetVector.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SparseSet.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/CodeGen/LiveInterval.h"
37 #include "llvm/CodeGen/MachineLoopInfo.h"
38 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
39 #include "llvm/CodeGen/MachineDominators.h"
40 #include "llvm/CodeGen/MachineBasicBlock.h"
41 #include "llvm/CodeGen/MachineFunctionPass.h"
42 #include "llvm/CodeGen/MachineLoopInfo.h"
43 #include "llvm/CodeGen/MachineModuleInfo.h"
44 #include "llvm/CodeGen/MachineRegisterInfo.h"
45 #include "llvm/CodeGen/MachineFrameInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/Passes.h"
48 #include "llvm/CodeGen/SlotIndexes.h"
49 #include "llvm/DebugInfo.h"
50 #include "llvm/MC/MCInstrItineraries.h"
51 #include "llvm/Target/TargetInstrInfo.h"
52 #include "llvm/Target/TargetRegisterInfo.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/raw_ostream.h"
56
57 using namespace llvm;
58
59 static cl::opt
60 DisableColoring("no-stack-coloring",
61 cl::init(false), cl::Hidden,
62 cl::desc("Suppress stack coloring"));
63
64 STATISTIC(NumMarkerSeen, "Number of life markers found.");
65 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
66 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
67
68 //===----------------------------------------------------------------------===//
69 // StackColoring Pass
70 //===----------------------------------------------------------------------===//
71
72 namespace {
73 /// StackColoring - A machine pass for merging disjoint stack allocations,
74 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
75 class StackColoring : public MachineFunctionPass {
76 MachineFrameInfo *MFI;
77 MachineFunction *MF;
78
79 /// A class representing liveness information for a single basic block.
80 /// Each bit in the BitVector represents the liveness property
81 /// for a different stack slot.
82 struct BlockLifetimeInfo {
83 /// Which slots BEGINs in each basic block.
84 BitVector Begin;
85 /// Which slots ENDs in each basic block.
86 BitVector End;
87 /// Which slots are marked as LIVE_IN, coming into each basic block.
88 BitVector LiveIn;
89 /// Which slots are marked as LIVE_OUT, coming out of each basic block.
90 BitVector LiveOut;
91 };
92
93 /// Maps active slots (per bit) for each basic block.
94 DenseMap BlockLiveness;
95
96 /// Maps serial numbers to basic blocks.
97 DenseMap BasicBlocks;
98 /// Maps basic blocks to a serial number.
99 SmallVector BasicBlockNumbering;
100
101 /// Maps liveness intervals for each slot.
102 SmallVector Intervals;
103 /// VNInfo is used for the construction of LiveIntervals.
104 VNInfo::Allocator VNInfoAllocator;
105 /// SlotIndex analysis object.
106 SlotIndexes* Indexes;
107
108 /// The list of lifetime markers found. These markers are to be removed
109 /// once the coloring is done.
110 SmallVector Markers;
111
112 /// SlotSizeSorter - A Sort utility for arranging stack slots according
113 /// to their size.
114 struct SlotSizeSorter {
115 MachineFrameInfo *MFI;
116 SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { }
117 bool operator()(int LHS, int RHS) {
118 // We use -1 to denote a uninteresting slot. Place these slots at the end.
119 if (LHS == -1) return false;
120 if (RHS == -1) return true;
121 // Sort according to size.
122 return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
123 }
124 };
125
126 public:
127 static char ID;
128 StackColoring() : MachineFunctionPass(ID) {
129 initializeStackColoringPass(*PassRegistry::getPassRegistry());
130 }
131 void getAnalysisUsage(AnalysisUsage &AU) const;
132 bool runOnMachineFunction(MachineFunction &MF);
133
134 private:
135 /// Debug.
136 void dump();
137
138 /// Removes all of the lifetime marker instructions from the function.
139 /// \returns true if any markers were removed.
140 bool removeAllMarkers();
141
142 /// Scan the machine function and find all of the lifetime markers.
143 /// Record the findings in the BEGIN and END vectors.
144 /// \returns the number of markers found.
145 unsigned collectMarkers(unsigned NumSlot);
146
147 /// Perform the dataflow calculation and calculate the lifetime for each of
148 /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
149 /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
150 /// in and out blocks.
151 void calculateLocalLiveness();
152
153 /// Construct the LiveIntervals for the slots.
154 void calculateLiveIntervals(unsigned NumSlots);
155
156 /// Go over the machine function and change instructions which use stack
157 /// slots to use the joint slots.
158 void remapInstructions(DenseMap &SlotRemap);
159
160 /// Map entries which point to other entries to their destination.
161 /// A->B->C becomes A->C.
162 void expungeSlotMap(DenseMap &SlotRemap, unsigned NumSlots);
163 };
164 } // end anonymous namespace
165
166 char StackColoring::ID = 0;
167 char &llvm::StackColoringID = StackColoring::ID;
168
169 INITIALIZE_PASS_BEGIN(StackColoring,
170 "stack-coloring", "Merge disjoint stack slots", false, false)
171 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
172 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
173 INITIALIZE_PASS_END(StackColoring,
174 "stack-coloring", "Merge disjoint stack slots", false, false)
175
176 void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
177 AU.addRequired();
178 AU.addPreserved();
179 AU.addRequired();
180 MachineFunctionPass::getAnalysisUsage(AU);
181 }
182
183 void StackColoring::dump() {
184 for (df_iterator FI = df_begin(MF), FE = df_end(MF);
185 FI != FE; ++FI) {
186 unsigned Num = BasicBlocks[*FI];
187 DEBUG(dbgs()<<"Inspecting block #"<getName()<<"]\n");
188 Num = 0;
189 DEBUG(dbgs()<<"BEGIN : {");
190 for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i)
191 DEBUG(dbgs()<
192 DEBUG(dbgs()<<"}\n");
193
194 DEBUG(dbgs()<<"END : {");
195 for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i)
196 DEBUG(dbgs()<
197
198 DEBUG(dbgs()<<"}\n");
199
200 DEBUG(dbgs()<<"LIVE_IN: {");
201 for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i)
202 DEBUG(dbgs()<
203
204 DEBUG(dbgs()<<"}\n");
205 DEBUG(dbgs()<<"LIVEOUT: {");
206 for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i)
207 DEBUG(dbgs()<
208 DEBUG(dbgs()<<"}\n");
209 }
210 }
211
212 unsigned StackColoring::collectMarkers(unsigned NumSlot) {
213 unsigned MarkersFound = 0;
214 // Scan the function to find all lifetime markers.
215 // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a
216 // deterministic numbering, and because we'll need a post-order iteration
217 // later for solving the liveness dataflow problem.
218 for (df_iterator FI = df_begin(MF), FE = df_end(MF);
219 FI != FE; ++FI) {
220
221 // Assign a serial number to this basic block.
222 BasicBlocks[*FI] = BasicBlockNumbering.size();;
223 BasicBlockNumbering.push_back(*FI);
224
225 BlockLiveness[*FI].Begin.resize(NumSlot);
226 BlockLiveness[*FI].End.resize(NumSlot);
227
228 for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end();
229 BI != BE; ++BI) {
230
231 if (BI->getOpcode() != TargetOpcode::LIFETIME_START &&
232 BI->getOpcode() != TargetOpcode::LIFETIME_END)
233 continue;
234
235 Markers.push_back(BI);
236
237 bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START;
238 MachineOperand &MI = BI->getOperand(0);
239 unsigned Slot = MI.getIndex();
240
241 MarkersFound++;
242
243 const Value* Allocation = MFI->getObjectAllocation(Slot);
244 if (Allocation) {
245 DEBUG(dbgs()<<"Found lifetime marker for allocation: "<<
246 Allocation->getName()<<"\n");
247 }
248
249 if (IsStart) {
250 BlockLiveness[*FI].Begin.set(Slot);
251 } else {
252 if (BlockLiveness[*FI].Begin.test(Slot)) {
253 // Allocas that start and end within a single block are handled
254 // specially when computing the LiveIntervals to avoid pessimizing
255 // the liveness propagation.
256 BlockLiveness[*FI].Begin.reset(Slot);
257 } else {
258 BlockLiveness[*FI].End.set(Slot);
259 }
260 }
261 }
262 }
263
264 // Update statistics.
265 NumMarkerSeen += MarkersFound;
266 return MarkersFound;
267 }
268
269 void StackColoring::calculateLocalLiveness() {
270 // Perform a standard reverse dataflow computation to solve for
271 // global liveness. The BEGIN set here is equivalent to KILL in the standard
272 // formulation, and END is equivalent to GEN. The result of this computation
273 // is a map from blocks to bitvectors where the bitvectors represent which
274 // allocas are live in/out of that block.
275 SmallPtrSet BBSet(BasicBlockNumbering.begin(),
276 BasicBlockNumbering.end());
277 unsigned NumSSMIters = 0;
278 bool changed = true;
279 while (changed) {
280 changed = false;
281 ++NumSSMIters;
282
283 SmallPtrSet NextBBSet;
284
285 for (SmallVector::iterator
286 PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
287 PI != PE; ++PI) {
288
289 MachineBasicBlock *BB = *PI;
290 if (!BBSet.count(BB)) continue;
291
292 BitVector LocalLiveIn;
293 BitVector LocalLiveOut;
294
295 // Forward propagation from begins to ends.
296 for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
297 PE = BB->pred_end(); PI != PE; ++PI)
298 LocalLiveIn |= BlockLiveness[*PI].LiveOut;
299 LocalLiveIn |= BlockLiveness[BB].End;
300 LocalLiveIn.reset(BlockLiveness[BB].Begin);
301
302 // Reverse propagation from ends to begins.
303 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
304 SE = BB->succ_end(); SI != SE; ++SI)
305 LocalLiveOut |= BlockLiveness[*SI].LiveIn;
306 LocalLiveOut |= BlockLiveness[BB].Begin;
307 LocalLiveOut.reset(BlockLiveness[BB].End);
308
309 LocalLiveIn |= LocalLiveOut;
310 LocalLiveOut |= LocalLiveIn;
311
312 // After adopting the live bits, we need to turn-off the bits which
313 // are de-activated in this block.
314 LocalLiveOut.reset(BlockLiveness[BB].End);
315 LocalLiveIn.reset(BlockLiveness[BB].Begin);
316
317 if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) {
318 changed = true;
319 BlockLiveness[BB].LiveIn |= LocalLiveIn;
320
321 for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
322 PE = BB->pred_end(); PI != PE; ++PI)
323 NextBBSet.insert(*PI);
324 }
325
326 if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) {
327 changed = true;
328 BlockLiveness[BB].LiveOut |= LocalLiveOut;
329
330 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
331 SE = BB->succ_end(); SI != SE; ++SI)
332 NextBBSet.insert(*SI);
333 }
334 }
335
336 BBSet = NextBBSet;
337 }// while changed.
338 }
339
340 void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
341 SmallVector Starts;
342 SmallVector Finishes;
343
344 // For each block, find which slots are active within this block
345 // and update the live intervals.
346 for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end();
347 MBB != MBBe; ++MBB) {
348 Starts.clear();
349 Starts.resize(NumSlots);
350 Finishes.clear();
351 Finishes.resize(NumSlots);
352
353 BitVector Alive = BlockLiveness[MBB].LiveIn;
354 Alive |= BlockLiveness[MBB].LiveOut;
355
356 if (Alive.any()) {
357 for (int pos = Alive.find_first(); pos != -1;
358 pos = Alive.find_next(pos)) {
359 Starts[pos] = Indexes->getMBBStartIdx(MBB);
360 Finishes[pos] = Indexes->getMBBEndIdx(MBB);
361 }
362 }
363
364 for (SmallVector::iterator it = Markers.begin(),
365 e = Markers.end(); it != e; ++it) {
366 MachineInstr *MI = *it;
367 assert((MI->getOpcode() == TargetOpcode::LIFETIME_START ||
368 MI->getOpcode() == TargetOpcode::LIFETIME_END) &&
369 "Invalid Lifetime marker");
370
371 if (MI->getParent() == MBB) {
372 bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
373 MachineOperand &Mo = MI->getOperand(0);
374 int Slot = Mo.getIndex();
375 assert(Slot >= 0 && "Invalid slot");
376 if (IsStart) {
377 Starts[Slot] = Indexes->getInstructionIndex(MI);
378 } else {
379 Finishes[Slot] = Indexes->getInstructionIndex(MI);
380 }
381 }
382 }
383
384 for (unsigned i = 0; i < NumSlots; ++i) {
385 assert(!!Starts[i] == !!Finishes[i] && "Unmatched range");
386 if (Starts[i] == Finishes[i])
387 continue;
388
389 assert(Starts[i] && Finishes[i] && "Invalid interval");
390 VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
391 SlotIndex S = Starts[i];
392 SlotIndex F = Finishes[i];
393 if (S < F) {
394 // We have a single consecutive region.
395 Intervals[i]->addRange(LiveRange(S, F, ValNum));
396 } else {
397 // We have two non consecutive regions. This happens when
398 // LIFETIME_START appears after the LIFETIME_END marker.
399 SlotIndex NewStart = Indexes->getMBBStartIdx(MBB);
400 SlotIndex NewFin = Indexes->getMBBEndIdx(MBB);
401 Intervals[i]->addRange(LiveRange(NewStart, F, ValNum));
402 Intervals[i]->addRange(LiveRange(S, NewFin, ValNum));
403 }
404 }
405 }
406 }
407
408 bool StackColoring::removeAllMarkers() {
409 unsigned Count = 0;
410 for (unsigned i = 0; i < Markers.size(); ++i) {
411 Markers[i]->eraseFromParent();
412 Count++;
413 }
414 Markers.clear();
415
416 DEBUG(dbgs()<<"Removed "<
417 return Count;
418 }
419
420 void StackColoring::remapInstructions(DenseMap &SlotRemap) {
421 unsigned FixedInstr = 0;
422 unsigned FixedMemOp = 0;
423 unsigned FixedDbg = 0;
424 MachineModuleInfo *MMI = &MF->getMMI();
425
426 // Remap debug information that refers to stack slots.
427 MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
428 for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
429 VE = VMap.end(); VI != VE; ++VI) {
430 const MDNode *Var = VI->first;
431 if (!Var) continue;
432 std::pair &VP = VI->second;
433 if (SlotRemap.count(VP.first)) {
434 DEBUG(dbgs()<<"Remapping debug info for ["<getName()<<"].\n");
435 VP.first = SlotRemap[VP.first];
436 FixedDbg++;
437 }
438 }
439
440 // Keep a list of *allocas* which need to be remapped.
441 DenseMap Allocas;
442 for (DenseMap::iterator it = SlotRemap.begin(),
443 e = SlotRemap.end(); it != e; ++it) {
444 const Value* From = MFI->getObjectAllocation(it->first);
445 const Value* To = MFI->getObjectAllocation(it->second);
446 assert(To && From && "Invalid allocation object");
447 Allocas[From] = To;
448 }
449
450 // Remap all instructions to the new stack slots.
451 MachineFunction::iterator BB, BBE;
452 MachineBasicBlock::iterator I, IE;
453 for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
454 for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
455
456 // Update the MachineMemOperand to use the new alloca.
457 for (MachineInstr::mmo_iterator MM = I->memoperands_begin(),
458 E = I->memoperands_end(); MM != E; ++MM) {
459 MachineMemOperand *MMO = *MM;
460
461 const Value *V = MMO->getValue();
462
463 if (!V)
464 continue;
465
466 // Climb up and find the original alloca.
467 V = GetUnderlyingObject(V);
468 // If we did not find one, or if the one that we found is not in our
469 // map, then move on.
470 if (!V || !Allocas.count(V))
471 continue;
472
473 MMO->setValue(Allocas[V]);
474 FixedMemOp++;
475 }
476
477 // Update all of the machine instruction operands.
478 for (unsigned i = 0 ; i < I->getNumOperands(); ++i) {
479 MachineOperand &MO = I->getOperand(i);
480
481 if (!MO.isFI())
482 continue;
483 int FromSlot = MO.getIndex();
484
485 // Don't touch arguments.
486 if (FromSlot<0)
487 continue;
488
489 // Only look at mapped slots.
490 if (!SlotRemap.count(FromSlot))
491 continue;
492
493 // Fix the machine instructions.
494 int ToSlot = SlotRemap[FromSlot];
495 MO.setIndex(ToSlot);
496 FixedInstr++;
497 }
498 }
499
500 DEBUG(dbgs()<<"Fixed "<
501 DEBUG(dbgs()<<"Fixed "<
502 DEBUG(dbgs()<<"Fixed "<
503 }
504
505 void StackColoring::expungeSlotMap(DenseMap &SlotRemap,
506 unsigned NumSlots) {
507 // Expunge slot remap map.
508 for (unsigned i=0; i < NumSlots; ++i) {
509 // If we are remapping i
510 if (SlotRemap.count(i)) {
511 int Target = SlotRemap[i];
512 // As long as our target is mapped to something else, follow it.
513 while (SlotRemap.count(Target)) {
514 Target = SlotRemap[Target];
515 SlotRemap[i] = Target;
516 }
517 }
518 }
519 }
520
521 bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
522 DEBUG(dbgs() << "********** Stack Coloring **********\n"
523 << "********** Function: "
524 << ((Value*)Func.getFunction())->getName() << '\n');
525 MF = &Func;
526 MFI = MF->getFrameInfo();
527 Indexes = &getAnalysis();
528 BlockLiveness.clear();
529 BasicBlocks.clear();
530 BasicBlockNumbering.clear();
531 Markers.clear();
532 Intervals.clear();
533 VNInfoAllocator.Reset();
534
535 unsigned NumSlots = MFI->getObjectIndexEnd();
536
537 // If there are no stack slots then there are no markers to remove.
538 if (!NumSlots)
539 return false;
540
541 SmallVector SortedSlots;
542
543 SortedSlots.reserve(NumSlots);
544 Intervals.reserve(NumSlots);
545
546 unsigned NumMarkers = collectMarkers(NumSlots);
547
548 unsigned TotalSize = 0;
549 DEBUG(dbgs()<<"Found "<
550 DEBUG(dbgs()<<"Slot structure:\n");
551
552 for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
553 DEBUG(dbgs()<<"Slot #"<getObjectSize(i)<<" bytes.\n");
554 TotalSize += MFI->getObjectSize(i);
555 }
556
557 DEBUG(dbgs()<<"Total Stack size: "<
558
559 // Don't continue because there are not enough lifetime markers, or the
560 // stack or too small, or we are told not to optimize the slots.
561 if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
562 DEBUG(dbgs()<<"Will not try to merge slots.\n");
563 return removeAllMarkers();
564 }
565
566 for (unsigned i=0; i < NumSlots; ++i) {
567 LiveInterval *LI = new LiveInterval(i, 0);
568 Intervals.push_back(LI);
569 LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
570 SortedSlots.push_back(i);
571 }
572
573 // Calculate the liveness of each block.
574 calculateLocalLiveness();
575
576 // Propagate the liveness information.
577 calculateLiveIntervals(NumSlots);
578
579 // Maps old slots to new slots.
580 DenseMap SlotRemap;
581 unsigned RemovedSlots = 0;
582 unsigned ReducedSize = 0;
583
584 // Do not bother looking at empty intervals.
585 for (unsigned I = 0; I < NumSlots; ++I) {
586 if (Intervals[SortedSlots[I]]->empty())
587 SortedSlots[I] = -1;
588 }
589
590 // This is a simple greedy algorithm for merging allocas. First, sort the
591 // slots, placing the largest slots first. Next, perform an n^2 scan and look
592 // for disjoint slots. When you find disjoint slots, merge the samller one
593 // into the bigger one and update the live interval. Remove the small alloca
594 // and continue.
595
596 // Sort the slots according to their size. Place unused slots at the end.
597 std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI));
598
599 bool Chanded = true;
600 while (Chanded) {
601 Chanded = false;
602 for (unsigned I = 0; I < NumSlots; ++I) {
603 if (SortedSlots[I] == -1)
604 continue;
605
606 for (unsigned J=0; J < NumSlots; ++J) {
607 if (SortedSlots[J] == -1)
608 continue;
609
610 int FirstSlot = SortedSlots[I];
611 int SecondSlot = SortedSlots[J];
612 LiveInterval *First = Intervals[FirstSlot];
613 LiveInterval *Second = Intervals[SecondSlot];
614 assert (!First->empty() && !Second->empty() && "Found an empty range");
615
616 // Merge disjoint slots.
617 if (!First->overlaps(*Second)) {
618 Chanded = true;
619 First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
620 SlotRemap[SecondSlot] = FirstSlot;
621 SortedSlots[J] = -1;
622 DEBUG(dbgs()<<"Merging #"<
623 unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
624 MFI->getObjectAlignment(SecondSlot));
625
626 assert(MFI->getObjectSize(FirstSlot) >=
627 MFI->getObjectSize(SecondSlot) &&
628 "Merging a small object into a larger one");
629
630 RemovedSlots+=1;
631 ReducedSize += MFI->getObjectSize(SecondSlot);
632 MFI->setObjectAlignment(FirstSlot, MaxAlignment);
633 MFI->RemoveStackObject(SecondSlot);
634 }
635 }
636 }
637 }// While changed.
638
639 // Record statistics.
640 StackSpaceSaved += ReducedSize;
641 StackSlotMerged += RemovedSlots;
642 DEBUG(dbgs()<<"Merge "<
643 ReducedSize<<" bytes\n");
644
645 // Scan the entire function and update all machine operands that use frame
646 // indices to use the remapped frame index.
647 expungeSlotMap(SlotRemap, NumSlots);
648 remapInstructions(SlotRemap);
649
650 // Release the intervals.
651 for (unsigned I = 0; I < NumSlots; ++I) {
652 delete Intervals[I];
653 }
654
655 return removeAllMarkers();
656 }
0 ; RUN: llc < %s | FileCheck %s --check-prefix=YESCOLOR
1 ; RUN: llc -no-stack-coloring < %s | FileCheck %s --check-prefix=NOCOLOR
2
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4 target triple = "x86_64-apple-macosx10.8.0"
5
6 ;YESCOLOR: subq $136, %rsp
7 ;NOCOLOR: subq $264, %rsp
8
9
10 define i32 @myCall_w2(i32 %in) {
11 entry:
12 %a = alloca [17 x i8*], align 8
13 %a2 = alloca [16 x i8*], align 8
14 %b = bitcast [17 x i8*]* %a to i8*
15 %b2 = bitcast [16 x i8*]* %a2 to i8*
16 call void @llvm.lifetime.start(i64 -1, i8* %b)
17 %t1 = call i32 @foo(i32 %in, i8* %b)
18 %t2 = call i32 @foo(i32 %in, i8* %b)
19 call void @llvm.lifetime.end(i64 -1, i8* %b)
20 call void @llvm.lifetime.start(i64 -1, i8* %b2)
21 %t3 = call i32 @foo(i32 %in, i8* %b2)
22 %t4 = call i32 @foo(i32 %in, i8* %b2)
23 call void @llvm.lifetime.end(i64 -1, i8* %b2)
24 %t5 = add i32 %t1, %t2
25 %t6 = add i32 %t3, %t4
26 %t7 = add i32 %t5, %t6
27 ret i32 %t7
28 }
29
30
31 ;YESCOLOR: subq $272, %rsp
32 ;NOCOLOR: subq $272, %rsp
33
34 define i32 @myCall2_no_merge(i32 %in, i1 %d) {
35 entry:
36 %a = alloca [17 x i8*], align 8
37 %a2 = alloca [16 x i8*], align 8
38 %b = bitcast [17 x i8*]* %a to i8*
39 %b2 = bitcast [16 x i8*]* %a2 to i8*
40 call void @llvm.lifetime.start(i64 -1, i8* %b)
41 %t1 = call i32 @foo(i32 %in, i8* %b)
42 %t2 = call i32 @foo(i32 %in, i8* %b)
43 br i1 %d, label %bb2, label %bb3
44 bb2:
45 call void @llvm.lifetime.start(i64 -1, i8* %b2)
46 %t3 = call i32 @foo(i32 %in, i8* %b2)
47 %t4 = call i32 @foo(i32 %in, i8* %b2)
48 call void @llvm.lifetime.end(i64 -1, i8* %b2)
49 %t5 = add i32 %t1, %t2
50 %t6 = add i32 %t3, %t4
51 %t7 = add i32 %t5, %t6
52 call void @llvm.lifetime.end(i64 -1, i8* %b)
53 ret i32 %t7
54 bb3:
55 call void @llvm.lifetime.end(i64 -1, i8* %b)
56 ret i32 0
57 }
58
59 ;YESCOLOR: subq $144, %rsp
60 ;NOCOLOR: subq $272, %rsp
61
62 define i32 @myCall2_w2(i32 %in, i1 %d) {
63 entry:
64 %a = alloca [17 x i8*], align 8
65 %a2 = alloca [16 x i8*], align 8
66 %b = bitcast [17 x i8*]* %a to i8*
67 %b2 = bitcast [16 x i8*]* %a2 to i8*
68 call void @llvm.lifetime.start(i64 -1, i8* %b)
69 %t1 = call i32 @foo(i32 %in, i8* %b)
70 %t2 = call i32 @foo(i32 %in, i8* %b)
71 call void @llvm.lifetime.end(i64 -1, i8* %b)
72 br i1 %d, label %bb2, label %bb3
73 bb2:
74 call void @llvm.lifetime.start(i64 -1, i8* %b2)
75 %t3 = call i32 @foo(i32 %in, i8* %b2)
76 %t4 = call i32 @foo(i32 %in, i8* %b2)
77 call void @llvm.lifetime.end(i64 -1, i8* %b2)
78 %t5 = add i32 %t1, %t2
79 %t6 = add i32 %t3, %t4
80 %t7 = add i32 %t5, %t6
81 ret i32 %t7
82 bb3:
83 ret i32 0
84 }
85 ;YESCOLOR: subq $208, %rsp
86 ;NOCOLOR: subq $400, %rsp
87
88
89
90
91 define i32 @myCall_w4(i32 %in) {
92 entry:
93 %a1 = alloca [14 x i8*], align 8
94 %a2 = alloca [13 x i8*], align 8
95 %a3 = alloca [12 x i8*], align 8
96 %a4 = alloca [11 x i8*], align 8
97 %b1 = bitcast [14 x i8*]* %a1 to i8*
98 %b2 = bitcast [13 x i8*]* %a2 to i8*
99 %b3 = bitcast [12 x i8*]* %a3 to i8*
100 %b4 = bitcast [11 x i8*]* %a4 to i8*
101 call void @llvm.lifetime.start(i64 -1, i8* %b4)
102 call void @llvm.lifetime.start(i64 -1, i8* %b1)
103 %t1 = call i32 @foo(i32 %in, i8* %b1)
104 %t2 = call i32 @foo(i32 %in, i8* %b1)
105 call void @llvm.lifetime.end(i64 -1, i8* %b1)
106 call void @llvm.lifetime.start(i64 -1, i8* %b2)
107 %t9 = call i32 @foo(i32 %in, i8* %b2)
108 %t8 = call i32 @foo(i32 %in, i8* %b2)
109 call void @llvm.lifetime.end(i64 -1, i8* %b2)
110 call void @llvm.lifetime.start(i64 -1, i8* %b3)
111 %t3 = call i32 @foo(i32 %in, i8* %b3)
112 %t4 = call i32 @foo(i32 %in, i8* %b3)
113 call void @llvm.lifetime.end(i64 -1, i8* %b3)
114 %t11 = call i32 @foo(i32 %in, i8* %b4)
115 call void @llvm.lifetime.end(i64 -1, i8* %b4)
116 %t5 = add i32 %t1, %t2
117 %t6 = add i32 %t3, %t4
118 %t7 = add i32 %t5, %t6
119 ret i32 %t7
120 }
121
122 ;YESCOLOR: subq $112, %rsp
123 ;NOCOLOR: subq $400, %rsp
124
125 define i32 @myCall2_w4(i32 %in) {
126 entry:
127 %a1 = alloca [14 x i8*], align 8
128 %a2 = alloca [13 x i8*], align 8
129 %a3 = alloca [12 x i8*], align 8
130 %a4 = alloca [11 x i8*], align 8
131 %b1 = bitcast [14 x i8*]* %a1 to i8*
132 %b2 = bitcast [13 x i8*]* %a2 to i8*
133 %b3 = bitcast [12 x i8*]* %a3 to i8*
134 %b4 = bitcast [11 x i8*]* %a4 to i8*
135 call void @llvm.lifetime.start(i64 -1, i8* %b1)
136 %t1 = call i32 @foo(i32 %in, i8* %b1)
137 %t2 = call i32 @foo(i32 %in, i8* %b1)
138 call void @llvm.lifetime.end(i64 -1, i8* %b1)
139 call void @llvm.lifetime.start(i64 -1, i8* %b2)
140 %t9 = call i32 @foo(i32 %in, i8* %b2)
141 %t8 = call i32 @foo(i32 %in, i8* %b2)
142 call void @llvm.lifetime.end(i64 -1, i8* %b2)
143 call void @llvm.lifetime.start(i64 -1, i8* %b3)
144 %t3 = call i32 @foo(i32 %in, i8* %b3)
145 %t4 = call i32 @foo(i32 %in, i8* %b3)
146 call void @llvm.lifetime.end(i64 -1, i8* %b3)
147 br i1 undef, label %bb2, label %bb3
148 bb2:
149 call void @llvm.lifetime.start(i64 -1, i8* %b4)
150 %t11 = call i32 @foo(i32 %in, i8* %b4)
151 call void @llvm.lifetime.end(i64 -1, i8* %b4)
152 %t5 = add i32 %t1, %t2
153 %t6 = add i32 %t3, %t4
154 %t7 = add i32 %t5, %t6
155 ret i32 %t7
156 bb3:
157 ret i32 0
158 }
159
160
161 ;YESCOLOR: subq $144, %rsp
162 ;NOCOLOR: subq $272, %rsp
163
164
165 define i32 @myCall2_noend(i32 %in, i1 %d) {
166 entry:
167 %a = alloca [17 x i8*], align 8
168 %a2 = alloca [16 x i8*], align 8
169 %b = bitcast [17 x i8*]* %a to i8*
170 %b2 = bitcast [16 x i8*]* %a2 to i8*
171 call void @llvm.lifetime.start(i64 -1, i8* %b)
172 %t1 = call i32 @foo(i32 %in, i8* %b)
173 %t2 = call i32 @foo(i32 %in, i8* %b)
174 call void @llvm.lifetime.end(i64 -1, i8* %b)
175 br i1 %d, label %bb2, label %bb3
176 bb2:
177 call void @llvm.lifetime.start(i64 -1, i8* %b2)
178 %t3 = call i32 @foo(i32 %in, i8* %b2)
179 %t4 = call i32 @foo(i32 %in, i8* %b2)
180 %t5 = add i32 %t1, %t2
181 %t6 = add i32 %t3, %t4
182 %t7 = add i32 %t5, %t6
183 ret i32 %t7
184 bb3:
185 ret i32 0
186 }
187
188 ;YESCOLOR: subq $144, %rsp
189 ;NOCOLOR: subq $272, %rsp
190 define i32 @myCall2_noend2(i32 %in, i1 %d) {
191 entry:
192 %a = alloca [17 x i8*], align 8
193 %a2 = alloca [16 x i8*], align 8
194 %b = bitcast [17 x i8*]* %a to i8*
195 %b2 = bitcast [16 x i8*]* %a2 to i8*
196 call void @llvm.lifetime.start(i64 -1, i8* %b)
197 %t1 = call i32 @foo(i32 %in, i8* %b)
198 %t2 = call i32 @foo(i32 %in, i8* %b)
199 br i1 %d, label %bb2, label %bb3
200 bb2:
201 call void @llvm.lifetime.end(i64 -1, i8* %b)
202 call void @llvm.lifetime.start(i64 -1, i8* %b2)
203 %t3 = call i32 @foo(i32 %in, i8* %b2)
204 %t4 = call i32 @foo(i32 %in, i8* %b2)
205 %t5 = add i32 %t1, %t2
206 %t6 = add i32 %t3, %t4
207 %t7 = add i32 %t5, %t6
208 ret i32 %t7
209 bb3:
210 ret i32 0
211 }
212
213
214 ;YESCOLOR: subq $144, %rsp
215 ;NOCOLOR: subq $272, %rsp
216 define i32 @myCall2_nostart(i32 %in, i1 %d) {
217 entry:
218 %a = alloca [17 x i8*], align 8
219 %a2 = alloca [16 x i8*], align 8
220 %b = bitcast [17 x i8*]* %a to i8*
221 %b2 = bitcast [16 x i8*]* %a2 to i8*
222 %t1 = call i32 @foo(i32 %in, i8* %b)
223 %t2 = call i32 @foo(i32 %in, i8* %b)
224 call void @llvm.lifetime.end(i64 -1, i8* %b)
225 br i1 %d, label %bb2, label %bb3
226 bb2:
227 call void @llvm.lifetime.start(i64 -1, i8* %b2)
228 %t3 = call i32 @foo(i32 %in, i8* %b2)
229 %t4 = call i32 @foo(i32 %in, i8* %b2)
230 %t5 = add i32 %t1, %t2
231 %t6 = add i32 %t3, %t4
232 %t7 = add i32 %t5, %t6
233 ret i32 %t7
234 bb3:
235 ret i32 0
236 }
237
238 ; Adopt the test from Transforms/Inline/array_merge.ll'
239 ;YESCOLOR: subq $816, %rsp
240 ;NOCOLOR: subq $1616, %rsp
241 define void @array_merge() nounwind ssp {
242 entry:
243 %A.i1 = alloca [100 x i32], align 4
244 %B.i2 = alloca [100 x i32], align 4
245 %A.i = alloca [100 x i32], align 4
246 %B.i = alloca [100 x i32], align 4
247 %0 = bitcast [100 x i32]* %A.i to i8*
248 call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind
249 %1 = bitcast [100 x i32]* %B.i to i8*
250 call void @llvm.lifetime.start(i64 -1, i8* %1) nounwind
251 call void @bar([100 x i32]* %A.i, [100 x i32]* %B.i) nounwind
252 call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
253 call void @llvm.lifetime.end(i64 -1, i8* %1) nounwind
254 %2 = bitcast [100 x i32]* %A.i1 to i8*
255 call void @llvm.lifetime.start(i64 -1, i8* %2) nounwind
256 %3 = bitcast [100 x i32]* %B.i2 to i8*
257 call void @llvm.lifetime.start(i64 -1, i8* %3) nounwind
258 call void @bar([100 x i32]* %A.i1, [100 x i32]* %B.i2) nounwind
259 call void @llvm.lifetime.end(i64 -1, i8* %2) nounwind
260 call void @llvm.lifetime.end(i64 -1, i8* %3) nounwind
261 ret void
262 }
263
264 declare void @bar([100 x i32]* , [100 x i32]*) nounwind
265
266 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
267
268 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
269
270 declare i32 @foo(i32, i8*)
271
299299 "REG_SEQUENCE",
300300 "COPY",
301301 "BUNDLE",
302 "LIFETIME_START",
303 "LIFETIME_END",
302304 0
303305 };
304306 const DenseMap &Insts = getInstructions();