llvm.org GIT mirror llvm / 3b53c4e
Implement unrolling of multiblock loops. This significantly improves the utility of the LoopUnroll pass. Also, add a testcase for multiblock-loop unrolling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@29859 91177308-0d34-0410-b5e6-96231b3b80d8 Owen Anderson 13 years ago
2 changed file(s) with 149 addition(s) and 133 deletion(s). Raw diff Collapse all Expand all
1010 // been canonicalized by the -indvars pass, allowing it to determine the trip
1111 // counts of loops easily.
1212 //
13 // This pass is currently extremely limited. It only currently only unrolls
14 // single basic block loops that execute a constant number of times.
13 // This pass will multi-block loops only if they contain no non-unrolled
14 // subloops. The process of unrolling can produce extraneous basic blocks
15 // linked with unconditional branches. This will be corrected in the future.
1516 //
1617 //===----------------------------------------------------------------------===//
1718
5253 ///
5354 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
5455 AU.addRequiredID(LoopSimplifyID);
56 AU.addRequiredID(LCSSAID);
5557 AU.addRequired();
58 AU.addPreservedID(LCSSAID);
5659 AU.addPreserved();
5760 }
5861 };
124127 for (unsigned i = 0, e = SubLoops.size(); i != e; ++i)
125128 Changed |= visitLoop(SubLoops[i]);
126129
127 // We only handle single basic block loops right now.
128 if (L->getBlocks().size() != 1)
129 return Changed;
130
131 BasicBlock *BB = L->getHeader();
132 BranchInst *BI = dyn_cast(BB->getTerminator());
130 BasicBlock* Header = L->getHeader();
131 BasicBlock* LatchBlock = L->getLoopLatch();
132
133 BranchInst *BI = dyn_cast(LatchBlock->getTerminator());
133134 if (BI == 0) return Changed; // Must end in a conditional branch
134135
135136 ConstantInt *TripCountC = dyn_cast_or_null(L->getTripCount());
140141 return Changed; // More than 2^32 iterations???
141142
142143 unsigned LoopSize = ApproximateLoopSize(L);
143 DEBUG(std::cerr << "Loop Unroll: F[" << BB->getParent()->getName()
144 << "] Loop %" << BB->getName() << " Loop Size = " << LoopSize
145 << " Trip Count = " << TripCountFull << " - ");
144 DEBUG(std::cerr << "Loop Unroll: F[" << Header->getParent()->getName()
145 << "] Loop %" << Header->getName() << " Loop Size = "
146 << LoopSize << " Trip Count = " << TripCountFull << " - ");
146147 uint64_t Size = (uint64_t)LoopSize*TripCountFull;
147148 if (Size > UnrollThreshold) {
148149 DEBUG(std::cerr << "TOO LARGE: " << Size << ">" << UnrollThreshold << "\n");
150151 }
151152 DEBUG(std::cerr << "UNROLLING!\n");
152153
154 std::vector LoopBlocks = L->getBlocks();
155
153156 unsigned TripCount = (unsigned)TripCountFull;
154157
155 BasicBlock *LoopExit = BI->getSuccessor(L->contains(BI->getSuccessor(0)));
156
157 // Create a new basic block to temporarily hold all of the cloned code.
158 BasicBlock *NewBlock = new BasicBlock();
158 BasicBlock *LoopExit = BI->getSuccessor(L->contains(BI->getSuccessor(0)));
159159
160160 // For the first iteration of the loop, we should use the precloned values for
161161 // PHI nodes. Insert associations now.
162162 std::map LastValueMap;
163163 std::vector OrigPHINode;
164 for (BasicBlock::iterator I = BB->begin(); isa(I); ++I) {
164 for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) {
165165 PHINode *PN = cast(I);
166166 OrigPHINode.push_back(PN);
167 if (Instruction *I =dyn_cast(PN->getIncomingValueForBlock(BB)))
168 if (I->getParent() == BB)
167 if (Instruction *I =
168 dyn_cast(PN->getIncomingValueForBlock(LatchBlock)))
169 if (L->contains(I->getParent()))
169170 LastValueMap[I] = I;
170171 }
171172
172173 // Remove the exit branch from the loop
173 BB->getInstList().erase(BI);
174 LatchBlock->getInstList().erase(BI);
175
176 std::vector Headers;
177 std::vector Latches;
178 Headers.push_back(Header);
179 Latches.push_back(LatchBlock);
174180
175181 assert(TripCount != 0 && "Trip count of 0 is impossible!");
176182 for (unsigned It = 1; It != TripCount; ++It) {
177183 char SuffixBuffer[100];
178184 sprintf(SuffixBuffer, ".%d", It);
179 std::map ValueMap;
180 BasicBlock *New = CloneBasicBlock(BB, ValueMap, SuffixBuffer);
181
182 // Loop over all of the PHI nodes in the block, changing them to use the
183 // incoming values from the previous block.
184 for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
185 PHINode *NewPHI = cast(ValueMap[OrigPHINode[i]]);
186 Value *InVal = NewPHI->getIncomingValueForBlock(BB);
187 if (Instruction *InValI = dyn_cast(InVal))
188 if (InValI->getParent() == BB)
189 InVal = LastValueMap[InValI];
190 ValueMap[OrigPHINode[i]] = InVal;
191 New->getInstList().erase(NewPHI);
192 }
193
194 for (BasicBlock::iterator I = New->begin(), E = New->end(); I != E; ++I)
195 RemapInstruction(I, ValueMap);
196
197 // Now that all of the instructions are remapped, splice them into the end
198 // of the NewBlock.
199 NewBlock->getInstList().splice(NewBlock->end(), New->getInstList());
200 delete New;
201
202 // LastValue map now contains values from this iteration.
203 std::swap(LastValueMap, ValueMap);
204 }
205
206 // If there was more than one iteration, replace any uses of values computed
207 // in the loop with values computed during the last iteration of the loop.
208 if (TripCount != 1) {
209 std::set Users;
210 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
211 Users.insert(I->use_begin(), I->use_end());
212
213 // We don't want to reprocess entries with PHI nodes in them. For this
214 // reason, we look at each operand of each user exactly once, performing the
215 // substitution exactly once.
216 for (std::set::iterator UI = Users.begin(), E = Users.end(); UI != E;
217 ++UI) {
218 Instruction *I = cast(*UI);
219 if (I->getParent() != BB && I->getParent() != NewBlock)
185
186 std::vector NewBlocks;
187
188 for (std::vector::iterator BB = LoopBlocks.begin(),
189 E = LoopBlocks.end(); BB != E; ++BB) {
190 std::map ValueMap;
191 BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer);
192 Header->getParent()->getBasicBlockList().push_back(New);
193
194 // Loop over all of the PHI nodes in the block, changing them to use the
195 // incoming values from the previous block.
196 if (*BB == Header)
197 for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
198 PHINode *NewPHI = cast(ValueMap[OrigPHINode[i]]);
199 Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
200 if (Instruction *InValI = dyn_cast(InVal))
201 if (It > 1 && L->contains(InValI->getParent()))
202 InVal = LastValueMap[InValI];
203 ValueMap[OrigPHINode[i]] = InVal;
204 New->getInstList().erase(NewPHI);
205 }
206
207 // Update our running map of newest clones
208 LastValueMap[*BB] = New;
209 for (std::map::iterator VI = ValueMap.begin(),
210 VE = ValueMap.end(); VI != VE; ++VI)
211 LastValueMap[VI->first] = VI->second;
212
213 L->addBasicBlockToLoop(New, *LI);
214
215 // Add phi entries for newly created values to all exit blocks except
216 // the successor of the latch block. The successor of the exit block will
217 // be updated specially after unrolling all the way.
218 if (*BB != LatchBlock)
219 for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
220 UI != UE; ++UI) {
221 Instruction* UseInst = cast(*UI);
222 if (isa(UseInst) && !L->contains(UseInst->getParent())) {
223 PHINode* phi = cast(UseInst);
224 Value* Incoming = phi->getIncomingValueForBlock(*BB);
225 if (isa(Incoming))
226 Incoming = LastValueMap[Incoming];
227
228 phi->addIncoming(Incoming, New);
229 }
230 }
231
232 // Keep track of new headers and latches as we create them, so that
233 // we can insert the proper branches later.
234 if (*BB == Header)
235 Headers.push_back(New);
236 if (*BB == LatchBlock)
237 Latches.push_back(New);
238
239 NewBlocks.push_back(New);
240 }
241
242 // Remap all instructions in the most recent iteration
243 for (unsigned i = 0; i < NewBlocks.size(); ++i)
244 for (BasicBlock::iterator I = NewBlocks[i]->begin(),
245 E = NewBlocks[i]->end(); I != E; ++I)
220246 RemapInstruction(I, LastValueMap);
221 }
222 }
223
224 // Now that we cloned the block as many times as we needed, stitch the new
225 // code into the original block and delete the temporary block.
226 BB->getInstList().splice(BB->end(), NewBlock->getInstList());
227 delete NewBlock;
247 }
248
249 // Insert the branches that link the different iterations together
250 for (unsigned i = 0; i < Latches.size()-1; ++i)
251 new BranchInst(Headers[i+1], Latches[i]);
252
253 // Finally, add an unconditional branch to the block to continue into the exit
254 // block.
255 new BranchInst(LoopExit, Latches[Latches.size()-1]);
256
257 // Update PHI nodes that reference the final latch block
258 if (TripCount > 1) {
259 std::set Users;
260 for (Value::use_iterator UI = LatchBlock->use_begin(),
261 UE = LatchBlock->use_end(); UI != UE; ++UI)
262 if (PHINode* phi = dyn_cast(*UI))
263 Users.insert(phi);
264
265 for (std::set::iterator SI = Users.begin(), SE = Users.end();
266 SI != SE; ++SI) {
267 Value* InVal = (*SI)->getIncomingValueForBlock(LatchBlock);
268 if (isa(InVal))
269 InVal = LastValueMap[InVal];
270 (*SI)->removeIncomingValue(LatchBlock, false);
271 (*SI)->addIncoming(InVal, cast(LastValueMap[LatchBlock]));
272 }
273 }
228274
229275 // Now loop over the PHI nodes in the original block, setting them to their
230276 // incoming values.
232278 for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
233279 PHINode *PN = OrigPHINode[i];
234280 PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
235 BB->getInstList().erase(PN);
236 }
237
238 // Finally, add an unconditional branch to the block to continue into the exit
239 // block.
240 new BranchInst(LoopExit, BB);
281 Header->getInstList().erase(PN);
282 }
241283
242284 // At this point, the code is well formed. We now do a quick sweep over the
243285 // inserted code, doing constant propagation and dead code elimination as we
244286 // go.
245 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
246 Instruction *Inst = I++;
247
248 if (isInstructionTriviallyDead(Inst))
249 BB->getInstList().erase(Inst);
250 else if (Constant *C = ConstantFoldInstruction(Inst)) {
251 Inst->replaceAllUsesWith(C);
252 BB->getInstList().erase(Inst);
253 }
254 }
287 const std::vector &NewLoopBlocks = L->getBlocks();
288 for (std::vector::const_iterator BB = NewLoopBlocks.begin(),
289 E = NewLoopBlocks.end(); BB != E; ++BB)
290 for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
291 Instruction *Inst = I++;
292
293 if (isInstructionTriviallyDead(Inst))
294 (*BB)->getInstList().erase(Inst);
295 else if (Constant *C = ConstantFoldInstruction(Inst)) {
296 Inst->replaceAllUsesWith(C);
297 (*BB)->getInstList().erase(Inst);
298 }
299 }
255300
256301 // Update the loop information for this loop.
257302 Loop *Parent = L->getParentLoop();
258303
259304 // Move all of the basic blocks in the loop into the parent loop.
260 LI->changeLoopFor(BB, Parent);
305 for (std::vector::const_iterator BB = NewLoopBlocks.begin(),
306 E = NewLoopBlocks.end(); BB != E; ++BB)
307 LI->changeLoopFor(*BB, Parent);
261308
262309 // Remove the loop from the parent.
263310 if (Parent)
265312 else
266313 delete LI->removeLoop(std::find(LI->begin(), LI->end(), L));
267314
268 // Remove single-entry Phis from the exit block.
269 for (BasicBlock::iterator ExitInstr = LoopExit->begin();
270 PHINode* PN = dyn_cast(ExitInstr); ++ExitInstr) {
271 assert(PN->getNumIncomingValues() == 1
272 && "Block should only have one pred, so Phi's must be single entry");
273 PN->replaceAllUsesWith(PN->getOperand(0));
274 PN->eraseFromParent();
275 }
276
277 // FIXME: Should update dominator analyses
278
279 // Now that everything is up-to-date that will be, we fold the loop block into
280 // the preheader and exit block, updating our analyses as we go.
281 LoopExit->getInstList().splice(LoopExit->begin(), BB->getInstList(),
282 BB->getInstList().begin(),
283 prior(BB->getInstList().end()));
284 LoopExit->getInstList().splice(LoopExit->begin(), Preheader->getInstList(),
285 Preheader->getInstList().begin(),
286 prior(Preheader->getInstList().end()));
287
288 // Make all other blocks in the program branch to LoopExit now instead of
289 // Preheader.
290 Preheader->replaceAllUsesWith(LoopExit);
291
292 Function *F = LoopExit->getParent();
293 if (Parent) {
294 // Otherwise, if this is a sub-loop, and the preheader was the loop header
295 // of the parent loop, move the exit block to be the new parent loop header.
296 if (Parent->getHeader() == Preheader) {
297 assert(Parent->contains(LoopExit) &&
298 "Exit block isn't contained in parent?");
299 Parent->moveToHeader(LoopExit);
300 }
301 } else {
302 // If the preheader was the entry block of this function, move the exit
303 // block to be the new entry of the function.
304 if (Preheader == &F->front())
305 F->getBasicBlockList().splice(F->begin(),
306 F->getBasicBlockList(), LoopExit);
307 }
308
309 // Remove BB and LoopExit from our analyses.
310 LI->removeBlock(Preheader);
311 LI->removeBlock(BB);
312
313 // Actually delete the blocks now.
314 F->getBasicBlockList().erase(Preheader);
315 F->getBasicBlockList().erase(BB);
316
317315 ++NumUnrolled;
318316 return true;
319317 }
0 ; RUN: llvm-as < %s | opt -loop-unroll | llvm-dis | grep "bb72.2"
1
2 void %vorbis_encode_noisebias_setup() {
3 entry:
4 br label %cond_true.outer
5
6 cond_true.outer:
7 %indvar1.ph = phi uint [ 0, %entry ], [ %indvar.next2, %bb72 ]
8 br label %bb72
9
10 bb72:
11 %indvar.next2 = add uint %indvar1.ph, 1
12 %exitcond3 = seteq uint %indvar.next2, 3
13 br bool %exitcond3, label %cond_true138, label %cond_true.outer
14
15 cond_true138:
16 ret void
17 }