llvm.org GIT mirror llvm / 1dfa6e7
[PM/Unswitch] Teach the new simple loop unswitch to handle loop invariant PHI inputs and to rewrite PHI nodes during the actual unswitching. The checking is quite easy, but rewriting the PHI nodes is somewhat surprisingly challenging. This should handle both branches and switches. I think this is now a full featured trivial unswitcher, and more full featured than the trivial cases in the old pass while still being (IMO) somewhat simpler in how it works. Next up is to verify its correctness in more widespread testing, and then to add non-trivial unswitching. Thanks to Davide and Sanjoy for the excellent review. There is one remaining question that I may address in a follow-up patch (see the review thread for details) but it isn't related to the functionality specifically. Differential Revision: https://reviews.llvm.org/D32699 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302867 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 3 years ago
2 changed file(s) with 337 addition(s) and 23 deletion(s). Raw diff Collapse all Expand all
5454 /// Update the dominator tree after removing one exiting predecessor of a loop
5555 /// exit block.
5656 static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L,
57 DominatorTree &DT) {
57 DominatorTree &DT) {
5858 assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) &&
5959 "Cannot have empty predecessors of the loop exit block if we split "
6060 "off a block to unswitch!");
136136 }
137137 }
138138
139 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
140 /// incoming values along this edge.
141 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
142 BasicBlock &ExitBB) {
143 for (Instruction &I : ExitBB) {
144 auto *PN = dyn_cast(&I);
145 if (!PN)
146 // No more PHIs to check.
147 return true;
148
149 // If the incoming value for this edge isn't loop invariant the unswitch
150 // won't be trivial.
151 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
152 return false;
153 }
154 llvm_unreachable("Basic blocks should never be empty!");
155 }
156
157 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
158 ///
159 /// Requires that the loop exit and unswitched basic block are the same, and
160 /// that the exiting block was a unique predecessor of that block. Rewrites the
161 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
162 /// PHI nodes from the old preheader that now contains the unswitched
163 /// terminator.
164 static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
165 BasicBlock &OldExitingBB,
166 BasicBlock &OldPH) {
167 for (Instruction &I : UnswitchedBB) {
168 auto *PN = dyn_cast(&I);
169 if (!PN)
170 // No more PHIs to check.
171 break;
172
173 // When the loop exit is directly unswitched we just need to update the
174 // incoming basic block. We loop to handle weird cases with repeated
175 // incoming blocks, but expect to typically only have one operand here.
176 for (auto i : llvm::seq(0, PN->getNumOperands())) {
177 assert(PN->getIncomingBlock(i) == &OldExitingBB &&
178 "Found incoming block different from unique predecessor!");
179 PN->setIncomingBlock(i, &OldPH);
180 }
181 }
182 }
183
184 /// Rewrite the PHI nodes in the loop exit basic block and the split off
185 /// unswitched block.
186 ///
187 /// Because the exit block remains an exit from the loop, this rewrites the
188 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
189 /// nodes into the unswitched basic block to select between the value in the
190 /// old preheader and the loop exit.
191 static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
192 BasicBlock &UnswitchedBB,
193 BasicBlock &OldExitingBB,
194 BasicBlock &OldPH) {
195 assert(&ExitBB != &UnswitchedBB &&
196 "Must have different loop exit and unswitched blocks!");
197 Instruction *InsertPt = &*UnswitchedBB.begin();
198 for (Instruction &I : ExitBB) {
199 auto *PN = dyn_cast(&I);
200 if (!PN)
201 // No more PHIs to check.
202 break;
203
204 auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2,
205 PN->getName() + ".split", InsertPt);
206
207 // Walk backwards over the old PHI node's inputs to minimize the cost of
208 // removing each one. We have to do this weird loop manually so that we
209 // create the same number of new incoming edges in the new PHI as we expect
210 // each case-based edge to be included in the unswitched switch in some
211 // cases.
212 // FIXME: This is really, really gross. It would be much cleaner if LLVM
213 // allowed us to create a single entry for a predecessor block without
214 // having separate entries for each "edge" even though these edges are
215 // required to produce identical results.
216 for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
217 if (PN->getIncomingBlock(i) != &OldExitingBB)
218 continue;
219
220 Value *Incoming = PN->removeIncomingValue(i);
221 NewPN->addIncoming(Incoming, &OldPH);
222 }
223
224 // Now replace the old PHI with the new one and wire the old one in as an
225 // input to the new one.
226 PN->replaceAllUsesWith(NewPN);
227 NewPN->addIncoming(PN, &ExitBB);
228 }
229 }
230
139231 /// Unswitch a trivial branch if the condition is loop invariant.
140232 ///
141233 /// This routine should only be called when loop code leading to the branch has
186278 assert(L.contains(ContinueBB) &&
187279 "Cannot have both successors exit and still be in the loop!");
188280
189 // If the loop exit block contains phi nodes, this isn't trivial.
190 // FIXME: We should examine the PHI to determine whether or not we can handle
191 // it trivially.
192 if (isa(LoopExitBB->begin()))
281 auto *ParentBB = BI.getParent();
282 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
193283 return false;
194284
195285 DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal
208298 BasicBlock *UnswitchedBB;
209299 if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
210300 (void)PredBB;
211 assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!");
301 assert(PredBB == BI.getParent() &&
302 "A branch's parent isn't a predecessor!");
212303 UnswitchedBB = LoopExitBB;
213304 } else {
214305 UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
215306 }
216
217 BasicBlock *ParentBB = BI.getParent();
218307
219308 // Now splice the branch to gate reaching the new preheader and re-point its
220309 // successors.
227316 // Create a new unconditional branch that will continue the loop as a new
228317 // terminator.
229318 BranchInst::Create(ContinueBB, ParentBB);
319
320 // Rewrite the relevant PHI nodes.
321 if (UnswitchedBB == LoopExitBB)
322 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
323 else
324 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
325 *ParentBB, *OldPH);
230326
231327 // Now we need to update the dominator tree.
232328 updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
277373 if (!L.isLoopInvariant(LoopCond))
278374 return false;
279375
376 auto *ParentBB = SI.getParent();
377
280378 // FIXME: We should compute this once at the start and update it!
281379 SmallVector ExitBlocks;
282380 L.getExitBlocks(ExitBlocks);
286384 SmallVector ExitCaseIndices;
287385 for (auto Case : SI.cases()) {
288386 auto *SuccBB = Case.getCaseSuccessor();
289 if (ExitBlockSet.count(SuccBB) && !isa(SuccBB->begin()))
387 if (ExitBlockSet.count(SuccBB) &&
388 areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
290389 ExitCaseIndices.push_back(Case.getCaseIndex());
291390 }
292391 BasicBlock *DefaultExitBB = nullptr;
293392 if (ExitBlockSet.count(SI.getDefaultDest()) &&
294 !isa(SI.getDefaultDest()->begin()) &&
393 areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
295394 !isa(SI.getDefaultDest()->getTerminator()))
296395 DefaultExitBB = SI.getDefaultDest();
297396 else if (ExitCaseIndices.empty())
329428 if (CommonSuccBB) {
330429 SI.setDefaultDest(CommonSuccBB);
331430 } else {
332 BasicBlock *ParentBB = SI.getParent();
333431 BasicBlock *UnreachableBB = BasicBlock::Create(
334432 ParentBB->getContext(),
335433 Twine(ParentBB->getName()) + ".unreachable_default",
357455 // Now add the unswitched switch.
358456 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
359457
360 // Split any exit blocks with remaining in-loop predecessors. We walk in
361 // reverse so that we split in the same order as the cases appeared. This is
362 // purely for convenience of reading the resulting IR, but it doesn't cost
363 // anything really.
458 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
459 // First, we split any exit blocks with remaining in-loop predecessors. Then
460 // we update the PHIs in one of two ways depending on if there was a split.
461 // We walk in reverse so that we split in the same order as the cases
462 // appeared. This is purely for convenience of reading the resulting IR, but
463 // it doesn't cost anything really.
464 SmallPtrSet UnswitchedExitBBs;
364465 SmallDenseMap SplitExitBBMap;
365466 // Handle the default exit if necessary.
366467 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
367468 // ranges aren't quite powerful enough yet.
368 if (DefaultExitBB && !pred_empty(DefaultExitBB)) {
369 auto *SplitBB =
370 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
371 updateLoopExitIDom(DefaultExitBB, L, DT);
372 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
469 if (DefaultExitBB) {
470 if (pred_empty(DefaultExitBB)) {
471 UnswitchedExitBBs.insert(DefaultExitBB);
472 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
473 } else {
474 auto *SplitBB =
475 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
476 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
477 *ParentBB, *OldPH);
478 updateLoopExitIDom(DefaultExitBB, L, DT);
479 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
480 }
373481 }
374482 // Note that we must use a reference in the for loop so that we update the
375483 // container.
376484 for (auto &CasePair : reverse(ExitCases)) {
377485 // Grab a reference to the exit block in the pair so that we can update it.
378 BasicBlock *&ExitBB = CasePair.second;
486 BasicBlock *ExitBB = CasePair.second;
379487
380488 // If this case is the last edge into the exit block, we can simply reuse it
381489 // as it will no longer be a loop exit. No mapping necessary.
382 if (pred_empty(ExitBB))
490 if (pred_empty(ExitBB)) {
491 // Only rewrite once.
492 if (UnswitchedExitBBs.insert(ExitBB).second)
493 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
383494 continue;
495 }
384496
385497 // Otherwise we need to split the exit block so that we retain an exit
386498 // block from the loop and a target for the unswitched condition.
388500 if (!SplitExitBB) {
389501 // If this is the first time we see this, do the split and remember it.
390502 SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
503 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
504 *ParentBB, *OldPH);
391505 updateLoopExitIDom(ExitBB, L, DT);
392506 }
393 ExitBB = SplitExitBB;
507 // Update the case pair to point to the split block.
508 CasePair.second = SplitExitBB;
394509 }
395510
396511 // Now add the unswitched cases. We do this in reverse order as we built them
182182 ; CHECK: [[UNREACHABLE]]:
183183 ; CHECK-NEXT: unreachable
184184 }
185
186 ; This test contains a trivially unswitchable branch with an LCSSA phi node in
187 ; a loop exit block.
188 define i32 @test5(i1 %cond1, i32 %x, i32 %y) {
189 ; CHECK-LABEL: @test5(
190 entry:
191 br label %loop_begin
192 ; CHECK-NEXT: entry:
193 ; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit
194 ;
195 ; CHECK: entry.split:
196 ; CHECK-NEXT: br label %loop_begin
197
198 loop_begin:
199 br i1 %cond1, label %latch, label %loop_exit
200 ; CHECK: loop_begin:
201 ; CHECK-NEXT: br label %latch
202
203 latch:
204 call void @some_func() noreturn nounwind
205 br label %loop_begin
206 ; CHECK: latch:
207 ; CHECK-NEXT: call
208 ; CHECK-NEXT: br label %loop_begin
209
210 loop_exit:
211 %result1 = phi i32 [ %x, %loop_begin ]
212 %result2 = phi i32 [ %y, %loop_begin ]
213 %result = add i32 %result1, %result2
214 ret i32 %result
215 ; CHECK: loop_exit:
216 ; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %x, %entry ]
217 ; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %y, %entry ]
218 ; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]
219 ; CHECK-NEXT: ret i32 %[[R]]
220 }
221
222 ; This test contains a trivially unswitchable branch with a real phi node in LCSSA
223 ; position in a shared exit block where a different path through the loop
224 ; produces a non-invariant input to the PHI node.
225 define i32 @test6(i32* %var, i1 %cond1, i1 %cond2, i32 %x, i32 %y) {
226 ; CHECK-LABEL: @test6(
227 entry:
228 br label %loop_begin
229 ; CHECK-NEXT: entry:
230 ; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split
231 ;
232 ; CHECK: entry.split:
233 ; CHECK-NEXT: br label %loop_begin
234
235 loop_begin:
236 br i1 %cond1, label %continue, label %loop_exit
237 ; CHECK: loop_begin:
238 ; CHECK-NEXT: br label %continue
239
240 continue:
241 %var_val = load i32, i32* %var
242 br i1 %cond2, label %latch, label %loop_exit
243 ; CHECK: continue:
244 ; CHECK-NEXT: load
245 ; CHECK-NEXT: br i1 %cond2, label %latch, label %loop_exit
246
247 latch:
248 call void @some_func() noreturn nounwind
249 br label %loop_begin
250 ; CHECK: latch:
251 ; CHECK-NEXT: call
252 ; CHECK-NEXT: br label %loop_begin
253
254 loop_exit:
255 %result1 = phi i32 [ %x, %loop_begin ], [ %var_val, %continue ]
256 %result2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ]
257 %result = add i32 %result1, %result2
258 ret i32 %result
259 ; CHECK: loop_exit:
260 ; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %var_val, %continue ]
261 ; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %var_val, %continue ]
262 ; CHECK-NEXT: br label %loop_exit.split
263 ;
264 ; CHECK: loop_exit.split:
265 ; CHECK-NEXT: %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %[[R1]], %loop_exit ]
266 ; CHECK-NEXT: %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %[[R2]], %loop_exit ]
267 ; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]
268 ; CHECK-NEXT: ret i32 %[[R]]
269 }
270
271 ; This test contains a trivially unswitchable switch with an LCSSA phi node in
272 ; a loop exit block.
273 define i32 @test7(i32 %cond1, i32 %x, i32 %y) {
274 ; CHECK-LABEL: @test7(
275 entry:
276 br label %loop_begin
277 ; CHECK-NEXT: entry:
278 ; CHECK-NEXT: switch i32 %cond1, label %entry.split [
279 ; CHECK-NEXT: i32 0, label %loop_exit
280 ; CHECK-NEXT: i32 1, label %loop_exit
281 ; CHECK-NEXT: ]
282 ;
283 ; CHECK: entry.split:
284 ; CHECK-NEXT: br label %loop_begin
285
286 loop_begin:
287 switch i32 %cond1, label %latch [
288 i32 0, label %loop_exit
289 i32 1, label %loop_exit
290 ]
291 ; CHECK: loop_begin:
292 ; CHECK-NEXT: br label %latch
293
294 latch:
295 call void @some_func() noreturn nounwind
296 br label %loop_begin
297 ; CHECK: latch:
298 ; CHECK-NEXT: call
299 ; CHECK-NEXT: br label %loop_begin
300
301 loop_exit:
302 %result1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ]
303 %result2 = phi i32 [ %y, %loop_begin ], [ %y, %loop_begin ]
304 %result = add i32 %result1, %result2
305 ret i32 %result
306 ; CHECK: loop_exit:
307 ; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ]
308 ; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ]
309 ; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]
310 ; CHECK-NEXT: ret i32 %[[R]]
311 }
312
313 ; This test contains a trivially unswitchable switch with a real phi node in
314 ; LCSSA position in a shared exit block where a different path through the loop
315 ; produces a non-invariant input to the PHI node.
316 define i32 @test8(i32* %var, i32 %cond1, i32 %cond2, i32 %x, i32 %y) {
317 ; CHECK-LABEL: @test8(
318 entry:
319 br label %loop_begin
320 ; CHECK-NEXT: entry:
321 ; CHECK-NEXT: switch i32 %cond1, label %entry.split [
322 ; CHECK-NEXT: i32 0, label %loop_exit.split
323 ; CHECK-NEXT: i32 1, label %loop_exit2
324 ; CHECK-NEXT: i32 2, label %loop_exit.split
325 ; CHECK-NEXT: ]
326 ;
327 ; CHECK: entry.split:
328 ; CHECK-NEXT: br label %loop_begin
329
330 loop_begin:
331 switch i32 %cond1, label %continue [
332 i32 0, label %loop_exit
333 i32 1, label %loop_exit2
334 i32 2, label %loop_exit
335 ]
336 ; CHECK: loop_begin:
337 ; CHECK-NEXT: br label %continue
338
339 continue:
340 %var_val = load i32, i32* %var
341 switch i32 %cond2, label %latch [
342 i32 0, label %loop_exit
343 ]
344 ; CHECK: continue:
345 ; CHECK-NEXT: load
346 ; CHECK-NEXT: switch i32 %cond2, label %latch [
347 ; CHECK-NEXT: i32 0, label %loop_exit
348 ; CHECK-NEXT: ]
349
350 latch:
351 call void @some_func() noreturn nounwind
352 br label %loop_begin
353 ; CHECK: latch:
354 ; CHECK-NEXT: call
355 ; CHECK-NEXT: br label %loop_begin
356
357 loop_exit:
358 %result1.1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ], [ %var_val, %continue ]
359 %result1.2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ], [ %y, %loop_begin ]
360 %result1 = add i32 %result1.1, %result1.2
361 ret i32 %result1
362 ; CHECK: loop_exit:
363 ; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %var_val, %continue ]
364 ; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %var_val, %continue ]
365 ; CHECK-NEXT: br label %loop_exit.split
366 ;
367 ; CHECK: loop_exit.split:
368 ; CHECK-NEXT: %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ], [ %[[R1]], %loop_exit ]
369 ; CHECK-NEXT: %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ], [ %[[R2]], %loop_exit ]
370 ; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]
371 ; CHECK-NEXT: ret i32 %[[R]]
372
373 loop_exit2:
374 %result2.1 = phi i32 [ %x, %loop_begin ]
375 %result2.2 = phi i32 [ %y, %loop_begin ]
376 %result2 = add i32 %result2.1, %result2.2
377 ret i32 %result2
378 ; CHECK: loop_exit2:
379 ; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %x, %entry ]
380 ; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %y, %entry ]
381 ; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]
382 ; CHECK-NEXT: ret i32 %[[R]]
383 }