llvm.org GIT mirror llvm / 131a1e5
[LoopPredication] Allow predication of loop invariant computations (within the loop) The purpose of this patch is to eliminate a pass ordering dependence between LoopPredication and LICM. To understand the purpose, consider the following snippet of code inside some loop 'L' with IV 'i' A = _a.length; guard (i < A) a = _a[i] B = _b.length; guard (i < B); b = _b[i]; ... Z = _z.length; guard (i < Z) z = _z[i] accum += a + b + ... + z; Today, we need LICM to hoist the length loads, LoopPredication to make the guards loop invariant, and TrivialUnswitch to eliminate the loop invariant guard to establish must execute for the next length load. Today, if we can't prove speculation safety, we'd have to iterate these three passes 26 times to reduce this example down to the minimal form. Using the fact that the array lengths are known to be invariant, we can short circuit this iteration. By forming the loop invariant form of all the guards at once, we remove the need for LoopPredication from the iterative cycle. At the moment, we'd still have to iterate LICM and TrivialUnswitch; we'll leave that part for later. As a secondary benefit, this allows LoopPred to expose peeling oppurtunities in a much more obvious manner. See the udiv test changes as an example. If the udiv was not hoistable (i.e. we couldn't prove speculation safety) this would be an example where peeling becomes obviously profitable whereas it wasn't before. A couple of subtleties in the implementation: - SCEV's isSafeToExpand guarantees speculation safety (i.e. let's us expand at a new point). It is not a precondition for expansion if we know the SCEV corresponds to a Value which dominates the requested expansion point. - SCEV's isLoopInvariant returns true for expressions which compute the same value across all iterations executed, regardless of where the original Value is located. (i.e. it can be in the loop) This implies we have a speculation burden to prove before expanding them outside loops. - invariant_loads and AA->pointsToConstantMemory are two cases that SCEV currently does not handle, but meets the SCEV definition of invariance. I plan to sink this part into SCEV once this has baked for a bit. Differential Revision: https://reviews.llvm.org/D60093 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358684 91177308-0d34-0410-b5e6-96231b3b80d8 Philip Reames 5 months ago
4 changed file(s) with 99 addition(s) and 26 deletion(s). Raw diff Collapse all Expand all
177177
178178 #include "llvm/Transforms/Scalar/LoopPredication.h"
179179 #include "llvm/ADT/Statistic.h"
180 #include "llvm/Analysis/AliasAnalysis.h"
180181 #include "llvm/Analysis/BranchProbabilityInfo.h"
181182 #include "llvm/Analysis/GuardUtils.h"
182183 #include "llvm/Analysis/LoopInfo.h"
245246 }
246247 };
247248
249 AliasAnalysis *AA;
248250 ScalarEvolution *SE;
249251 BranchProbabilityInfo *BPI;
250252
274276 /// passed to SCEVExpander!
275277 Instruction *findInsertPt(Instruction *User, ArrayRef Ops);
276278
277 bool CanExpand(const SCEV* S);
279 /// Return true if the value is known to produce a single fixed value across
280 /// all iterations on which it executes. Note that this does not imply
281 /// speculation safety. That must be established seperately.
282 bool isLoopInvariantValue(const SCEV* S);
283
278284 Value *expandCheck(SCEVExpander &Expander, Instruction *Guard,
279285 ICmpInst::Predicate Pred, const SCEV *LHS,
280286 const SCEV *RHS);
317323 Optional generateLoopLatchCheck(Type *RangeCheckType);
318324
319325 public:
320 LoopPredication(ScalarEvolution *SE, BranchProbabilityInfo *BPI)
321 : SE(SE), BPI(BPI){};
326 LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE,
327 BranchProbabilityInfo *BPI)
328 : AA(AA), SE(SE), BPI(BPI){};
322329 bool runOnLoop(Loop *L);
323330 };
324331
340347 auto *SE = &getAnalysis().getSE();
341348 BranchProbabilityInfo &BPI =
342349 getAnalysis().getBPI();
343 LoopPredication LP(SE, &BPI);
350 auto *AA = &getAnalysis().getAAResults();
351 LoopPredication LP(AA, SE, &BPI);
344352 return LP.runOnLoop(L);
345353 }
346354 };
366374 AM.getResult(L, AR).getManager();
367375 Function *F = L.getHeader()->getParent();
368376 auto *BPI = FAM.getCachedResult(*F);
369 LoopPredication LP(&AR.SE, BPI);
377 LoopPredication LP(&AR.AA, &AR.SE, BPI);
370378 if (!LP.runOnLoop(&L))
371379 return PreservedAnalyses::all();
372380
461469
462470 Instruction *LoopPredication::findInsertPt(Instruction *Use,
463471 ArrayRef Ops) {
472 // Subtlety: SCEV considers things to be invariant if the value produced is
473 // the same across iterations. This is not the same as being able to
474 // evaluate outside the loop, which is what we actually need here.
464475 for (const SCEV *Op : Ops)
465 if (!SE->isLoopInvariant(Op, L))
476 if (!SE->isLoopInvariant(Op, L) ||
477 !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE))
466478 return Use;
467479 return Preheader->getTerminator();
468480 }
469481
470
471 bool LoopPredication::CanExpand(const SCEV* S) {
472 return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
482 bool LoopPredication::isLoopInvariantValue(const SCEV* S) {
483 // Handling expressions which produce invariant results, but *haven't* yet
484 // been removed from the loop serves two important purposes.
485 // 1) Most importantly, it resolves a pass ordering cycle which would
486 // otherwise need us to iteration licm, loop-predication, and either
487 // loop-unswitch or loop-peeling to make progress on examples with lots of
488 // predicable range checks in a row. (Since, in the general case, we can't
489 // hoist the length checks until the dominating checks have been discharged
490 // as we can't prove doing so is safe.)
491 // 2) As a nice side effect, this exposes the value of peeling or unswitching
492 // much more obviously in the IR. Otherwise, the cost modeling for other
493 // transforms would end up needing to duplicate all of this logic to model a
494 // check which becomes predictable based on a modeled peel or unswitch.
495 //
496 // The cost of doing so in the worst case is an extra fill from the stack in
497 // the loop to materialize the loop invariant test value instead of checking
498 // against the original IV which is presumable in a register inside the loop.
499 // Such cases are presumably rare, and hint at missing oppurtunities for
500 // other passes.
501
502 if (SE->isLoopInvariant(S, L))
503 // Note: This the SCEV variant, so the original Value* may be within the
504 // loop even though SCEV has proven it is loop invariant.
505 return true;
506
507 // Handle a particular important case which SCEV doesn't yet know about which
508 // shows up in range checks on arrays with immutable lengths.
509 // TODO: This should be sunk inside SCEV.
510 if (const SCEVUnknown *U = dyn_cast(S))
511 if (const auto *LI = dyn_cast(U->getValue()))
512 if (LI->isUnordered())
513 if (AA->pointsToConstantMemory(LI->getOperand(0)) ||
514 LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
515 return true;
516 return false;
473517 }
474518
475519 Optional LoopPredication::widenICmpRangeCheckIncrementingLoop(
486530 const SCEV *GuardLimit = RangeCheck.Limit;
487531 const SCEV *LatchStart = LatchCheck.IV->getStart();
488532 const SCEV *LatchLimit = LatchCheck.Limit;
533 // Subtlety: We need all the values to be *invariant* across all iterations,
534 // but we only need to check expansion safety for those which *aren't*
535 // already guaranteed to dominate the guard.
536 if (!isLoopInvariantValue(GuardStart) ||
537 !isLoopInvariantValue(GuardLimit) ||
538 !isLoopInvariantValue(LatchStart) ||
539 !isLoopInvariantValue(LatchLimit)) {
540 LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
541 return None;
542 }
543 if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
544 !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
545 LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
546 return None;
547 }
489548
490549 // guardLimit - guardStart + latchStart - 1
491550 const SCEV *RHS =
492551 SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),
493552 SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
494 if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
495 !CanExpand(LatchLimit) || !CanExpand(RHS)) {
496 LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
497 return None;
498 }
499553 auto LimitCheckPred =
500554 ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
501555
517571 auto *Ty = RangeCheck.IV->getType();
518572 const SCEV *GuardStart = RangeCheck.IV->getStart();
519573 const SCEV *GuardLimit = RangeCheck.Limit;
574 const SCEV *LatchStart = LatchCheck.IV->getStart();
520575 const SCEV *LatchLimit = LatchCheck.Limit;
521 if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
522 !CanExpand(LatchLimit)) {
576 // Subtlety: We need all the values to be *invariant* across all iterations,
577 // but we only need to check expansion safety for those which *aren't*
578 // already guaranteed to dominate the guard.
579 if (!isLoopInvariantValue(GuardStart) ||
580 !isLoopInvariantValue(GuardLimit) ||
581 !isLoopInvariantValue(LatchStart) ||
582 !isLoopInvariantValue(LatchLimit)) {
583 LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
584 return None;
585 }
586 if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
587 !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
523588 LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
524589 return None;
525590 }
15001500 ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
15011501 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
15021502 ; CHECK-NEXT: [[LENGTH_UDIV:%.*]] = udiv i32 [[LENGTH:%.*]], [[DIVIDER:%.*]]
1503 ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH_UDIV]]
1504 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
1503 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LENGTH_UDIV]]
1504 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH_UDIV]]
1505 ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
1506 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
15051507 ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
15061508 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
15071509 ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4
18071807 ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
18081808 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
18091809 ; CHECK-NEXT: [[LENGTH_UDIV:%.*]] = udiv i32 [[LENGTH:%.*]], [[DIVIDER:%.*]]
1810 ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH_UDIV]]
1811 ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
1812 ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WITHIN_BOUNDS]], [[WIDENABLE_COND]]
1813 ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0
1810 ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
1811 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LENGTH_UDIV]]
1812 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH_UDIV]]
1813 ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
1814 ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[WIDENABLE_COND]]
1815 ; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0
18141816 ; CHECK: deopt:
18151817 ; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ]
18161818 ; CHECK-NEXT: ret i32 [[DEOPTCALL]]
7777 ; CHECK-NEXT: [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN
7878 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ]
7979 ; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[LENGTH:%.*]], align 4, !invariant.load !0
80 ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LEN]]
81 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
80 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LEN]]
81 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LEN]]
82 ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
83 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
8284 ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
8385 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
8486 ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4
263265 ; CHECK-NEXT: [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN
264266 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ]
265267 ; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* @Length, align 4
266 ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LEN]]
267 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
268 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LEN]]
269 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LEN]]
270 ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
271 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
268272 ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
269273 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
270274 ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4