llvm.org GIT mirror llvm / bdc9234
PSCEV] Create AddRec for Phis in cases of possible integer overflow, using runtime checks Extend the SCEVPredicateRewriter to work a bit harder when it encounters an UnknownSCEV for a Phi node; Try to build an AddRecurrence also for Phi nodes whose update chain involves casts that can be ignored under the proper runtime overflow test. This is one step towards addressing PR30654. Differential revision: http://reviews.llvm.org/D30041 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308299 91177308-0d34-0410-b5e6-96231b3b80d8 Dorit Nuzman 3 years ago
3 changed file(s) with 651 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
236236 };
237237
238238 /// This class represents an assumption that two SCEV expressions are equal,
239 /// and this can be checked at run-time. We assume that the left hand side is
240 /// a SCEVUnknown and the right hand side a constant.
239 /// and this can be checked at run-time.
241240 class SCEVEqualPredicate final : public SCEVPredicate {
242 /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a
243 /// constant.
244 const SCEVUnknown *LHS;
245 const SCEVConstant *RHS;
241 /// We assume that LHS == RHS.
242 const SCEV *LHS;
243 const SCEV *RHS;
246244
247245 public:
248 SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS,
249 const SCEVConstant *RHS);
246 SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEV *LHS,
247 const SCEV *RHS);
250248
251249 /// Implementation of the SCEVPredicate interface
252250 bool implies(const SCEVPredicate *N) const override;
255253 const SCEV *getExpr() const override;
256254
257255 /// Returns the left hand side of the equality.
258 const SCEVUnknown *getLHS() const { return LHS; }
256 const SCEV *getLHS() const { return LHS; }
259257
260258 /// Returns the right hand side of the equality.
261 const SCEVConstant *getRHS() const { return RHS; }
259 const SCEV *getRHS() const { return RHS; }
262260
263261 /// Methods for support type inquiry through isa, cast, and dyn_cast:
264262 static bool classof(const SCEVPredicate *P) {
12401238 SmallVector NewOp(Operands.begin(), Operands.end());
12411239 return getAddRecExpr(NewOp, L, Flags);
12421240 }
1241
1242 /// Checks if \p SymbolicPHI can be rewritten as an AddRecExpr under some
1243 /// Predicates. If successful return these ;
1244 /// The function is intended to be called from PSCEV (the caller will decide
1245 /// whether to actually add the predicates and carry out the rewrites).
1246 Optional>>
1247 createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI);
1248
12431249 /// Returns an expression for a GEP
12441250 ///
12451251 /// \p GEP The GEP. The indices contained in the GEP itself are ignored,
16741680 return F.getParent()->getDataLayout();
16751681 }
16761682
1677 const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS,
1678 const SCEVConstant *RHS);
1683 const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS);
16791684
16801685 const SCEVPredicate *
16811686 getWrapPredicate(const SCEVAddRecExpr *AR,
16911696 SmallPtrSetImpl &Preds);
16921697
16931698 private:
1699 /// Similar to createAddRecFromPHI, but with the additional flexibility of
1700 /// suggesting runtime overflow checks in case casts are encountered.
1701 /// If successful, the analysis records that for this loop, \p SymbolicPHI,
1702 /// which is the UnknownSCEV currently representing the PHI, can be rewritten
1703 /// into an AddRec, assuming some predicates; The function then returns the
1704 /// AddRec and the predicates as a pair, and caches this pair in
1705 /// PredicatedSCEVRewrites.
1706 /// If the analysis is not successful, a mapping from the \p SymbolicPHI to
1707 /// itself (with no predicates) is recorded, and a nullptr with an empty
1708 /// predicates vector is returned as a pair.
1709 Optional>>
1710 createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI);
1711
16941712 /// Compute the backedge taken count knowing the interval difference, the
16951713 /// stride and presence of the equality in the comparison.
16961714 const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride,
17211739 FoldingSet UniquePreds;
17221740 BumpPtrAllocator SCEVAllocator;
17231741
1742 /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression
1743 /// they can be rewritten into under certain predicates.
1744 DenseMap,
1745 std::pair>>
1746 PredicatedSCEVRewrites;
1747
17241748 /// The head of a linked list of all SCEVUnknown values that have been
17251749 /// allocated. This is used by releaseMemory to locate them all and call
17261750 /// their destructors.
41724172 return None;
41734173 }
41744174
4175 /// Helper function to createAddRecFromPHIWithCasts. We have a phi
4176 /// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via
4177 /// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the
4178 /// way. This function checks if \p Op, an operand of this SCEVAddExpr,
4179 /// follows one of the following patterns:
4180 /// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
4181 /// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
4182 /// If the SCEV expression of \p Op conforms with one of the expected patterns
4183 /// we return the type of the truncation operation, and indicate whether the
4184 /// truncated type should be treated as signed/unsigned by setting
4185 /// \p Signed to true/false, respectively.
4186 static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
4187 bool &Signed, ScalarEvolution &SE) {
4188
4189 // The case where Op == SymbolicPHI (that is, with no type conversions on
4190 // the way) is handled by the regular add recurrence creating logic and
4191 // would have already been triggered in createAddRecForPHI. Reaching it here
4192 // means that createAddRecFromPHI had failed for this PHI before (e.g.,
4193 // because one of the other operands of the SCEVAddExpr updating this PHI is
4194 // not invariant).
4195 //
4196 // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in
4197 // this case predicates that allow us to prove that Op == SymbolicPHI will
4198 // be added.
4199 if (Op == SymbolicPHI)
4200 return nullptr;
4201
4202 unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType());
4203 unsigned NewBits = SE.getTypeSizeInBits(Op->getType());
4204 if (SourceBits != NewBits)
4205 return nullptr;
4206
4207 const SCEVSignExtendExpr *SExt = dyn_cast(Op);
4208 const SCEVZeroExtendExpr *ZExt = dyn_cast(Op);
4209 if (!SExt && !ZExt)
4210 return nullptr;
4211 const SCEVTruncateExpr *Trunc =
4212 SExt ? dyn_cast(SExt->getOperand())
4213 : dyn_cast(ZExt->getOperand());
4214 if (!Trunc)
4215 return nullptr;
4216 const SCEV *X = Trunc->getOperand();
4217 if (X != SymbolicPHI)
4218 return nullptr;
4219 Signed = SExt ? true : false;
4220 return Trunc->getType();
4221 }
4222
4223 static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
4224 if (!PN->getType()->isIntegerTy())
4225 return nullptr;
4226 const Loop *L = LI.getLoopFor(PN->getParent());
4227 if (!L || L->getHeader() != PN->getParent())
4228 return nullptr;
4229 return L;
4230 }
4231
4232 // Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the
4233 // computation that updates the phi follows the following pattern:
4234 // (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum
4235 // which correspond to a phi->trunc->sext/zext->add->phi update chain.
4236 // If so, try to see if it can be rewritten as an AddRecExpr under some
4237 // Predicates. If successful, return them as a pair. Also cache the results
4238 // of the analysis.
4239 //
4240 // Example usage scenario:
4241 // Say the Rewriter is called for the following SCEV:
4242 // 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
4243 // where:
4244 // %X = phi i64 (%Start, %BEValue)
4245 // It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X),
4246 // and call this function with %SymbolicPHI = %X.
4247 //
4248 // The analysis will find that the value coming around the backedge has
4249 // the following SCEV:
4250 // BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
4251 // Upon concluding that this matches the desired pattern, the function
4252 // will return the pair {NewAddRec, SmallPredsVec} where:
4253 // NewAddRec = {%Start,+,%Step}
4254 // SmallPredsVec = {P1, P2, P3} as follows:
4255 // P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)} Flags:
4256 // P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64)
4257 // P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64)
4258 // The returned pair means that SymbolicPHI can be rewritten into NewAddRec
4259 // under the predicates {P1,P2,P3}.
4260 // This predicated rewrite will be cached in PredicatedSCEVRewrites:
4261 // PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)}
4262 //
4263 // TODO's:
4264 //
4265 // 1) Extend the Induction descriptor to also support inductions that involve
4266 // casts: When needed (namely, when we are called in the context of the
4267 // vectorizer induction analysis), a Set of cast instructions will be
4268 // populated by this method, and provided back to isInductionPHI. This is
4269 // needed to allow the vectorizer to properly record them to be ignored by
4270 // the cost model and to avoid vectorizing them (otherwise these casts,
4271 // which are redundant under the runtime overflow checks, will be
4272 // vectorized, which can be costly).
4273 //
4274 // 2) Support additional induction/PHISCEV patterns: We also want to support
4275 // inductions where the sext-trunc / zext-trunc operations (partly) occur
4276 // after the induction update operation (the induction increment):
4277 //
4278 // (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix)
4279 // which correspond to a phi->add->trunc->sext/zext->phi update chain.
4280 //
4281 // (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix)
4282 // which correspond to a phi->trunc->add->sext/zext->phi update chain.
4283 //
4284 // 3) Outline common code with createAddRecFromPHI to avoid duplication.
4285 //
4286 Optional>>
4287 ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) {
4288 SmallVector Predicates;
4289
4290 // *** Part1: Analyze if we have a phi-with-cast pattern for which we can
4291 // return an AddRec expression under some predicate.
4292
4293 auto *PN = cast(SymbolicPHI->getValue());
4294 const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
4295 assert (L && "Expecting an integer loop header phi");
4296
4297 // The loop may have multiple entrances or multiple exits; we can analyze
4298 // this phi as an addrec if it has a unique entry value and a unique
4299 // backedge value.
4300 Value *BEValueV = nullptr, *StartValueV = nullptr;
4301 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
4302 Value *V = PN->getIncomingValue(i);
4303 if (L->contains(PN->getIncomingBlock(i))) {
4304 if (!BEValueV) {
4305 BEValueV = V;
4306 } else if (BEValueV != V) {
4307 BEValueV = nullptr;
4308 break;
4309 }
4310 } else if (!StartValueV) {
4311 StartValueV = V;
4312 } else if (StartValueV != V) {
4313 StartValueV = nullptr;
4314 break;
4315 }
4316 }
4317 if (!BEValueV || !StartValueV)
4318 return None;
4319
4320 const SCEV *BEValue = getSCEV(BEValueV);
4321
4322 // If the value coming around the backedge is an add with the symbolic
4323 // value we just inserted, possibly with casts that we can ignore under
4324 // an appropriate runtime guard, then we found a simple induction variable!
4325 const auto *Add = dyn_cast(BEValue);
4326 if (!Add)
4327 return None;
4328
4329 // If there is a single occurrence of the symbolic value, possibly
4330 // casted, replace it with a recurrence.
4331 unsigned FoundIndex = Add->getNumOperands();
4332 Type *TruncTy = nullptr;
4333 bool Signed;
4334 for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
4335 if ((TruncTy =
4336 isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this)))
4337 if (FoundIndex == e) {
4338 FoundIndex = i;
4339 break;
4340 }
4341
4342 if (FoundIndex == Add->getNumOperands())
4343 return None;
4344
4345 // Create an add with everything but the specified operand.
4346 SmallVector Ops;
4347 for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
4348 if (i != FoundIndex)
4349 Ops.push_back(Add->getOperand(i));
4350 const SCEV *Accum = getAddExpr(Ops);
4351
4352 // The runtime checks will not be valid if the step amount is
4353 // varying inside the loop.
4354 if (!isLoopInvariant(Accum, L))
4355 return None;
4356
4357
4358 // *** Part2: Create the predicates
4359
4360 // Analysis was successful: we have a phi-with-cast pattern for which we
4361 // can return an AddRec expression under the following predicates:
4362 //
4363 // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum)
4364 // fits within the truncated type (does not overflow) for i = 0 to n-1.
4365 // P2: An Equal predicate that guarantees that
4366 // Start = (Ext ix (Trunc iy (Start) to ix) to iy)
4367 // P3: An Equal predicate that guarantees that
4368 // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy)
4369 //
4370 // As we next prove, the above predicates guarantee that:
4371 // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy)
4372 //
4373 //
4374 // More formally, we want to prove that:
4375 // Expr(i+1) = Start + (i+1) * Accum
4376 // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
4377 //
4378 // Given that:
4379 // 1) Expr(0) = Start
4380 // 2) Expr(1) = Start + Accum
4381 // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2
4382 // 3) Induction hypothesis (step i):
4383 // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum
4384 //
4385 // Proof:
4386 // Expr(i+1) =
4387 // = Start + (i+1)*Accum
4388 // = (Start + i*Accum) + Accum
4389 // = Expr(i) + Accum
4390 // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum
4391 // :: from step i
4392 //
4393 // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum
4394 //
4395 // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy)
4396 // + (Ext ix (Trunc iy (Accum) to ix) to iy)
4397 // + Accum :: from P3
4398 //
4399 // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy)
4400 // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y)
4401 //
4402 // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum
4403 // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
4404 //
4405 // By induction, the same applies to all iterations 1<=i
4406 //
4407
4408 // Create a truncated addrec for which we will add a no overflow check (P1).
4409 const SCEV *StartVal = getSCEV(StartValueV);
4410 const SCEV *PHISCEV =
4411 getAddRecExpr(getTruncateExpr(StartVal, TruncTy),
4412 getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap);
4413 const auto *AR = cast(PHISCEV);
4414
4415 SCEVWrapPredicate::IncrementWrapFlags AddedFlags =
4416 Signed ? SCEVWrapPredicate::IncrementNSSW
4417 : SCEVWrapPredicate::IncrementNUSW;
4418 const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags);
4419 Predicates.push_back(AddRecPred);
4420
4421 // Create the Equal Predicates P2,P3:
4422 auto AppendPredicate = [&](const SCEV *Expr) -> void {
4423 assert (isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
4424 const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
4425 const SCEV *ExtendedExpr =
4426 Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType())
4427 : getZeroExtendExpr(TruncatedExpr, Expr->getType());
4428 if (Expr != ExtendedExpr &&
4429 !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) {
4430 const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr);
4431 DEBUG (dbgs() << "Added Predicate: " << *Pred);
4432 Predicates.push_back(Pred);
4433 }
4434 };
4435
4436 AppendPredicate(StartVal);
4437 AppendPredicate(Accum);
4438
4439 // *** Part3: Predicates are ready. Now go ahead and create the new addrec in
4440 // which the casts had been folded away. The caller can rewrite SymbolicPHI
4441 // into NewAR if it will also add the runtime overflow checks specified in
4442 // Predicates.
4443 auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap);
4444
4445 std::pair> PredRewrite =
4446 std::make_pair(NewAR, Predicates);
4447 // Remember the result of the analysis for this SCEV at this locayyytion.
4448 PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite;
4449 return PredRewrite;
4450 }
4451
4452 Optional>>
4453 ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
4454
4455 auto *PN = cast(SymbolicPHI->getValue());
4456 const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
4457 if (!L)
4458 return None;
4459
4460 // Check to see if we already analyzed this PHI.
4461 auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L});
4462 if (I != PredicatedSCEVRewrites.end()) {
4463 std::pair> Rewrite =
4464 I->second;
4465 // Analysis was done before and failed to create an AddRec:
4466 if (Rewrite.first == SymbolicPHI)
4467 return None;
4468 // Analysis was done before and succeeded to create an AddRec under
4469 // a predicate:
4470 assert(isa(Rewrite.first) && "Expected an AddRec");
4471 assert(!(Rewrite.second).empty() && "Expected to find Predicates");
4472 return Rewrite;
4473 }
4474
4475 Optional>>
4476 Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI);
4477
4478 // Record in the cache that the analysis failed
4479 if (!Rewrite) {
4480 SmallVector Predicates;
4481 PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates};
4482 return None;
4483 }
4484
4485 return Rewrite;
4486 }
4487
41754488 /// A helper function for createAddRecFromPHI to handle simple cases.
41764489 ///
41774490 /// This function tries to find an AddRec expression for the simplest (yet most
59026215
59036216 RemoveLoopFromBackedgeMap(BackedgeTakenCounts);
59046217 RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts);
6218
6219 // Drop information about predicated SCEV rewrites for this loop.
6220 for (auto I = PredicatedSCEVRewrites.begin();
6221 I != PredicatedSCEVRewrites.end();) {
6222 std::pair Entry = I->first;
6223 if (Entry.second == L)
6224 PredicatedSCEVRewrites.erase(I++);
6225 else
6226 ++I;
6227 }
59056228
59066229 // Drop information about expressions based on loop-header PHIs.
59076230 SmallVector Worklist;
1006110384 UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
1006210385 UniquePreds(std::move(Arg.UniquePreds)),
1006310386 SCEVAllocator(std::move(Arg.SCEVAllocator)),
10387 PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)),
1006410388 FirstUnknown(Arg.FirstUnknown) {
1006510389 Arg.FirstUnknown = nullptr;
1006610390 }
1046110785 HasRecMap.erase(S);
1046210786 MinTrailingZerosCache.erase(S);
1046310787
10788 for (auto I = PredicatedSCEVRewrites.begin();
10789 I != PredicatedSCEVRewrites.end();) {
10790 std::pair Entry = I->first;
10791 if (Entry.first == S)
10792 PredicatedSCEVRewrites.erase(I++);
10793 else
10794 ++I;
10795 }
10796
1046410797 auto RemoveSCEVFromBackedgeMap =
1046510798 [S, this](DenseMap &Map) {
1046610799 for (auto I = Map.begin(), E = Map.end(); I != E;) {
1062010953 AU.addRequiredTransitive();
1062110954 }
1062210955
10623 const SCEVPredicate *
10624 ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS,
10625 const SCEVConstant *RHS) {
10956 const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS,
10957 const SCEV *RHS) {
1062610958 FoldingSetNodeID ID;
10959 assert(LHS->getType() == RHS->getType() &&
10960 "Type mismatch between LHS and RHS");
1062710961 // Unique this node based on the arguments
1062810962 ID.AddInteger(SCEVPredicate::P_Equal);
1062910963 ID.AddPointer(LHS);
1068611020 if (IPred->getLHS() == Expr)
1068711021 return IPred->getRHS();
1068811022 }
10689
10690 return Expr;
11023 return convertToAddRecWithPreds(Expr);
1069111024 }
1069211025
1069311026 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
1072311056 }
1072411057
1072511058 private:
11059 bool addOverflowAssumption(const SCEVPredicate *P) {
11060 if (!NewPreds) {
11061 // Check if we've already made this assumption.
11062 return Pred && Pred->implies(P);
11063 }
11064 NewPreds->insert(P);
11065 return true;
11066 }
11067
1072611068 bool addOverflowAssumption(const SCEVAddRecExpr *AR,
1072711069 SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
1072811070 auto *A = SE.getWrapPredicate(AR, AddedFlags);
10729 if (!NewPreds) {
10730 // Check if we've already made this assumption.
10731 return Pred && Pred->implies(A);
10732 }
10733 NewPreds->insert(A);
10734 return true;
10735 }
10736
11071 return addOverflowAssumption(A);
11072 }
11073
11074 // If \p Expr represents a PHINode, we try to see if it can be represented
11075 // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible
11076 // to add this predicate as a runtime overflow check, we return the AddRec.
11077 // If \p Expr does not meet these conditions (is not a PHI node, or we
11078 // couldn't create an AddRec for it, or couldn't add the predicate), we just
11079 // return \p Expr.
11080 const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
11081 if (!isa(Expr->getValue()))
11082 return Expr;
11083 Optional>>
11084 PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr);
11085 if (!PredicatedRewrite)
11086 return Expr;
11087 for (auto *P : PredicatedRewrite->second){
11088 if (!addOverflowAssumption(P))
11089 return Expr;
11090 }
11091 return PredicatedRewrite->first;
11092 }
11093
1073711094 SmallPtrSetImpl *NewPreds;
1073811095 SCEVUnionPredicate *Pred;
1073911096 const Loop *L;
1077011127 : FastID(ID), Kind(Kind) {}
1077111128
1077211129 SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
10773 const SCEVUnknown *LHS,
10774 const SCEVConstant *RHS)
10775 : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {}
11130 const SCEV *LHS, const SCEV *RHS)
11131 : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {
11132 assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match");
11133 assert(LHS != RHS && "LHS and RHS are the same SCEV");
11134 }
1077611135
1077711136 bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
1077811137 const auto *Op = dyn_cast(N);
0 ; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
1
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3
4 ; Check that the vectorizer identifies the %p.09 phi,
5 ; as an induction variable, despite the potential overflow
6 ; due to the truncation from 32bit to 8bit.
7 ; SCEV will detect the pattern "sext(trunc(%p.09)) + %step"
8 ; and generate the required runtime checks under which
9 ; we can assume no overflow. We check here that we generate
10 ; exactly two runtime checks:
11 ; 1) an overflow check:
12 ; {0,+,(trunc i32 %step to i8)}<%for.body> Added Flags:
13 ; 2) an equality check verifying that the step of the induction
14 ; is equal to sext(trunc(step)):
15 ; Equal predicate: %step == (sext i8 (trunc i32 %step to i8) to i32)
16 ;
17 ; See also pr30654.
18 ;
19 ; int a[N];
20 ; void doit1(int n, int step) {
21 ; int i;
22 ; char p = 0;
23 ; for (i = 0; i < n; i++) {
24 ; a[i] = p;
25 ; p = p + step;
26 ; }
27 ; }
28 ;
29
30 ; CHECK-LABEL: @doit1
31 ; CHECK: vector.scevcheck
32 ; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
33 ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
34 ; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow
35 ; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]]
36 ; CHECK: %ident.check = icmp ne i32 {{.*}}, %{{.*}}
37 ; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check
38 ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
39 ; CHECK: vector.body:
40 ; CHECK: <4 x i32>
41
42 @a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16
43
44 ; Function Attrs: norecurse nounwind uwtable
45 define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
46 entry:
47 %cmp7 = icmp sgt i32 %n, 0
48 br i1 %cmp7, label %for.body.preheader, label %for.end
49
50 for.body.preheader:
51 %wide.trip.count = zext i32 %n to i64
52 br label %for.body
53
54 for.body:
55 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
56 %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
57 %sext = shl i32 %p.09, 24
58 %conv = ashr exact i32 %sext, 24
59 %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
60 store i32 %conv, i32* %arrayidx, align 4
61 %add = add nsw i32 %conv, %step
62 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
63 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
64 br i1 %exitcond, label %for.end.loopexit, label %for.body
65
66 for.end.loopexit:
67 br label %for.end
68
69 for.end:
70 ret void
71 }
72
73 ; Same as above, but for checking the SCEV "zext(trunc(%p.09)) + %step".
74 ; Here we expect the following two predicates to be added for runtime checking:
75 ; 1) {0,+,(trunc i32 %step to i8)}<%for.body> Added Flags:
76 ; 2) Equal predicate: %step == (zext i8 (trunc i32 %step to i8) to i32)
77 ;
78 ; int a[N];
79 ; void doit2(int n, int step) {
80 ; int i;
81 ; unsigned char p = 0;
82 ; for (i = 0; i < n; i++) {
83 ; a[i] = p;
84 ; p = p + step;
85 ; }
86 ; }
87 ;
88
89 ; CHECK-LABEL: @doit2
90 ; CHECK: vector.scevcheck
91 ; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
92 ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
93 ; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow
94 ; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]]
95 ; CHECK: %ident.check = icmp ne i32 {{.*}}, %{{.*}}
96 ; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check
97 ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
98 ; CHECK: vector.body:
99 ; CHECK: <4 x i32>
100
101 ; Function Attrs: norecurse nounwind uwtable
102 define void @doit2(i32 %n, i32 %step) local_unnamed_addr {
103 entry:
104 %cmp7 = icmp sgt i32 %n, 0
105 br i1 %cmp7, label %for.body.preheader, label %for.end
106
107 for.body.preheader:
108 %wide.trip.count = zext i32 %n to i64
109 br label %for.body
110
111 for.body:
112 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
113 %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
114 %conv = and i32 %p.09, 255
115 %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
116 store i32 %conv, i32* %arrayidx, align 4
117 %add = add nsw i32 %conv, %step
118 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
119 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
120 br i1 %exitcond, label %for.end.loopexit, label %for.body
121
122 for.end.loopexit:
123 br label %for.end
124
125 for.end:
126 ret void
127 }
128
129 ; Here we check that the same phi scev analysis would fail
130 ; to create the runtime checks because the step is not invariant.
131 ; As a result vectorization will fail.
132 ;
133 ; int a[N];
134 ; void doit3(int n, int step) {
135 ; int i;
136 ; char p = 0;
137 ; for (i = 0; i < n; i++) {
138 ; a[i] = p;
139 ; p = p + step;
140 ; step += 2;
141 ; }
142 ; }
143 ;
144
145 ; CHECK-LABEL: @doit3
146 ; CHECK-NOT: vector.scevcheck
147 ; CHECK-NOT: vector.body:
148 ; CHECK-LABEL: for.body:
149
150 ; Function Attrs: norecurse nounwind uwtable
151 define void @doit3(i32 %n, i32 %step) local_unnamed_addr {
152 entry:
153 %cmp9 = icmp sgt i32 %n, 0
154 br i1 %cmp9, label %for.body.preheader, label %for.end
155
156 for.body.preheader:
157 %wide.trip.count = zext i32 %n to i64
158 br label %for.body
159
160 for.body:
161 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
162 %p.012 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
163 %step.addr.010 = phi i32 [ %add3, %for.body ], [ %step, %for.body.preheader ]
164 %sext = shl i32 %p.012, 24
165 %conv = ashr exact i32 %sext, 24
166 %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
167 store i32 %conv, i32* %arrayidx, align 4
168 %add = add nsw i32 %conv, %step.addr.010
169 %add3 = add nsw i32 %step.addr.010, 2
170 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
171 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
172 br i1 %exitcond, label %for.end.loopexit, label %for.body
173
174 for.end.loopexit:
175 br label %for.end
176
177 for.end:
178 ret void
179 }
180
181
182 ; Lastly, we also check the case where we can tell at compile time that
183 ; the step of the induction is equal to sext(trunc(step)), in which case
184 ; we don't have to check this equality at runtime (we only need the
185 ; runtime overflow check). Therefore only the following overflow predicate
186 ; will be added for runtime checking:
187 ; {0,+,%cstep}<%for.body> Added Flags:
188 ;
189 ; a[N];
190 ; void doit4(int n, char cstep) {
191 ; int i;
192 ; char p = 0;
193 ; int istep = cstep;
194 ; for (i = 0; i < n; i++) {
195 ; a[i] = p;
196 ; p = p + istep;
197 ; }
198 ; }
199
200 ; CHECK-LABEL: @doit4
201 ; CHECK: vector.scevcheck
202 ; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
203 ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
204 ; CHECK: %{{.*}} = or i1 {{.*}}, %mul.overflow
205 ; CHECK-NOT: %ident.check = icmp ne i32 {{.*}}, %{{.*}}
206 ; CHECK-NOT: %{{.*}} = or i1 %{{.*}}, %ident.check
207 ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
208 ; CHECK: vector.body:
209 ; CHECK: <4 x i32>
210
211 ; Function Attrs: norecurse nounwind uwtable
212 define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
213 entry:
214 %conv = sext i8 %cstep to i32
215 %cmp10 = icmp sgt i32 %n, 0
216 br i1 %cmp10, label %for.body.preheader, label %for.end
217
218 for.body.preheader:
219 %wide.trip.count = zext i32 %n to i64
220 br label %for.body
221
222 for.body:
223 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
224 %p.011 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
225 %sext = shl i32 %p.011, 24
226 %conv2 = ashr exact i32 %sext, 24
227 %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
228 store i32 %conv2, i32* %arrayidx, align 4
229 %add = add nsw i32 %conv2, %conv
230 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
231 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
232 br i1 %exitcond, label %for.end.loopexit, label %for.body
233
234 for.end.loopexit:
235 br label %for.end
236
237 for.end:
238 ret void
239 }