llvm.org GIT mirror llvm / d6d7595
[LFTR] Fix post-inc pointer IV with truncated exit count (PR41998) Fixes https://bugs.llvm.org/show_bug.cgi?id=41998. Usually when we have a truncated exit count we'll truncate the IV when comparing against the limit, in which case exit count overflow in post-inc form doesn't matter. However, for pointer IVs we don't do that, so we have to be careful about incrementing the IV in the wide type. I'm fixing this by removing the IVCount variable (which was ExitCount or ExitCount+1) and replacing it with a UsePostInc flag, and then moving the actual limit adjustment to the individual cases (which are: pointer IV where we add to the wide type, integer IV where we add to the narrow type, and constant integer IV where we add to the wide type). Differential Revision: https://reviews.llvm.org/D63686 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364709 91177308-0d34-0410-b5e6-96231b3b80d8 Nikita Popov 2 months ago
3 changed file(s) with 46 addition(s) and 45 deletion(s). Raw diff Collapse all Expand all
22992299
23002300 /// Insert an IR expression which computes the value held by the IV IndVar
23012301 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2302 /// is taken IVCount times.
2302 /// is taken ExitCount times.
23032303 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2304 const SCEV *IVCount, Loop *L,
2304 const SCEV *ExitCount, bool UsePostInc, Loop *L,
23052305 SCEVExpander &Rewriter, ScalarEvolution *SE) {
23062306 assert(isLoopCounter(IndVar, L, SE));
23072307 const SCEVAddRecExpr *AR = cast(SE->getSCEV(IndVar));
23082308 const SCEV *IVInit = AR->getStart();
23092309
2310 // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2311 // finds a valid pointer IV. Sign extend BECount in order to materialize a
2310 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2311 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
23122312 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
23132313 // the existing GEPs whenever possible.
2314 if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2314 if (IndVar->getType()->isPointerTy() &&
2315 !ExitCount->getType()->isPointerTy()) {
23152316 // IVOffset will be the new GEP offset that is interpreted by GEP as a
2316 // signed value. IVCount on the other hand represents the loop trip count,
2317 // signed value. ExitCount on the other hand represents the loop trip count,
23172318 // which is an unsigned value. FindLoopCounter only allows induction
23182319 // variables that have a positive unit stride of one. This means we don't
23192320 // have to handle the case of negative offsets (yet) and just need to zero
2320 // extend IVCount.
2321 // extend ExitCount.
23212322 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2322 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2323 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2324 if (UsePostInc)
2325 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
23232326
23242327 // Expand the code for the iteration count.
23252328 assert(SE->isLoopInvariant(IVOffset, L) &&
23402343 return Builder.CreateGEP(GEPBase->getType()->getPointerElementType(),
23412344 GEPBase, GEPOffset, "lftr.limit");
23422345 } else {
2343 // In any other case, convert both IVInit and IVCount to integers before
2346 // In any other case, convert both IVInit and ExitCount to integers before
23442347 // comparing. This may result in SCEV expansion of pointers, but in practice
23452348 // SCEV will fold the pointer arithmetic away as such:
23462349 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
23482351 // Valid Cases: (1) both integers is most common; (2) both may be pointers
23492352 // for simple memset-style loops.
23502353 //
2351 // IVInit integer and IVCount pointer would only occur if a canonical IV
2354 // IVInit integer and ExitCount pointer would only occur if a canonical IV
23522355 // were generated on top of case #2, which is not expected.
23532356
23542357 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2355 // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2358 // For unit stride, IVCount = Start + ExitCount with 2's complement
2359 // overflow.
23562360 const SCEV *IVInit = AR->getStart();
23572361
23582362 // For integer IVs, truncate the IV before computing IVInit + BECount.
23592363 if (SE->getTypeSizeInBits(IVInit->getType())
2360 > SE->getTypeSizeInBits(IVCount->getType()))
2361 IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2362
2363 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVCount);
2364
2364 > SE->getTypeSizeInBits(ExitCount->getType()))
2365 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2366
2367 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2368
2369 if (UsePostInc)
2370 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2371
23652372 // Expand the code for the iteration count.
23662373 BranchInst *BI = cast(ExitingBB->getTerminator());
23672374 IRBuilder<> Builder(BI);
23702377 // Ensure that we generate the same type as IndVar, or a smaller integer
23712378 // type. In the presence of null pointer values, we have an integer type
23722379 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2373 Type *LimitTy = IVCount->getType()->isPointerTy() ?
2374 IndVar->getType() : IVCount->getType();
2380 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2381 IndVar->getType() : ExitCount->getType();
23752382 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
23762383 }
23772384 }
23902397 Instruction * const IncVar =
23912398 cast(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
23922399
2393 // Initialize CmpIndVar and IVCount to their preincremented values.
2400 // Initialize CmpIndVar to the preincremented IV.
23942401 Value *CmpIndVar = IndVar;
2395 const SCEV *IVCount = ExitCount;
2402 bool UsePostInc = false;
23962403
23972404 // If the exiting block is the same as the backedge block, we prefer to
23982405 // compare against the post-incremented value, otherwise we must compare
24112418 SafeToPostInc =
24122419 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
24132420 }
2421
24142422 if (SafeToPostInc) {
2415 // Add one to the "backedge-taken" count to get the trip count.
2416 // This addition may overflow, which is valid as long as the comparison
2417 // is truncated to ExitCount->getType().
2418 IVCount = SE->getAddExpr(ExitCount,
2419 SE->getOne(ExitCount->getType()));
2420 // The BackedgeTaken expression contains the number of times that the
2421 // backedge branches to the loop header. This is one less than the
2422 // number of times the loop executes, so use the incremented indvar.
2423 UsePostInc = true;
24232424 CmpIndVar = IncVar;
24242425 }
24252426 }
24442445 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
24452446 }
24462447
2447 Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE);
2448 Value *ExitCnt = genLoopLimit(
2449 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
24482450 assert(ExitCnt->getType()->isPointerTy() ==
24492451 IndVar->getType()->isPointerTy() &&
24502452 "genLoopLimit missed a cast");
24652467 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
24662468
24672469 // LFTR can ignore IV overflow and truncate to the width of
2468 // BECount. This avoids materializing the add(zext(add)) expression.
2470 // ExitCount. This avoids materializing the add(zext(add)) expression.
24692471 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
24702472 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
24712473 if (CmpIndVarSize > ExitCntSize) {
24722474 const SCEVAddRecExpr *AR = cast(SE->getSCEV(IndVar));
24732475 const SCEV *ARStart = AR->getStart();
24742476 const SCEV *ARStep = AR->getStepRecurrence(*SE);
2475 // For constant IVCount, avoid truncation.
2476 if (isa(ARStart) && isa(IVCount)) {
2477 // For constant ExitCount, avoid truncation.
2478 if (isa(ARStart) && isa(ExitCount)) {
24772479 const APInt &Start = cast(ARStart)->getAPInt();
2478 APInt Count = cast(IVCount)->getAPInt();
2479 // Note that the post-inc value of ExitCount may have overflowed
2480 // above such that IVCount is now zero.
2481 if (IVCount != ExitCount && Count == 0) {
2482 Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2480 APInt Count = cast(ExitCount)->getAPInt();
2481 Count = Count.zext(CmpIndVarSize);
2482 if (UsePostInc)
24832483 ++Count;
2484 }
2485 else
2486 Count = Count.zext(CmpIndVarSize);
24872484 APInt NewLimit;
24882485 if (cast(ARStep)->getValue()->isNegative())
24892486 NewLimit = Start - Count;
25282525 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
25292526 << "\n"
25302527 << " RHS:\t" << *ExitCnt << "\n"
2531 << " IVCount:\t" << *IVCount << "\n"
2528 << "ExitCount:\t" << *ExitCount << "\n"
25322529 << " was: " << *BI->getCondition() << "\n");
25332530
25342531 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
145145 ; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
146146 ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
147147 ; PTR64: preheader:
148 ; PTR64-NEXT: [[TMP1:%.*]] = zext i32 [[CNT]] to i64
149 ; PTR64-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]]
148 ; PTR64-NEXT: [[TMP1:%.*]] = add i32 [[EI]], -1
149 ; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]]
150 ; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
151 ; PTR64-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
152 ; PTR64-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* null, i64 [[TMP4]]
150153 ; PTR64-NEXT: br label [[LOOP:%.*]]
151154 ; PTR64: loop:
152155 ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
4141 ; CHECK-LABEL: @test_ptr(
4242 ; CHECK-NEXT: entry:
4343 ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[START:%.*]] to i3
44 ; CHECK-NEXT: [[TMP1:%.*]] = sub i3 0, [[TMP0]]
44 ; CHECK-NEXT: [[TMP1:%.*]] = sub i3 -1, [[TMP0]]
4545 ; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[TMP1]] to i64
46 ; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @data, i64 0, i64 0), i64 [[TMP2]]
46 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
47 ; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @data, i64 0, i64 0), i64 [[TMP3]]
4748 ; CHECK-NEXT: br label [[LOOP:%.*]]
4849 ; CHECK: loop:
4950 ; CHECK-NEXT: [[P:%.*]] = phi i8* [ getelementptr inbounds ([256 x i8], [256 x i8]* @data, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[P_INC:%.*]], [[LOOP]] ]