llvm.org GIT mirror llvm / 7d4d116
[SCEV] Apply NSW and NUW flags via poison value analysis Summary: Make Scalar Evolution able to propagate NSW and NUW flags from instructions to SCEVs in some cases. This is based on reasoning about when poison from instructions with these flags would trigger undefined behavior. This gives a 13% speed-up on some Eigen3-based Google-internal microbenchmarks for NVPTX. There does not seem to be clear agreement about when poison should be considered to propagate through instructions. In this analysis, poison propagates only in cases where that should be uncontroversial. This change makes LSR able to create induction variables for expressions like &ptr[i + offset] for loops like this: for (int i = 0; i < limit; ++i) { sum += ptr[i + offset]; } Here ptr is a 64 bit pointer and offset is a 32 bit integer. For NVPTX, LSR currently creates an induction variable for i + offset instead, which is not as fast. Improving this situation is what brings the 13% speed-up on some Eigen3-based Google-internal microbenchmarks for NVPTX. There are more details in this discussion on llvmdev. June: http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-June/thread.html#87234 July: http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-July/thread.html#87392 Patch by Bjarke Roune Reviewers: eliben, atrick, sanjoy Subscribers: majnemer, hfinkel, jingyue, meheff, llvm-commits Differential Revision: http://reviews.llvm.org/D11212 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243460 91177308-0d34-0410-b5e6-96231b3b80d8 Jingyue Wu 5 years ago
8 changed file(s) with 721 addition(s) and 29 deletion(s). Raw diff Collapse all Expand all
565565 /// forgetMemoizedResults - Drop memoized information computed for S.
566566 void forgetMemoizedResults(const SCEV *S);
567567
568 /// Return an existing SCEV for V if there is one, otherwise return nullptr.
569 const SCEV *getExistingSCEV(Value *V);
570
568571 /// Return false iff given SCEV contains a SCEVUnknown with NULL value-
569572 /// pointer.
570573 bool checkValidity(const SCEV *S) const;
593596 /// way around.
594597 bool isMonotonicPredicate(const SCEVAddRecExpr *LHS,
595598 ICmpInst::Predicate Pred, bool &Increasing);
599
600 // Return SCEV no-wrap flags that can be proven based on reasoning
601 // about how poison produced from no-wrap flags on this value
602 // (e.g. a nuw add) would trigger undefined behavior on overflow.
603 SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V);
596604
597605 public:
598606 static char ID; // Pass identification, replacement for typeid
2929 class DominatorTree;
3030 class TargetLibraryInfo;
3131 class LoopInfo;
32 class Loop;
3233
3334 /// Determine which bits of V are known to be either zero or one and return
3435 /// them in the KnownZero/KnownOne bit sets.
287288 AssumptionCache *AC,
288289 const Instruction *CxtI,
289290 const DominatorTree *DT);
290
291
292 /// Return true if this function can prove that the instruction I will
293 /// always transfer execution to one of its successors (including the next
294 /// instruction that follows within a basic block). E.g. this is not
295 /// guaranteed for function calls that could loop infinitely.
296 ///
297 /// In other words, this function returns false for instructions that may
298 /// transfer execution or fail to transfer execution in a way that is not
299 /// captured in the CFG nor in the sequence of instructions within a basic
300 /// block.
301 ///
302 /// Undefined behavior is assumed not to happen, so e.g. division is
303 /// guaranteed to transfer execution to the following instruction even
304 /// though division by zero might cause undefined behavior.
305 bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I);
306
307 /// Return true if this function can prove that the instruction I
308 /// is executed for every iteration of the loop L.
309 ///
310 /// Note that this currently only considers the loop header.
311 bool isGuaranteedToExecuteForEveryIteration(const Instruction *I,
312 const Loop *L);
313
314 /// Return true if this function can prove that I is guaranteed to yield
315 /// full-poison (all bits poison) if at least one of its operands are
316 /// full-poison (all bits poison).
317 ///
318 /// The exact rules for how poison propagates through instructions have
319 /// not been settled as of 2015-07-10, so this function is conservative
320 /// and only considers poison to be propagated in uncontroversial
321 /// cases. There is no attempt to track values that may be only partially
322 /// poison.
323 bool propagatesFullPoison(const Instruction *I);
324
325 /// Return either nullptr or an operand of I such that I will trigger
326 /// undefined behavior if I is executed and that operand has a full-poison
327 /// value (all bits poison).
328 const Value *getGuaranteedNonFullPoisonOp(const Instruction *I);
329
330 /// Return true if this function can prove that if PoisonI is executed
331 /// and yields a full-poison value (all bits poison), then that will
332 /// trigger undefined behavior.
333 ///
334 /// Note that this currently only considers the basic block that is
335 /// the parent of I.
336 bool isKnownNotFullPoison(const Instruction *PoisonI);
337
291338 /// \brief Specific patterns of select instructions we can match.
292339 enum SelectPatternFlavor {
293340 SPF_UNKNOWN = 0,
29352935 // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
29362936 // instruction to its SCEV, because the Instruction may be guarded by control
29372937 // flow and the no-overflow bits may not be valid for the expression in any
2938 // context.
2938 // context. This can be fixed similarly to how these flags are handled for
2939 // adds.
29392940 SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
29402941
29412942 const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
33143315 const SCEV *ScalarEvolution::getSCEV(Value *V) {
33153316 assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
33163317
3318 const SCEV *S = getExistingSCEV(V);
3319 if (S == nullptr) {
3320 S = createSCEV(V);
3321 ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
3322 }
3323 return S;
3324 }
3325
3326 const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
3327 assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
3328
33173329 ValueExprMapType::iterator I = ValueExprMap.find_as(V);
33183330 if (I != ValueExprMap.end()) {
33193331 const SCEV *S = I->second;
33203332 if (checkValidity(S))
33213333 return S;
3322 else
3323 ValueExprMap.erase(I);
3324 }
3325 const SCEV *S = createSCEV(V);
3326
3327 // The process of creating a SCEV for V may have caused other SCEVs
3328 // to have been created, so it's necessary to insert the new entry
3329 // from scratch, rather than trying to remember the insert position
3330 // above.
3331 ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
3332 return S;
3334 ValueExprMap.erase(I);
3335 }
3336 return nullptr;
33333337 }
33343338
33353339 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
40884092 return setRange(S, SignHint, ConservativeResult);
40894093 }
40904094
4091 /// createSCEV - We know that there is no SCEV for the specified value.
4092 /// Analyze the expression.
4095 SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
4096 const BinaryOperator *BinOp = cast(V);
4097
4098 // Return early if there are no flags to propagate to the SCEV.
4099 SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
4100 if (BinOp->hasNoUnsignedWrap())
4101 Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
4102 if (BinOp->hasNoSignedWrap())
4103 Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
4104 if (Flags == SCEV::FlagAnyWrap) {
4105 return SCEV::FlagAnyWrap;
4106 }
4107
4108 // Here we check that BinOp is in the header of the innermost loop
4109 // containing BinOp, since we only deal with instructions in the loop
4110 // header. The actual loop we need to check later will come from an add
4111 // recurrence, but getting that requires computing the SCEV of the operands,
4112 // which can be expensive. This check we can do cheaply to rule out some
4113 // cases early.
4114 Loop *innermostContainingLoop = LI->getLoopFor(BinOp->getParent());
4115 if (innermostContainingLoop == nullptr ||
4116 innermostContainingLoop->getHeader() != BinOp->getParent())
4117 return SCEV::FlagAnyWrap;
4118
4119 // Only proceed if we can prove that BinOp does not yield poison.
4120 if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap;
4121
4122 // At this point we know that if V is executed, then it does not wrap
4123 // according to at least one of NSW or NUW. If V is not executed, then we do
4124 // not know if the calculation that V represents would wrap. Multiple
4125 // instructions can map to the same SCEV. If we apply NSW or NUW from V to
4126 // the SCEV, we must guarantee no wrapping for that SCEV also when it is
4127 // derived from other instructions that map to the same SCEV. We cannot make
4128 // that guarantee for cases where V is not executed. So we need to find the
4129 // loop that V is considered in relation to and prove that V is executed for
4130 // every iteration of that loop. That implies that the value that V
4131 // calculates does not wrap anywhere in the loop, so then we can apply the
4132 // flags to the SCEV.
4133 //
4134 // We check isLoopInvariant to disambiguate in case we are adding two
4135 // recurrences from different loops, so that we know which loop to prove
4136 // that V is executed in.
4137 for (int OpIndex = 0; OpIndex < 2; ++OpIndex) {
4138 const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex));
4139 if (auto *AddRec = dyn_cast(Op)) {
4140 const int OtherOpIndex = 1 - OpIndex;
4141 const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex));
4142 if (isLoopInvariant(OtherOp, AddRec->getLoop()) &&
4143 isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop()))
4144 return Flags;
4145 }
4146 }
4147 return SCEV::FlagAnyWrap;
4148 }
4149
4150 /// createSCEV - We know that there is no SCEV for the specified value. Analyze
4151 /// the expression.
40934152 ///
40944153 const SCEV *ScalarEvolution::createSCEV(Value *V) {
40954154 if (!isSCEVable(V->getType()))
41264185 // Instead, gather up all the operands and make a single getAddExpr call.
41274186 // LLVM IR canonical form means we need only traverse the left operands.
41284187 //
4129 // Don't apply this instruction's NSW or NUW flags to the new
4130 // expression. The instruction may be guarded by control flow that the
4131 // no-wrap behavior depends on. Non-control-equivalent instructions can be
4132 // mapped to the same SCEV expression, and it would be incorrect to transfer
4133 // NSW/NUW semantics to those operations.
4188 // FIXME: Expand this handling of NSW and NUW to other instructions, like
4189 // sub and mul.
41344190 SmallVector AddOps;
4135 AddOps.push_back(getSCEV(U->getOperand(1)));
4136 for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
4137 unsigned Opcode = Op->getValueID() - Value::InstructionVal;
4138 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
4191 for (Value *Op = U;; Op = U->getOperand(0)) {
4192 U = dyn_cast(Op);
4193 unsigned Opcode = U ? U->getOpcode() : 0;
4194 if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) {
4195 assert(Op != V && "V should be an add");
4196 AddOps.push_back(getSCEV(Op));
41394197 break;
4140 U = cast(Op);
4198 }
4199
4200 if (auto *OpSCEV = getExistingSCEV(Op)) {
4201 AddOps.push_back(OpSCEV);
4202 break;
4203 }
4204
4205 // If a NUW or NSW flag can be applied to the SCEV for this
4206 // addition, then compute the SCEV for this addition by itself
4207 // with a separate call to getAddExpr. We need to do that
4208 // instead of pushing the operands of the addition onto AddOps,
4209 // since the flags are only known to apply to this particular
4210 // addition - they may not apply to other additions that can be
4211 // formed with operands from AddOps.
4212 //
4213 // FIXME: Expand this to sub instructions.
4214 if (Opcode == Instruction::Add && isa(U)) {
4215 SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
4216 if (Flags != SCEV::FlagAnyWrap) {
4217 AddOps.push_back(getAddExpr(getSCEV(U->getOperand(0)),
4218 getSCEV(U->getOperand(1)), Flags));
4219 break;
4220 }
4221 }
4222
41414223 const SCEV *Op1 = getSCEV(U->getOperand(1));
41424224 if (Opcode == Instruction::Sub)
41434225 AddOps.push_back(getNegativeSCEV(Op1));
41444226 else
41454227 AddOps.push_back(Op1);
41464228 }
4147 AddOps.push_back(getSCEV(U->getOperand(0)));
41484229 return getAddExpr(AddOps);
41494230 }
4231
41504232 case Instruction::Mul: {
4151 // Don't transfer NSW/NUW for the same reason as AddExpr.
4233 // FIXME: Transfer NSW/NUW as in AddExpr.
41524234 SmallVector MulOps;
41534235 MulOps.push_back(getSCEV(U->getOperand(1)));
41544236 for (Value *Op = U->getOperand(0);
33153315 return OverflowResult::MayOverflow;
33163316 }
33173317
3318 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
3319 // FIXME: This conservative implementation can be relaxed. E.g. most
3320 // atomic operations are guaranteed to terminate on most platforms
3321 // and most functions terminate.
3322
3323 return !I->isAtomic() && // atomics may never succeed on some platforms
3324 !isa(I) && // could throw and might not terminate
3325 !isa(I) && // might not terminate and could throw to
3326 // non-successor (see bug 24185 for details).
3327 !isa(I) && // has no successors
3328 !isa(I); // has no successors
3329 }
3330
3331 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
3332 const Loop *L) {
3333 // The loop header is guaranteed to be executed for every iteration.
3334 //
3335 // FIXME: Relax this constraint to cover all basic blocks that are
3336 // guaranteed to be executed at every iteration.
3337 if (I->getParent() != L->getHeader()) return false;
3338
3339 for (const Instruction &LI : *L->getHeader()) {
3340 if (&LI == I) return true;
3341 if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
3342 }
3343 llvm_unreachable("Instruction not contained in its own parent basic block.");
3344 }
3345
3346 bool llvm::propagatesFullPoison(const Instruction *I) {
3347 switch (I->getOpcode()) {
3348 case Instruction::Add:
3349 case Instruction::Sub:
3350 case Instruction::Xor:
3351 case Instruction::Trunc:
3352 case Instruction::BitCast:
3353 case Instruction::AddrSpaceCast:
3354 // These operations all propagate poison unconditionally. Note that poison
3355 // is not any particular value, so xor or subtraction of poison with
3356 // itself still yields poison, not zero.
3357 return true;
3358
3359 case Instruction::AShr:
3360 case Instruction::SExt:
3361 // For these operations, one bit of the input is replicated across
3362 // multiple output bits. A replicated poison bit is still poison.
3363 return true;
3364
3365 case Instruction::Shl: {
3366 // Left shift *by* a poison value is poison. The number of
3367 // positions to shift is unsigned, so no negative values are
3368 // possible there. Left shift by zero places preserves poison. So
3369 // it only remains to consider left shift of poison by a positive
3370 // number of places.
3371 //
3372 // A left shift by a positive number of places leaves the lowest order bit
3373 // non-poisoned. However, if such a shift has a no-wrap flag, then we can
3374 // make the poison operand violate that flag, yielding a fresh full-poison
3375 // value.
3376 auto *OBO = cast(I);
3377 return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
3378 }
3379
3380 case Instruction::Mul: {
3381 // A multiplication by zero yields a non-poison zero result, so we need to
3382 // rule out zero as an operand. Conservatively, multiplication by a
3383 // non-zero constant is not multiplication by zero.
3384 //
3385 // Multiplication by a non-zero constant can leave some bits
3386 // non-poisoned. For example, a multiplication by 2 leaves the lowest
3387 // order bit unpoisoned. So we need to consider that.
3388 //
3389 // Multiplication by 1 preserves poison. If the multiplication has a
3390 // no-wrap flag, then we can make the poison operand violate that flag
3391 // when multiplied by any integer other than 0 and 1.
3392 auto *OBO = cast(I);
3393 if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) {
3394 for (Value *V : OBO->operands()) {
3395 if (auto *CI = dyn_cast(V)) {
3396 // A ConstantInt cannot yield poison, so we can assume that it is
3397 // the other operand that is poison.
3398 return !CI->isZero();
3399 }
3400 }
3401 }
3402 return false;
3403 }
3404
3405 case Instruction::GetElementPtr:
3406 // A GEP implicitly represents a sequence of additions, subtractions,
3407 // truncations, sign extensions and multiplications. The multiplications
3408 // are by the non-zero sizes of some set of types, so we do not have to be
3409 // concerned with multiplication by zero. If the GEP is in-bounds, then
3410 // these operations are implicitly no-signed-wrap so poison is propagated
3411 // by the arguments above for Add, Sub, Trunc, SExt and Mul.
3412 return cast(I)->isInBounds();
3413
3414 default:
3415 return false;
3416 }
3417 }
3418
3419 const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
3420 switch (I->getOpcode()) {
3421 case Instruction::Store:
3422 return cast(I)->getPointerOperand();
3423
3424 case Instruction::Load:
3425 return cast(I)->getPointerOperand();
3426
3427 case Instruction::AtomicCmpXchg:
3428 return cast(I)->getPointerOperand();
3429
3430 case Instruction::AtomicRMW:
3431 return cast(I)->getPointerOperand();
3432
3433 case Instruction::UDiv:
3434 case Instruction::SDiv:
3435 case Instruction::URem:
3436 case Instruction::SRem:
3437 return I->getOperand(1);
3438
3439 default:
3440 return nullptr;
3441 }
3442 }
3443
3444 bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
3445 // We currently only look for uses of poison values within the same basic
3446 // block, as that makes it easier to guarantee that the uses will be
3447 // executed given that PoisonI is executed.
3448 //
3449 // FIXME: Expand this to consider uses beyond the same basic block. To do
3450 // this, look out for the distinction between post-dominance and strong
3451 // post-dominance.
3452 const BasicBlock *BB = PoisonI->getParent();
3453
3454 // Set of instructions that we have proved will yield poison if PoisonI
3455 // does.
3456 SmallSet YieldsPoison;
3457 YieldsPoison.insert(PoisonI);
3458
3459 for (const Instruction *I = PoisonI, *E = BB->end(); I != E;
3460 I = I->getNextNode()) {
3461 if (I != PoisonI) {
3462 const Value *NotPoison = getGuaranteedNonFullPoisonOp(I);
3463 if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true;
3464 if (!isGuaranteedToTransferExecutionToSuccessor(I)) return false;
3465 }
3466
3467 // Mark poison that propagates from I through uses of I.
3468 if (YieldsPoison.count(I)) {
3469 for (const User *User : I->users()) {
3470 const Instruction *UserI = cast(User);
3471 if (UserI->getParent() == BB && propagatesFullPoison(UserI))
3472 YieldsPoison.insert(User);
3473 }
3474 }
3475 }
3476 return false;
3477 }
3478
33183479 static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
33193480 Value *CmpLHS, Value *CmpRHS,
33203481 Value *TrueVal, Value *FalseVal,
1010 ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
1111 ; CHECK: Base offset: %A
1212 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
13 ; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}w><%for.k>]
13 ; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}uw>w><%for.k>]
1414
1515 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
1616 entry:
1010 ; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
1111 ; CHECK: Base offset: %A
1212 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
13 ; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}w><%for.k>]
13 ; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}sw><%for.k>]
1414
1515 define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
1616 entry:
0 ; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
1
2 ; Positive and negative tests for inferring flags like nsw from
3 ; reasoning about how a poison value from overflow would trigger
4 ; undefined behavior.
5
6 define void @foo() {
7 ret void
8 }
9
10 ; Example where an add should get the nsw flag, so that a sext can be
11 ; distributed over the add.
12 define void @test-add-nsw(float* %input, i32 %offset, i32 %numIterations) {
13 ; CHECK-LABEL: @test-add-nsw
14 entry:
15 br label %loop
16 loop:
17 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
18
19 ; CHECK: %index32 =
20 ; CHECK: --> {%offset,+,1}
21 %index32 = add nsw i32 %i, %offset
22
23 ; CHECK: %index64 =
24 ; CHECK: --> {(sext i32 %offset to i64),+,1}
25 %index64 = sext i32 %index32 to i64
26
27 %ptr = getelementptr inbounds float, float* %input, i64 %index64
28 %nexti = add nsw i32 %i, 1
29 %f = load float, float* %ptr, align 4
30 call void @foo()
31 %exitcond = icmp eq i32 %nexti, %numIterations
32 br i1 %exitcond, label %exit, label %loop
33 exit:
34 ret void
35 }
36
37 ; Example where an add should get the nuw flag.
38 define void @test-add-nuw(float* %input, i32 %offset, i32 %numIterations) {
39 ; CHECK-LABEL: @test-add-nuw
40 entry:
41 br label %loop
42 loop:
43 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
44
45 ; CHECK: %index32 =
46 ; CHECK: --> {%offset,+,1}
47 %index32 = add nuw i32 %i, %offset
48
49 %ptr = getelementptr inbounds float, float* %input, i32 %index32
50 %nexti = add nuw i32 %i, 1
51 %f = load float, float* %ptr, align 4
52 %exitcond = icmp eq i32 %nexti, %numIterations
53 br i1 %exitcond, label %exit, label %loop
54
55 exit:
56 ret void
57 }
58
59 ; With no load to trigger UB from poison, we cannot infer nsw.
60 define void @test-add-no-load(float* %input, i32 %offset, i32 %numIterations) {
61 ; CHECK-LABEL: @test-add-no-load
62 entry:
63 br label %loop
64 loop:
65 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
66
67 ; CHECK: %index32 =
68 ; CHECK: --> {%offset,+,1}
69 %index32 = add nsw i32 %i, %offset
70
71 %ptr = getelementptr inbounds float, float* %input, i32 %index32
72 %nexti = add nuw i32 %i, 1
73 %exitcond = icmp eq i32 %nexti, %numIterations
74 br i1 %exitcond, label %exit, label %loop
75
76 exit:
77 ret void
78 }
79
80 ; The current code is only supposed to look at the loop header, so
81 ; it should not infer nsw in this case, as that would require looking
82 ; outside the loop header.
83 define void @test-add-not-header(float* %input, i32 %offset, i32 %numIterations) {
84 ; CHECK-LABEL: @test-add-not-header
85 entry:
86 br label %loop
87 loop:
88 %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
89 br label %loop2
90 loop2:
91
92 ; CHECK: %index32 =
93 ; CHECK: --> {%offset,+,1}
94 %index32 = add nsw i32 %i, %offset
95
96 %ptr = getelementptr inbounds float, float* %input, i32 %index32
97 %nexti = add nsw i32 %i, 1
98 %f = load float, float* %ptr, align 4
99 %exitcond = icmp eq i32 %nexti, %numIterations
100 br i1 %exitcond, label %exit, label %loop
101 exit:
102 ret void
103 }
104
105 ; Same thing as test-add-not-header, but in this case only the load
106 ; instruction is outside the loop header.
107 define void @test-add-not-header2(float* %input, i32 %offset, i32 %numIterations) {
108 ; CHECK-LABEL: @test-add-not-header2
109 entry:
110 br label %loop
111 loop:
112 %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
113
114 ; CHECK: %index32 =
115 ; CHECK: --> {%offset,+,1}
116 %index32 = add nsw i32 %i, %offset
117
118 %ptr = getelementptr inbounds float, float* %input, i32 %index32
119 %nexti = add nsw i32 %i, 1
120 br label %loop2
121 loop2:
122 %f = load float, float* %ptr, align 4
123 %exitcond = icmp eq i32 %nexti, %numIterations
124 br i1 %exitcond, label %exit, label %loop
125 exit:
126 ret void
127 }
128
129 ; The call instruction makes it not guaranteed that the add will be
130 ; executed, since it could run forever or throw an exception, so we
131 ; cannot assume that the UB is realized.
132 define void @test-add-call(float* %input, i32 %offset, i32 %numIterations) {
133 ; CHECK-LABEL: @test-add-call
134 entry:
135 br label %loop
136 loop:
137 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
138
139 ; CHECK: %index32 =
140 ; CHECK: --> {%offset,+,1}
141 call void @foo()
142 %index32 = add nsw i32 %i, %offset
143
144 %ptr = getelementptr inbounds float, float* %input, i32 %index32
145 %nexti = add nsw i32 %i, 1
146 %f = load float, float* %ptr, align 4
147 %exitcond = icmp eq i32 %nexti, %numIterations
148 br i1 %exitcond, label %exit, label %loop
149 exit:
150 ret void
151 }
152
153 ; Same issue as test-add-call, but this time the call is between the
154 ; producer of poison and the load that consumes it.
155 define void @test-add-call2(float* %input, i32 %offset, i32 %numIterations) {
156 ; CHECK-LABEL: @test-add-call2
157 entry:
158 br label %loop
159 loop:
160 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
161
162 ; CHECK: %index32 =
163 ; CHECK: --> {%offset,+,1}
164 %index32 = add nsw i32 %i, %offset
165
166 %ptr = getelementptr inbounds float, float* %input, i32 %index32
167 %nexti = add nsw i32 %i, 1
168 call void @foo()
169 %f = load float, float* %ptr, align 4
170 %exitcond = icmp eq i32 %nexti, %numIterations
171 br i1 %exitcond, label %exit, label %loop
172 exit:
173 ret void
174 }
175
176 ; Without inbounds, GEP does not propagate poison in the very
177 ; conservative approach used here.
178 define void @test-add-no-inbounds(float* %input, i32 %offset, i32 %numIterations) {
179 ; CHECK-LABEL: @test-add-no-inbounds
180 entry:
181 br label %loop
182 loop:
183 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
184
185 ; CHECK: %index32 =
186 ; CHECK: --> {%offset,+,1}
187 %index32 = add nsw i32 %i, %offset
188
189 %ptr = getelementptr float, float* %input, i32 %index32
190 %nexti = add nsw i32 %i, 1
191 %f = load float, float* %ptr, align 4
192 %exitcond = icmp eq i32 %nexti, %numIterations
193 br i1 %exitcond, label %exit, label %loop
194 exit:
195 ret void
196 }
197
198 ; Multiplication by a non-zero constant propagates poison if there is
199 ; a nuw or nsw flag on the multiplication.
200 define void @test-add-mul-propagates(float* %input, i32 %offset, i32 %numIterations) {
201 ; CHECK-LABEL: @test-add-mul-propagates
202 entry:
203 br label %loop
204 loop:
205 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
206
207 ; CHECK: %index32 =
208 ; CHECK: --> {%offset,+,1}
209 %index32 = add nsw i32 %i, %offset
210
211 %indexmul = mul nuw i32 %index32, 2
212 %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
213 %nexti = add nsw i32 %i, 1
214 %f = load float, float* %ptr, align 4
215 %exitcond = icmp eq i32 %nexti, %numIterations
216 br i1 %exitcond, label %exit, label %loop
217 exit:
218 ret void
219 }
220
221 ; Multiplication by a non-constant should not propagate poison in the
222 ; very conservative approach used here.
223 define void @test-add-mul-no-propagation(float* %input, i32 %offset, i32 %numIterations) {
224 ; CHECK-LABEL: @test-add-mul-no-propagation
225 entry:
226 br label %loop
227 loop:
228 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
229
230 ; CHECK: %index32 =
231 ; CHECK: --> {%offset,+,1}
232 %index32 = add nsw i32 %i, %offset
233
234 %indexmul = mul nsw i32 %index32, %offset
235 %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
236 %nexti = add nsw i32 %i, 1
237 %f = load float, float* %ptr, align 4
238 %exitcond = icmp eq i32 %nexti, %numIterations
239 br i1 %exitcond, label %exit, label %loop
240 exit:
241 ret void
242 }
243
244 ; Multiplication by a non-zero constant does not propagate poison
245 ; without a no-wrap flag.
246 define void @test-add-mul-no-propagation2(float* %input, i32 %offset, i32 %numIterations) {
247 ; CHECK-LABEL: @test-add-mul-no-propagation2
248 entry:
249 br label %loop
250 loop:
251 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
252
253 ; CHECK: %index32 =
254 ; CHECK: --> {%offset,+,1}
255 %index32 = add nsw i32 %i, %offset
256
257 %indexmul = mul i32 %index32, 2
258 %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
259 %nexti = add nsw i32 %i, 1
260 %f = load float, float* %ptr, align 4
261 %exitcond = icmp eq i32 %nexti, %numIterations
262 br i1 %exitcond, label %exit, label %loop
263 exit:
264 ret void
265 }
266
267 ; Division by poison triggers UB.
268 define void @test-add-div(float* %input, i32 %offset, i32 %numIterations) {
269 ; CHECK-LABEL: @test-add-div
270 entry:
271 br label %loop
272 loop:
273 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
274
275 ; CHECK: %j =
276 ; CHECK: --> {%offset,+,1}
277 %j = add nsw i32 %i, %offset
278
279 %q = sdiv i32 %numIterations, %j
280 %nexti = add nsw i32 %i, 1
281 %exitcond = icmp eq i32 %nexti, %numIterations
282 br i1 %exitcond, label %exit, label %loop
283 exit:
284 ret void
285 }
286
287 ; Remainder of poison by non-poison divisor does not trigger UB.
288 define void @test-add-div2(float* %input, i32 %offset, i32 %numIterations) {
289 ; CHECK-LABEL: @test-add-div2
290 entry:
291 br label %loop
292 loop:
293 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
294
295 ; CHECK: %j =
296 ; CHECK: --> {%offset,+,1}
297 %j = add nsw i32 %i, %offset
298
299 %q = sdiv i32 %j, %numIterations
300 %nexti = add nsw i32 %i, 1
301 %exitcond = icmp eq i32 %nexti, %numIterations
302 br i1 %exitcond, label %exit, label %loop
303 exit:
304 ret void
305 }
306
307 ; Store to poison address triggers UB.
308 define void @test-add-store(float* %input, i32 %offset, i32 %numIterations) {
309 ; CHECK-LABEL: @test-add-store
310 entry:
311 br label %loop
312 loop:
313 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
314
315 ; CHECK: %index32 =
316 ; CHECK: --> {%offset,+,1}
317 %index32 = add nsw i32 %i, %offset
318
319 %ptr = getelementptr inbounds float, float* %input, i32 %index32
320 %nexti = add nsw i32 %i, 1
321 store float 1.0, float* %ptr, align 4
322 %exitcond = icmp eq i32 %nexti, %numIterations
323 br i1 %exitcond, label %exit, label %loop
324 exit:
325 ret void
326 }
327
328 ; Three sequential adds where the middle add should have nsw. There is
329 ; a special case for sequential adds and this test covers that. We have to
330 ; put the final add first in the program since otherwise the special case
331 ; is not triggered, hence the strange basic block ordering.
332 define void @test-add-twice(float* %input, i32 %offset, i32 %numIterations) {
333 ; CHECK-LABEL: @test-add-twice
334 entry:
335 br label %loop
336 loop2:
337 ; CHECK: %seq =
338 ; CHECK: --> {(2 + %offset),+,1}
339 %seq = add nsw nuw i32 %index32, 1
340 %exitcond = icmp eq i32 %nexti, %numIterations
341 br i1 %exitcond, label %exit, label %loop
342
343 loop:
344 %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
345
346 %j = add nsw i32 %i, 1
347 ; CHECK: %index32 =
348 ; CHECK: --> {(1 + %offset),+,1}
349 %index32 = add nsw i32 %j, %offset
350
351 %ptr = getelementptr inbounds float, float* %input, i32 %index32
352 %nexti = add nsw i32 %i, 1
353 store float 1.0, float* %ptr, align 4
354 br label %loop2
355 exit:
356 ret void
357 }
0 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
1
2 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
3 target triple = "nvptx64-unknown-unknown"
4
5 ; LSR used not to be able to generate a float* induction variable in
6 ; these cases due to scalar evolution not propagating nsw from an
7 ; instruction to the SCEV, preventing distributing sext into the
8 ; corresponding addrec.
9
10 define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
11 ; CHECK-LABEL: @testadd
12 ; CHECK: sext i32 %offset to i64
13 ; CHECK: loop:
14 ; CHECK-DAG: phi float*
15 ; CHECK-DAG: phi i32
16 ; CHECK-NOT: sext
17
18 entry:
19 br label %loop
20
21 loop:
22 %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
23 %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
24 %index32 = add nuw nsw i32 %i, %offset
25 %index64 = sext i32 %index32 to i64
26 %ptr = getelementptr inbounds float, float* %input, i64 %index64
27 %addend = load float, float* %ptr, align 4
28 %nextsum = fadd float %sum, %addend
29 %nexti = add nuw nsw i32 %i, 1
30 %exitcond = icmp eq i32 %nexti, %numIterations
31 br i1 %exitcond, label %exit, label %loop
32
33 exit:
34 ret float %nextsum
35 }