llvm.org GIT mirror llvm / 5c1bd30
[SCEV] Properly solve quadratic equations Differential Revision: https://reviews.llvm.org/D48283 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338758 91177308-0d34-0410-b5e6-96231b3b80d8 Krzysztof Parzyszek 2 years ago
7 changed file(s) with 1097 addition(s) and 117 deletion(s). Raw diff Collapse all Expand all
3030
3131 template class SmallVectorImpl;
3232 template class ArrayRef;
33 template class Optional;
3334
3435 class APInt;
3536
21652166 /// Return A sign-divided by B, rounded by the given rounding mode.
21662167 APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
21672168
2169 /// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2170 /// (e.g. 32 for i32).
2171 /// This function finds the smallest number n, such that
2172 /// (a) n >= 0 and q(n) = 0, or
2173 /// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2174 /// integers, belong to two different intervals [Rk, Rk+R),
2175 /// where R = 2^BW, and k is an integer.
2176 /// The idea here is to find when q(n) "overflows" 2^BW, while at the
2177 /// same time "allowing" subtraction. In unsigned modulo arithmetic a
2178 /// subtraction (treated as addition of negated numbers) would always
2179 /// count as an overflow, but here we want to allow values to decrease
2180 /// and increase as long as they are within the same interval.
2181 /// Specifically, adding of two negative numbers should not cause an
2182 /// overflow (as long as the magnitude does not exceed the bith width).
2183 /// On the other hand, given a positive number, adding a negative
2184 /// number to it can give a negative result, which would cause the
2185 /// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2186 /// treated as a special case of an overflow.
2187 ///
2188 /// This function returns None if after finding k that minimizes the
2189 /// positive solution to q(n) = kR, both solutions are contained between
2190 /// two consecutive integers.
2191 ///
2192 /// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2193 /// in arithmetic modulo 2^BW, and treating the values as signed) by the
2194 /// virtue of *signed* overflow. This function will *not* find such an n,
2195 /// however it may find a value of n satisfying the inequalities due to
2196 /// an *unsigned* overflow (if the values are treated as unsigned).
2197 /// To find a solution for a signed overflow, treat it as a problem of
2198 /// finding an unsigned overflow with a range with of BW-1.
2199 ///
2200 /// The returned value may have a different bit width from the input
2201 /// coefficients.
2202 Optional SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2203 unsigned RangeWidth);
21682204 } // End of APIntOps namespace
21692205
21702206 // See friend declaration above. This additional declaration is required in
83438343 return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
83448344 }
83458345
8346 /// Find the roots of the quadratic equation for the given quadratic chrec
8347 /// {L,+,M,+,N}. This returns either the two roots (which might be the same) or
8348 /// two SCEVCouldNotCompute objects.
8349 static Optional>
8350 SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
8346 /// For a given quadratic addrec, generate coefficients of the corresponding
8347 /// quadratic equation, multiplied by a common value to ensure that they are
8348 /// integers.
8349 /// The returned value is a tuple { A, B, C, M, BitWidth }, where
8350 /// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C
8351 /// were multiplied by, and BitWidth is the bit width of the original addrec
8352 /// coefficients.
8353 /// This function returns None if the addrec coefficients are not compile-
8354 /// time constants.
8355 static Optional>
8356 GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
83518357 assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
83528358 const SCEVConstant *LC = dyn_cast(AddRec->getOperand(0));
83538359 const SCEVConstant *MC = dyn_cast(AddRec->getOperand(1));
83548360 const SCEVConstant *NC = dyn_cast(AddRec->getOperand(2));
8361 LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: "
8362 << *AddRec << '\n');
83558363
83568364 // We currently can only solve this if the coefficients are constants.
8357 if (!LC || !MC || !NC)
8365 if (!LC || !MC || !NC) {
8366 LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n");
83588367 return None;
8359
8360 uint32_t BitWidth = LC->getAPInt().getBitWidth();
8361 const APInt &L = LC->getAPInt();
8362 const APInt &M = MC->getAPInt();
8363 const APInt &N = NC->getAPInt();
8364 APInt Two(BitWidth, 2);
8365
8366 // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
8367
8368 // The A coefficient is N/2
8369 APInt A = N.sdiv(Two);
8370
8371 // The B coefficient is M-N/2
8372 APInt B = M;
8373 B -= A; // A is the same as N/2.
8374
8375 // The C coefficient is L.
8376 const APInt& C = L;
8377
8378 // Compute the B^2-4ac term.
8379 APInt SqrtTerm = B;
8380 SqrtTerm *= B;
8381 SqrtTerm -= 4 * (A * C);
8382
8383 if (SqrtTerm.isNegative()) {
8384 // The loop is provably infinite.
8368 }
8369
8370 APInt L = LC->getAPInt();
8371 APInt M = MC->getAPInt();
8372 APInt N = NC->getAPInt();
8373 assert(!N.isNullValue() && "This is not a quadratic addrec");
8374
8375 unsigned BitWidth = LC->getAPInt().getBitWidth();
8376 unsigned NewWidth = BitWidth + 1;
8377 LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: "
8378 << BitWidth << '\n');
8379 // The sign-extension (as opposed to a zero-extension) here matches the
8380 // extension used in SolveQuadraticEquationWrap (with the same motivation).
8381 N = N.sext(NewWidth);
8382 M = M.sext(NewWidth);
8383 L = L.sext(NewWidth);
8384
8385 // The increments are M, M+N, M+2N, ..., so the accumulated values are
8386 // L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is,
8387 // L+M, L+2M+N, L+3M+3N, ...
8388 // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N.
8389 //
8390 // The equation Acc = 0 is then
8391 // L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0.
8392 // In a quadratic form it becomes:
8393 // N n^2 + (2M-N) n + 2L = 0.
8394
8395 APInt A = N;
8396 APInt B = 2 * M - A;
8397 APInt C = 2 * L;
8398 APInt T = APInt(NewWidth, 2);
8399 LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B
8400 << "x + " << C << ", coeff bw: " << NewWidth
8401 << ", multiplied by " << T << '\n');
8402 return std::make_tuple(A, B, C, T, BitWidth);
8403 }
8404
8405 /// Helper function to compare optional APInts:
8406 /// (a) if X and Y both exist, return min(X, Y),
8407 /// (b) if neither X nor Y exist, return None,
8408 /// (c) if exactly one of X and Y exists, return that value.
8409 static Optional MinOptional(Optional X, Optional Y) {
8410 if (X.hasValue() && Y.hasValue()) {
8411 unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
8412 APInt XW = X->sextOrSelf(W);
8413 APInt YW = Y->sextOrSelf(W);
8414 return XW.slt(YW) ? *X : *Y;
8415 }
8416 if (!X.hasValue() && !Y.hasValue())
83858417 return None;
8386 }
8387
8388 // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
8389 // integer value or else APInt::sqrt() will assert.
8390 APInt SqrtVal = SqrtTerm.sqrt();
8391
8392 // Compute the two solutions for the quadratic formula.
8393 // The divisions must be performed as signed divisions.
8394 APInt NegB = -std::move(B);
8395 APInt TwoA = std::move(A);
8396 TwoA <<= 1;
8397 if (TwoA.isNullValue())
8418 return X.hasValue() ? *X : *Y;
8419 }
8420
8421 /// Helper function to truncate an optional APInt to a given BitWidth.
8422 /// When solving addrec-related equations, it is preferable to return a value
8423 /// that has the same bit width as the original addrec's coefficients. If the
8424 /// solution fits in the original bit width, truncate it (except for i1).
8425 /// Returning a value of a different bit width may inhibit some optimizations.
8426 ///
8427 /// In general, a solution to a quadratic equation generated from an addrec
8428 /// may require BW+1 bits, where BW is the bit width of the addrec's
8429 /// coefficients. The reason is that the coefficients of the quadratic
8430 /// equation are BW+1 bits wide (to avoid truncation when converting from
8431 /// the addrec to the equation).
8432 static Optional TruncIfPossible(Optional X, unsigned BitWidth) {
8433 if (!X.hasValue())
83988434 return None;
8399
8400 LLVMContext &Context = SE.getContext();
8401
8402 ConstantInt *Solution1 =
8403 ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
8404 ConstantInt *Solution2 =
8405 ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
8406
8407 return std::make_pair(cast(SE.getConstant(Solution1)),
8408 cast(SE.getConstant(Solution2)));
8435 unsigned W = X->getBitWidth();
8436 if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
8437 return X->trunc(BitWidth);
8438 return X;
8439 }
8440
8441 /// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n
8442 /// iterations. The values L, M, N are assumed to be signed, and they
8443 /// should all have the same bit widths.
8444 /// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW,
8445 /// where BW is the bit width of the addrec's coefficients.
8446 /// If the calculated value is a BW-bit integer (for BW > 1), it will be
8447 /// returned as such, otherwise the bit width of the returned value may
8448 /// be greater than BW.
8449 ///
8450 /// This function returns None if
8451 /// (a) the addrec coefficients are not constant, or
8452 /// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases
8453 /// like x^2 = 5, no integer solutions exist, in other cases an integer
8454 /// solution may exist, but SolveQuadraticEquationWrap may fail to find it.
8455 static Optional
8456 SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
8457 APInt A, B, C, M;
8458 unsigned BitWidth;
8459 auto T = GetQuadraticEquation(AddRec);
8460 if (!T.hasValue())
8461 return None;
8462
8463 std::tie(A, B, C, M, BitWidth) = *T;
8464 LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
8465 Optional X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
8466 if (!X.hasValue())
8467 return None;
8468
8469 ConstantInt *CX = ConstantInt::get(SE.getContext(), *X);
8470 ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE);
8471 if (!V->isZero())
8472 return None;
8473
8474 return TruncIfPossible(X, BitWidth);
8475 }
8476
8477 /// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n
8478 /// iterations. The values M, N are assumed to be signed, and they
8479 /// should all have the same bit widths.
8480 /// Find the least n such that c(n) does not belong to the given range,
8481 /// while c(n-1) does.
8482 ///
8483 /// This function returns None if
8484 /// (a) the addrec coefficients are not constant, or
8485 /// (b) SolveQuadraticEquationWrap was unable to find a solution for the
8486 /// bounds of the range.
8487 static Optional
8488 SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
8489 const ConstantRange &Range, ScalarEvolution &SE) {
8490 assert(AddRec->getOperand(0)->isZero() &&
8491 "Starting value of addrec should be 0");
8492 LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range "
8493 << Range << ", addrec " << *AddRec << '\n');
8494 // This case is handled in getNumIterationsInRange. Here we can assume that
8495 // we start in the range.
8496 assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) &&
8497 "Addrec's initial value should be in range");
8498
8499 APInt A, B, C, M;
8500 unsigned BitWidth;
8501 auto T = GetQuadraticEquation(AddRec);
8502 if (!T.hasValue())
8503 return None;
8504
8505 // Be careful about the return value: there can be two reasons for not
8506 // returning an actual number. First, if no solutions to the equations
8507 // were found, and second, if the solutions don't leave the given range.
8508 // The first case means that the actual solution is "unknown", the second
8509 // means that it's known, but not valid. If the solution is unknown, we
8510 // cannot make any conclusions.
8511 // Return a pair: the optional solution and a flag indicating if the
8512 // solution was found.
8513 auto SolveForBoundary = [&](APInt Bound) -> std::pair,bool> {
8514 // Solve for signed overflow and unsigned overflow, pick the lower
8515 // solution.
8516 LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary "
8517 << Bound << " (before multiplying by " << M << ")\n");
8518 Bound *= M; // The quadratic equation multiplier.
8519
8520 Optional SO = None;
8521 if (BitWidth > 1) {
8522 LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
8523 "signed overflow\n");
8524 SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth);
8525 }
8526 LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
8527 "unsigned overflow\n");
8528 Optional UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound,
8529 BitWidth+1);
8530
8531 auto LeavesRange = [&] (const APInt &X) {
8532 ConstantInt *C0 = ConstantInt::get(SE.getContext(), X);
8533 ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE);
8534 if (Range.contains(V0->getValue()))
8535 return false;
8536 // X should be at least 1, so X-1 is non-negative.
8537 ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1);
8538 ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE);
8539 if (Range.contains(V1->getValue()))
8540 return true;
8541 return false;
8542 };
8543
8544 // If SolveQuadraticEquationWrap returns None, it means that there can
8545 // be a solution, but the function failed to find it. We cannot treat it
8546 // as "no solution".
8547 if (!SO.hasValue() || !UO.hasValue())
8548 return { None, false };
8549
8550 // Check the smaller value first to see if it leaves the range.
8551 // At this point, both SO and UO must have values.
8552 Optional Min = MinOptional(SO, UO);
8553 if (LeavesRange(*Min))
8554 return { Min, true };
8555 Optional Max = Min == SO ? UO : SO;
8556 if (LeavesRange(*Max))
8557 return { Max, true };
8558
8559 // Solutions were found, but were eliminated, hence the "true".
8560 return { None, true };
8561 };
8562
8563 std::tie(A, B, C, M, BitWidth) = *T;
8564 // Lower bound is inclusive, subtract 1 to represent the exiting value.
8565 APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
8566 APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
8567 auto SL = SolveForBoundary(Lower);
8568 auto SU = SolveForBoundary(Upper);
8569 // If any of the solutions was unknown, no meaninigful conclusions can
8570 // be made.
8571 if (!SL.second || !SU.second)
8572 return None;
8573
8574 // Claim: The correct solution is not some value between Min and Max.
8575 //
8576 // Justification: Assuming that Min and Max are different values, one of
8577 // them is when the first signed overflow happens, the other is when the
8578 // first unsigned overflow happens. Crossing the range boundary is only
8579 // possible via an overflow (treating 0 as a special case of it, modeling
8580 // an overflow as crossing k*2^W for some k).
8581 //
8582 // The interesting case here is when Min was eliminated as an invalid
8583 // solution, but Max was not. The argument is that if there was another
8584 // overflow between Min and Max, it would also have been eliminated if
8585 // it was considered.
8586 //
8587 // For a given boundary, it is possible to have two overflows of the same
8588 // type (signed/unsigned) without having the other type in between: this
8589 // can happen when the vertex of the parabola is between the iterations
8590 // corresponding to the overflows. This is only possible when the two
8591 // overflows cross k*2^W for the same k. In such case, if the second one
8592 // left the range (and was the first one to do so), the first overflow
8593 // would have to enter the range, which would mean that either we had left
8594 // the range before or that we started outside of it. Both of these cases
8595 // are contradictions.
8596 //
8597 // Claim: In the case where SolveForBoundary returns None, the correct
8598 // solution is not some value between the Max for this boundary and the
8599 // Min of the other boundary.
8600 //
8601 // Justification: Assume that we had such Max_A and Min_B corresponding
8602 // to range boundaries A and B and such that Max_A < Min_B. If there was
8603 // a solution between Max_A and Min_B, it would have to be caused by an
8604 // overflow corresponding to either A or B. It cannot correspond to B,
8605 // since Min_B is the first occurrence of such an overflow. If it
8606 // corresponded to A, it would have to be either a signed or an unsigned
8607 // overflow that is larger than both eliminated overflows for A. But
8608 // between the eliminated overflows and this overflow, the values would
8609 // cover the entire value space, thus crossing the other boundary, which
8610 // is a contradiction.
8611
8612 return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth);
84098613 }
84108614
84118615 ScalarEvolution::ExitLimit
84408644 // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
84418645 // the quadratic equation to solve it.
84428646 if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
8443 if (auto Roots = SolveQuadraticEquation(AddRec, *this)) {
8444 const SCEVConstant *R1 = Roots->first;
8445 const SCEVConstant *R2 = Roots->second;
8446 // Pick the smallest positive root value.
8447 if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp(
8448 CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
8449 if (!CB->getZExtValue())
8450 std::swap(R1, R2); // R1 is the minimum root now.
8451
8452 // We can only use this value if the chrec ends up with an exact zero
8453 // value at this index. When solving for "X*X != 5", for example, we
8454 // should not accept a root of 2.
8455 const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
8456 if (Val->isZero())
8457 // We found a quadratic root!
8458 return ExitLimit(R1, R1, false, Predicates);
8459 }
8647 // We can only use this value if the chrec ends up with an exact zero
8648 // value at this index. When solving for "X*X != 5", for example, we
8649 // should not accept a root of 2.
8650 if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
8651 const auto *R = cast(getConstant(S.getValue()));
8652 return ExitLimit(R, R, false, Predicates);
84608653 }
84618654 return getCouldNotCompute();
84628655 }
1056410757 ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
1056510758 "Linear scev computation is off in a bad way!");
1056610759 return SE.getConstant(ExitValue);
10567 } else if (isQuadratic()) {
10568 // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
10569 // quadratic equation to solve it. To do this, we must frame our problem in
10570 // terms of figuring out when zero is crossed, instead of when
10571 // Range.getUpper() is crossed.
10572 SmallVector NewOps(op_begin(), op_end());
10573 NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
10574 const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
10575
10576 // Next, solve the constructed addrec
10577 if (auto Roots =
10578 SolveQuadraticEquation(cast(NewAddRec), SE)) {
10579 const SCEVConstant *R1 = Roots->first;
10580 const SCEVConstant *R2 = Roots->second;
10581 // Pick the smallest positive root value.
10582 if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp(
10583 ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
10584 if (!CB->getZExtValue())
10585 std::swap(R1, R2); // R1 is the minimum root now.
10586
10587 // Make sure the root is not off by one. The returned iteration should
10588 // not be in the range, but the previous one should be. When solving
10589 // for "X*X < 5", for example, we should not return a root of 2.
10590 ConstantInt *R1Val =
10591 EvaluateConstantChrecAtConstant(this, R1->getValue(), SE);
10592 if (Range.contains(R1Val->getValue())) {
10593 // The next iteration must be out of the range...
10594 ConstantInt *NextVal =
10595 ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
10596
10597 R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
10598 if (!Range.contains(R1Val->getValue()))
10599 return SE.getConstant(NextVal);
10600 return SE.getCouldNotCompute(); // Something strange happened
10601 }
10602
10603 // If R1 was not in the range, then it is a good return value. Make
10604 // sure that R1-1 WAS in the range though, just in case.
10605 ConstantInt *NextVal =
10606 ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
10607 R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
10608 if (Range.contains(R1Val->getValue()))
10609 return R1;
10610 return SE.getCouldNotCompute(); // Something strange happened
10611 }
10612 }
10760 }
10761
10762 if (isQuadratic()) {
10763 if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
10764 return SE.getConstant(S.getValue());
1061310765 }
1061410766
1061510767 return SE.getCouldNotCompute();
1515 #include "llvm/ADT/ArrayRef.h"
1616 #include "llvm/ADT/FoldingSet.h"
1717 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
1819 #include "llvm/ADT/SmallString.h"
1920 #include "llvm/ADT/StringRef.h"
2021 #include "llvm/Config/llvm-config.h"
27062707 }
27072708 llvm_unreachable("Unknown APInt::Rounding enum");
27082709 }
2710
2711 Optional
2712 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2713 unsigned RangeWidth) {
2714 unsigned CoeffWidth = A.getBitWidth();
2715 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2716 assert(RangeWidth <= CoeffWidth &&
2717 "Value range width should be less than coefficient width");
2718 assert(RangeWidth > 1 && "Value range bit width should be > 1");
2719
2720 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2721 << "x + " << C << ", rw:" << RangeWidth << '\n');
2722
2723 // Identify 0 as a (non)solution immediately.
2724 if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2725 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2726 return APInt(CoeffWidth, 0);
2727 }
2728
2729 // The result of APInt arithmetic has the same bit width as the operands,
2730 // so it can actually lose high bits. A product of two n-bit integers needs
2731 // 2n-1 bits to represent the full value.
2732 // The operation done below (on quadratic coefficients) that can produce
2733 // the largest value is the evaluation of the equation during bisection,
2734 // which needs 3 times the bitwidth of the coefficient, so the total number
2735 // of required bits is 3n.
2736 //
2737 // The purpose of this extension is to simulate the set Z of all integers,
2738 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2739 // and negative numbers (not so much in a modulo arithmetic). The method
2740 // used to solve the equation is based on the standard formula for real
2741 // numbers, and uses the concepts of "positive" and "negative" with their
2742 // usual meanings.
2743 CoeffWidth *= 3;
2744 A = A.sext(CoeffWidth);
2745 B = B.sext(CoeffWidth);
2746 C = C.sext(CoeffWidth);
2747
2748 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2749 // the bit width has increased.
2750 if (A.isNegative()) {
2751 A.negate();
2752 B.negate();
2753 C.negate();
2754 }
2755
2756 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2757 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2758 // and R = 2^BitWidth.
2759 // Since we're trying not only to find exact solutions, but also values
2760 // that "wrap around", such a set will always have a solution, i.e. an x
2761 // that satisfies at least one of the equations, or such that |q(x)|
2762 // exceeds kR, while |q(x-1)| for the same k does not.
2763 //
2764 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2765 // positive solution n (in the above sense), and also such that the n
2766 // will be the least among all solutions corresponding to k = 0, 1, ...
2767 // (more precisely, the least element in the set
2768 // { n(k) | k is such that a solution n(k) exists }).
2769 //
2770 // Consider the parabola (over real numbers) that corresponds to the
2771 // quadratic equation. Since A > 0, the arms of the parabola will point
2772 // up. Picking different values of k will shift it up and down by R.
2773 //
2774 // We want to shift the parabola in such a way as to reduce the problem
2775 // of solving q(x) = kR to solving shifted_q(x) = 0.
2776 // (The interesting solutions are the ceilings of the real number
2777 // solutions.)
2778 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2779 APInt TwoA = 2 * A;
2780 APInt SqrB = B * B;
2781 bool PickLow;
2782
2783 auto RoundUp = [] (const APInt &V, const APInt &A) {
2784 assert(A.isStrictlyPositive());
2785 APInt T = V.abs().urem(A);
2786 if (T.isNullValue())
2787 return V;
2788 return V.isNegative() ? V+T : V+(A-T);
2789 };
2790
2791 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2792 // iff B is positive.
2793 if (B.isNonNegative()) {
2794 // If B >= 0, the vertex it at a negative location (or at 0), so in
2795 // order to have a non-negative solution we need to pick k that makes
2796 // C-kR negative. To satisfy all the requirements for the solution
2797 // that we are looking for, it needs to be closest to 0 of all k.
2798 C = C.srem(R);
2799 if (C.isStrictlyPositive())
2800 C -= R;
2801 // Pick the greater solution.
2802 PickLow = false;
2803 } else {
2804 // If B < 0, the vertex is at a positive location. For any solution
2805 // to exist, the discriminant must be non-negative. This means that
2806 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2807 // lower bound on values of k: kR >= C - B^2/4A.
2808 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2809 // Round LowkR up (towards +inf) to the nearest kR.
2810 LowkR = RoundUp(LowkR, R);
2811
2812 // If there exists k meeting the condition above, and such that
2813 // C-kR > 0, there will be two positive real number solutions of
2814 // q(x) = kR. Out of all such values of k, pick the one that makes
2815 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2816 // In other words, find maximum k such that LowkR <= kR < C.
2817 if (C.sgt(LowkR)) {
2818 // If LowkR < C, then such a k is guaranteed to exist because
2819 // LowkR itself is a multiple of R.
2820 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)
2821 // Pick the smaller solution.
2822 PickLow = true;
2823 } else {
2824 // If C-kR < 0 for all potential k's, it means that one solution
2825 // will be negative, while the other will be positive. The positive
2826 // solution will shift towards 0 if the parabola is moved up.
2827 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2828 // to 0, or in other words, out of all parabolas that have solutions,
2829 // pick the one that is the farthest "up").
2830 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2831 C -= LowkR;
2832 // Pick the greater solution.
2833 PickLow = false;
2834 }
2835 }
2836
2837 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2838 << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2839
2840 APInt D = SqrB - 4*A*C;
2841 assert(D.isNonNegative() && "Negative discriminant");
2842 APInt SQ = D.sqrt();
2843
2844 APInt Q = SQ * SQ;
2845 bool InexactSQ = Q != D;
2846 // The calculated SQ may actually be greater than the exact (non-integer)
2847 // value. If that's the case, decremement SQ to get a value that is lower.
2848 if (Q.sgt(D))
2849 SQ -= 1;
2850
2851 APInt X;
2852 APInt Rem;
2853
2854 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2855 // When using the quadratic formula directly, the calculated low root
2856 // may be greater than the exact one, since we would be subtracting SQ.
2857 // To make sure that the calculated root is not greater than the exact
2858 // one, subtract SQ+1 when calculating the low root (for inexact value
2859 // of SQ).
2860 if (PickLow)
2861 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2862 else
2863 APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2864
2865 // The updated coefficients should be such that the (exact) solution is
2866 // positive. Since APInt division rounds towards 0, the calculated one
2867 // can be 0, but cannot be negative.
2868 assert(X.isNonNegative() && "Solution should be non-negative");
2869
2870 if (!InexactSQ && Rem.isNullValue()) {
2871 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2872 return X;
2873 }
2874
2875 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2876 // The exact value of the square root of D should be between SQ and SQ+1.
2877 // This implies that the solution should be between that corresponding to
2878 // SQ (i.e. X) and that corresponding to SQ+1.
2879 //
2880 // The calculated X cannot be greater than the exact (real) solution.
2881 // Actually it must be strictly less than the exact solution, while
2882 // X+1 will be greater than or equal to it.
2883
2884 APInt VX = (A*X + B)*X + C;
2885 APInt VY = VX + TwoA*X + A + B;
2886 bool SignChange = VX.isNegative() != VY.isNegative() ||
2887 VX.isNullValue() != VY.isNullValue();
2888 // If the sign did not change between X and X+1, X is not a valid solution.
2889 // This could happen when the actual (exact) roots don't have an integer
2890 // between them, so they would both be contained between X and X+1.
2891 if (!SignChange) {
2892 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2893 return None;
2894 }
2895
2896 X += 1;
2897 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2898 return X;
2899 }
0 ; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
1
2 target triple = "x86_64-unknown-linux-gnu"
3
4 ; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'f0':
5 ; CHECK-NEXT: Classifying expressions for: @f0
6 ; CHECK-NEXT: %v0 = phi i16 [ 2, %b0 ], [ %v2, %b1 ]
7 ; CHECK-NEXT: --> {2,+,1}<%b1> U: [2,4) S: [2,4) Exits: 3 LoopDispositions: { %b1: Computable }
8 ; CHECK-NEXT: %v1 = phi i16 [ 1, %b0 ], [ %v3, %b1 ]
9 ; CHECK-NEXT: --> {1,+,2,+,1}<%b1> U: full-set S: full-set Exits: 3 LoopDispositions: { %b1: Computable }
10 ; CHECK-NEXT: %v2 = add nsw i16 %v0, 1
11 ; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,5) S: [3,5) Exits: 4 LoopDispositions: { %b1: Computable }
12 ; CHECK-NEXT: %v3 = add nsw i16 %v1, %v0
13 ; CHECK-NEXT: --> {3,+,3,+,1}<%b1> U: full-set S: full-set Exits: 6 LoopDispositions: { %b1: Computable }
14 ; CHECK-NEXT: %v4 = and i16 %v3, 1
15 ; CHECK-NEXT: --> (zext i1 {true,+,true,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable }
16 ; CHECK-NEXT: Determining loop execution counts for: @f0
17 ; CHECK-NEXT: Loop %b1: backedge-taken count is 1
18 ; CHECK-NEXT: Loop %b1: max backedge-taken count is 1
19 ; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 1
20 ; CHECK-NEXT: Predicates:
21 ; CHECK-EMPTY:
22 ; CHECK-NEXT: Loop %b1: Trip multiple is 2
23 define void @f0() {
24 b0:
25 br label %b1
26
27 b1: ; preds = %b1, %b0
28 %v0 = phi i16 [ 2, %b0 ], [ %v2, %b1 ]
29 %v1 = phi i16 [ 1, %b0 ], [ %v3, %b1 ]
30 %v2 = add nsw i16 %v0, 1
31 %v3 = add nsw i16 %v1, %v0
32 %v4 = and i16 %v3, 1
33 %v5 = icmp ne i16 %v4, 0
34 br i1 %v5, label %b1, label %b2
35
36 b2: ; preds = %b1
37 ret void
38 }
39
40 @g0 = common dso_local global i16 0, align 2
41 @g1 = common dso_local global i32 0, align 4
42 @g2 = common dso_local global i32* null, align 8
43
44 ; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'f1':
45 ; CHECK-NEXT: Classifying expressions for: @f1
46 ; CHECK-NEXT: %v0 = phi i16 [ 0, %b0 ], [ %v3, %b1 ]
47 ; CHECK-NEXT: --> {0,+,3,+,1}<%b1> U: full-set S: full-set Exits: 7 LoopDispositions: { %b1: Computable }
48 ; CHECK-NEXT: %v1 = phi i32 [ 3, %b0 ], [ %v6, %b1 ]
49 ; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,6) S: [3,6) Exits: 5 LoopDispositions: { %b1: Computable }
50 ; CHECK-NEXT: %v2 = trunc i32 %v1 to i16
51 ; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,6) S: [3,6) Exits: 5 LoopDispositions: { %b1: Computable }
52 ; CHECK-NEXT: %v3 = add i16 %v0, %v2
53 ; CHECK-NEXT: --> {3,+,4,+,1}<%b1> U: full-set S: full-set Exits: 12 LoopDispositions: { %b1: Computable }
54 ; CHECK-NEXT: %v4 = and i16 %v3, 1
55 ; CHECK-NEXT: --> (zext i1 {true,+,false,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable }
56 ; CHECK-NEXT: %v6 = add nuw nsw i32 %v1, 1
57 ; CHECK-NEXT: --> {4,+,1}<%b1> U: [4,7) S: [4,7) Exits: 6 LoopDispositions: { %b1: Computable }
58 ; CHECK-NEXT: %v7 = phi i32 [ %v1, %b1 ]
59 ; CHECK-NEXT: --> %v7 U: [3,6) S: [3,6)
60 ; CHECK-NEXT: %v8 = phi i16 [ %v3, %b1 ]
61 ; CHECK-NEXT: --> %v8 U: full-set S: full-set
62 ; CHECK-NEXT: Determining loop execution counts for: @f1
63 ; CHECK-NEXT: Loop %b3: Unpredictable backedge-taken count.
64 ; CHECK-NEXT: Loop %b3: Unpredictable max backedge-taken count.
65 ; CHECK-NEXT: Loop %b3: Unpredictable predicated backedge-taken count.
66 ; CHECK-NEXT: Loop %b1: backedge-taken count is 2
67 ; CHECK-NEXT: Loop %b1: max backedge-taken count is 2
68 ; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 2
69 ; CHECK-NEXT: Predicates:
70 ; CHECK-EMPTY:
71 ; CHECK-NEXT: Loop %b1: Trip multiple is 3
72 define void @f1() #0 {
73 b0:
74 store i16 0, i16* @g0, align 2
75 store i32* @g1, i32** @g2, align 8
76 br label %b1
77
78 b1: ; preds = %b1, %b0
79 %v0 = phi i16 [ 0, %b0 ], [ %v3, %b1 ]
80 %v1 = phi i32 [ 3, %b0 ], [ %v6, %b1 ]
81 %v2 = trunc i32 %v1 to i16
82 %v3 = add i16 %v0, %v2
83 %v4 = and i16 %v3, 1
84 %v5 = icmp eq i16 %v4, 0
85 %v6 = add nuw nsw i32 %v1, 1
86 br i1 %v5, label %b2, label %b1
87
88 b2: ; preds = %b1
89 %v7 = phi i32 [ %v1, %b1 ]
90 %v8 = phi i16 [ %v3, %b1 ]
91 store i32 %v7, i32* @g1, align 4
92 store i16 %v8, i16* @g0, align 2
93 br label %b3
94
95 b3: ; preds = %b3, %b2
96 br label %b3
97 }
98
99 attributes #0 = { nounwind uwtable "target-cpu"="x86-64" }
0 ; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s
1
2 ; The exit value from this loop was originally calculated as 0.
3 ; The actual exit condition is 256*256 == 0 (in i16).
4
5 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'f0':
6 ; CHECK-NEXT: Classifying expressions for: @f0
7 ; CHECK-NEXT: %v1 = phi i16 [ 0, %b0 ], [ %v2, %b1 ]
8 ; CHECK-NEXT: --> {0,+,-1}<%b1> U: [-255,1) S: [-255,1) Exits: -255 LoopDispositions: { %b1: Computable }
9 ; CHECK-NEXT: %v2 = add i16 %v1, -1
10 ; CHECK-NEXT: --> {-1,+,-1}<%b1> U: [-256,0) S: [-256,0) Exits: -256 LoopDispositions: { %b1: Computable }
11 ; CHECK-NEXT: %v3 = mul i16 %v2, %v2
12 ; CHECK-NEXT: --> {1,+,3,+,2}<%b1> U: full-set S: full-set Exits: 0 LoopDispositions: { %b1: Computable }
13 ; CHECK-NEXT: %v5 = phi i16 [ %v2, %b1 ]
14 ; CHECK-NEXT: --> %v5 U: [-256,0) S: [-256,0)
15 ; CHECK-NEXT: %v6 = phi i16 [ %v3, %b1 ]
16 ; CHECK-NEXT: --> %v6 U: full-set S: full-set
17 ; CHECK-NEXT: %v7 = sext i16 %v5 to i32
18 ; CHECK-NEXT: --> (sext i16 %v5 to i32) U: [-256,0) S: [-256,0)
19 ; CHECK-NEXT: Determining loop execution counts for: @f0
20 ; CHECK-NEXT: Loop %b1: backedge-taken count is 255
21 ; CHECK-NEXT: Loop %b1: max backedge-taken count is 255
22 ; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 255
23 ; CHECK-NEXT: Predicates:
24 ; CHECK-EMPTY:
25 ; CHECK-NEXT: Loop %b1: Trip multiple is 256
26
27
28 @g0 = global i32 0, align 4
29 @g1 = global i16 0, align 2
30
31 define signext i32 @f0() {
32 b0:
33 br label %b1
34
35 b1: ; preds = %b1, %b0
36 %v1 = phi i16 [ 0, %b0 ], [ %v2, %b1 ]
37 %v2 = add i16 %v1, -1
38 %v3 = mul i16 %v2, %v2
39 %v4 = icmp eq i16 %v3, 0
40 br i1 %v4, label %b2, label %b1
41
42 b2: ; preds = %b1
43 %v5 = phi i16 [ %v2, %b1 ]
44 %v6 = phi i16 [ %v3, %b1 ]
45 %v7 = sext i16 %v5 to i32
46 store i32 %v7, i32* @g0, align 4
47 store i16 %v6, i16* @g1, align 2
48 ret i32 0
49 }
50
0 ; RUN: opt -analyze -scalar-evolution -S -debug-only=scalar-evolution,apint < %s 2>&1 | FileCheck %s
1 ; REQUIRES: asserts
2
3 ; Use the following template to get a chrec {L,+,M,+,N}.
4 ;
5 ; define signext i32 @func() {
6 ; entry:
7 ; br label %loop
8 ;
9 ; loop:
10 ; %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
11 ; %inc = phi i32 [ X, %entry ], [ %inc1, %loop ]
12 ; %acc = phi i32 [ Y, %entry ], [ %acc1, %loop ]
13 ; %ivr1 = add i32 %ivr, %inc
14 ; %inc1 = add i32 %inc, Z ; M = inc1 = inc + N = X + N
15 ; %acc1 = add i32 %acc, %inc ; L = acc1 = X + Y
16 ; %and = and i32 %acc1, 2^W-1 ; iW
17 ; %cond = icmp eq i32 %and, 0
18 ; br i1 %cond, label %exit, label %loop
19 ;
20 ; exit:
21 ; %rv = phi i32 [ %acc1, %loop ]
22 ; ret i32 %rv
23 ; }
24 ;
25 ; From
26 ; X + Y = L
27 ; X + Z = M
28 ; Z = N
29 ; get
30 ; X = M - N
31 ; Y = N - M + L
32 ; Z = N
33
34 ; The connection between the chrec coefficients {L,+,M,+,N} and the quadratic
35 ; coefficients is that the quadratic equation is N x^2 + (2M-N) x + 2L = 0,
36 ; where the equation was multiplied by 2 to make the coefficient at x^2 an
37 ; integer (the actual equation is N/2 x^2 + (M-N/2) x + L = 0).
38
39 ; Quadratic equation: 2x^2 + 2x + 4 in i4, solution (wrap): 4
40 ; {14,+,14,+,14} -> X=0, Y=14, Z=14
41 ;
42 ; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test01'
43 ; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {-2,+,-2,+,-2}<%loop>
44 ; CHECK: GetQuadraticEquation: addrec coeff bw: 4
45 ; CHECK: GetQuadraticEquation: equation -2x^2 + -2x + -4, coeff bw: 5, multiplied by 2
46 ; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow
47 ; CHECK: SolveQuadraticEquationWrap: solving -2x^2 + -2x + -4, rw:5
48 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 2x^2 + 2x + -28, rw:5
49 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 4
50 ; CHECK: Loop %loop: Unpredictable backedge-taken count
51 define signext i32 @test01() {
52 entry:
53 br label %loop
54
55 loop:
56 %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
57 %inc = phi i32 [ 0, %entry ], [ %inc1, %loop ]
58 %acc = phi i32 [ 14, %entry ], [ %acc1, %loop ]
59 %ivr1 = add i32 %ivr, %inc
60 %inc1 = add i32 %inc, 14
61 %acc1 = add i32 %acc, %inc
62 %and = and i32 %acc1, 15
63 %cond = icmp eq i32 %and, 0
64 br i1 %cond, label %exit, label %loop
65
66 exit:
67 %rv = phi i32 [ %acc1, %loop ]
68 ret i32 %rv
69 }
70
71 ; Quadratic equation: 1x^2 + -73x + -146 in i32, solution (wrap): 75
72 ; {-72,+,-36,+,1} -> X=-37, Y=-35, Z=1
73 ;
74 ; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test02':
75 ; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,-36,+,1}<%loop>
76 ; CHECK: GetQuadraticEquation: addrec coeff bw: 32
77 ; CHECK: GetQuadraticEquation: equation 1x^2 + -73x + 0, coeff bw: 33, multiplied by 2
78 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
79 ; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + 4294967154, rw:32
80 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -142, rw:32
81 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 75
82 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
83 ; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + 4294967154, rw:33
84 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -4294967438, rw:33
85 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 65573
86 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
87 ; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + -146, rw:32
88 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -146, rw:32
89 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 75
90 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
91 ; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + -146, rw:33
92 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -146, rw:33
93 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 75
94 ; CHECK: Loop %loop: backedge-taken count is 75
95 define signext i32 @test02() {
96 entry:
97 br label %loop
98
99 loop:
100 %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
101 %inc = phi i32 [ -37, %entry ], [ %inc1, %loop ]
102 %acc = phi i32 [ -35, %entry ], [ %acc1, %loop ]
103 %ivr1 = add i32 %ivr, %inc
104 %inc1 = add i32 %inc, 1
105 %acc1 = add i32 %acc, %inc
106 %and = and i32 %acc1, -1
107 %cond = icmp sgt i32 %and, 0
108 br i1 %cond, label %exit, label %loop
109
110 exit:
111 %rv = phi i32 [ %acc1, %loop ]
112 ret i32 %rv
113 }
114
115 ; Quadratic equation: 2x^2 - 4x + 34 in i4, solution (exact): 1.
116 ; {17,+,-1,+,2} -> X=-3, Y=20, Z=2
117 ;
118 ; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test03':
119 ; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {1,+,-1,+,2}<%loop>
120 ; CHECK: GetQuadraticEquation: addrec coeff bw: 4
121 ; CHECK: GetQuadraticEquation: equation 2x^2 + -4x + 2, coeff bw: 5, multiplied by 2
122 ; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow
123 ; CHECK: SolveQuadraticEquationWrap: solving 2x^2 + -4x + 2, rw:5
124 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 2x^2 + -4x + 2, rw:5
125 ; CHECK: SolveQuadraticEquationWrap: solution (root): 1
126 ; CHECK: Loop %loop: backedge-taken count is 1
127 define signext i32 @test03() {
128 entry:
129 br label %loop
130
131 loop:
132 %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
133 %inc = phi i32 [ -3, %entry ], [ %inc1, %loop ]
134 %acc = phi i32 [ 20, %entry ], [ %acc1, %loop ]
135 %ivr1 = add i32 %ivr, %inc
136 %inc1 = add i32 %inc, 2
137 %acc1 = add i32 %acc, %inc
138 %and = and i32 %acc1, 15
139 %cond = icmp eq i32 %and, 0
140 br i1 %cond, label %exit, label %loop
141
142 exit:
143 %rv = phi i32 [ %acc1, %loop ]
144 ret i32 %rv
145 }
146
147 ; Quadratic equation 4x^2 + 2x + 2 in i16, solution (wrap): 181
148 ; {1,+,3,+,4} -> X=-1, Y=2, Z=4 (i16)
149 ;
150 ; This is an example where the returned solution is the first time an
151 ; unsigned wrap occurs, whereas the actual exit condition occurs much
152 ; later. The number of iterations returned by SolveQuadraticEquation
153 ; is 181, but the loop will iterate 37174 times.
154 ;
155 ; Here is a C code that corresponds to this case that calculates the number
156 ; of iterations:
157 ;
158 ; int test04() {
159 ; int c = 0;
160 ; int ivr = 0;
161 ; int inc = -1;
162 ; int acc = 2;
163 ;
164 ; while (acc & 0xffff) {
165 ; c++;
166 ; ivr += inc;
167 ; inc += 4;
168 ; acc += inc;
169 ; }
170 ;
171 ; return c;
172 ; }
173 ;
174
175 ; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test04':
176 ; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,3,+,4}<%loop>
177 ; CHECK: GetQuadraticEquation: addrec coeff bw: 16
178 ; CHECK: GetQuadraticEquation: equation 4x^2 + 2x + 0, coeff bw: 17, multiplied by 2
179 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
180 ; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:16
181 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -65534, rw:16
182 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 128
183 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
184 ; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:17
185 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -131070, rw:17
186 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 181
187 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
188 ; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:16
189 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -65534, rw:16
190 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 128
191 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
192 ; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:17
193 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -131070, rw:17
194 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 181
195 ; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {1,+,3,+,4}<%loop>
196 ; CHECK: GetQuadraticEquation: addrec coeff bw: 16
197 ; CHECK: GetQuadraticEquation: equation 4x^2 + 2x + 2, coeff bw: 17, multiplied by 2
198 ; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow
199 ; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:17
200 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -131070, rw:17
201 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 181
202 ; CHECK: Loop %loop: Unpredictable backedge-taken count.
203 define signext i32 @test04() {
204 entry:
205 br label %loop
206
207 loop:
208 %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
209 %inc = phi i32 [ -1, %entry ], [ %inc1, %loop ]
210 %acc = phi i32 [ 2, %entry ], [ %acc1, %loop ]
211 %ivr1 = add i32 %ivr, %inc
212 %inc1 = add i32 %inc, 4
213 %acc1 = add i32 %acc, %inc
214 %and = trunc i32 %acc1 to i16
215 %cond = icmp eq i16 %and, 0
216 br i1 %cond, label %exit, label %loop
217
218 exit:
219 %rv = phi i32 [ %acc1, %loop ]
220 ret i32 %rv
221 }
222
223 ; A case with signed arithmetic, but unsigned comparison.
224
225 ; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test05':
226 ; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,-1,+,-1}<%loop>
227 ; CHECK: GetQuadraticEquation: addrec coeff bw: 32
228 ; CHECK: GetQuadraticEquation: equation -1x^2 + -1x + 0, coeff bw: 33, multiplied by 2
229 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
230 ; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + 4, rw:32
231 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -4, rw:32
232 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 2
233 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
234 ; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + 4, rw:33
235 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -4, rw:33
236 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 2
237 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
238 ; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + -2, rw:32
239 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -4294967294, rw:32
240 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 65536
241 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
242 ; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + -2, rw:33
243 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -8589934590, rw:33
244 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 92682
245 ; CHECK: Loop %loop: backedge-taken count is 2
246
247 define signext i32 @test05() {
248 entry:
249 br label %loop
250
251 loop:
252 %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
253 %inc = phi i32 [ 0, %entry ], [ %inc1, %loop ]
254 %acc = phi i32 [ -1, %entry ], [ %acc1, %loop ]
255 %ivr1 = add i32 %ivr, %inc
256 %inc1 = add i32 %inc, -1
257 %acc1 = add i32 %acc, %inc
258 %and = and i32 %acc1, -1
259 %cond = icmp ule i32 %and, -3
260 br i1 %cond, label %exit, label %loop
261
262 exit:
263 %rv = phi i32 [ %acc1, %loop ]
264 ret i32 %rv
265 }
266
267 ; A test that used to crash with one of the earlier versions of the code.
268
269 ; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test06':
270 ; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,-99999,+,1}<%loop>
271 ; CHECK: GetQuadraticEquation: addrec coeff bw: 32
272 ; CHECK: GetQuadraticEquation: equation 1x^2 + -199999x + 0, coeff bw: 33, multiplied by 2
273 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
274 ; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -4294967294, rw:32
275 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 2, rw:32
276 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 1
277 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
278 ; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -4294967294, rw:33
279 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 4294967298, rw:33
280 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 24469
281 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
282 ; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -12, rw:32
283 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 4294967284, rw:32
284 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 24469
285 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
286 ; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -12, rw:33
287 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 8589934580, rw:33
288 ; CHECK: SolveQuadraticEquationWrap: solution (wrap): 62450
289 ; CHECK: Loop %loop: backedge-taken count is 24469
290 define signext i32 @test06() {
291 entry:
292 br label %loop
293
294 loop:
295 %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
296 %inc = phi i32 [ -100000, %entry ], [ %inc1, %loop ]
297 %acc = phi i32 [ 100000, %entry ], [ %acc1, %loop ]
298 %ivr1 = add i32 %ivr, %inc
299 %inc1 = add i32 %inc, 1
300 %acc1 = add i32 %acc, %inc
301 %and = and i32 %acc1, -1
302 %cond = icmp sgt i32 %and, 5
303 br i1 %cond, label %exit, label %loop
304
305 exit:
306 %rv = phi i32 [ %acc1, %loop ]
307 ret i32 %rv
308 }
309
310 ; The equation
311 ; 532052752x^2 + -450429774x + 71188414 = 0
312 ; has two exact solutions (up to two decimal digits): 0.21 and 0.64.
313 ; Since there is no integer between them, there is no integer n that either
314 ; solves the equation exactly, or changes the sign of it between n and n+1.
315
316 ; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test07':
317 ; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,40811489,+,532052752}<%loop>
318 ; CHECK: GetQuadraticEquation: addrec coeff bw: 32
319 ; CHECK: GetQuadraticEquation: equation 532052752x^2 + -450429774x + 0, coeff bw: 33, multiplied by 2
320 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
321 ; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:32
322 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:32
323 ; CHECK: SolveQuadraticEquationWrap: no valid solution
324 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
325 ; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:33
326 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:33
327 ; CHECK: SolveQuadraticEquationWrap: no valid solution
328 ; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
329 ; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:32
330 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:32
331 ; CHECK: SolveQuadraticEquationWrap: no valid solution
332 ; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
333 ; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:33
334 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:33
335 ; CHECK: SolveQuadraticEquationWrap: no valid solution
336 ; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {35594207,+,40811489,+,532052752}<%loop>
337 ; CHECK: GetQuadraticEquation: addrec coeff bw: 32
338 ; CHECK: GetQuadraticEquation: equation 532052752x^2 + -450429774x + 71188414, coeff bw: 33, multiplied by 2
339 ; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow
340 ; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:33
341 ; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:33
342 ; CHECK: SolveQuadraticEquationWrap: no valid solution
343 ; CHECK: Loop %loop: Unpredictable backedge-taken count.
344 define signext i32 @test07() {
345 entry:
346 br label %loop
347
348 loop:
349 %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
350 %inc = phi i32 [ -491241263, %entry ], [ %inc1, %loop ]
351 %acc = phi i32 [ 526835470, %entry ], [ %acc1, %loop ]
352 %ivr1 = add i32 %ivr, %inc
353 %inc1 = add i32 %inc, 532052752
354 %acc1 = add i32 %acc, %inc
355 %and = and i32 %acc1, -1
356 %cond = icmp eq i32 %and, 0
357 br i1 %cond, label %exit, label %loop
358
359 exit:
360 %rv = phi i32 [ %acc1, %loop ]
361 ret i32 %rv
362 }
363
99 #include "llvm/ADT/APInt.h"
1010 #include "llvm/ADT/ArrayRef.h"
1111 #include "llvm/ADT/SmallString.h"
12 #include "llvm/ADT/Twine.h"
1213 #include "gtest/gtest.h"
1314 #include
1415
23562357 }
23572358 }
23582359
2360 TEST(APIntTest, SolveQuadraticEquationWrap) {
2361 // Verify that "Solution" is the first non-negative integer that solves
2362 // Ax^2 + Bx + C = "0 or overflow", i.e. that it is a correct solution
2363 // as calculated by SolveQuadraticEquationWrap.
2364 auto Validate = [] (int A, int B, int C, unsigned Width, int Solution) {
2365 int Mask = (1 << Width) - 1;
2366
2367 // Solution should be non-negative.
2368 EXPECT_GE(Solution, 0);
2369
2370 auto OverflowBits = [] (int64_t V, unsigned W) {
2371 return V & -(1 << W);
2372 };
2373
2374 int64_t Over0 = OverflowBits(C, Width);
2375
2376 auto IsZeroOrOverflow = [&] (int X) {
2377 int64_t ValueAtX = A*X*X + B*X + C;
2378 int64_t OverX = OverflowBits(ValueAtX, Width);
2379 return (ValueAtX & Mask) == 0 || OverX != Over0;
2380 };
2381
2382 auto EquationToString = [&] (const char *X_str) {
2383 return Twine(A) + Twine(X_str) + Twine("^2 + ") + Twine(B) +
2384 Twine(X_str) + Twine(" + ") + Twine(C) + Twine(", bitwidth: ") +
2385 Twine(Width);
2386 };
2387
2388 auto IsSolution = [&] (const char *X_str, int X) {
2389 if (IsZeroOrOverflow(X))
2390 return ::testing::AssertionSuccess()
2391 << X << " is a solution of " << EquationToString(X_str);
2392 return ::testing::AssertionFailure()
2393 << X << " is not an expected solution of "
2394 << EquationToString(X_str);
2395 };
2396
2397 auto IsNotSolution = [&] (const char *X_str, int X) {
2398 if (!IsZeroOrOverflow(X))
2399 return ::testing::AssertionSuccess()
2400 << X << " is not a solution of " << EquationToString(X_str);
2401 return ::testing::AssertionFailure()
2402 << X << " is an unexpected solution of "
2403 << EquationToString(X_str);
2404 };
2405
2406 // This is the important part: make sure that there is no solution that
2407 // is less than the calculated one.
2408 if (Solution > 0) {
2409 for (int X = 1; X < Solution-1; ++X)
2410 EXPECT_PRED_FORMAT1(IsNotSolution, X);
2411 }
2412
2413 // Verify that the calculated solution is indeed a solution.
2414 EXPECT_PRED_FORMAT1(IsSolution, Solution);
2415 };
2416
2417 // Generate all possible quadratic equations with Width-bit wide integer
2418 // coefficients, get the solution from SolveQuadraticEquationWrap, and
2419 // verify that the solution is correct.
2420 auto Iterate = [&] (unsigned Width) {
2421 assert(1 < Width && Width < 32);
2422 int Low = -(1 << (Width-1));
2423 int High = (1 << (Width-1));
2424
2425 for (int A = Low; A != High; ++A) {
2426 if (A == 0)
2427 continue;
2428 for (int B = Low; B != High; ++B) {
2429 for (int C = Low; C != High; ++C) {
2430 Optional S = APIntOps::SolveQuadraticEquationWrap(
2431 APInt(Width, A), APInt(Width, B),
2432 APInt(Width, C), Width);
2433 if (S.hasValue())
2434 Validate(A, B, C, Width, S->getSExtValue());
2435 }
2436 }
2437 }
2438 };
2439
2440 // Test all widths in [2..6].
2441 for (unsigned i = 2; i <= 6; ++i)
2442 Iterate(i);
2443 }
2444
23592445 } // end anonymous namespace