llvm.org GIT mirror llvm / 5026b2c
split delinearization pass in 3 steps To compute the dimensions of the array in a unique way, we split the delinearization analysis in three steps: - find parametric terms in all memory access functions - compute the array dimensions from the set of terms - compute the delinearized access functions for each dimension The first step is executed on all the memory access functions such that we gather all the patterns in which an array is accessed. The second step reduces all this information in a unique description of the sizes of the array. The third step is delinearizing each memory access function following the common description of the shape of the array computed in step 2. This rewrite of the delinearization pass also solves a problem we had with the previous implementation: because the previous algorithm was by induction on the structure of the SCEV, it would not correctly recognize the shape of the array when the memory access was not following the nesting of the loops: for example, see polly/test/ScopInfo/multidim_only_ivs_3d_reverse.ll ; void foo(long n, long m, long o, double A[n][m][o]) { ; ; for (long i = 0; i < n; i++) ; for (long j = 0; j < m; j++) ; for (long k = 0; k < o; k++) ; A[i][k][j] = 1.0; Starting with this patch we no longer delinearize access functions that do not contain parameters, for example in test/Analysis/DependenceAnalysis/GCD.ll ;; for (long int i = 0; i < 100; i++) ;; for (long int j = 0; j < 100; j++) { ;; A[2*i - 4*j] = i; ;; *B++ = A[6*i + 8*j]; these accesses will not be delinearized as the upper bound of the loops are constants, and their access functions do not contain SCEVUnknown parameters. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208232 91177308-0d34-0410-b5e6-96231b3b80d8 Sebastian Pop 6 years ago
16 changed file(s) with 651 addition(s) and 570 deletion(s). Raw diff Collapse all Expand all
356356 return S->getSCEVType() == scAddRecExpr;
357357 }
358358
359 /// Splits the SCEV into two vectors of SCEVs representing the subscripts
360 /// and sizes of an array access. Returns the remainder of the
359 /// Collect parametric terms occurring in step expressions.
360 void collectParametricTerms(ScalarEvolution &SE,
361 SmallVectorImpl &Terms) const;
362
363 /// Compute the array dimensions Sizes from the set of Terms extracted from
364 /// the memory access function of this SCEVAddRecExpr.
365 void findArrayDimensions(ScalarEvolution &SE,
366 SmallVectorImpl &Terms,
367 SmallVectorImpl &Sizes) const;
368
369 /// Return in Subscripts the access functions for each dimension in Sizes.
370 const SCEV *
371 computeAccessFunctions(ScalarEvolution &SE,
372 SmallVectorImpl &Subscripts,
373 SmallVectorImpl &Sizes) const;
374
375 /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
376 /// subscripts and sizes of an array access. Returns the remainder of the
361377 /// delinearization that is the offset start of the array.
378 ///
379 /// The delinearization is a 3 step process: the first two steps compute the
380 /// sizes of each subscript and the third step computes the access functions
381 /// for the delinearized array:
382 ///
383 /// 1. Find the terms in the step functions
384 /// 2. Compute the array size
385 /// 3. Compute the access function: divide the SCEV by the array size
386 /// starting with the innermost dimensions found in step 2. The Quotient
387 /// is the SCEV to be divided in the next step of the recursion. The
388 /// Remainder is the subscript of the innermost dimension. Loop over all
389 /// array dimensions computed in step 2.
390 ///
391 /// To compute a uniform array size for several memory accesses to the same
392 /// object, one can collect in step 1 all the step terms for all the memory
393 /// accesses, and compute in step 2 a unique array shape. This guarantees
394 /// that the array shape will be the same across all memory accesses.
395 ///
396 /// FIXME: We could derive the result of steps 1 and 2 from a description of
397 /// the array shape given in metadata.
398 ///
399 /// Example:
400 ///
401 /// A[][n][m]
402 ///
403 /// for i
404 /// for j
405 /// for k
406 /// A[j+k][2i][5i] =
407 ///
408 /// The initial SCEV:
409 ///
410 /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
411 ///
412 /// 1. Find the different terms in the step functions:
413 /// -> [2*m, 5, n*m, n*m]
414 ///
415 /// 2. Compute the array size: sort and unique them
416 /// -> [n*m, 2*m, 5]
417 /// find the GCD of all the terms = 1
418 /// divide by the GCD and erase constant terms
419 /// -> [n*m, 2*m]
420 /// GCD = m
421 /// divide by GCD -> [n, 2]
422 /// remove constant terms
423 /// -> [n]
424 /// size of the array is A[unknown][n][m]
425 ///
426 /// 3. Compute the access function
427 /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
428 /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
429 /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
430 /// The remainder is the subscript of the innermost array dimension: [5i].
431 ///
432 /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
433 /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
434 /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
435 /// The Remainder is the subscript of the next array dimension: [2i].
436 ///
437 /// The subscript of the outermost dimension is the Quotient: [j+k].
438 ///
439 /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
362440 const SCEV *delinearize(ScalarEvolution &SE,
363441 SmallVectorImpl &Subscripts,
364442 SmallVectorImpl &Sizes) const;
108108
109109 SmallVector Subscripts, Sizes;
110110 const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes);
111 int Size = Subscripts.size();
112 if (Res == AR || Size == 0) {
111 if (Res == AR || Subscripts.size() == 0 || Sizes.size() == 0 ||
112 Subscripts.size() != Sizes.size()) {
113113 O << "failed to delinearize\n";
114114 continue;
115115 }
116116 O << "Base offset: " << *Res << "\n";
117117 O << "ArrayDecl[UnknownSize]";
118 int Size = Subscripts.size();
118119 for (int i = 0; i < Size - 1; i++)
119120 O << "[" << *Sizes[i] << "]";
120121 O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
31873187 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
31883188 return false;
31893189
3190 SmallVector SrcSubscripts, DstSubscripts, SrcSizes, DstSizes;
3191 const SCEV *RemainderS = SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
3192 const SCEV *RemainderD = DstAR->delinearize(*SE, DstSubscripts, DstSizes);
3193
3194 int size = SrcSubscripts.size();
3190 // First step: collect parametric terms in both array references.
3191 SmallVector Terms;
3192 SrcAR->collectParametricTerms(*SE, Terms);
3193 DstAR->collectParametricTerms(*SE, Terms);
3194
3195 // Second step: find subscript sizes.
3196 SmallVector Sizes;
3197 SrcAR->findArrayDimensions(*SE, Terms, Sizes);
3198
3199 // Third step: compute the access functions for each subscript.
3200 SmallVector SrcSubscripts, DstSubscripts;
3201 const SCEV *RemainderS = SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes);
3202 const SCEV *RemainderD = DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes);
3203
31953204 // Fail when there is only a subscript: that's a linearized access function.
3196 if (size < 2)
3205 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
3206 SrcSubscripts.size() != DstSubscripts.size())
31973207 return false;
3198
3199 int dstSize = DstSubscripts.size();
3200 // Fail when the number of subscripts in Src and Dst differ.
3201 if (size != dstSize)
3202 return false;
3203
3204 // Fail when the size of any of the subscripts in Src and Dst differs: the
3205 // dependence analysis assumes that elements in the same array have same size.
3206 // SCEV delinearization does not have a context based on which it would decide
3207 // globally the size of subscripts that would best fit all the array accesses.
3208 for (int i = 0; i < size; ++i)
3209 if (SrcSizes[i] != DstSizes[i])
3210 return false;
32113208
32123209 // When the difference in remainders is different than a constant it might be
32133210 // that the base address of the arrays is not the same.
32153212 if (!isa(DiffRemainders))
32163213 return false;
32173214
3218 // Normalize the last dimension: integrate the size of the "scalar dimension"
3219 // and the remainder of the delinearization.
3220 DstSubscripts[size-1] = SE->getMulExpr(DstSubscripts[size-1],
3221 DstSizes[size-1]);
3222 SrcSubscripts[size-1] = SE->getMulExpr(SrcSubscripts[size-1],
3223 SrcSizes[size-1]);
3224 SrcSubscripts[size-1] = SE->getAddExpr(SrcSubscripts[size-1], RemainderS);
3225 DstSubscripts[size-1] = SE->getAddExpr(DstSubscripts[size-1], RemainderD);
3226
3227 #ifndef NDEBUG
3228 DEBUG(errs() << "\nSrcSubscripts: ");
3229 for (int i = 0; i < size; i++)
3230 DEBUG(errs() << *SrcSubscripts[i]);
3231 DEBUG(errs() << "\nDstSubscripts: ");
3232 for (int i = 0; i < size; i++)
3233 DEBUG(errs() << *DstSubscripts[i]);
3234 #endif
3215 int size = SrcSubscripts.size();
3216
3217 DEBUG({
3218 dbgs() << "\nSrcSubscripts: ";
3219 for (int i = 0; i < size; i++)
3220 dbgs() << *SrcSubscripts[i];
3221 dbgs() << "\nDstSubscripts: ";
3222 for (int i = 0; i < size; i++)
3223 dbgs() << *DstSubscripts[i];
3224 });
32353225
32363226 // The delinearization transforms a single-subscript MIV dependence test into
32373227 // a multi-subscript SIV dependence test that is easier to compute. So we
68146814 return SE.getCouldNotCompute();
68156815 }
68166816
6817 namespace {
6818 // Collect all steps of SCEV expressions.
6819 struct SCEVCollectStrides {
6820 ScalarEvolution &SE;
6821 SmallVectorImpl &Strides;
6822
6823 SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl &S)
6824 : SE(SE), Strides(S) {}
6825
6826 bool follow(const SCEV *S) {
6827 if (const SCEVAddRecExpr *AR = dyn_cast(S))
6828 Strides.push_back(AR->getStepRecurrence(SE));
6829 return true;
6830 }
6831 bool isDone() const { return false; }
6832 };
6833
6834 // Collect all SCEVUnknown and SCEVMulExpr expressions.
6835 struct SCEVCollectTerms {
6836 SmallVectorImpl &Terms;
6837
6838 SCEVCollectTerms(SmallVectorImpl &T)
6839 : Terms(T) {}
6840
6841 bool follow(const SCEV *S) {
6842 if (isa(S) || isa(S) || isa(S)) {
6843 Terms.push_back(S);
6844
6845 // Stop recursion: once we collected a term, do not walk its operands.
6846 return false;
6847 }
6848
6849 // Keep looking.
6850 return true;
6851 }
6852 bool isDone() const { return false; }
6853 };
6854 }
6855
6856 /// Find parametric terms in this SCEVAddRecExpr.
6857 void SCEVAddRecExpr::collectParametricTerms(
6858 ScalarEvolution &SE, SmallVectorImpl &Terms) const {
6859 SmallVector Strides;
6860 SCEVCollectStrides StrideCollector(SE, Strides);
6861 visitAll(this, StrideCollector);
6862
6863 DEBUG({
6864 dbgs() << "Strides:\n";
6865 for (const SCEV *S : Strides)
6866 dbgs() << *S << "\n";
6867 });
6868
6869 for (const SCEV *S : Strides) {
6870 SCEVCollectTerms TermCollector(Terms);
6871 visitAll(S, TermCollector);
6872 }
6873
6874 DEBUG({
6875 dbgs() << "Terms:\n";
6876 for (const SCEV *T : Terms)
6877 dbgs() << *T << "\n";
6878 });
6879 }
6880
68176881 static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
68186882 APInt A = C1->getValue()->getValue();
68196883 APInt B = C2->getValue()->getValue();
68436907 }
68446908
68456909 namespace {
6846 struct SCEVGCD : public SCEVVisitor {
6910 struct FindSCEVSize {
6911 int Size;
6912 FindSCEVSize() : Size(0) {}
6913
6914 bool follow(const SCEV *S) {
6915 ++Size;
6916 // Keep looking at all operands of S.
6917 return true;
6918 }
6919 bool isDone() const {
6920 return false;
6921 }
6922 };
6923 }
6924
6925 // Returns the size of the SCEV S.
6926 static inline int sizeOfSCEV(const SCEV *S) {
6927 FindSCEVSize F;
6928 SCEVTraversal ST(F);
6929 ST.visitAll(S);
6930 return F.Size;
6931 }
6932
6933 namespace {
6934
6935 struct SCEVDivision : public SCEVVisitor {
68476936 public:
6848 // Pattern match Step into Start. When Step is a multiply expression, find
6849 // the largest subexpression of Step that appears in Start. When Start is an
6850 // add expression, try to match Step in the subexpressions of Start, non
6851 // matching subexpressions are returned under Remainder.
6852 static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
6853 const SCEV *Step, const SCEV **Remainder) {
6854 assert(Remainder && "Remainder should not be NULL");
6855 SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
6856 const SCEV *Res = R.visit(Start);
6857 *Remainder = R.Remainder;
6858 return Res;
6859 }
6860
6861 SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
6862 : SE(S), GCD(G), Remainder(R) {
6863 Zero = SE.getConstant(GCD->getType(), 0);
6864 One = SE.getConstant(GCD->getType(), 1);
6865 }
6866
6867 const SCEV *visitConstant(const SCEVConstant *Constant) {
6868 if (GCD == Constant || Constant == Zero)
6869 return GCD;
6870
6871 if (const SCEVConstant *CGCD = dyn_cast(GCD)) {
6872 const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
6873 if (Res != One)
6874 return Res;
6875
6876 Remainder = SE.getConstant(srem(Constant, CGCD));
6877 Constant = cast(SE.getMinusSCEV(Constant, Remainder));
6878 Res = SE.getConstant(gcd(Constant, CGCD));
6879 return Res;
6880 }
6881
6882 // When GCD is not a constant, it could be that the GCD is an Add, Mul,
6883 // AddRec, etc., in which case we want to find out how many times the
6884 // Constant divides the GCD: we then return that as the new GCD.
6885 const SCEV *Rem = Zero;
6886 const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
6887
6888 if (Res == One || Rem != Zero) {
6889 Remainder = Constant;
6890 return One;
6891 }
6892
6893 assert(isa(Res) && "Res should be a constant");
6894 Remainder = SE.getConstant(srem(Constant, cast(Res)));
6895 return Res;
6896 }
6897
6898 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
6899 if (GCD != Expr)
6900 Remainder = Expr;
6901 return GCD;
6902 }
6903
6904 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
6905 if (GCD != Expr)
6906 Remainder = Expr;
6907 return GCD;
6908 }
6909
6910 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
6911 if (GCD != Expr)
6912 Remainder = Expr;
6913 return GCD;
6914 }
6915
6916 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
6917 if (GCD == Expr)
6918 return GCD;
6919
6920 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
6921 const SCEV *Rem = Zero;
6922 const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
6923
6924 // FIXME: There may be ambiguous situations: for instance,
6925 // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
6926 // The order in which the AddExpr is traversed computes a different GCD
6927 // and Remainder.
6928 if (Res != One)
6929 GCD = Res;
6930 if (Rem != Zero)
6931 Remainder = SE.getAddExpr(Remainder, Rem);
6932 }
6933
6934 return GCD;
6935 }
6936
6937 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
6938 if (GCD == Expr)
6939 return GCD;
6940
6941 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
6942 if (Expr->getOperand(i) == GCD)
6943 return GCD;
6944 }
6945
6946 // If we have not returned yet, it means that GCD is not part of Expr.
6947 const SCEV *PartialGCD = One;
6948 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
6949 const SCEV *Rem = Zero;
6950 const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
6951 if (Rem != Zero)
6952 // GCD does not divide Expr->getOperand(i).
6937 // Computes the Quotient and Remainder of the division of Numerator by
6938 // Denominator.
6939 static void divide(ScalarEvolution &SE, const SCEV *Numerator,
6940 const SCEV *Denominator, const SCEV **Quotient,
6941 const SCEV **Remainder) {
6942 assert(Numerator && Denominator && *Quotient && *Remainder &&
6943 "Uninitialized SCEV");
6944
6945 SCEVDivision D(SE, Numerator, Denominator);
6946
6947 // Check for the trivial case here to avoid having to check for it in the
6948 // rest of the code.
6949 if (Numerator == Denominator) {
6950 *Quotient = D.One;
6951 *Remainder = D.Zero;
6952 return;
6953 }
6954
6955 if (Numerator == D.Zero) {
6956 *Quotient = D.Zero;
6957 *Remainder = D.Zero;
6958 return;
6959 }
6960
6961 // Split the Denominator when it is a product.
6962 if (const SCEVMulExpr *T = dyn_cast(Denominator)) {
6963 const SCEV *Q, *R;
6964 *Quotient = Numerator;
6965 for (const SCEV *Op : T->operands()) {
6966 divide(SE, *Quotient, Op, &Q, &R);
6967 *Quotient = Q;
6968
6969 // Bail out when the Numerator is not divisible by one of the terms of
6970 // the Denominator.
6971 if (R != D.Zero) {
6972 *Quotient = D.Zero;
6973 *Remainder = Numerator;
6974 return;
6975 }
6976 }
6977 *Remainder = D.Zero;
6978 return;
6979 }
6980
6981 D.visit(Numerator);
6982 *Quotient = D.Quotient;
6983 *Remainder = D.Remainder;
6984 }
6985
6986 SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator)
6987 : SE(S), Denominator(Denominator) {
6988 Zero = SE.getConstant(Denominator->getType(), 0);
6989 One = SE.getConstant(Denominator->getType(), 1);
6990
6991 // By default, we don't know how to divide Expr by Denominator.
6992 // Providing the default here simplifies the rest of the code.
6993 Quotient = Zero;
6994 Remainder = Numerator;
6995 }
6996
6997 // Except in the trivial case described above, we do not know how to divide
6998 // Expr by Denominator for the following functions with empty implementation.
6999 void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
7000 void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
7001 void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
7002 void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
7003 void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
7004 void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
7005 void visitUnknown(const SCEVUnknown *Numerator) {}
7006 void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
7007
7008 void visitConstant(const SCEVConstant *Numerator) {
7009 if (const SCEVConstant *D = dyn_cast(Denominator)) {
7010 Quotient = SE.getConstant(sdiv(Numerator, D));
7011 Remainder = SE.getConstant(srem(Numerator, D));
7012 return;
7013 }
7014 }
7015
7016 void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
7017 const SCEV *StartQ, *StartR, *StepQ, *StepR;
7018 assert(Numerator->isAffine() && "Numerator should be affine");
7019 divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
7020 divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
7021 Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
7022 Numerator->getNoWrapFlags());
7023 Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
7024 Numerator->getNoWrapFlags());
7025 }
7026
7027 void visitAddExpr(const SCEVAddExpr *Numerator) {
7028 SmallVector Qs, Rs;
7029 for (const SCEV *Op : Numerator->operands()) {
7030 const SCEV *Q, *R;
7031 divide(SE, Op, Denominator, &Q, &R);
7032 Qs.push_back(Q);
7033 Rs.push_back(R);
7034 }
7035
7036 if (Qs.size() == 1) {
7037 Quotient = Qs[0];
7038 Remainder = Rs[0];
7039 return;
7040 }
7041
7042 Quotient = SE.getAddExpr(Qs);
7043 Remainder = SE.getAddExpr(Rs);
7044 }
7045
7046 void visitMulExpr(const SCEVMulExpr *Numerator) {
7047 SmallVector Qs;
7048
7049 bool FoundDenominatorTerm = false;
7050 for (const SCEV *Op : Numerator->operands()) {
7051 if (FoundDenominatorTerm) {
7052 Qs.push_back(Op);
69537053 continue;
6954
6955 if (Res == GCD)
6956 return GCD;
6957 PartialGCD = SE.getMulExpr(PartialGCD, Res);
6958 if (PartialGCD == GCD)
6959 return GCD;
6960 }
6961
6962 if (PartialGCD != One)
6963 return PartialGCD;
6964
6965 // Failed to find a PartialGCD: set the Remainder to the full expression,
6966 // and return the GCD.
6967 Remainder = Expr;
6968 const SCEVMulExpr *Mul = dyn_cast(GCD);
6969 if (!Mul)
6970 return GCD;
6971
6972 // When the GCD is a multiply expression, try to decompose it:
6973 // this occurs when Step does not divide the Start expression
6974 // as in: {(-4 + (3 * %m)),+,(2 * %m)}
6975 for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
6976 const SCEV *Rem = Zero;
6977 const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
6978 if (Rem == Zero) {
6979 Remainder = Rem;
6980 return Res;
69817054 }
6982 }
6983
6984 return GCD;
6985 }
6986
6987 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
6988 if (GCD != Expr)
6989 Remainder = Expr;
6990 return GCD;
6991 }
6992
6993 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
6994 if (GCD == Expr)
6995 return GCD;
6996
6997 if (!Expr->isAffine()) {
6998 Remainder = Expr;
6999 return GCD;
7000 }
7001
7002 const SCEV *Rem = Zero;
7003 const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
7004 if (Res == One || Res->isAllOnesValue()) {
7005 Remainder = Expr;
7006 return GCD;
7007 }
7008
7009 if (Rem != Zero)
7010 Remainder = SE.getAddExpr(Remainder, Rem);
7011
7012 Rem = Zero;
7013 Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
7014 if (Rem != Zero || Res == One || Res->isAllOnesValue()) {
7015 Remainder = Expr;
7016 return GCD;
7017 }
7018
7019 return Res;
7020 }
7021
7022 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
7023 if (GCD != Expr)
7024 Remainder = Expr;
7025 return GCD;
7026 }
7027
7028 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
7029 if (GCD != Expr)
7030 Remainder = Expr;
7031 return GCD;
7032 }
7033
7034 const SCEV *visitUnknown(const SCEVUnknown *Expr) {
7035 if (GCD != Expr)
7036 Remainder = Expr;
7037 return GCD;
7038 }
7039
7040 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
7041 return One;
7055
7056 // Check whether Denominator divides one of the product operands.
7057 const SCEV *Q, *R;
7058 divide(SE, Op, Denominator, &Q, &R);
7059 if (R != Zero) {
7060 Qs.push_back(Op);
7061 continue;
7062 }
7063 FoundDenominatorTerm = true;
7064 Qs.push_back(Q);
7065 }
7066
7067 if (FoundDenominatorTerm) {
7068 Remainder = Zero;
7069 if (Qs.size() == 1)
7070 Quotient = Qs[0];
7071 else
7072 Quotient = SE.getMulExpr(Qs);
7073 return;
7074 }
7075
7076 if (!isa(Denominator)) {
7077 Quotient = Zero;
7078 Remainder = Numerator;
7079 return;
7080 }
7081
7082 // The Remainder is obtained by replacing Denominator by 0 in Numerator.
7083 ValueToValueMap RewriteMap;
7084 RewriteMap[cast(Denominator)->getValue()] =
7085 cast(Zero)->getValue();
7086 Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
7087
7088 // Quotient is (Numerator - Remainder) divided by Denominator.
7089 const SCEV *Q, *R;
7090 const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
7091 if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
7092 // This SCEV does not seem to simplify: fail the division here.
7093 Quotient = Zero;
7094 Remainder = Numerator;
7095 return;
7096 }
7097 divide(SE, Diff, Denominator, &Q, &R);
7098 assert(R == Zero &&
7099 "(Numerator - Remainder) should evenly divide Denominator");
7100 Quotient = Q;
70427101 }
70437102
70447103 private:
70457104 ScalarEvolution &SE;
7046 const SCEV *GCD, *Remainder, *Zero, *One;
7105 const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
70477106 };
7048
7049 struct SCEVDivision : public SCEVVisitor {
7050 public:
7051 // Remove from Start all multiples of Step.
7052 static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
7053 const SCEV *Step) {
7054 SCEVDivision D(SE, Step);
7055 const SCEV *Rem = D.Zero;
7056 (void)Rem;
7057 // The division is guaranteed to succeed: Step should divide Start with no
7058 // remainder.
7059 assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
7060 "Step should divide Start with no remainder.");
7061 return D.visit(Start);
7062 }
7063
7064 SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
7065 Zero = SE.getConstant(GCD->getType(), 0);
7066 One = SE.getConstant(GCD->getType(), 1);
7067 }
7068
7069 const SCEV *visitConstant(const SCEVConstant *Constant) {
7070 if (GCD == Constant)
7071 return One;
7072
7073 if (const SCEVConstant *CGCD = dyn_cast(GCD))
7074 return SE.getConstant(sdiv(Constant, CGCD));
7075 return Constant;
7076 }
7077
7078 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
7079 if (GCD == Expr)
7080 return One;
7081 return Expr;
7082 }
7083
7084 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
7085 if (GCD == Expr)
7086 return One;
7087 return Expr;
7088 }
7089
7090 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
7091 if (GCD == Expr)
7092 return One;
7093 return Expr;
7094 }
7095
7096 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
7097 if (GCD == Expr)
7098 return One;
7099
7100 SmallVector Operands;
7101 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
7102 Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
7103
7104 if (Operands.size() == 1)
7105 return Operands[0];
7106 return SE.getAddExpr(Operands);
7107 }
7108
7109 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
7110 if (GCD == Expr)
7111 return One;
7112
7113 bool FoundGCDTerm = false;
7114 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
7115 if (Expr->getOperand(i) == GCD)
7116 FoundGCDTerm = true;
7117
7118 SmallVector Operands;
7119 if (FoundGCDTerm) {
7120 FoundGCDTerm = false;
7121 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
7122 if (FoundGCDTerm)
7123 Operands.push_back(Expr->getOperand(i));
7124 else if (Expr->getOperand(i) == GCD)
7125 FoundGCDTerm = true;
7126 else
7127 Operands.push_back(Expr->getOperand(i));
7128 }
7129 } else {
7130 const SCEV *PartialGCD = One;
7131 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
7132 if (PartialGCD == GCD) {
7133 Operands.push_back(Expr->getOperand(i));
7134 continue;
7135 }
7136
7137 const SCEV *Rem = Zero;
7138 const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
7139 if (Rem == Zero) {
7140 PartialGCD = SE.getMulExpr(PartialGCD, Res);
7141 Operands.push_back(divide(SE, Expr->getOperand(i), Res));
7142 } else {
7143 Operands.push_back(Expr->getOperand(i));
7144 }
7145 }
7146 }
7147
7148 if (Operands.size() == 1)
7149 return Operands[0];
7150 return SE.getMulExpr(Operands);
7151 }
7152
7153 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
7154 if (GCD == Expr)
7155 return One;
7156 return Expr;
7157 }
7158
7159 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
7160 if (GCD == Expr)
7161 return One;
7162
7163 assert(Expr->isAffine() && "Expr should be affine");
7164
7165 const SCEV *Start = divide(SE, Expr->getStart(), GCD);
7166 const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
7167
7168 return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
7169 Expr->getNoWrapFlags());
7170 }
7171
7172 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
7173 if (GCD == Expr)
7174 return One;
7175 return Expr;
7176 }
7177
7178 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
7179 if (GCD == Expr)
7180 return One;
7181 return Expr;
7182 }
7183
7184 const SCEV *visitUnknown(const SCEVUnknown *Expr) {
7185 if (GCD == Expr)
7186 return One;
7187 return Expr;
7188 }
7189
7190 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
7191 return Expr;
7192 }
7193
7194 private:
7195 ScalarEvolution &SE;
7196 const SCEV *GCD, *Zero, *One;
7107 }
7108
7109 // Find the Greatest Common Divisor of A and B.
7110 static const SCEV *
7111 findGCD(ScalarEvolution &SE, const SCEV *A, const SCEV *B) {
7112
7113 if (const SCEVConstant *CA = dyn_cast(A))
7114 if (const SCEVConstant *CB = dyn_cast(B))
7115 return SE.getConstant(gcd(CA, CB));
7116
7117 const SCEV *One = SE.getConstant(A->getType(), 1);
7118 if (isa(A) && isa(B))
7119 return One;
7120 if (isa(A) && isa(B))
7121 return One;
7122
7123 const SCEV *Q, *R;
7124 if (const SCEVMulExpr *M = dyn_cast(A)) {
7125 SmallVector Qs;
7126 for (const SCEV *Op : M->operands())
7127 Qs.push_back(findGCD(SE, Op, B));
7128 return SE.getMulExpr(Qs);
7129 }
7130 if (const SCEVMulExpr *M = dyn_cast(B)) {
7131 SmallVector Qs;
7132 for (const SCEV *Op : M->operands())
7133 Qs.push_back(findGCD(SE, A, Op));
7134 return SE.getMulExpr(Qs);
7135 }
7136
7137 const SCEV *Zero = SE.getConstant(A->getType(), 0);
7138 SCEVDivision::divide(SE, A, B, &Q, &R);
7139 if (R == Zero)
7140 return B;
7141
7142 SCEVDivision::divide(SE, B, A, &Q, &R);
7143 if (R == Zero)
7144 return A;
7145
7146 return One;
7147 }
7148
7149 // Find the Greatest Common Divisor of all the SCEVs in Terms.
7150 static const SCEV *
7151 findGCD(ScalarEvolution &SE, SmallVectorImpl &Terms) {
7152 assert(Terms.size() > 0 && "Terms vector is empty");
7153
7154 const SCEV *GCD = Terms[0];
7155 for (const SCEV *T : Terms)
7156 GCD = findGCD(SE, GCD, T);
7157
7158 return GCD;
7159 }
7160
7161 static void findArrayDimensionsRec(ScalarEvolution &SE,
7162 SmallVectorImpl &Terms,
7163 SmallVectorImpl &Sizes,
7164 const SCEV *Zero, const SCEV *One) {
7165 // The GCD of all Terms is the dimension of the innermost dimension.
7166 const SCEV *GCD = findGCD(SE, Terms);
7167
7168 // End of recursion.
7169 if (Terms.size() == 1) {
7170 if (const SCEVMulExpr *M = dyn_cast(GCD)) {
7171 SmallVector Qs;
7172 for (const SCEV *Op : M->operands())
7173 if (!isa(Op))
7174 Qs.push_back(Op);
7175
7176 GCD = SE.getMulExpr(Qs);
7177 }
7178
7179 Sizes.push_back(GCD);
7180 return;
7181 }
7182
7183 for (unsigned I = 0; I < Terms.size(); ++I) {
7184 // Normalize the terms before the next call to findArrayDimensionsRec.
7185 const SCEV *Q, *R;
7186 SCEVDivision::divide(SE, Terms[I], GCD, &Q, &R);
7187 assert(R == Zero && "GCD does not evenly divide one of the terms");
7188 Terms[I] = Q;
7189 }
7190
7191 // Remove all SCEVConstants.
7192 for (unsigned I = 0; I < Terms.size();)
7193 if (isa(Terms[I]))
7194 Terms.erase(Terms.begin() + I);
7195 else
7196 ++I;
7197
7198 if (Terms.size() > 0)
7199 findArrayDimensionsRec(SE, Terms, Sizes, Zero, One);
7200 Sizes.push_back(GCD);
7201 }
7202
7203 namespace {
7204 struct FindParameter {
7205 bool FoundParameter;
7206 FindParameter() : FoundParameter(false) {}
7207
7208 bool follow(const SCEV *S) {
7209 if (isa(S)) {
7210 FoundParameter = true;
7211 // Stop recursion: we found a parameter.
7212 return false;
7213 }
7214 // Keep looking.
7215 return true;
7216 }
7217 bool isDone() const {
7218 // Stop recursion if we have found a parameter.
7219 return FoundParameter;
7220 }
71977221 };
7222 }
7223
7224 // Returns true when S contains at least a SCEVUnknown parameter.
7225 static inline bool
7226 containsParameters(const SCEV *S) {
7227 FindParameter F;
7228 SCEVTraversal ST(F);
7229 ST.visitAll(S);
7230
7231 return F.FoundParameter;
7232 }
7233
7234 // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
7235 static inline bool
7236 containsParameters(SmallVectorImpl &Terms) {
7237 for (const SCEV *T : Terms)
7238 if (containsParameters(T))
7239 return true;
7240 return false;
7241 }
7242
7243 // Return the number of product terms in S.
7244 static inline int numberOfTerms(const SCEV *S) {
7245 if (const SCEVMulExpr *Expr = dyn_cast(S))
7246 return Expr->getNumOperands();
7247 return 1;
7248 }
7249
7250 /// Second step of delinearization: compute the array dimensions Sizes from the
7251 /// set of Terms extracted from the memory access function of this SCEVAddRec.
7252 void SCEVAddRecExpr::findArrayDimensions(
7253 ScalarEvolution &SE, SmallVectorImpl &Terms,
7254 SmallVectorImpl &Sizes) const {
7255
7256 if (Terms.size() < 2)
7257 return;
7258
7259 // Early return when Terms do not contain parameters: we do not delinearize
7260 // non parametric SCEVs.
7261 if (!containsParameters(Terms))
7262 return;
7263
7264 DEBUG({
7265 dbgs() << "Terms:\n";
7266 for (const SCEV *T : Terms)
7267 dbgs() << *T << "\n";
7268 });
7269
7270 // Remove duplicates.
7271 std::sort(Terms.begin(), Terms.end());
7272 Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
7273
7274 // Put larger terms first.
7275 std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
7276 return numberOfTerms(LHS) > numberOfTerms(RHS);
7277 });
7278
7279 DEBUG({
7280 dbgs() << "Terms after sorting:\n";
7281 for (const SCEV *T : Terms)
7282 dbgs() << *T << "\n";
7283 });
7284
7285 const SCEV *Zero = SE.getConstant(this->getType(), 0);
7286 const SCEV *One = SE.getConstant(this->getType(), 1);
7287 findArrayDimensionsRec(SE, Terms, Sizes, Zero, One);
7288
7289 DEBUG({
7290 dbgs() << "Sizes:\n";
7291 for (const SCEV *S : Sizes)
7292 dbgs() << *S << "\n";
7293 });
7294 }
7295
7296 /// Third step of delinearization: compute the access functions for the
7297 /// Subscripts based on the dimensions in Sizes.
7298 const SCEV *SCEVAddRecExpr::computeAccessFunctions(
7299 ScalarEvolution &SE, SmallVectorImpl &Subscripts,
7300 SmallVectorImpl &Sizes) const {
7301 // Early exit in case this SCEV is not an affine multivariate function.
7302 const SCEV *Zero = SE.getConstant(this->getType(), 0);
7303 if (!this->isAffine())
7304 return Zero;
7305
7306 const SCEV *Res = this, *Remainder = Zero;
7307 int Last = Sizes.size() - 1;
7308 for (int i = Last; i >= 0; i--) {
7309 const SCEV *Q, *R;
7310 SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
7311
7312 DEBUG({
7313 dbgs() << "Res: " << *Res << "\n";
7314 dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
7315 dbgs() << "Res divided by Sizes[i]:\n";
7316 dbgs() << "Quotient: " << *Q << "\n";
7317 dbgs() << "Remainder: " << *R << "\n";
7318 });
7319
7320 Res = Q;
7321
7322 if (i == Last) {
7323 // Do not record the last subscript corresponding to the size of elements
7324 // in the array.
7325 Remainder = R;
7326 continue;
7327 }
7328
7329 // Record the access function for the current subscript.
7330 Subscripts.push_back(R);
7331 }
7332
7333 // Also push in last position the remainder of the last division: it will be
7334 // the access function of the innermost dimension.
7335 Subscripts.push_back(Res);
7336
7337 std::reverse(Subscripts.begin(), Subscripts.end());
7338
7339 DEBUG({
7340 dbgs() << "Subscripts:\n";
7341 for (const SCEV *S : Subscripts)
7342 dbgs() << *S << "\n";
7343 });
7344 return Remainder;
71987345 }
71997346
72007347 /// Splits the SCEV into two vectors of SCEVs representing the subscripts and
72507397 SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
72517398 SmallVectorImpl &Subscripts,
72527399 SmallVectorImpl &Sizes) const {
7253 // Early exit in case this SCEV is not an affine multivariate function.
7254 if (!this->isAffine())
7255 return this;
7256
7257 const SCEV *Start = this->getStart();
7258 const SCEV *Step = this->getStepRecurrence(SE);
7259
7260 // Build the SCEV representation of the canonical induction variable in the
7261 // loop of this SCEV.
7262 const SCEV *Zero = SE.getConstant(this->getType(), 0);
7263 const SCEV *One = SE.getConstant(this->getType(), 1);
7264 const SCEV *IV =
7265 SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
7266
7267 DEBUG(dbgs() << "(delinearize: " << *this << "\n");
7268
7269 // When the stride of this SCEV is 1, do not compute the GCD: the size of this
7270 // subscript is 1, and this same SCEV for the access function.
7271 const SCEV *Remainder = Zero;
7272 const SCEV *GCD = One;
7273
7274 // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
7275 if (Step != One && !Step->isAllOnesValue())
7276 GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
7277
7278 DEBUG(dbgs() << "GCD: " << *GCD << "\n");
7279 DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
7280
7281 const SCEV *Quotient = Start;
7282 if (GCD != One && !GCD->isAllOnesValue())
7283 // As findGCD computed Remainder, GCD divides "Start - Remainder." The
7284 // Quotient is then this SCEV without Remainder, scaled down by the GCD. The
7285 // Quotient is what will be used in the next subscript delinearization.
7286 Quotient = SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
7287
7288 DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
7289
7290 const SCEV *Rem = Quotient;
7291 if (const SCEVAddRecExpr *AR = dyn_cast(Quotient))
7292 // Recursively call delinearize on the Quotient until there are no more
7293 // multiples that can be recognized.
7294 Rem = AR->delinearize(SE, Subscripts, Sizes);
7295
7296 // Scale up the canonical induction variable IV by whatever remains from the
7297 // Step after division by the GCD: the GCD is the size of all the sub-array.
7298 if (Step != One && !Step->isAllOnesValue() && GCD != One &&
7299 !GCD->isAllOnesValue() && Step != GCD) {
7300 Step = SCEVDivision::divide(SE, Step, GCD);
7301 IV = SE.getMulExpr(IV, Step);
7302 }
7303 // The access function in the current subscript is computed as the canonical
7304 // induction variable IV (potentially scaled up by the step) and offset by
7305 // Rem, the offset of delinearization in the sub-array.
7306 const SCEV *Index = SE.getAddExpr(IV, Rem);
7307
7308 // Record the access function and the size of the current subscript.
7309 Subscripts.push_back(Index);
7310 Sizes.push_back(GCD);
7311
7312 #ifndef NDEBUG
7313 int Size = Sizes.size();
7314 DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
7315 DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
7316 for (int i = 0; i < Size - 1; i++)
7317 DEBUG(dbgs() << "[" << *Sizes[i] << "]");
7318 DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
7319
7320 DEBUG(dbgs() << "ArrayRef");
7321 for (int i = 0; i < Size; i++)
7322 DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
7323 DEBUG(dbgs() << "\n)\n");
7324 #endif
7400 // First step: collect parametric terms.
7401 SmallVector Terms;
7402 collectParametricTerms(SE, Terms);
7403
7404 // Second step: find subscript sizes.
7405 findArrayDimensions(SE, Terms, Sizes);
7406
7407 // Third step: compute the access functions for each subscript.
7408 const SCEV *Remainder = computeAccessFunctions(SE, Subscripts, Sizes);
7409
7410 DEBUG({
7411 dbgs() << "succeeded to delinearize " << *this << "\n";
7412 dbgs() << "ArrayDecl[UnknownSize]";
7413 for (const SCEV *S : Sizes)
7414 dbgs() << "[" << *S << "]";
7415
7416 dbgs() << "ArrayRef";
7417 for (const SCEV *S : Sizes)
7418 dbgs() << "[" << *S << "]";
7419 dbgs() << "\n";
7420 });
73257421
73267422 return Remainder;
73277423 }
1111 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(i32) bytes.
1212 ; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
1313
14 ; AddRec: {{(8 + ((4 + (12 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>
15 ; CHECK: Base offset: %A
16 ; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(i32) bytes.
17 ; CHECK: ArrayRef[{(1 + (3 * %m)),+,(2 * %m)}<%for.i>][{2,+,(3 * %o)}<%for.j>]
18
19 ; AddRec: {(8 + ((-8 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
20 ; CHECK: Base offset: %A
21 ; CHECK: ArrayDecl[UnknownSize] with elements of 2 bytes.
22 ; CHECK: ArrayRef[{((1 + ((-1 + (3 * %m)) * %o)) * sizeof(i32)),+,(%m * %o * sizeof(i32))}<%for.i>]
23
24 ; Function Attrs: nounwind uwtable
2514 define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
2615 entry:
2716 %cmp32 = icmp sgt i64 %n, 0
2929 ; CHECK: Base offset: %a.base
3030 ; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
3131 ; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>]
32
33 ; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
34 ; CHECK: Base offset: %a.base
35 ; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
36 ; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
37
38 ; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
39 ; CHECK: Base offset: %a.base
40 ; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
41 ; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
4232
4333 %struct.Mat = type { float*, i32, i32, i32, i32 }
4434
2929 ; CHECK: Base offset: %a.base
3030 ; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
3131 ; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>]
32
33 ; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
34 ; CHECK: Base offset: %a.base
35 ; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
36 ; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
37
38 ; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
39 ; CHECK: Base offset: %a.base
40 ; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
41 ; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
4232
4333 %struct.Mat = type { float*, i32, i32, i32, i32 }
4434
1111 ; CHECK: Base offset: %A
1212 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
1313 ; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>]
14
15 ; AddRec: {{(48 + ((-24 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
16 ; CHECK: Base offset: %A
17 ; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
18 ; CHECK: ArrayRef[{(-3 + (3 * %m)),+,%m}<%for.i>][{6,+,%o}<%for.j>]
19
20 ; AddRec: {(48 + ((-32 + (32 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
21 ; CHECK: Base offset: %A
22 ; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
23 ; CHECK: ArrayRef[{(6 + ((-4 + (4 * %m)) * %o)),+,(%m * %o)}<%for.i>]
2414
2515 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
2616 entry:
1111 ; CHECK: Base offset: %A
1212 ; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of sizeof(double) bytes.
1313 ; CHECK: ArrayRef[{3,+,1}<%for.cond4.preheader.lr.ph.us>][{-4,+,1}<%for.body6.lr.ph.us.us>][{7,+,1}<%for.body6.us.us>]
14
15 ; AddRec: {{(48 + (8 * %o) + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>
16 ; CHECK: Base offset: %A
17 ; CHECK: ArrayDecl[UnknownSize][(%o + %p)] with elements of sizeof(double) bytes.
18 ; CHECK: ArrayRef[{(-4 + (3 * %m)),+,%m}<%for.cond4.preheader.lr.ph.us>][{(6 + %o),+,(%o + %p)}<%for.body6.lr.ph.us.us>]
19
20 ; AddRec: {(48 + (8 * %o) + ((-40 + (32 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>
21 ; CHECK: Base offset: %A
22 ; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
23 ; CHECK: ArrayRef[{(6 + ((-5 + (4 * %m)) * (%o + %p)) + %o),+,((%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>]
2414
2515 define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable {
2616 entry:
1111 ; CHECK: Base offset: %A
1212 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
1313 ; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>]
14
15 ; AddRec: {{(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
16 ; CHECK: Base offset: %A
17 ; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
18 ; CHECK: ArrayRef[{(1 + (%m * %p) + %q),+,%m}<%for.i>][{(-1 + %r),+,%o}<%for.j>]
19
20 ; AddRec: {(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
21 ; CHECK: Base offset: %A
22 ; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
23 ; CHECK: ArrayRef[{(-1 + ((((1 + %p) * %m) + %q) * %o) + %r),+,(%m * %o)}<%for.i>]
2414
2515 define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
2616 entry:
1111 ; CHECK: Base offset: %A
1212 ; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes.
1313 ; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>]
14
15 ; AddRec: {(-8 + (8 * %m) + %A),+,(8 * %m)}<%for.i>
16 ; CHECK: Base offset: %A
17 ; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
18 ; CHECK: ArrayRef[{(-1 + %m),+,%m}<%for.i>]
1914
2015 define void @foo(i64 %n, i64 %m, double* %A) {
2116 entry:
0 ; RUN: opt < %s -analyze -delinearize | FileCheck %s
1 ; XFAIL: *
2 ; We do not recognize anymore variable size arrays.
13
24 ; extern void bar(long n, long m, double A[n][m]);
35 ;
1111 ; CHECK: Base offset: %A
1212 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
1313 ; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
14
15 ; AddRec: {{(-8 + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
16 ; CHECK: Base offset: %A
17 ; CHECK: ArrayDecl[UnknownSize][(%m * %o)] with elements of sizeof(double) bytes.
18 ; CHECK: ArrayRef[{0,+,1}<%for.i>][{(-1 + %o),+,%o}<%for.j>]
19
20 ; AddRec: {(-8 + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
21 ; CHECK: Base offset: %A
22 ; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
23 ; CHECK: ArrayRef[{(-1 + (%m * %o)),+,(%m * %o)}<%for.i>]
2414
2515 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
2616 entry:
1010 ; CHECK: Base offset: %A
1111 ; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes.
1212 ; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
13
14 ; AddRec: {{((8 * (zext i32 (-1 + %o) to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>
15 ; CHECK: Base offset: %A
16 ; CHECK: ArrayDecl[UnknownSize][((zext i32 %m to i64) * (zext i32 %o to i64))] with elements of 8 bytes.
17 ; CHECK: ArrayRef[{0,+,1}<%for.i>][{(zext i32 (-1 + %o) to i64),+,(zext i32 %o to i64)}<%for.j>]
18
19 ; AddRec: {((8 * (zext i32 (-1 + %o) to i64)) + (8 * (zext i32 (-1 + %m) to i64) * (zext i32 %o to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>
20 ; CHECK: Base offset: %A
21 ; CHECK: ArrayDecl[UnknownSize] with elements of 8 bytes.
22 ; CHECK: ArrayRef[{((zext i32 (-1 + %o) to i64) + ((zext i32 (-1 + %m) to i64) * (zext i32 %o to i64))),+,((zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>]
2313
2414 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
2515 target triple = "x86_64-unknown-linux-gnu"
2323
2424 ; DELIN: 'Dependence Analysis' for function 'banerjee0':
2525 ; DELIN: da analyze - none!
26 ; DELIN: da analyze - consistent flow [0 1]!
26 ; DELIN: da analyze - flow [<= <>]!
2727 ; DELIN: da analyze - confused!
2828 ; DELIN: da analyze - none!
2929 ; DELIN: da analyze - confused!
8282 ; CHECK: da analyze - output [* *]!
8383
8484 ; DELIN: 'Dependence Analysis' for function 'banerjee1':
85 ; DELIN: da analyze - none
86 ; DELIN: da analyze - consistent flow [0 1]!
87 ; DELIN: da analyze - confused!
88 ; DELIN: da analyze - none
85 ; DELIN: da analyze - output [* *]!
86 ; DELIN: da analyze - flow [* <>]!
87 ; DELIN: da analyze - confused!
88 ; DELIN: da analyze - input [* *]!
8989 ; DELIN: da analyze - confused!
9090 ; DELIN: da analyze - output [* *]!
9191
217217
218218 ; DELIN: 'Dependence Analysis' for function 'banerjee3':
219219 ; DELIN: da analyze - none!
220 ; DELIN: da analyze - consistent flow [-9 -9]!
220 ; DELIN: da analyze - flow [> >]!
221221 ; DELIN: da analyze - confused!
222222 ; DELIN: da analyze - none!
223223 ; DELIN: da analyze - confused!
335335
336336 ; DELIN: 'Dependence Analysis' for function 'banerjee5':
337337 ; DELIN: da analyze - none!
338 ; DELIN: da analyze - consistent flow [9 9]!
338 ; DELIN: da analyze - flow [< <]!
339339 ; DELIN: da analyze - confused!
340340 ; DELIN: da analyze - none!
341341 ; DELIN: da analyze - confused!
394394
395395 ; DELIN: 'Dependence Analysis' for function 'banerjee6':
396396 ; DELIN: da analyze - none!
397 ; DELIN: da analyze - consistent flow [0 -9]!
397 ; DELIN: da analyze - flow [=> <>]!
398398 ; DELIN: da analyze - confused!
399399 ; DELIN: da analyze - none!
400400 ; DELIN: da analyze - confused!
453453
454454 ; DELIN: 'Dependence Analysis' for function 'banerjee7':
455455 ; DELIN: da analyze - none!
456 ; DELIN: da analyze - consistent flow [-1 0]!
456 ; DELIN: da analyze - flow [> <=]!
457457 ; DELIN: da analyze - confused!
458458 ; DELIN: da analyze - none!
459459 ; DELIN: da analyze - confused!
512512
513513 ; DELIN: 'Dependence Analysis' for function 'banerjee8':
514514 ; DELIN: da analyze - none!
515 ; DELIN: da analyze - consistent flow [-1 -1]!
515 ; DELIN: da analyze - flow [> <>]!
516516 ; DELIN: da analyze - confused!
517517 ; DELIN: da analyze - none!
518518 ; DELIN: da analyze - confused!
570570 ; CHECK: da analyze - none!
571571
572572 ; DELIN: 'Dependence Analysis' for function 'banerjee9':
573 ; DELIN: da analyze - none!
573 ; DELIN: da analyze - output [* *]!
574574 ; DELIN: da analyze - flow [<= =|<]!
575575 ; DELIN: da analyze - confused!
576576 ; DELIN: da analyze - none!
749749
750750 ; DELIN: 'Dependence Analysis' for function 'banerjee12':
751751 ; DELIN: da analyze - none!
752 ; DELIN: da analyze - consistent flow [0 -11]!
752 ; DELIN: da analyze - flow [= <>]!
753753 ; DELIN: da analyze - confused!
754754 ; DELIN: da analyze - none!
755755 ; DELIN: da analyze - confused!
2323 ; CHECK: da analyze - none!
2424
2525 ; DELIN: 'Dependence Analysis' for function 'gcd0'
26 ; DELIN: da analyze - none!
26 ; DELIN: da analyze - output [* *]!
2727 ; DELIN: da analyze - flow [=> *|<]!
2828 ; DELIN: da analyze - confused!
29 ; DELIN: da analyze - none!
29 ; DELIN: da analyze - input [* *]!
3030 ; DELIN: da analyze - confused!
3131 ; DELIN: da analyze - none!
3232
8484 ; CHECK: da analyze - none!
8585
8686 ; DELIN: 'Dependence Analysis' for function 'gcd1'
87 ; DELIN: da analyze - none!
88 ; DELIN: da analyze - none!
89 ; DELIN: da analyze - confused!
90 ; DELIN: da analyze - none!
87 ; DELIN: da analyze - output [* *]!
88 ; DELIN: da analyze - none!
89 ; DELIN: da analyze - confused!
90 ; DELIN: da analyze - input [* *]!
9191 ; DELIN: da analyze - confused!
9292 ; DELIN: da analyze - none!
9393
146146 ; CHECK: da analyze - none!
147147
148148 ; DELIN: 'Dependence Analysis' for function 'gcd2'
149 ; DELIN: da analyze - none!
150 ; DELIN: da analyze - none!
151 ; DELIN: da analyze - confused!
152 ; DELIN: da analyze - none!
149 ; DELIN: da analyze - output [* *]!
150 ; DELIN: da analyze - none!
151 ; DELIN: da analyze - confused!
152 ; DELIN: da analyze - input [* *]!
153153 ; DELIN: da analyze - confused!
154154 ; DELIN: da analyze - none!
155155
268268 ; CHECK: da analyze - none!
269269
270270 ; DELIN: 'Dependence Analysis' for function 'gcd4'
271 ; DELIN: da analyze - output [* *]!
272 ; DELIN: da analyze - none!
273 ; DELIN: da analyze - confused!
274 ; DELIN: da analyze - input [* *]!
271 ; DELIN: da analyze - none!
272 ; DELIN: da analyze - none!
273 ; DELIN: da analyze - confused!
274 ; DELIN: da analyze - none!
275275 ; DELIN: da analyze - confused!
276276 ; DELIN: da analyze - none!
277277
338338 ; CHECK: da analyze - none!
339339
340340 ; DELIN: 'Dependence Analysis' for function 'gcd5'
341 ; DELIN: da analyze - output [* *]!
341 ; DELIN: da analyze - none!
342342 ; DELIN: da analyze - flow [<> *]!
343343 ; DELIN: da analyze - confused!
344 ; DELIN: da analyze - input [* *]!
344 ; DELIN: da analyze - none!
345345 ; DELIN: da analyze - confused!
346346 ; DELIN: da analyze - none!
347347
410410
411411 ; DELIN: 'Dependence Analysis' for function 'gcd6'
412412 ; DELIN: da analyze - none!
413 ; DELIN: da analyze - none!
413 ; DELIN: da analyze - flow [=> =>|<]!
414414 ; DELIN: da analyze - confused!
415415 ; DELIN: da analyze - none!
416416 ; DELIN: da analyze - confused!