llvm.org GIT mirror llvm / 87cb71d
SCEV: Allow simple AddRec * Parameter products in delinearization This patch also allows the -delinearize pass to delinearize expressions that do not have an outermost SCEVAddRec expression. The SCEV::delinearize infrastructure allowed this since r240952, but the -delinearize pass was not updated yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250018 91177308-0d34-0410-b5e6-96231b3b80d8 Tobias Grosser 5 years ago
3 changed file(s) with 143 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
101101 if (!BasePointer)
102102 break;
103103 AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
104 const SCEVAddRecExpr *AR = dyn_cast(AccessFn);
105
106 // Do not try to delinearize memory accesses that are not AddRecs.
107 if (!AR)
108 break;
109
110104
111105 O << "\n";
112106 O << "Inst:" << *Inst << "\n";
113107 O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
114 O << "AddRec: " << *AR << "\n";
108 O << "AccessFunction: " << *AccessFn << "\n";
115109
116110 SmallVector Subscripts, Sizes;
117 SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst));
111 SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst));
118112 if (Subscripts.size() == 0 || Sizes.size() == 0 ||
119113 Subscripts.size() != Sizes.size()) {
120114 O << "failed to delinearize\n";
83068306 }
83078307 bool isDone() const { return false; }
83088308 };
8309 }
8310
8311 /// Find parametric terms in this SCEVAddRecExpr.
8309
8310 // Check if a SCEV contains an AddRecExpr.
8311 struct SCEVHasAddRec {
8312 bool &ContainsAddRec;
8313
8314 SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
8315 ContainsAddRec = false;
8316 }
8317
8318 bool follow(const SCEV *S) {
8319 if (isa(S)) {
8320 ContainsAddRec = true;
8321
8322 // Stop recursion: once we collected a term, do not walk its operands.
8323 return false;
8324 }
8325
8326 // Keep looking.
8327 return true;
8328 }
8329 bool isDone() const { return false; }
8330 };
8331
8332 // Find factors that are multiplied with an expression that (possibly as a
8333 // subexpression) contains an AddRecExpr. In the expression:
8334 //
8335 // 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
8336 //
8337 // "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
8338 // that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
8339 // parameters as they form a product with an induction variable.
8340 //
8341 // This collector expects all array size parameters to be in the same MulExpr.
8342 // It might be necessary to later add support for collecting parameters that are
8343 // spread over different nested MulExpr.
8344 struct SCEVCollectAddRecMultiplies {
8345 SmallVectorImpl &Terms;
8346 ScalarEvolution &SE;
8347
8348 SCEVCollectAddRecMultiplies(SmallVectorImpl &T, ScalarEvolution &SE)
8349 : Terms(T), SE(SE) {}
8350
8351 bool follow(const SCEV *S) {
8352 if (auto *Mul = dyn_cast(S)) {
8353 bool HasAddRec = false;
8354 SmallVector Operands;
8355 for (auto Op : Mul->operands()) {
8356 if (isa(Op)) {
8357 Operands.push_back(Op);
8358 } else {
8359 bool ContainsAddRec;
8360 SCEVHasAddRec ContiansAddRec(ContainsAddRec);
8361 visitAll(Op, ContiansAddRec);
8362 HasAddRec |= ContainsAddRec;
8363 }
8364 }
8365 if (Operands.size() == 0)
8366 return true;
8367
8368 if (!HasAddRec)
8369 return false;
8370
8371 Terms.push_back(SE.getMulExpr(Operands));
8372 // Stop recursion: once we collected a term, do not walk its operands.
8373 return false;
8374 }
8375
8376 // Keep looking.
8377 return true;
8378 }
8379 bool isDone() const { return false; }
8380 };
8381 }
8382
8383 /// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
8384 /// two places:
8385 /// 1) The strides of AddRec expressions.
8386 /// 2) Unknowns that are multiplied with AddRec expressions.
83128387 void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
83138388 SmallVectorImpl &Terms) {
83148389 SmallVector Strides;
83318406 for (const SCEV *T : Terms)
83328407 dbgs() << *T << "\n";
83338408 });
8409
8410 SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
8411 visitAll(Expr, MulCollector);
83348412 }
83358413
83368414 static bool findArrayDimensionsRec(ScalarEvolution &SE,
84918569
84928570 ScalarEvolution &SE = *const_cast(this);
84938571
8494 // Divide all terms by the element size.
8572 // Try to divide all terms by the element size. If term is not divisible by
8573 // element size, proceed with the original term.
84958574 for (const SCEV *&Term : Terms) {
84968575 const SCEV *Q, *R;
84978576 SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
8498 Term = Q;
8577 if (!Q->isZero())
8578 Term = Q;
84998579 }
85008580
85018581 SmallVector NewTerms;
0 ; RUN: opt -delinearize -analyze < %s | FileCheck %s
1 ;
2 ; void foo(float *A, long *p) {
3 ; for (long i = 0; i < 100; i++)
4 ; for (long j = 0; j < 100; j++)
5 ; A[i * (*p) + j] += i + j;
6 ; }
7 ;
8 ; CHECK: ArrayDecl[UnknownSize][%pval] with elements of 4 bytes.
9 ; CHECK: ArrayRef[{0,+,1}<%bb2>][{0,+,1}<%bb4>]
10 ;
11 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
12
13 define void @foo(float* %A, i64* %p) {
14 bb:
15 br label %bb2
16
17 bb2: ; preds = %bb16, %bb
18 %i.0 = phi i64 [ 0, %bb ], [ %tmp17, %bb16 ]
19 %exitcond1 = icmp ne i64 %i.0, 100
20 br i1 %exitcond1, label %bb3, label %bb18
21
22 bb3: ; preds = %bb2
23 br label %bb4
24
25 bb4: ; preds = %bb13, %bb3
26 %j.0 = phi i64 [ 0, %bb3 ], [ %tmp14, %bb13 ]
27 %exitcond = icmp ne i64 %j.0, 100
28 br i1 %exitcond, label %bb5, label %bb15
29
30 bb5: ; preds = %bb4
31 %tmp = add nuw nsw i64 %i.0, %j.0
32 %tmp6 = sitofp i64 %tmp to float
33 %pval = load i64, i64* %p, align 8
34 %tmp8 = mul nsw i64 %i.0, %pval
35 %tmp9 = add nsw i64 %tmp8, %j.0
36 %tmp10 = getelementptr inbounds float, float* %A, i64 %tmp9
37 %tmp11 = load float, float* %tmp10, align 4
38 %tmp12 = fadd float %tmp11, %tmp6
39 store float %tmp12, float* %tmp10, align 4
40 br label %bb13
41
42 bb13: ; preds = %bb5
43 %tmp14 = add nuw nsw i64 %j.0, 1
44 br label %bb4
45
46 bb15: ; preds = %bb4
47 br label %bb16
48
49 bb16: ; preds = %bb15
50 %tmp17 = add nuw nsw i64 %i.0, 1
51 br label %bb2
52
53 bb18: ; preds = %bb2
54 ret void
55 }