llvm.org GIT mirror llvm / 685c628
Enable loopreroll to rerool loop with pointer induction variable. Example: while (buf !=end ) { S += buf[0]; S += buf[1]; buf +=2; }; Differential Revision: http://reviews.llvm.org/D13151 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258709 91177308-0d34-0410-b5e6-96231b3b80d8 Lawrence Hu 3 years ago
2 changed file(s) with 209 addition(s) and 55 deletion(s). Raw diff Collapse all Expand all
395395 bool instrDependsOn(Instruction *I,
396396 UsesTy::iterator Start,
397397 UsesTy::iterator End);
398 void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
398399
399400 LoopReroll *Parent;
400401
459460 return false;
460461 }
461462
463 static const SCEVConstant *getIncrmentFactorSCEV(ScalarEvolution *SE,
464 const SCEV *SCEVExpr,
465 Instruction &IV) {
466 const SCEVMulExpr *MulSCEV = dyn_cast(SCEVExpr);
467
468 // If StepRecurrence of a SCEVExpr is a constant (c1 * c2, c2 = sizeof(ptr)),
469 // Return c1.
470 if (!MulSCEV && IV.getType()->isPointerTy())
471 if (const SCEVConstant *IncSCEV = dyn_cast(SCEVExpr)) {
472 const PointerType *PTy = cast(IV.getType());
473 Type *ElTy = PTy->getElementType();
474 const SCEV *SizeOfExpr =
475 SE->getSizeOfExpr(SE->getEffectiveSCEVType(IV.getType()), ElTy);
476 if (IncSCEV->getValue()->getValue().isNegative()) {
477 const SCEV *NewSCEV =
478 SE->getUDivExpr(SE->getNegativeSCEV(SCEVExpr), SizeOfExpr);
479 return dyn_cast(SE->getNegativeSCEV(NewSCEV));
480 } else {
481 return dyn_cast(SE->getUDivExpr(SCEVExpr, SizeOfExpr));
482 }
483 }
484
485 if (!MulSCEV)
486 return nullptr;
487
488 // If StepRecurrence of a SCEVExpr is a c * sizeof(x), where c is constant,
489 // Return c.
490 const SCEVConstant *CIncSCEV = nullptr;
491 for (const SCEV *Operand : MulSCEV->operands()) {
492 if (const SCEVConstant *Constant = dyn_cast(Operand)) {
493 CIncSCEV = Constant;
494 } else if (const SCEVUnknown *Unknown = dyn_cast(Operand)) {
495 Type *AllocTy;
496 if (!Unknown->isSizeOf(AllocTy))
497 break;
498 } else {
499 return nullptr;
500 }
501 }
502 return CIncSCEV;
503 }
504
462505 // Collect the list of loop induction variables with respect to which it might
463506 // be possible to reroll the loop.
464507 void LoopReroll::collectPossibleIVs(Loop *L,
468511 IE = Header->getFirstInsertionPt(); I != IE; ++I) {
469512 if (!isa(I))
470513 continue;
471 if (!I->getType()->isIntegerTy())
514 if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
472515 continue;
473516
474517 if (const SCEVAddRecExpr *PHISCEV =
477520 continue;
478521 if (!PHISCEV->isAffine())
479522 continue;
480 if (const SCEVConstant *IncSCEV =
481 dyn_cast(PHISCEV->getStepRecurrence(*SE))) {
482 const APInt &AInt = IncSCEV->getAPInt().abs();
523 const SCEVConstant *IncSCEV = nullptr;
524 if (I->getType()->isPointerTy())
525 IncSCEV =
526 getIncrmentFactorSCEV(SE, PHISCEV->getStepRecurrence(*SE), *I);
527 else
528 IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE));
529 if (IncSCEV) {
530 const APInt &AInt = IncSCEV->getValue()->getValue().abs();
483531 if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
484532 continue;
485533 IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
650698
651699 static bool isLoopIncrement(User *U, Instruction *IV) {
652700 BinaryOperator *BO = dyn_cast(U);
653 if (!BO || BO->getOpcode() != Instruction::Add)
701
702 if ((BO && BO->getOpcode() != Instruction::Add) ||
703 (!BO && !isa(U)))
654704 return false;
655705
656 for (auto *UU : BO->users()) {
706 for (auto *UU : U->users()) {
657707 PHINode *PN = dyn_cast(UU);
658708 if (PN && PN == IV)
659709 return true;
12711321
12721322 ++J;
12731323 }
1274 bool Negative = IVToIncMap[IV] < 0;
1275 const DataLayout &DL = Header->getModule()->getDataLayout();
12761324
12771325 // We need to create a new induction variable for each different BaseInst.
1278 for (auto &DRS : RootSets) {
1326 for (auto &DRS : RootSets)
12791327 // Insert the new induction variable.
1280 const SCEVAddRecExpr *RealIVSCEV =
1281 cast(SE->getSCEV(DRS.BaseInst));
1282 const SCEV *Start = RealIVSCEV->getStart();
1283 const SCEVAddRecExpr *H = cast(SE->getAddRecExpr(
1284 Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L,
1285 SCEV::FlagAnyWrap));
1286 { // Limit the lifetime of SCEVExpander.
1287 SCEVExpander Expander(*SE, DL, "reroll");
1288 Value *NewIV = Expander.expandCodeFor(H, IV->getType(), &Header->front());
1289
1290 for (auto &KV : Uses) {
1291 if (KV.second.find_first() == 0)
1292 KV.first->replaceUsesOfWith(DRS.BaseInst, NewIV);
1293 }
1294
1295 if (BranchInst *BI = dyn_cast(Header->getTerminator())) {
1296 // FIXME: Why do we need this check?
1297 if (Uses[BI].find_first() == IL_All) {
1298 const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1299
1300 // Iteration count SCEV minus 1
1301 const SCEV *ICMinus1SCEV = SE->getMinusSCEV(
1302 ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1));
1303
1304 Value *ICMinus1; // Iteration count minus 1
1305 if (isa(ICMinus1SCEV)) {
1306 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
1307 } else {
1308 BasicBlock *Preheader = L->getLoopPreheader();
1309 if (!Preheader)
1310 Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
1311
1312 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
1313 Preheader->getTerminator());
1314 }
1315
1316 Value *Cond =
1317 new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
1318 BI->setCondition(Cond);
1319
1320 if (BI->getSuccessor(1) != Header)
1321 BI->swapSuccessors();
1322 }
1323 }
1324 }
1325 }
1328 replaceIV(DRS.BaseInst, IV, IterCount);
13261329
13271330 SimplifyInstructionsInBlock(Header, TLI);
13281331 DeleteDeadPHIs(Header, TLI);
1332 }
1333
1334 void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
1335 Instruction *InstIV,
1336 const SCEV *IterCount) {
1337 BasicBlock *Header = L->getHeader();
1338 int64_t Inc = IVToIncMap[InstIV];
1339 bool Negative = Inc < 0;
1340
1341 const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(Inst));
1342 const SCEV *Start = RealIVSCEV->getStart();
1343
1344 const SCEV *SizeOfExpr = nullptr;
1345 const SCEV *IncrExpr =
1346 SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
1347 if (auto *PTy = dyn_cast(Inst->getType())) {
1348 Type *ElTy = PTy->getElementType();
1349 SizeOfExpr =
1350 SE->getSizeOfExpr(SE->getEffectiveSCEVType(Inst->getType()), ElTy);
1351 IncrExpr = SE->getMulExpr(IncrExpr, SizeOfExpr);
1352 }
1353 const SCEV *NewIVSCEV =
1354 SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1355
1356 { // Limit the lifetime of SCEVExpander.
1357 const DataLayout &DL = Header->getModule()->getDataLayout();
1358 SCEVExpander Expander(*SE, DL, "reroll");
1359 Value *NewIV =
1360 Expander.expandCodeFor(NewIVSCEV, InstIV->getType(), &Header->front());
1361
1362 for (auto &KV : Uses)
1363 if (KV.second.find_first() == 0)
1364 KV.first->replaceUsesOfWith(Inst, NewIV);
1365
1366 if (BranchInst *BI = dyn_cast(Header->getTerminator())) {
1367 // FIXME: Why do we need this check?
1368 if (Uses[BI].find_first() == IL_All) {
1369 const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1370
1371 // Iteration count SCEV minus or plus 1
1372 const SCEV *MinusPlus1SCEV =
1373 SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
1374 if (Inst->getType()->isPointerTy()) {
1375 assert(SizeOfExpr && "SizeOfExpr is not initialized");
1376 MinusPlus1SCEV = SE->getMulExpr(MinusPlus1SCEV, SizeOfExpr);
1377 }
1378
1379 const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV);
1380 // Iteration count minus 1
1381 Value *ICMinusPlus1 = nullptr;
1382 if (isa(ICMinusPlus1SCEV)) {
1383 ICMinusPlus1 =
1384 Expander.expandCodeFor(ICMinusPlus1SCEV, NewIV->getType(), BI);
1385 } else {
1386 BasicBlock *Preheader = L->getLoopPreheader();
1387 if (!Preheader)
1388 Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
1389 ICMinusPlus1 = Expander.expandCodeFor(
1390 ICMinusPlus1SCEV, NewIV->getType(), Preheader->getTerminator());
1391 }
1392
1393 Value *Cond =
1394 new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond");
1395 BI->setCondition(Cond);
1396
1397 if (BI->getSuccessor(1) != Header)
1398 BI->swapSuccessors();
1399 }
1400 }
1401 }
13291402 }
13301403
13311404 // Validate the selected reductions. All iterations must have an isomorphic
0 ; RUN: opt -S -loop-reroll %s | FileCheck %s
1 target triple = "aarch64--linux-gnu"
2
3 define i32 @test(i32* readonly %buf, i32* readnone %end) #0 {
4 entry:
5 %cmp.9 = icmp eq i32* %buf, %end
6 br i1 %cmp.9, label %while.end, label %while.body.preheader
7
8 while.body.preheader:
9 br label %while.body
10
11 while.body:
12 ;CHECK-LABEL: while.body:
13 ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
14 ;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
15 ;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %indvar
16 ;CHECK-NEXT: %4 = load i32, i32* %scevgep, align 4
17 ;CHECK-NEXT: %add = add nsw i32 %4, %S.011
18 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
19 ;CHECK-NEXT: %exitcond = icmp eq i32* %scevgep, %scevgep5
20 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
21
22 %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
23 %buf.addr.010 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
24 %0 = load i32, i32* %buf.addr.010, align 4
25 %add = add nsw i32 %0, %S.011
26 %arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.010, i64 1
27 %1 = load i32, i32* %arrayidx1, align 4
28 %add2 = add nsw i32 %add, %1
29 %add.ptr = getelementptr inbounds i32, i32* %buf.addr.010, i64 2
30 %cmp = icmp eq i32* %add.ptr, %end
31 br i1 %cmp, label %while.end.loopexit, label %while.body
32
33 while.end.loopexit:
34 %add2.lcssa = phi i32 [ %add2, %while.body ]
35 br label %while.end
36
37 while.end:
38 %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2.lcssa, %while.end.loopexit ]
39 ret i32 %S.0.lcssa
40 }
41
42 define i32 @test2(i32* readonly %buf, i32* readnone %end) #0 {
43 entry:
44 %cmp.9 = icmp eq i32* %buf, %end
45 br i1 %cmp.9, label %while.end, label %while.body.preheader
46
47 while.body.preheader:
48 br label %while.body
49
50 while.body:
51 ;CHECK-LABEL: while.body:
52 ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
53 ;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
54 ;CHECK-NEXT: %4 = mul i64 %indvar, -1
55 ;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %4
56 ;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4
57 ;CHECK-NEXT: %add = add nsw i32 %5, %S.011
58 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
59 ;CHECK-NEXT: %exitcond = icmp eq i32* %scevgep, %scevgep5
60 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
61
62 %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
63 %buf.addr.010 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
64 %0 = load i32, i32* %buf.addr.010, align 4
65 %add = add nsw i32 %0, %S.011
66 %arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.010, i64 -1
67 %1 = load i32, i32* %arrayidx1, align 4
68 %add2 = add nsw i32 %add, %1
69 %add.ptr = getelementptr inbounds i32, i32* %buf.addr.010, i64 -2
70 %cmp = icmp eq i32* %add.ptr, %end
71 br i1 %cmp, label %while.end.loopexit, label %while.body
72
73 while.end.loopexit:
74 %add2.lcssa = phi i32 [ %add2, %while.body ]
75 br label %while.end
76
77 while.end:
78 %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2.lcssa, %while.end.loopexit ]
79 ret i32 %S.0.lcssa
80 }