llvm.org GIT mirror llvm / 7a3330e
[LV] Refactor ILV.vectorize{Loop}() by introducing LVP.executePlan(); NFC Introduce LoopVectorizationPlanner.executePlan(), replacing ILV.vectorize() and refactoring ILV.vectorizeLoop(). Method collectDeadInstructions() is moved from ILV to LVP. These changes facilitate building VPlans and using them to generate code, following https://reviews.llvm.org/D28975 and its tentative breakdown. Method ILV.createEmptyLoop() is renamed ILV.createVectorizedLoopSkeleton() to improve clarity; it's contents remain intact. Differential Revision: https://reviews.llvm.org/D32200 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302790 91177308-0d34-0410-b5e6-96231b3b80d8 Ayal Zaks 3 years ago
1 changed file(s) with 101 addition(s) and 80 deletion(s). Raw diff Collapse all Expand all
390390 TripCount(nullptr), VectorTripCount(nullptr), Legal(LVL), Cost(CM),
391391 AddedSafetyChecks(false) {}
392392
393 // Perform the actual loop widening (vectorization).
394 void vectorize() {
395 // Create a new empty loop. Unlink the old loop and connect the new one.
396 createEmptyLoop();
397 // Widen each instruction in the old loop to a new one in the new loop.
398 vectorizeLoop();
399 }
393 /// Create a new empty loop. Unlink the old loop and connect the new one.
394 void createVectorizedLoopSkeleton();
395
396 /// Vectorize a single instruction within the innermost loop.
397 void vectorizeInstruction(Instruction &I);
398
399 /// Fix the vectorized code, taking care of header phi's, live-outs, and more.
400 void fixVectorizedLoop();
400401
401402 // Return true if any runtime check is added.
402403 bool areSafetyChecksAdded() { return AddedSafetyChecks; }
424425 EdgeMaskCacheTy;
425426 typedef DenseMap BlockMaskCacheTy;
426427
427 /// Create an empty loop, based on the loop ranges of the old loop.
428 void createEmptyLoop();
429
430428 /// Set up the values of the IVs correctly when exiting the vector loop.
431429 void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
432430 Value *CountRoundDown, Value *EndValue,
435433 /// Create a new induction variable inside L.
436434 PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
437435 Value *Step, Instruction *DL);
438 /// Copy and widen the instructions from the old loop.
439 virtual void vectorizeLoop();
440436
441437 /// Handle all cross-iteration phis in the header.
442438 void fixCrossIterationPHIs();
462458 /// Predicate conditional instructions that require predication on their
463459 /// respective conditions.
464460 void predicateInstructions();
465
466 /// Collect the instructions from the original loop that would be trivially
467 /// dead in the vectorized loop if generated.
468 void collectTriviallyDeadInstructions(
469 SmallPtrSetImpl &DeadInstructions);
470461
471462 /// Shrinks vector element sizes to the smallest bitwidth they can be legally
472463 /// represented as.
479470 /// A helper function that computes the predicate of the edge between SRC
480471 /// and DST.
481472 VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
482
483 /// A helper function to vectorize a single instruction within the innermost
484 /// loop.
485 void vectorizeInstruction(Instruction &I);
486473
487474 /// Vectorize a single PHINode in a block. This method handles the induction
488475 /// variable canonicalization. It supports both VF = 1 for unrolled loops and
21872174 /// passed Legality checks.
21882175 class LoopVectorizationPlanner {
21892176 public:
2190 LoopVectorizationPlanner(LoopVectorizationCostModel &CM) : CM(CM) {}
2177 LoopVectorizationPlanner(Loop *OrigLoop, LoopInfo *LI,
2178 LoopVectorizationLegality *Legal,
2179 LoopVectorizationCostModel &CM)
2180 : OrigLoop(OrigLoop), LI(LI), Legal(Legal), CM(CM) {}
21912181
21922182 ~LoopVectorizationPlanner() {}
21932183
21952185 LoopVectorizationCostModel::VectorizationFactor plan(bool OptForSize,
21962186 unsigned UserVF);
21972187
2188 /// Generate the IR code for the vectorized loop.
2189 void executePlan(InnerLoopVectorizer &ILV);
2190
2191 protected:
2192 /// Collect the instructions from the original loop that would be trivially
2193 /// dead in the vectorized loop if generated.
2194 void collectTriviallyDeadInstructions(
2195 SmallPtrSetImpl &DeadInstructions);
2196
21982197 private:
2198 /// The loop that we evaluate.
2199 Loop *OrigLoop;
2200
2201 /// Loop Info analysis.
2202 LoopInfo *LI;
2203
2204 /// The legality analysis.
2205 LoopVectorizationLegality *Legal;
2206
21992207 /// The profitablity analysis.
22002208 LoopVectorizationCostModel &CM;
22012209 };
33633371 LVer->prepareNoAliasMetadata();
33643372 }
33653373
3366 void InnerLoopVectorizer::createEmptyLoop() {
3374 void InnerLoopVectorizer::createVectorizedLoopSkeleton() {
33673375 /*
33683376 In this function we generate a new loop. The new loop will contain
33693377 the vectorized instructions while the old loop will continue to run the
38853893 }
38863894 }
38873895
3888 void InnerLoopVectorizer::vectorizeLoop() {
3889 //===------------------------------------------------===//
3890 //
3891 // Notice: any optimization or new instruction that go
3892 // into the code below should be also be implemented in
3893 // the cost-model.
3894 //
3895 //===------------------------------------------------===//
3896
3897 // Collect instructions from the original loop that will become trivially dead
3898 // in the vectorized loop. We don't need to vectorize these instructions. For
3899 // example, original induction update instructions can become dead because we
3900 // separately emit induction "steps" when generating code for the new loop.
3901 // Similarly, we create a new latch condition when setting up the structure
3902 // of the new loop, so the old one can become dead.
3903 SmallPtrSet DeadInstructions;
3904 collectTriviallyDeadInstructions(DeadInstructions);
3905
3906 // Scan the loop in a topological order to ensure that defs are vectorized
3907 // before users.
3908 LoopBlocksDFS DFS(OrigLoop);
3909 DFS.perform(LI);
3910
3911 // Vectorize all instructions in the original loop that will not become
3912 // trivially dead when vectorized.
3913 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
3914 for (Instruction &I : *BB)
3915 if (!DeadInstructions.count(&I))
3916 vectorizeInstruction(I);
3917
3896 void InnerLoopVectorizer::fixVectorizedLoop() {
39183897 // Insert truncates and extends for any truncated instructions as hints to
39193898 // InstCombine.
39203899 if (VF > 1)
43234302 if (LCSSAPhi->getNumIncomingValues() == 1)
43244303 LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
43254304 LoopMiddleBlock);
4326 }
4327 }
4328
4329 void InnerLoopVectorizer::collectTriviallyDeadInstructions(
4330 SmallPtrSetImpl &DeadInstructions) {
4331 BasicBlock *Latch = OrigLoop->getLoopLatch();
4332
4333 // We create new control-flow for the vectorized loop, so the original
4334 // condition will be dead after vectorization if it's only used by the
4335 // branch.
4336 auto *Cmp = dyn_cast(Latch->getTerminator()->getOperand(0));
4337 if (Cmp && Cmp->hasOneUse())
4338 DeadInstructions.insert(Cmp);
4339
4340 // We create new "steps" for induction variable updates to which the original
4341 // induction variables map. An original update instruction will be dead if
4342 // all its users except the induction variable are dead.
4343 for (auto &Induction : *Legal->getInductionVars()) {
4344 PHINode *Ind = Induction.first;
4345 auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch));
4346 if (all_of(IndUpdate->users(), [&](User *U) -> bool {
4347 return U == Ind || DeadInstructions.count(cast(U));
4348 }))
4349 DeadInstructions.insert(IndUpdate);
43504305 }
43514306 }
43524307
75527507 return CM.selectVectorizationFactor(MaxVF);
75537508 }
75547509
7510 void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV) {
7511 // Perform the actual loop transformation.
7512
7513 // 1. Create a new empty loop. Unlink the old loop and connect the new one.
7514 ILV.createVectorizedLoopSkeleton();
7515
7516 //===------------------------------------------------===//
7517 //
7518 // Notice: any optimization or new instruction that go
7519 // into the code below should also be implemented in
7520 // the cost-model.
7521 //
7522 //===------------------------------------------------===//
7523
7524 // 2. Copy and widen instructions from the old loop into the new loop.
7525
7526 // Collect instructions from the original loop that will become trivially dead
7527 // in the vectorized loop. We don't need to vectorize these instructions. For
7528 // example, original induction update instructions can become dead because we
7529 // separately emit induction "steps" when generating code for the new loop.
7530 // Similarly, we create a new latch condition when setting up the structure
7531 // of the new loop, so the old one can become dead.
7532 SmallPtrSet DeadInstructions;
7533 collectTriviallyDeadInstructions(DeadInstructions);
7534
7535 // Scan the loop in a topological order to ensure that defs are vectorized
7536 // before users.
7537 LoopBlocksDFS DFS(OrigLoop);
7538 DFS.perform(LI);
7539
7540 // Vectorize all instructions in the original loop that will not become
7541 // trivially dead when vectorized.
7542 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
7543 for (Instruction &I : *BB)
7544 if (!DeadInstructions.count(&I))
7545 ILV.vectorizeInstruction(I);
7546
7547 // 3. Fix the vectorized code: take care of header phi's, live-outs,
7548 // predication, updating analyses.
7549 ILV.fixVectorizedLoop();
7550 }
7551
7552 void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
7553 SmallPtrSetImpl &DeadInstructions) {
7554 BasicBlock *Latch = OrigLoop->getLoopLatch();
7555
7556 // We create new control-flow for the vectorized loop, so the original
7557 // condition will be dead after vectorization if it's only used by the
7558 // branch.
7559 auto *Cmp = dyn_cast(Latch->getTerminator()->getOperand(0));
7560 if (Cmp && Cmp->hasOneUse())
7561 DeadInstructions.insert(Cmp);
7562
7563 // We create new "steps" for induction variable updates to which the original
7564 // induction variables map. An original update instruction will be dead if
7565 // all its users except the induction variable are dead.
7566 for (auto &Induction : *Legal->getInductionVars()) {
7567 PHINode *Ind = Induction.first;
7568 auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch));
7569 if (all_of(IndUpdate->users(), [&](User *U) -> bool {
7570 return U == Ind || DeadInstructions.count(cast(U));
7571 }))
7572 DeadInstructions.insert(IndUpdate);
7573 }
7574 }
7575
75557576 void InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr) {
75567577 auto *SI = dyn_cast(Instr);
75577578 bool IfPredicateInstr = (SI && Legal->blockNeedsPredication(SI->getParent()));
77347755 CM.collectValuesToIgnore();
77357756
77367757 // Use the planner for vectorization.
7737 LoopVectorizationPlanner LVP(CM);
7758 LoopVectorizationPlanner LVP(L, LI, &LVL, CM);
77387759
77397760 // Get user vectorization factor.
77407761 unsigned UserVF = Hints.getWidth();
78287849 // interleave it.
78297850 InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
78307851 &CM);
7831 Unroller.vectorize();
7852 LVP.executePlan(Unroller);
78327853
78337854 ORE->emit(OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
78347855 L->getHeader())
78387859 // If we decided that it is *legal* to vectorize the loop, then do it.
78397860 InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
78407861 &LVL, &CM);
7841 LB.vectorize();
7862 LVP.executePlan(LB);
78427863 ++LoopsVectorized;
78437864
78447865 // Add metadata to disable runtime unrolling a scalar loop when there are