llvm.org GIT mirror llvm / bcb266e
API to update MemorySSA for cloned blocks and added CFG edges. Summary: End goal is to update MemorySSA in all loop passes. LoopUnswitch clones all blocks in a loop. SimpleLoopUnswitch clones some blocks. LoopRotate clones some instructions. Some of these loop passes also make CFG changes. This is an API based on what I found needed in LoopUnswitch, SimpleLoopUnswitch, LoopRotate, LoopInstSimplify, LoopSimplifyCFG. Adding dependent patches using this API for context. Reviewers: george.burgess.iv, dberlin Subscribers: sanjoy, jlebar, Prazek, llvm-commits Differential Revision: https://reviews.llvm.org/D45299 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@341855 91177308-0d34-0410-b5e6-96231b3b80d8 Alina Sbirlea 1 year, 9 days ago
5 changed file(s) with 804 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
823823 InsertionPlace);
824824 void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
825825 AccessList::iterator);
826 MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *);
826 MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *,
827 const MemoryUseOrDef *Template = nullptr);
827828
828829 private:
829830 class CachingWalker;
844845 void markUnreachableAsLiveOnEntry(BasicBlock *BB);
845846 bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
846847 MemoryPhi *createMemoryPhi(BasicBlock *BB);
847 MemoryUseOrDef *createNewAccess(Instruction *);
848 MemoryUseOrDef *createNewAccess(Instruction *,
849 const MemoryUseOrDef *Template = nullptr);
848850 MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
849851 void placePHINodes(const SmallPtrSetImpl &);
850852 MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool);
3434 #include "llvm/ADT/SmallPtrSet.h"
3535 #include "llvm/ADT/SmallSet.h"
3636 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/Analysis/LoopIterator.h"
3738 #include "llvm/Analysis/MemorySSA.h"
3839 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/CFGDiff.h"
3941 #include "llvm/IR/Dominators.h"
4042 #include "llvm/IR/Module.h"
4143 #include "llvm/IR/OperandTraits.h"
4446 #include "llvm/IR/User.h"
4547 #include "llvm/IR/Value.h"
4648 #include "llvm/IR/ValueHandle.h"
49 #include "llvm/IR/ValueMap.h"
4750 #include "llvm/Pass.h"
4851 #include "llvm/Support/Casting.h"
4952 #include "llvm/Support/ErrorHandling.h"
5659 class LLVMContext;
5760 class raw_ostream;
5861
62 using ValueToValueMapTy = ValueMap;
63 using PhiToDefMap = SmallDenseMap;
64 using CFGUpdate = cfg::Update;
65 using GraphDiffInvBBPair =
66 std::pair *, Inverse>;
67
5968 class MemorySSAUpdater {
6069 private:
6170 MemorySSA *MSSA;
6978
7079 public:
7180 MemorySSAUpdater(MemorySSA *MSSA) : MSSA(MSSA) {}
81
7282 /// Insert a definition into the MemorySSA IR. RenameUses will rename any use
7383 /// below the new def block (and any inserted phis). RenameUses should be set
7484 /// to true if the definition may cause new aliases for loads below it. This
8898 /// Where a mayalias b, *does* require RenameUses be set to true.
8999 void insertDef(MemoryDef *Def, bool RenameUses = false);
90100 void insertUse(MemoryUse *Use);
101 /// Update the MemoryPhi in `To` following an edge deletion between `From` and
102 /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made.
103 void removeEdge(BasicBlock *From, BasicBlock *To);
104 /// Update the MemoryPhi in `To` to have a single incoming edge from `From`,
105 /// following a CFG change that replaced multiple edges (switch) with a direct
106 /// branch.
107 void removeDuplicatePhiEdgesBetween(BasicBlock *From, BasicBlock *To);
108 /// Update MemorySSA after a loop was cloned, given the blocks in RPO order,
109 /// the exit blocks and a 1:1 mapping of all blocks and instructions
110 /// cloned. This involves duplicating all defs and uses in the cloned blocks
111 /// Updating phi nodes in exit block successors is done separately.
112 void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
113 ArrayRef ExitBlocks,
114 const ValueToValueMapTy &VM,
115 bool IgnoreIncomingWithNoClones = false);
116 // Block BB was fully or partially cloned into its predecessor P1. Map
117 // contains the 1:1 mapping of instructions cloned and VM[BB]=P1.
118 void updateForClonedBlockIntoPred(BasicBlock *BB, BasicBlock *P1,
119 const ValueToValueMapTy &VM);
120 /// Update phi nodes in exit block successors following cloning. Exit blocks
121 /// that were not cloned don't have additional predecessors added.
122 void updateExitBlocksForClonedLoop(ArrayRef ExitBlocks,
123 const ValueToValueMapTy &VMap,
124 DominatorTree &DT);
125 void updateExitBlocksForClonedLoop(
126 ArrayRef ExitBlocks,
127 ArrayRef> VMaps, DominatorTree &DT);
128
129 /// Apply CFG updates, analogous with the DT edge updates.
130 void applyUpdates(ArrayRef Updates, DominatorTree &DT);
131 /// Apply CFG insert updates, analogous with the DT edge updates.
132 void applyInsertUpdates(ArrayRef Updates, DominatorTree &DT);
133
91134 void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where);
92135 void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where);
93136 void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
218261 template
219262 MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
220263 void fixupDefs(const SmallVectorImpl &);
264 // Clone all uses and defs from BB to NewBB given a 1:1 map of all
265 // instructions and blocks cloned, and a map of MemoryPhi : Definition
266 // (MemoryAccess Phi or Def). VMap maps old instructions to cloned
267 // instructions and old blocks to cloned blocks. MPhiMap, is created in the
268 // caller of this private method, and maps existing MemoryPhis to new
269 // definitions that new MemoryAccesses must point to. These definitions may
270 // not necessarily be MemoryPhis themselves, they may be MemoryDefs. As such,
271 // the map is between MemoryPhis and MemoryAccesses, where the MemoryAccesses
272 // may be MemoryPhis or MemoryDefs and not MemoryUses.
273 void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
274 const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap);
275 template
276 void privateUpdateExitBlocksForClonedLoop(ArrayRef ExitBlocks,
277 Iter ValuesBegin, Iter ValuesEnd,
278 DominatorTree &DT);
279 void applyInsertUpdates(ArrayRef, DominatorTree &DT,
280 const GraphDiff *GD);
221281 };
222282 } // end namespace llvm
223283
15411541 }
15421542
15431543 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
1544 MemoryAccess *Definition) {
1544 MemoryAccess *Definition,
1545 const MemoryUseOrDef *Template) {
15451546 assert(!isa(I) && "Cannot create a defined access for a PHI");
1546 MemoryUseOrDef *NewAccess = createNewAccess(I);
1547 MemoryUseOrDef *NewAccess = createNewAccess(I, Template);
15471548 assert(
15481549 NewAccess != nullptr &&
15491550 "Tried to create a memory access for a non-memory touching instruction");
15661567 }
15671568
15681569 /// Helper function to create new memory accesses
1569 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
1570 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
1571 const MemoryUseOrDef *Template) {
15701572 // The assume intrinsic has a control dependency which we model by claiming
15711573 // that it writes arbitrarily. Ignore that fake memory dependency here.
15721574 // FIXME: Replace this special casing with a more accurate modelling of
15751577 if (II->getIntrinsicID() == Intrinsic::assume)
15761578 return nullptr;
15771579
1578 // Find out what affect this instruction has on memory.
1579 ModRefInfo ModRef = AA->getModRefInfo(I, None);
1580 // The isOrdered check is used to ensure that volatiles end up as defs
1581 // (atomics end up as ModRef right now anyway). Until we separate the
1582 // ordering chain from the memory chain, this enables people to see at least
1583 // some relative ordering to volatiles. Note that getClobberingMemoryAccess
1584 // will still give an answer that bypasses other volatile loads. TODO:
1585 // Separate memory aliasing and ordering into two different chains so that we
1586 // can precisely represent both "what memory will this read/write/is clobbered
1587 // by" and "what instructions can I move this past".
1588 bool Def = isModSet(ModRef) || isOrdered(I);
1589 bool Use = isRefSet(ModRef);
1580 bool Def, Use;
1581 if (Template) {
1582 Def = dyn_cast_or_null(Template) != nullptr;
1583 Use = dyn_cast_or_null(Template) != nullptr;
1584 #if !defined(NDEBUG)
1585 ModRefInfo ModRef = AA->getModRefInfo(I, None);
1586 bool DefCheck, UseCheck;
1587 DefCheck = isModSet(ModRef) || isOrdered(I);
1588 UseCheck = isRefSet(ModRef);
1589 assert(Def == DefCheck && (Def || Use == UseCheck) && "Invalid template");
1590 #endif
1591 } else {
1592 // Find out what affect this instruction has on memory.
1593 ModRefInfo ModRef = AA->getModRefInfo(I, None);
1594 // The isOrdered check is used to ensure that volatiles end up as defs
1595 // (atomics end up as ModRef right now anyway). Until we separate the
1596 // ordering chain from the memory chain, this enables people to see at least
1597 // some relative ordering to volatiles. Note that getClobberingMemoryAccess
1598 // will still give an answer that bypasses other volatile loads. TODO:
1599 // Separate memory aliasing and ordering into two different chains so that
1600 // we can precisely represent both "what memory will this read/write/is
1601 // clobbered by" and "what instructions can I move this past".
1602 Def = isModSet(ModRef) || isOrdered(I);
1603 Use = isRefSet(ModRef);
1604 }
15901605
15911606 // It's possible for an instruction to not modify memory at all. During
15921607 // construction, we ignore them.
1111 //===----------------------------------------------------------------===//
1212 #include "llvm/Analysis/MemorySSAUpdater.h"
1313 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SetVector.h"
1415 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/Analysis/IteratedDominanceFrontier.h"
1517 #include "llvm/Analysis/MemorySSA.h"
1618 #include "llvm/IR/DataLayout.h"
1719 #include "llvm/IR/Dominators.h"
385387 if (!Seen.insert(S).second)
386388 continue;
387389 Worklist.push_back(S);
390 }
391 }
392 }
393 }
394 }
395
396 void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
397 if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
398 MPhi->unorderedDeleteIncomingBlock(From);
399 if (MPhi->getNumIncomingValues() == 1)
400 removeMemoryAccess(MPhi);
401 }
402 }
403
404 void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
405 BasicBlock *To) {
406 if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
407 bool Found = false;
408 MPhi->unorderedDeleteIncomingIf([&](const MemoryAccess *, BasicBlock *B) {
409 if (From != B)
410 return false;
411 if (Found)
412 return true;
413 Found = true;
414 return false;
415 });
416 if (MPhi->getNumIncomingValues() == 1)
417 removeMemoryAccess(MPhi);
418 }
419 }
420
421 void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
422 const ValueToValueMapTy &VMap,
423 PhiToDefMap &MPhiMap) {
424 auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
425 MemoryAccess *InsnDefining = MA;
426 if (MemoryUseOrDef *DefMUD = dyn_cast(InsnDefining)) {
427 if (!MSSA->isLiveOnEntryDef(DefMUD)) {
428 Instruction *DefMUDI = DefMUD->getMemoryInst();
429 assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
430 if (Instruction *NewDefMUDI =
431 cast_or_null(VMap.lookup(DefMUDI)))
432 InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
433 }
434 } else {
435 MemoryPhi *DefPhi = cast(InsnDefining);
436 if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
437 InsnDefining = NewDefPhi;
438 }
439 assert(InsnDefining && "Defining instruction cannot be nullptr.");
440 return InsnDefining;
441 };
442
443 const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
444 if (!Acc)
445 return;
446 for (const MemoryAccess &MA : *Acc) {
447 if (const MemoryUseOrDef *MUD = dyn_cast(&MA)) {
448 Instruction *Insn = MUD->getMemoryInst();
449 // Entry does not exist if the clone of the block did not clone all
450 // instructions. This occurs in LoopRotate when cloning instructions
451 // from the old header to the old preheader. The cloned instruction may
452 // also be a simplified Value, not an Instruction (see LoopRotate).
453 if (Instruction *NewInsn =
454 dyn_cast_or_null(VMap.lookup(Insn))) {
455 MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
456 NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), MUD);
457 MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
458 }
459 }
460 }
461 }
462
463 void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
464 ArrayRef ExitBlocks,
465 const ValueToValueMapTy &VMap,
466 bool IgnoreIncomingWithNoClones) {
467 PhiToDefMap MPhiMap;
468
469 auto FixPhiIncomingValues = [&](MemoryPhi *Phi, MemoryPhi *NewPhi) {
470 assert(Phi && NewPhi && "Invalid Phi nodes.");
471 BasicBlock *NewPhiBB = NewPhi->getBlock();
472 SmallPtrSet NewPhiBBPreds(pred_begin(NewPhiBB),
473 pred_end(NewPhiBB));
474 for (unsigned It = 0, E = Phi->getNumIncomingValues(); It < E; ++It) {
475 MemoryAccess *IncomingAccess = Phi->getIncomingValue(It);
476 BasicBlock *IncBB = Phi->getIncomingBlock(It);
477
478 if (BasicBlock *NewIncBB = cast_or_null(VMap.lookup(IncBB)))
479 IncBB = NewIncBB;
480 else if (IgnoreIncomingWithNoClones)
481 continue;
482
483 // Now we have IncBB, and will need to add incoming from it to NewPhi.
484
485 // If IncBB is not a predecessor of NewPhiBB, then do not add it.
486 // NewPhiBB was cloned without that edge.
487 if (!NewPhiBBPreds.count(IncBB))
488 continue;
489
490 // Determine incoming value and add it as incoming from IncBB.
491 if (MemoryUseOrDef *IncMUD = dyn_cast(IncomingAccess)) {
492 if (!MSSA->isLiveOnEntryDef(IncMUD)) {
493 Instruction *IncI = IncMUD->getMemoryInst();
494 assert(IncI && "Found MemoryUseOrDef with no Instruction.");
495 if (Instruction *NewIncI =
496 cast_or_null(VMap.lookup(IncI))) {
497 IncMUD = MSSA->getMemoryAccess(NewIncI);
498 assert(IncMUD &&
499 "MemoryUseOrDef cannot be null, all preds processed.");
500 }
501 }
502 NewPhi->addIncoming(IncMUD, IncBB);
503 } else {
504 MemoryPhi *IncPhi = cast(IncomingAccess);
505 if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi))
506 NewPhi->addIncoming(NewDefPhi, IncBB);
507 else
508 NewPhi->addIncoming(IncPhi, IncBB);
509 }
510 }
511 };
512
513 auto ProcessBlock = [&](BasicBlock *BB) {
514 BasicBlock *NewBlock = cast_or_null(VMap.lookup(BB));
515 if (!NewBlock)
516 return;
517
518 assert(!MSSA->getWritableBlockAccesses(NewBlock) &&
519 "Cloned block should have no accesses");
520
521 // Add MemoryPhi.
522 if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) {
523 MemoryPhi *NewPhi = MSSA->createMemoryPhi(NewBlock);
524 MPhiMap[MPhi] = NewPhi;
525 }
526 // Update Uses and Defs.
527 cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap);
528 };
529
530 for (auto BB : llvm::concat(LoopBlocks, ExitBlocks))
531 ProcessBlock(BB);
532
533 for (auto BB : llvm::concat(LoopBlocks, ExitBlocks))
534 if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
535 if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi))
536 FixPhiIncomingValues(MPhi, cast(NewPhi));
537 }
538
539 void MemorySSAUpdater::updateForClonedBlockIntoPred(
540 BasicBlock *BB, BasicBlock *P1, const ValueToValueMapTy &VM) {
541 // All defs/phis from outside BB that are used in BB, are valid uses in P1.
542 // Since those defs/phis must have dominated BB, and also dominate P1.
543 // Defs from BB being used in BB will be replaced with the cloned defs from
544 // VM. The uses of BB's Phi (if it exists) in BB will be replaced by the
545 // incoming def into the Phi from P1.
546 PhiToDefMap MPhiMap;
547 if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
548 MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1);
549 cloneUsesAndDefs(BB, P1, VM, MPhiMap);
550 }
551
552 template
553 void MemorySSAUpdater::privateUpdateExitBlocksForClonedLoop(
554 ArrayRef ExitBlocks, Iter ValuesBegin, Iter ValuesEnd,
555 DominatorTree &DT) {
556 SmallVector Updates;
557 // Update/insert phis in all successors of exit blocks.
558 for (auto *Exit : ExitBlocks)
559 for (const ValueToValueMapTy *VMap : make_range(ValuesBegin, ValuesEnd))
560 if (BasicBlock *NewExit = cast_or_null(VMap->lookup(Exit))) {
561 BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
562 Updates.push_back({DT.Insert, NewExit, ExitSucc});
563 }
564 applyInsertUpdates(Updates, DT);
565 }
566
567 void MemorySSAUpdater::updateExitBlocksForClonedLoop(
568 ArrayRef ExitBlocks, const ValueToValueMapTy &VMap,
569 DominatorTree &DT) {
570 const ValueToValueMapTy *const Arr[] = {&VMap};
571 privateUpdateExitBlocksForClonedLoop(ExitBlocks, std::begin(Arr),
572 std::end(Arr), DT);
573 }
574
575 void MemorySSAUpdater::updateExitBlocksForClonedLoop(
576 ArrayRef ExitBlocks,
577 ArrayRef> VMaps, DominatorTree &DT) {
578 auto GetPtr = [&](const std::unique_ptr &I) {
579 return I.get();
580 };
581 using MappedIteratorType =
582 mapped_iterator *,
583 decltype(GetPtr)>;
584 auto MapBegin = MappedIteratorType(VMaps.begin(), GetPtr);
585 auto MapEnd = MappedIteratorType(VMaps.end(), GetPtr);
586 privateUpdateExitBlocksForClonedLoop(ExitBlocks, MapBegin, MapEnd, DT);
587 }
588
589 void MemorySSAUpdater::applyUpdates(ArrayRef Updates,
590 DominatorTree &DT) {
591 SmallVector RevDeleteUpdates;
592 SmallVector InsertUpdates;
593 for (auto &Update : Updates) {
594 if (Update.getKind() == DT.Insert)
595 InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
596 else
597 RevDeleteUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
598 }
599
600 if (!RevDeleteUpdates.empty()) {
601 // Update for inserted edges: use newDT and snapshot CFG as if deletes had
602 // not occured.
603 // FIXME: This creates a new DT, so it's more expensive to do mix
604 // delete/inserts vs just inserts. We can do an incremental update on the DT
605 // to revert deletes, than re-delete the edges. Teaching DT to do this, is
606 // part of a pending cleanup.
607 DominatorTree NewDT(DT, RevDeleteUpdates);
608 GraphDiff GD(RevDeleteUpdates);
609 applyInsertUpdates(InsertUpdates, NewDT, &GD);
610 } else {
611 GraphDiff GD;
612 applyInsertUpdates(InsertUpdates, DT, &GD);
613 }
614
615 // Update for deleted edges
616 for (auto &Update : RevDeleteUpdates)
617 removeEdge(Update.getFrom(), Update.getTo());
618 }
619
620 void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates,
621 DominatorTree &DT) {
622 GraphDiff GD;
623 applyInsertUpdates(Updates, DT, &GD);
624 }
625
626 void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates,
627 DominatorTree &DT,
628 const GraphDiff *GD) {
629 // Get recursive last Def, assuming well formed MSSA and updated DT.
630 auto GetLastDef = [&](BasicBlock *BB) -> MemoryAccess * {
631 while (true) {
632 MemorySSA::DefsList *Defs = MSSA->getWritableBlockDefs(BB);
633 // Return last Def or Phi in BB, if it exists.
634 if (Defs)
635 return &*(--Defs->end());
636
637 // Check number of predecessors, we only care if there's more than one.
638 unsigned Count = 0;
639 BasicBlock *Pred = nullptr;
640 for (auto &Pair : children({GD, BB})) {
641 Pred = Pair.second;
642 Count++;
643 if (Count == 2)
644 break;
645 }
646
647 // If BB has multiple predecessors, get last definition from IDom.
648 if (Count != 1) {
649 // [SimpleLoopUnswitch] If BB is a dead block, about to be deleted, its
650 // DT is invalidated. Return LoE as its last def. This will be added to
651 // MemoryPhi node, and later deleted when the block is deleted.
652 if (!DT.getNode(BB))
653 return MSSA->getLiveOnEntryDef();
654 if (auto *IDom = DT.getNode(BB)->getIDom())
655 if (IDom->getBlock() != BB) {
656 BB = IDom->getBlock();
657 continue;
658 }
659 return MSSA->getLiveOnEntryDef();
660 } else {
661 // Single predecessor, BB cannot be dead. GetLastDef of Pred.
662 assert(Count == 1 && Pred && "Single predecessor expected.");
663 BB = Pred;
664 }
665 };
666 llvm_unreachable("Unable to get last definition.");
667 };
668
669 // Get nearest IDom given a set of blocks.
670 // TODO: this can be optimized by starting the search at the node with the
671 // lowest level (highest in the tree).
672 auto FindNearestCommonDominator =
673 [&](const SmallSetVector &BBSet) -> BasicBlock * {
674 BasicBlock *PrevIDom = *BBSet.begin();
675 for (auto *BB : BBSet)
676 PrevIDom = DT.findNearestCommonDominator(PrevIDom, BB);
677 return PrevIDom;
678 };
679
680 // Get all blocks that dominate PrevIDom, stop when reaching CurrIDom. Do not
681 // include CurrIDom.
682 auto GetNoLongerDomBlocks =
683 [&](BasicBlock *PrevIDom, BasicBlock *CurrIDom,
684 SmallVectorImpl &BlocksPrevDom) {
685 if (PrevIDom == CurrIDom)
686 return;
687 BlocksPrevDom.push_back(PrevIDom);
688 BasicBlock *NextIDom = PrevIDom;
689 while (BasicBlock *UpIDom =
690 DT.getNode(NextIDom)->getIDom()->getBlock()) {
691 if (UpIDom == CurrIDom)
692 break;
693 BlocksPrevDom.push_back(UpIDom);
694 NextIDom = UpIDom;
695 }
696 };
697
698 // Map a BB to its predecessors: added + previously existing. To get a
699 // deterministic order, store predecessors as SetVectors. The order in each
700 // will be defined by teh order in Updates (fixed) and the order given by
701 // children<> (also fixed). Since we further iterate over these ordered sets,
702 // we lose the information of multiple edges possibly existing between two
703 // blocks, so we'll keep and EdgeCount map for that.
704 // An alternate implementation could keep unordered set for the predecessors,
705 // traverse either Updates or children<> each time to get the deterministic
706 // order, and drop the usage of EdgeCount. This alternate approach would still
707 // require querying the maps for each predecessor, and children<> call has
708 // additional computation inside for creating the snapshot-graph predecessors.
709 // As such, we favor using a little additional storage and less compute time.
710 // This decision can be revisited if we find the alternative more favorable.
711
712 struct PredInfo {
713 SmallSetVector Added;
714 SmallSetVector Prev;
715 };
716 SmallDenseMap PredMap;
717
718 for (auto &Edge : Updates) {
719 BasicBlock *BB = Edge.getTo();
720 auto &AddedBlockSet = PredMap[BB].Added;
721 AddedBlockSet.insert(Edge.getFrom());
722 }
723
724 // Store all existing predecessor for each BB, at least one must exist.
725 SmallDenseMap, int> EdgeCountMap;
726 SmallPtrSet NewBlocks;
727 for (auto &BBPredPair : PredMap) {
728 auto *BB = BBPredPair.first;
729 const auto &AddedBlockSet = BBPredPair.second.Added;
730 auto &PrevBlockSet = BBPredPair.second.Prev;
731 for (auto &Pair : children({GD, BB})) {
732 BasicBlock *Pi = Pair.second;
733 if (!AddedBlockSet.count(Pi))
734 PrevBlockSet.insert(Pi);
735 EdgeCountMap[{Pi, BB}]++;
736 }
737
738 if (PrevBlockSet.empty()) {
739 assert(pred_size(BB) == AddedBlockSet.size() && "Duplicate edges added.");
740 LLVM_DEBUG(
741 dbgs()
742 << "Adding a predecessor to a block with no predecessors. "
743 "This must be an edge added to a new, likely cloned, block. "
744 "Its memory accesses must be already correct, assuming completed "
745 "via the updateExitBlocksForClonedLoop API. "
746 "Assert a single such edge is added so no phi addition or "
747 "additional processing is required.\n");
748 assert(AddedBlockSet.size() == 1 &&
749 "Can only handle adding one predecessor to a new block.");
750 // Need to remove new blocks from PredMap. Remove below to not invalidate
751 // iterator here.
752 NewBlocks.insert(BB);
753 }
754 }
755 // Nothing to process for new/cloned blocks.
756 for (auto *BB : NewBlocks)
757 PredMap.erase(BB);
758
759 SmallVector BlocksToProcess;
760 SmallVector BlocksWithDefsToReplace;
761
762 // First create MemoryPhis in all blocks that don't have one. Create in the
763 // order found in Updates, not in PredMap, to get deterministic numbering.
764 for (auto &Edge : Updates) {
765 BasicBlock *BB = Edge.getTo();
766 if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB))
767 MSSA->createMemoryPhi(BB);
768 }
769
770 // Now we'll fill in the MemoryPhis with the right incoming values.
771 for (auto &BBPredPair : PredMap) {
772 auto *BB = BBPredPair.first;
773 const auto &PrevBlockSet = BBPredPair.second.Prev;
774 const auto &AddedBlockSet = BBPredPair.second.Added;
775 assert(!PrevBlockSet.empty() &&
776 "At least one previous predecessor must exist.");
777
778 // TODO: if this becomes a bottleneck, we can save on GetLastDef calls by
779 // keeping this map before the loop. We can reuse already populated entries
780 // if an edge is added from the same predecessor to two different blocks,
781 // and this does happen in rotate. Note that the map needs to be updated
782 // when deleting non-necessary phis below, if the phi is in the map by
783 // replacing the value with DefP1.
784 SmallDenseMap LastDefAddedPred;
785 for (auto *AddedPred : AddedBlockSet) {
786 auto *DefPn = GetLastDef(AddedPred);
787 assert(DefPn != nullptr && "Unable to find last definition.");
788 LastDefAddedPred[AddedPred] = DefPn;
789 }
790
791 MemoryPhi *NewPhi = MSSA->getMemoryAccess(BB);
792 // If Phi is not empty, add an incoming edge from each added pred. Must
793 // still compute blocks with defs to replace for this block below.
794 if (NewPhi->getNumOperands()) {
795 for (auto *Pred : AddedBlockSet) {
796 auto *LastDefForPred = LastDefAddedPred[Pred];
797 for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
798 NewPhi->addIncoming(LastDefForPred, Pred);
799 }
800 } else {
801 // Pick any existing predecessor and get its definition. All other
802 // existing predecessors should have the same one, since no phi existed.
803 auto *P1 = *PrevBlockSet.begin();
804 MemoryAccess *DefP1 = GetLastDef(P1);
805
806 // Check DefP1 against all Defs in LastDefPredPair. If all the same,
807 // nothing to add.
808 bool InsertPhi = false;
809 for (auto LastDefPredPair : LastDefAddedPred)
810 if (DefP1 != LastDefPredPair.second) {
811 InsertPhi = true;
812 break;
813 }
814 if (!InsertPhi) {
815 // Since NewPhi may be used in other newly added Phis, replace all uses
816 // of NewPhi with the definition coming from all predecessors (DefP1),
817 // before deleting it.
818 NewPhi->replaceAllUsesWith(DefP1);
819 removeMemoryAccess(NewPhi);
820 continue;
821 }
822
823 // Update Phi with new values for new predecessors and old value for all
824 // other predecessors. Since AddedBlockSet and PrevBlockSet are ordered
825 // sets, the order of entries in NewPhi is deterministic.
826 for (auto *Pred : AddedBlockSet) {
827 auto *LastDefForPred = LastDefAddedPred[Pred];
828 for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
829 NewPhi->addIncoming(LastDefForPred, Pred);
830 }
831 for (auto *Pred : PrevBlockSet)
832 for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
833 NewPhi->addIncoming(DefP1, Pred);
834
835 // Insert BB in the set of blocks that now have definition. We'll use this
836 // to compute IDF and add Phis there next.
837 BlocksToProcess.push_back(BB);
838 }
839
840 // Get all blocks that used to dominate BB and no longer do after adding
841 // AddedBlockSet, where PrevBlockSet are the previously known predecessors.
842 assert(DT.getNode(BB)->getIDom() && "BB does not have valid idom");
843 BasicBlock *PrevIDom = FindNearestCommonDominator(PrevBlockSet);
844 assert(PrevIDom && "Previous IDom should exists");
845 BasicBlock *NewIDom = DT.getNode(BB)->getIDom()->getBlock();
846 assert(NewIDom && "BB should have a new valid idom");
847 assert(DT.dominates(NewIDom, PrevIDom) &&
848 "New idom should dominate old idom");
849 GetNoLongerDomBlocks(PrevIDom, NewIDom, BlocksWithDefsToReplace);
850 }
851
852 // Compute IDF and add Phis in all IDF blocks that do not have one.
853 SmallVector IDFBlocks;
854 if (!BlocksToProcess.empty()) {
855 ForwardIDFCalculator IDFs(DT);
856 SmallPtrSet DefiningBlocks(BlocksToProcess.begin(),
857 BlocksToProcess.end());
858 IDFs.setDefiningBlocks(DefiningBlocks);
859 IDFs.calculate(IDFBlocks);
860 for (auto *BBIDF : IDFBlocks) {
861 if (auto *IDFPhi = MSSA->getMemoryAccess(BBIDF)) {
862 // Update existing Phi.
863 // FIXME: some updates may be redundant, try to optimize and skip some.
864 for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I)
865 IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I)));
866 } else {
867 IDFPhi = MSSA->createMemoryPhi(BBIDF);
868 for (auto &Pair : children({GD, BBIDF})) {
869 BasicBlock *Pi = Pair.second;
870 IDFPhi->addIncoming(GetLastDef(Pi), Pi);
871 }
872 }
873 }
874 }
875
876 // Now for all defs in BlocksWithDefsToReplace, if there are uses they no
877 // longer dominate, replace those with the closest dominating def.
878 // This will also update optimized accesses, as they're also uses.
879 for (auto *BlockWithDefsToReplace : BlocksWithDefsToReplace) {
880 if (auto DefsList = MSSA->getWritableBlockDefs(BlockWithDefsToReplace)) {
881 for (auto &DefToReplaceUses : *DefsList) {
882 BasicBlock *DominatingBlock = DefToReplaceUses.getBlock();
883 Value::use_iterator UI = DefToReplaceUses.use_begin(),
884 E = DefToReplaceUses.use_end();
885 for (; UI != E;) {
886 Use &U = *UI;
887 ++UI;
888 MemoryAccess *Usr = dyn_cast(U.getUser());
889 if (MemoryPhi *UsrPhi = dyn_cast(Usr)) {
890 BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U);
891 if (!DT.dominates(DominatingBlock, DominatedBlock))
892 U.set(GetLastDef(DominatedBlock));
893 } else {
894 BasicBlock *DominatedBlock = Usr->getBlock();
895 if (!DT.dominates(DominatingBlock, DominatedBlock)) {
896 if (auto *DomBlPhi = MSSA->getMemoryAccess(DominatedBlock))
897 U.set(DomBlPhi);
898 else {
899 auto *IDom = DT.getNode(DominatedBlock)->getIDom();
900 assert(IDom && "Block must have a valid IDom.");
901 U.set(GetLastDef(IDom->getBlock()));
902 }
903 cast(Usr)->resetOptimized();
904 }
905 }
388906 }
389907 }
390908 }
13921392 (std::vector{StoreAAccess, StoreAAccess,
13931393 StoreBAccess}));
13941394 }
1395
1396 // entry
1397 // |
1398 // header
1399 // / \
1400 // body |
1401 // \ /
1402 // exit
1403 // header:
1404 // ; 1 = MemoryDef(liveOnEntry)
1405 // body:
1406 // ; 2 = MemoryDef(1)
1407 // exit:
1408 // ; 3 = MemoryPhi({body, 2}, {header, 1})
1409 // ; 4 = MemoryDef(3); optimized to 3, cannot optimize thorugh phi.
1410 // Insert edge: entry -> exit, check mssa Update is correct.
1411 TEST_F(MemorySSATest, TestAddedEdgeToBlockWithPhiNotOpt) {
1412 F = Function::Create(
1413 FunctionType::get(B.getVoidTy(), {B.getInt8PtrTy()}, false),
1414 GlobalValue::ExternalLinkage, "F", &M);
1415 Argument *PointerArg = &*F->arg_begin();
1416 BasicBlock *Entry(BasicBlock::Create(C, "entry", F));
1417 BasicBlock *Header(BasicBlock::Create(C, "header", F));
1418 BasicBlock *Body(BasicBlock::Create(C, "body", F));
1419 BasicBlock *Exit(BasicBlock::Create(C, "exit", F));
1420 B.SetInsertPoint(Entry);
1421 BranchInst::Create(Header, Entry);
1422 B.SetInsertPoint(Header);
1423 B.CreateStore(B.getInt8(16), PointerArg);
1424 B.CreateCondBr(B.getTrue(), Exit, Body);
1425 B.SetInsertPoint(Body);
1426 B.CreateStore(B.getInt8(16), PointerArg);
1427 BranchInst::Create(Exit, Body);
1428 B.SetInsertPoint(Exit);
1429 StoreInst *S1 = B.CreateStore(B.getInt8(16), PointerArg);
1430
1431 setupAnalyses();
1432 MemorySSA &MSSA = *Analyses->MSSA;
1433 MemorySSAWalker *Walker = Analyses->Walker;
1434 std::unique_ptr MSSAU =
1435 make_unique(&MSSA);
1436
1437 MemoryPhi *Phi = MSSA.getMemoryAccess(Exit);
1438 EXPECT_EQ(Phi, Walker->getClobberingMemoryAccess(S1));
1439
1440 // Alter CFG, add edge: entry -> exit
1441 Entry->getTerminator()->eraseFromParent();
1442 B.SetInsertPoint(Entry);
1443 B.CreateCondBr(B.getTrue(), Header, Exit);
1444 SmallVector Updates;
1445 Updates.push_back({cfg::UpdateKind::Insert, Entry, Exit});
1446 Analyses->DT.applyUpdates(Updates);
1447 MSSAU->applyInsertUpdates(Updates, Analyses->DT);
1448 EXPECT_EQ(Phi, Walker->getClobberingMemoryAccess(S1));
1449 }
1450
1451 // entry
1452 // |
1453 // header
1454 // / \
1455 // body |
1456 // \ /
1457 // exit
1458 // header:
1459 // ; 1 = MemoryDef(liveOnEntry)
1460 // body:
1461 // ; 2 = MemoryDef(1)
1462 // exit:
1463 // ; 3 = MemoryPhi({body, 2}, {header, 1})
1464 // ; 4 = MemoryDef(3); optimize this to 1 now, added edge should invalidate
1465 // the optimized access.
1466 // Insert edge: entry -> exit, check mssa Update is correct.
1467 TEST_F(MemorySSATest, TestAddedEdgeToBlockWithPhiOpt) {
1468 F = Function::Create(
1469 FunctionType::get(B.getVoidTy(), {B.getInt8PtrTy()}, false),
1470 GlobalValue::ExternalLinkage, "F", &M);
1471 Argument *PointerArg = &*F->arg_begin();
1472 Type *Int8 = Type::getInt8Ty(C);
1473 BasicBlock *Entry(BasicBlock::Create(C, "entry", F));
1474 BasicBlock *Header(BasicBlock::Create(C, "header", F));
1475 BasicBlock *Body(BasicBlock::Create(C, "body", F));
1476 BasicBlock *Exit(BasicBlock::Create(C, "exit", F));
1477
1478 B.SetInsertPoint(Entry);
1479 Value *Alloca = B.CreateAlloca(Int8, ConstantInt::get(Int8, 1), "A");
1480 BranchInst::Create(Header, Entry);
1481
1482 B.SetInsertPoint(Header);
1483 StoreInst *S1 = B.CreateStore(B.getInt8(16), PointerArg);
1484 B.CreateCondBr(B.getTrue(), Exit, Body);
1485
1486 B.SetInsertPoint(Body);
1487 B.CreateStore(ConstantInt::get(Int8, 0), Alloca);
1488 BranchInst::Create(Exit, Body);
1489
1490 B.SetInsertPoint(Exit);
1491 StoreInst *S2 = B.CreateStore(B.getInt8(16), PointerArg);
1492
1493 setupAnalyses();
1494 MemorySSA &MSSA = *Analyses->MSSA;
1495 MemorySSAWalker *Walker = Analyses->Walker;
1496 std::unique_ptr MSSAU =
1497 make_unique(&MSSA);
1498
1499 MemoryDef *DefS1 = cast(MSSA.getMemoryAccess(S1));
1500 EXPECT_EQ(DefS1, Walker->getClobberingMemoryAccess(S2));
1501
1502 // Alter CFG, add edge: entry -> exit
1503 Entry->getTerminator()->eraseFromParent();
1504 B.SetInsertPoint(Entry);
1505 B.CreateCondBr(B.getTrue(), Header, Exit);
1506 SmallVector Updates;
1507 Updates.push_back({cfg::UpdateKind::Insert, Entry, Exit});
1508 Analyses->DT.applyUpdates(Updates);
1509 MSSAU->applyInsertUpdates(Updates, Analyses->DT);
1510
1511 MemoryPhi *Phi = MSSA.getMemoryAccess(Exit);
1512 EXPECT_EQ(Phi, Walker->getClobberingMemoryAccess(S2));
1513 }
1514
1515 // entry
1516 // / |
1517 // a |
1518 // / \ |
1519 // b c f
1520 // \ / |
1521 // d |
1522 // \ /
1523 // e
1524 // f:
1525 // ; 1 = MemoryDef(liveOnEntry)
1526 // e:
1527 // ; 2 = MemoryPhi({d, liveOnEntry}, {f, 1})
1528 //
1529 // Insert edge: f -> c, check update is correct.
1530 // After update:
1531 // f:
1532 // ; 1 = MemoryDef(liveOnEntry)
1533 // c:
1534 // ; 3 = MemoryPhi({a, liveOnEntry}, {f, 1})
1535 // d:
1536 // ; 4 = MemoryPhi({b, liveOnEntry}, {c, 3})
1537 // e:
1538 // ; 2 = MemoryPhi({d, 4}, {f, 1})
1539 TEST_F(MemorySSATest, TestAddedEdgeToBlockWithNoPhiAddNewPhis) {
1540 F = Function::Create(
1541 FunctionType::get(B.getVoidTy(), {B.getInt8PtrTy()}, false),
1542 GlobalValue::ExternalLinkage, "F", &M);
1543 Argument *PointerArg = &*F->arg_begin();
1544 BasicBlock *Entry(BasicBlock::Create(C, "entry", F));
1545 BasicBlock *ABlock(BasicBlock::Create(C, "a", F));
1546 BasicBlock *BBlock(BasicBlock::Create(C, "b", F));
1547 BasicBlock *CBlock(BasicBlock::Create(C, "c", F));
1548 BasicBlock *DBlock(BasicBlock::Create(C, "d", F));
1549 BasicBlock *EBlock(BasicBlock::Create(C, "e", F));
1550 BasicBlock *FBlock(BasicBlock::Create(C, "f", F));
1551
1552 B.SetInsertPoint(Entry);
1553 B.CreateCondBr(B.getTrue(), ABlock, FBlock);
1554 B.SetInsertPoint(ABlock);
1555 B.CreateCondBr(B.getTrue(), BBlock, CBlock);
1556 B.SetInsertPoint(BBlock);
1557 BranchInst::Create(DBlock, BBlock);
1558 B.SetInsertPoint(CBlock);
1559 BranchInst::Create(DBlock, CBlock);
1560 B.SetInsertPoint(DBlock);
1561 BranchInst::Create(EBlock, DBlock);
1562 B.SetInsertPoint(FBlock);
1563 B.CreateStore(B.getInt8(16), PointerArg);
1564 BranchInst::Create(EBlock, FBlock);
1565
1566 setupAnalyses();
1567 MemorySSA &MSSA = *Analyses->MSSA;
1568 std::unique_ptr MSSAU =
1569 make_unique(&MSSA);
1570
1571 // Alter CFG, add edge: f -> c
1572 FBlock->getTerminator()->eraseFromParent();
1573 B.SetInsertPoint(FBlock);
1574 B.CreateCondBr(B.getTrue(), CBlock, EBlock);
1575 SmallVector Updates;
1576 Updates.push_back({cfg::UpdateKind::Insert, FBlock, CBlock});
1577 Analyses->DT.applyUpdates(Updates);
1578 MSSAU->applyInsertUpdates(Updates, Analyses->DT);
1579
1580 MemoryPhi *MPC = MSSA.getMemoryAccess(CBlock);
1581 EXPECT_NE(MPC, nullptr);
1582 MemoryPhi *MPD = MSSA.getMemoryAccess(DBlock);
1583 EXPECT_NE(MPD, nullptr);
1584 MemoryPhi *MPE = MSSA.getMemoryAccess(EBlock);
1585 EXPECT_EQ(MPD, MPE->getIncomingValueForBlock(DBlock));
1586 }