llvm.org GIT mirror llvm / c8d11e8
Temporarily revert "[ScheduleDAGInstrs::buildSchedGraph()] Handling of memory dependecies rewritten." Some buildbot failures needs to be debugged. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259213 91177308-0d34-0410-b5e6-96231b3b80d8 Jonas Paulsson 4 years ago
4 changed file(s) with 361 addition(s) and 467 deletion(s). Raw diff Collapse all Expand all
2626 class raw_ostream;
2727
2828 raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
29 class PseudoSourceValue;
30 raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV);
3129
3230 /// Special value supplied for machine level alias analysis. It indicates that
3331 /// a memory access references the functions stack frame (e.g., a spill slot),
4644
4745 private:
4846 PSVKind Kind;
49 friend raw_ostream &llvm::operator<<(raw_ostream &OS,
50 const PseudoSourceValue* PSV);
5147
5248 friend class MachineMemOperand; // For printCustom().
5349
395395 /// specified node.
396396 bool addPred(const SDep &D, bool Required = true);
397397
398 /// addPredBarrier - This adds a barrier edge to SU by calling
399 /// addPred(), with latency 0 generally or latency 1 for a store
400 /// followed by a load.
401 bool addPredBarrier(SUnit *SU) {
402 SDep Dep(SU, SDep::Barrier);
403 unsigned TrueMemOrderLatency =
404 ((SU->getInstr()->mayStore() && this->getInstr()->mayLoad()) ? 1 : 0);
405 Dep.setLatency(TrueMemOrderLatency);
406 return addPred(Dep);
407 }
408
409398 /// removePred - This removes the specified edge as a pred of the current
410399 /// node if it exists. It also removes the current node as a successor of
411400 /// the specified node.
1414 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
1515 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
1616
17 #include "llvm/ADT/MapVector.h"
1817 #include "llvm/ADT/SparseMultiSet.h"
1918 #include "llvm/ADT/SparseSet.h"
2019 #include "llvm/CodeGen/ScheduleDAG.h"
2120 #include "llvm/CodeGen/TargetSchedule.h"
2221 #include "llvm/Support/Compiler.h"
2322 #include "llvm/Target/TargetRegisterInfo.h"
24 #include
2523
2624 namespace llvm {
2725 class MachineFrameInfo;
8583 typedef SparseMultiSet
8684 VReg2SUnitOperIdxMultiMap;
8785
88 typedef PointerUnion ValueType;
89 typedef SmallVector, 4>
90 UnderlyingObjectsVector;
91
9286 /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
9387 /// MachineInstrs.
9488 class ScheduleDAGInstrs : public ScheduleDAG {
154148 /// Tracks the last instructions in this region using each virtual register.
155149 VReg2SUnitOperIdxMultiMap CurrentVRegUses;
156150
157 AliasAnalysis *AAForDep;
158
159 /// Remember a generic side-effecting instruction as we proceed.
160 /// No other SU ever gets scheduled around it (except in the special
161 /// case of a huge region that gets reduced).
162 SUnit *BarrierChain;
163
164 public:
165
166 /// A list of SUnits, used in Value2SUsMap, during DAG construction.
167 /// Note: to gain speed it might be worth investigating an optimized
168 /// implementation of this data structure, such as a singly linked list
169 /// with a memory pool (SmallVector was tried but slow and SparseSet is not
170 /// applicable).
171 typedef std::list SUList;
172 protected:
173 /// A map from ValueType to SUList, used during DAG construction,
174 /// as a means of remembering which SUs depend on which memory
175 /// locations.
176 class Value2SUsMap;
177
178 /// Remove in FIFO order some SUs from huge maps.
179 void reduceHugeMemNodeMaps(Value2SUsMap &stores,
180 Value2SUsMap &loads, unsigned N);
181
182 /// Add a chain edge between SUa and SUb, but only if both AliasAnalysis
183 /// and Target fail to deny the dependency.
184 void addChainDependency(SUnit *SUa, SUnit *SUb,
185 unsigned Latency = 0);
186
187 /// Add dependencies as needed from all SUs in list to SU.
188 void addChainDependencies(SUnit *SU, SUList &sus, unsigned Latency) {
189 for (auto *su : sus)
190 addChainDependency(SU, su, Latency);
191 }
192
193 /// Add dependencies as needed from all SUs in map, to SU.
194 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
195
196 /// Add dependencies as needed to SU, from all SUs mapped to V.
197 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
198 ValueType V);
199
200 /// Add barrier chain edges from all SUs in map, and then clear
201 /// the map. This is equivalent to insertBarrierChain(), but
202 /// optimized for the common case where the new BarrierChain (a
203 /// global memory object) has a higher NodeNum than all SUs in
204 /// map. It is assumed BarrierChain has been set before calling
205 /// this.
206 void addBarrierChain(Value2SUsMap &map);
207
208 /// Insert a barrier chain in a huge region, far below current
209 /// SU. Add barrier chain edges from all SUs in map with higher
210 /// NodeNums than this new BarrierChain, and remove them from
211 /// map. It is assumed BarrierChain has been set before calling
212 /// this.
213 void insertBarrierChain(Value2SUsMap &map);
214
215 /// For an unanalyzable memory access, this Value is used in maps.
216 UndefValue *UnknownValue;
151 /// PendingLoads - Remember where unknown loads are after the most recent
152 /// unknown store, as we iterate. As with Defs and Uses, this is here
153 /// to minimize construction/destruction.
154 std::vector PendingLoads;
217155
218156 /// DbgValues - Remember instruction that precedes DBG_VALUE.
219157 /// These are generated by buildSchedGraph but persist so they can be
1313
1414 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
1515 #include "llvm/ADT/IntEqClasses.h"
16 #include "llvm/ADT/MapVector.h"
1617 #include "llvm/ADT/SmallPtrSet.h"
1718 #include "llvm/ADT/SmallSet.h"
1819 #include "llvm/Analysis/AliasAnalysis.h"
2627 #include "llvm/CodeGen/PseudoSourceValue.h"
2728 #include "llvm/CodeGen/RegisterPressure.h"
2829 #include "llvm/CodeGen/ScheduleDFS.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Type.h"
3130 #include "llvm/IR/Operator.h"
3231 #include "llvm/Support/CommandLine.h"
3332 #include "llvm/Support/Debug.h"
5049 static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
5150 cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
5251
53 // Note: the two options below might be used in tuning compile time vs
54 // output quality. Setting HugeRegion so large that it will never be
55 // reached means best-effort, but may be slow.
56
57 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
58 // together hold this many SUs, a reduction of maps will be done.
59 static cl::opt HugeRegion("dag-maps-huge-region", cl::Hidden,
60 cl::init(1000), cl::desc("The limit to use while constructing the DAG "
61 "prior to scheduling, at which point a trade-off "
62 "is made to avoid excessive compile time."));
63
64 static cl::opt ReductionSize("dag-maps-reduction-size", cl::Hidden,
65 cl::desc("A huge scheduling region will have maps reduced by this many "
66 "nodes at a time. Defaults to HugeRegion / 2."));
67
68 static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
69 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
70 dbgs() << "{ ";
71 for (auto *su : L) {
72 dbgs() << "SU(" << su->NodeNum << ")";
73 if (su != L.back())
74 dbgs() << ", ";
75 }
76 dbgs() << "}\n";
77 #endif
78 }
79
8052 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
8153 const MachineLoopInfo *mli,
8254 bool RemoveKillFlags)
8355 : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
8456 RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
85 TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
86 UnknownValue(UndefValue::get(
87 Type::getVoidTy(mf.getFunction()->getContext()))),
88 FirstDbgValue(nullptr) {
57 TrackLaneMasks(false), FirstDbgValue(nullptr) {
8958 DbgValues.clear();
9059
9160 const TargetSubtargetInfo &ST = mf.getSubtarget();
150119 }
151120 } while (!Working.empty());
152121 }
122
123 typedef PointerUnion ValueType;
124 typedef SmallVector, 4>
125 UnderlyingObjectsVector;
153126
154127 /// getUnderlyingObjectsForInstr - If this machine instr has memory reference
155128 /// information and it can be tracked to a normal reference to a known
570543 return true;
571544 }
572545
573 if ((*MI->memoperands_begin())->getValue() == nullptr)
546 const Value *V = (*MI->memoperands_begin())->getValue();
547 if (!V)
574548 return true;
575549
550 SmallVector Objs;
551 getUnderlyingObjects(V, Objs, DL);
552 for (Value *V : Objs) {
553 // Does this pointer refer to a distinct and identifiable object?
554 if (!isIdentifiedObject(V))
555 return true;
556 }
557
576558 return false;
577559 }
578560
579561 /// This returns true if the two MIs need a chain edge between them.
580 /// This is called on normal stores and loads.
562 /// If these are not even memory operations, we still may need
563 /// chain deps between them. The question really is - could
564 /// these two MIs be reordered during scheduling from memory dependency
565 /// point of view.
581566 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
582567 const DataLayout &DL, MachineInstr *MIa,
583568 MachineInstr *MIb) {
584569 const MachineFunction *MF = MIa->getParent()->getParent();
585570 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
586571
587 assert ((MIa->mayStore() || MIb->mayStore()) &&
588 "Dependency checked between two loads");
589
590 // buildSchedGraph() will clear list of stores if not using AA,
591 // which means all stores have to be chained without AA.
592 if (!AA && MIa->mayStore() && MIb->mayStore())
593 return true;
594
572 // Cover a trivial case - no edge is need to itself.
573 if (MIa == MIb)
574 return false;
575
595576 // Let the target decide if memory accesses cannot possibly overlap.
596 if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
597 return false;
577 if ((MIa->mayLoad() || MIa->mayStore()) &&
578 (MIb->mayLoad() || MIb->mayStore()))
579 if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
580 return false;
598581
599582 // FIXME: Need to handle multiple memory operands to support all targets.
600583 if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
602585
603586 if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
604587 return true;
588
589 // If we are dealing with two "normal" loads, we do not need an edge
590 // between them - they could be reordered.
591 if (!MIa->mayStore() && !MIb->mayStore())
592 return false;
605593
606594 // To this point analysis is generic. From here on we do need AA.
607595 if (!AA)
645633 return (AAResult != NoAlias);
646634 }
647635
648 /// Check whether two objects need a chain edge and add it if needed.
649 void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
650 unsigned Latency) {
651 if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
652 SUb->getInstr())) {
653 SDep Dep(SUa, SDep::MayAliasMem);
654 Dep.setLatency(Latency);
636 /// This recursive function iterates over chain deps of SUb looking for
637 /// "latest" node that needs a chain edge to SUa.
638 static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
639 const DataLayout &DL, SUnit *SUa, SUnit *SUb,
640 SUnit *ExitSU, unsigned *Depth,
641 SmallPtrSetImpl &Visited) {
642 if (!SUa || !SUb || SUb == ExitSU)
643 return *Depth;
644
645 // Remember visited nodes.
646 if (!Visited.insert(SUb).second)
647 return *Depth;
648 // If there is _some_ dependency already in place, do not
649 // descend any further.
650 // TODO: Need to make sure that if that dependency got eliminated or ignored
651 // for any reason in the future, we would not violate DAG topology.
652 // Currently it does not happen, but makes an implicit assumption about
653 // future implementation.
654 //
655 // Independently, if we encounter node that is some sort of global
656 // object (like a call) we already have full set of dependencies to it
657 // and we can stop descending.
658 if (SUa->isSucc(SUb) ||
659 isGlobalMemoryObject(AA, SUb->getInstr()))
660 return *Depth;
661
662 // If we do need an edge, or we have exceeded depth budget,
663 // add that edge to the predecessors chain of SUb,
664 // and stop descending.
665 if (*Depth > 200 ||
666 MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
667 SUb->addPred(SDep(SUa, SDep::MayAliasMem));
668 return *Depth;
669 }
670 // Track current depth.
671 (*Depth)++;
672 // Iterate over memory dependencies only.
673 for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
674 I != E; ++I)
675 if (I->isNormalMemoryOrBarrier())
676 iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
677 return *Depth;
678 }
679
680 /// This function assumes that "downward" from SU there exist
681 /// tail/leaf of already constructed DAG. It iterates downward and
682 /// checks whether SU can be aliasing any node dominated
683 /// by it.
684 static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
685 const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
686 std::set &CheckList,
687 unsigned LatencyToLoad) {
688 if (!SU)
689 return;
690
691 SmallPtrSet Visited;
692 unsigned Depth = 0;
693
694 for (std::set::iterator I = CheckList.begin(), IE = CheckList.end();
695 I != IE; ++I) {
696 if (SU == *I)
697 continue;
698 if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
699 SDep Dep(SU, SDep::MayAliasMem);
700 Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
701 (*I)->addPred(Dep);
702 }
703
704 // Iterate recursively over all previously added memory chain
705 // successors. Keep track of visited nodes.
706 for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
707 JE = (*I)->Succs.end(); J != JE; ++J)
708 if (J->isNormalMemoryOrBarrier())
709 iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
710 Visited);
711 }
712 }
713
714 /// Check whether two objects need a chain edge, if so, add it
715 /// otherwise remember the rejected SU.
716 static inline void addChainDependency(AliasAnalysis *AA,
717 const MachineFrameInfo *MFI,
718 const DataLayout &DL, SUnit *SUa,
719 SUnit *SUb, std::set &RejectList,
720 unsigned TrueMemOrderLatency = 0,
721 bool isNormalMemory = false) {
722 // If this is a false dependency,
723 // do not add the edge, but remember the rejected node.
724 if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
725 SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
726 Dep.setLatency(TrueMemOrderLatency);
655727 SUb->addPred(Dep);
728 }
729 else {
730 // Duplicate entries should be ignored.
731 RejectList.insert(SUb);
732 DEBUG(dbgs() << "\tReject chain dep between SU("
733 << SUa->NodeNum << ") and SU("
734 << SUb->NodeNum << ")\n");
656735 }
657736 }
658737
752831 }
753832 }
754833
755 class ScheduleDAGInstrs::Value2SUsMap : public MapVector {
756
757 /// Current total number of SUs in map.
758 unsigned NumNodes;
759
760 /// 1 for loads, 0 for stores. (see comment in SUList)
761 unsigned TrueMemOrderLatency;
762 public:
763
764 Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
765
766 /// To keep NumNodes up to date, insert() is used instead of
767 /// this operator w/ push_back().
768 ValueType &operator[](const SUList &Key) {
769 llvm_unreachable("Don't use. Use insert() instead."); };
770
771 /// Add SU to the SUList of V. If Map grows huge, reduce its size
772 /// by calling reduce().
773 void inline insert(SUnit *SU, ValueType V) {
774 MapVector::operator[](V).push_back(SU);
775 NumNodes++;
776 }
777
778 /// Clears the list of SUs mapped to V.
779 void inline clearList(ValueType V) {
780 iterator Itr = find(V);
781 if (Itr != end()) {
782 assert (NumNodes >= Itr->second.size());
783 NumNodes -= Itr->second.size();
784
785 Itr->second.clear();
786 }
787 }
788
789 /// Clears map from all contents.
790 void clear() {
791 MapVector::clear();
792 NumNodes = 0;
793 }
794
795 unsigned inline size() const { return NumNodes; }
796
797 /// Count the number of SUs in this map after a reduction.
798 void reComputeSize(void) {
799 NumNodes = 0;
800 for (auto &I : *this)
801 NumNodes += I.second.size();
802 }
803
804 unsigned inline getTrueMemOrderLatency() const {
805 return TrueMemOrderLatency;
806 }
807
808 void dump();
809 };
810
811 void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
812 Value2SUsMap &Val2SUsMap) {
813 for (auto &I : Val2SUsMap)
814 addChainDependencies(SU, I.second,
815 Val2SUsMap.getTrueMemOrderLatency());
816 }
817
818 void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
819 Value2SUsMap &Val2SUsMap,
820 ValueType V) {
821 Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
822 if (Itr != Val2SUsMap.end())
823 addChainDependencies(SU, Itr->second,
824 Val2SUsMap.getTrueMemOrderLatency());
825 }
826
827 void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
828 assert (BarrierChain != nullptr);
829
830 for (auto &I : map) {
831 SUList &sus = I.second;
832 for (auto *SU : sus)
833 SU->addPredBarrier(BarrierChain);
834 }
835 map.clear();
836 }
837
838 void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
839 assert (BarrierChain != nullptr);
840
841 // Go through all lists of SUs.
842 for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
843 Value2SUsMap::iterator CurrItr = I++;
844 SUList &sus = CurrItr->second;
845 SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
846 for (; SUItr != SUEE; ++SUItr) {
847 // Stop on BarrierChain or any instruction above it.
848 if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
849 break;
850
851 (*SUItr)->addPredBarrier(BarrierChain);
852 }
853
854 // Remove also the BarrierChain from list if present.
855 if (*SUItr == BarrierChain)
856 SUItr++;
857
858 // Remove all SUs that are now successors of BarrierChain.
859 if (SUItr != sus.begin())
860 sus.erase(sus.begin(), SUItr);
861 }
862
863 // Remove all entries with empty su lists.
864 map.remove_if([&](std::pair &mapEntry) {
865 return (mapEntry.second.empty()); });
866
867 // Recompute the size of the map (NumNodes).
868 map.reComputeSize();
869 }
870
871834 /// If RegPressure is non-null, compute register pressure as a side effect. The
872835 /// DAG builder is an efficient place to do it because it already visits
873836 /// operands.
879842 const TargetSubtargetInfo &ST = MF.getSubtarget();
880843 bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
881844 : ST.useAA();
882 AAForDep = UseAA ? AA : nullptr;
883
884 BarrierChain = nullptr;
845 AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
885846
886847 this->TrackLaneMasks = TrackLaneMasks;
887848 MISUnitMap.clear();
893854 if (PDiffs)
894855 PDiffs->init(SUnits.size());
895856
896 // We build scheduling units by walking a block's instruction list
897 // from bottom to top.
898
899 // Each MIs' memory operand(s) is analyzed to a list of underlying
900 // objects. The SU is then inserted in the SUList(s) mapped from
901 // that Value(s). Each Value thus gets mapped to a list of SUs
902 // depending on it, defs and uses kept separately. Two SUs are
903 // non-aliasing to each other if they depend on different Values
904 // exclusively.
905 Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
906
907 // Certain memory accesses are known to not alias any SU in Stores
908 // or Loads, and have therefore their own 'NonAlias'
909 // domain. E.g. spill / reload instructions never alias LLVM I/R
910 // Values. It is assumed that this type of memory accesses always
911 // have a proper memory operand modelling, and are therefore never
912 // unanalyzable. This means they are non aliasing against all nodes
913 // in Stores and Loads, including the unanalyzable ones.
914 Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
915
916 // Always reduce a huge region with half of the elements, except
917 // when user sets this number explicitly.
918 if (ReductionSize.getNumOccurrences() == 0)
919 ReductionSize = (HugeRegion / 2);
857 // We build scheduling units by walking a block's instruction list from bottom
858 // to top.
859
860 // Remember where a generic side-effecting instruction is as we proceed.
861 SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
862
863 // Memory references to specific known memory locations are tracked
864 // so that they can be given more precise dependencies. We track
865 // separately the known memory locations that may alias and those
866 // that are known not to alias
867 MapVector > AliasMemDefs, NonAliasMemDefs;
868 MapVector > AliasMemUses, NonAliasMemUses;
869 std::set RejectMemNodes;
920870
921871 // Remove any stale debug info; sometimes BuildSchedGraph is called again
922872 // without emitting the info from the previous call.
1011961 ExitSU.addPred(Dep);
1012962 }
1013963
1014 // Add memory dependencies (Note: isStoreToStackSlot and
1015 // isLoadFromStackSLot are not usable after stack slots are lowered to
1016 // actual addresses).
1017
1018 // This is a barrier event that acts as a pivotal node in the DAG.
964 // Add chain dependencies.
965 // Chain dependencies used to enforce memory order should have
966 // latency of 0 (except for true dependency of Store followed by
967 // aliased Load... we estimate that with a single cycle of latency
968 // assuming the hardware will bypass)
969 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
970 // after stack slots are lowered to actual addresses.
971 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
972 // produce more precise dependence information.
973 unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
1019974 if (isGlobalMemoryObject(AA, MI)) {
1020
1021 // Become the barrier chain.
975 // Be conservative with these and add dependencies on all memory
976 // references, even those that are known to not alias.
977 for (MapVector >::iterator I =
978 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
979 for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
980 I->second[i]->addPred(SDep(SU, SDep::Barrier));
981 }
982 }
983 for (MapVector >::iterator I =
984 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
985 for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
986 SDep Dep(SU, SDep::Barrier);
987 Dep.setLatency(TrueMemOrderLatency);
988 I->second[i]->addPred(Dep);
989 }
990 }
991 // Add SU to the barrier chain.
1022992 if (BarrierChain)
1023 BarrierChain->addPredBarrier(SU);
993 BarrierChain->addPred(SDep(SU, SDep::Barrier));
1024994 BarrierChain = SU;
1025
1026 DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
1027 << BarrierChain->NodeNum << ").\n";);
1028
1029 // Add dependencies against everything below it and clear maps.
1030 addBarrierChain(Stores);
1031 addBarrierChain(Loads);
1032 addBarrierChain(NonAliasStores);
1033 addBarrierChain(NonAliasLoads);
1034
1035 continue;
1036 }
1037
1038 // If it's not a store or a variant load, we're done.
1039 if (!MI->mayStore() && !(MI->mayLoad() && !MI->isInvariantLoad(AA)))
1040 continue;
1041
1042 // Always add dependecy edge to BarrierChain if present.
1043 if (BarrierChain)
1044 BarrierChain->addPredBarrier(SU);
1045
1046 // Find the underlying objects for MI. The Objs vector is either
1047 // empty, or filled with the Values of memory locations which this
1048 // SU depends on. An empty vector means the memory location is
1049 // unknown, and may alias anything except NonAlias nodes.
1050 UnderlyingObjectsVector Objs;
1051 getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
1052
1053 if (MI->mayStore()) {
995 // This is a barrier event that acts as a pivotal node in the DAG,
996 // so it is safe to clear list of exposed nodes.
997 adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
998 TrueMemOrderLatency);
999 RejectMemNodes.clear();
1000 NonAliasMemDefs.clear();
1001 NonAliasMemUses.clear();
1002
1003 // fall-through
1004 new_alias_chain:
1005 // Chain all possibly aliasing memory references through SU.
1006 if (AliasChain) {
1007 unsigned ChainLatency = 0;
1008 if (AliasChain->getInstr()->mayLoad())
1009 ChainLatency = TrueMemOrderLatency;
1010 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
1011 RejectMemNodes, ChainLatency);
1012 }
1013 AliasChain = SU;
1014 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
1015 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1016 PendingLoads[k], RejectMemNodes,
1017 TrueMemOrderLatency);
1018 for (MapVector >::iterator I =
1019 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
1020 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1021 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1022 I->second[i], RejectMemNodes);
1023 }
1024 for (MapVector >::iterator I =
1025 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
1026 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1027 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1028 I->second[i], RejectMemNodes, TrueMemOrderLatency);
1029 }
1030 // This call must come after calls to addChainDependency() since it
1031 // consumes the 'RejectMemNodes' list that addChainDependency() possibly
1032 // adds to.
1033 adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
1034 TrueMemOrderLatency);
1035 PendingLoads.clear();
1036 AliasMemDefs.clear();
1037 AliasMemUses.clear();
1038 } else if (MI->mayStore()) {
1039 // Add dependence on barrier chain, if needed.
1040 // There is no point to check aliasing on barrier event. Even if
1041 // SU and barrier _could_ be reordered, they should not. In addition,
1042 // we have lost all RejectMemNodes below barrier.
1043 if (BarrierChain)
1044 BarrierChain->addPred(SDep(SU, SDep::Barrier));
1045
1046 UnderlyingObjectsVector Objs;
1047 getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
1048
10541049 if (Objs.empty()) {
1055 // An unknown store depends on all stores and loads.
1056 addChainDependencies(SU, Stores);
1057 addChainDependencies(SU, NonAliasStores);
1058 addChainDependencies(SU, Loads);
1059 addChainDependencies(SU, NonAliasLoads);
1060
1061 // If we're not using AA, clear Stores map since all stores
1062 // will be chained.
1063 if (!AAForDep)
1064 Stores.clear();
1065
1066 // Map this store to 'UnknownValue'.
1067 Stores.insert(SU, UnknownValue);
1068 continue;
1069 }
1070
1071 // Add precise dependencies against all previously seen memory
1072 // accesses mapped to the same Value(s).
1073 for (auto &underlObj : Objs) {
1074 ValueType V = underlObj.getPointer();
1075 bool ThisMayAlias = underlObj.getInt();
1076
1077 Value2SUsMap &stores_ = (ThisMayAlias ? Stores : NonAliasStores);
1078
1079 // Add dependencies to previous stores and loads mapped to V.
1080 addChainDependencies(SU, stores_, V);
1081 addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
1082
1083 // If we're not using AA, then we only need one store per object.
1084 if (!AAForDep)
1085 stores_.clearList(V);
1086
1087 // Map this store to V.
1088 stores_.insert(SU, V);
1089 }
1090 // The store may have dependencies to unanalyzable loads and
1091 // stores.
1092 addChainDependencies(SU, Loads, UnknownValue);
1093 addChainDependencies(SU, Stores, UnknownValue);
1094 }
1095 else { // SU is a load.
1096 if (Objs.empty()) {
1097 // An unknown load depends on all stores.
1098 addChainDependencies(SU, Stores);
1099 addChainDependencies(SU, NonAliasStores);
1100
1101 Loads.insert(SU, UnknownValue);
1102 continue;
1103 }
1104
1105 for (auto &underlObj : Objs) {
1106 ValueType V = underlObj.getPointer();
1107 bool ThisMayAlias = underlObj.getInt();
1108
1109 // Add precise dependencies against all previously seen stores
1110 // mapping to the same Value(s).
1111 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
1112
1113 // Map this load to V.
1114 (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
1115 }
1116 // The load may have dependencies to unanalyzable stores.
1117 addChainDependencies(SU, Stores, UnknownValue);
1118 }
1119
1120 // Reduce maps if they grow huge.
1121 if (Stores.size() + Loads.size() >= HugeRegion) {
1122 DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
1123 reduceHugeMemNodeMaps(Stores, Loads, ReductionSize);
1124 }
1125 if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
1126 DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
1127 reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, ReductionSize);
1128 }
1129 }
1130
1050 // Treat all other stores conservatively.
1051 goto new_alias_chain;
1052 }
1053
1054 bool MayAlias = false;
1055 for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
1056 K != KE; ++K) {
1057 ValueType V = K->getPointer();
1058 bool ThisMayAlias = K->getInt();
1059 if (ThisMayAlias)
1060 MayAlias = true;
1061
1062 // A store to a specific PseudoSourceValue. Add precise dependencies.
1063 // Record the def in MemDefs, first adding a dep if there is
1064 // an existing def.
1065 MapVector >::iterator I =
1066 ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
1067 MapVector >::iterator IE =
1068 ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
1069 if (I != IE) {
1070 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1071 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1072 I->second[i], RejectMemNodes, 0, true);
1073
1074 // If we're not using AA, then we only need one store per object.
1075 if (!AAForDep)
1076 I->second.clear();
1077 I->second.push_back(SU);
1078 } else {
1079 if (ThisMayAlias) {
1080 if (!AAForDep)
1081 AliasMemDefs[V].clear();
1082 AliasMemDefs[V].push_back(SU);
1083 } else {
1084 if (!AAForDep)
1085 NonAliasMemDefs[V].clear();
1086 NonAliasMemDefs[V].push_back(SU);
1087 }
1088 }
1089 // Handle the uses in MemUses, if there are any.
1090 MapVector >::iterator J =
1091 ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
1092 MapVector >::iterator JE =
1093 ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
1094 if (J != JE) {
1095 for (unsigned i = 0, e = J->second.size(); i != e; ++i)
1096 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1097 J->second[i], RejectMemNodes,
1098 TrueMemOrderLatency, true);
1099 J->second.clear();
1100 }
1101 }
1102 if (MayAlias) {
1103 // Add dependencies from all the PendingLoads, i.e. loads
1104 // with no underlying object.
1105 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
1106 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1107 PendingLoads[k], RejectMemNodes,
1108 TrueMemOrderLatency);
1109 // Add dependence on alias chain, if needed.
1110 if (AliasChain)
1111 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
1112 RejectMemNodes);
1113 }
1114 // This call must come after calls to addChainDependency() since it
1115 // consumes the 'RejectMemNodes' list that addChainDependency() possibly
1116 // adds to.
1117 adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
1118 TrueMemOrderLatency);
1119 } else if (MI->mayLoad()) {
1120 bool MayAlias = true;
1121 if (MI->isInvariantLoad(AA)) {
1122 // Invariant load, no chain dependencies needed!
1123 } else {
1124 UnderlyingObjectsVector Objs;
1125 getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
1126
1127 if (Objs.empty()) {
1128 // A load with no underlying object. Depend on all
1129 // potentially aliasing stores.
1130 for (MapVector >::iterator I =
1131 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
1132 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1133 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1134 I->second[i], RejectMemNodes);
1135
1136 PendingLoads.push_back(SU);
1137 MayAlias = true;
1138 } else {
1139 MayAlias = false;
1140 }
1141
1142 for (UnderlyingObjectsVector::iterator
1143 J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
1144 ValueType V = J->getPointer();
1145 bool ThisMayAlias = J->getInt();
1146
1147 if (ThisMayAlias)
1148 MayAlias = true;
1149
1150 // A load from a specific PseudoSourceValue. Add precise dependencies.
1151 MapVector >::iterator I =
1152 ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
1153 MapVector >::iterator IE =
1154 ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
1155 if (I != IE)
1156 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1157 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1158 I->second[i], RejectMemNodes, 0, true);
1159 if (ThisMayAlias)
1160 AliasMemUses[V].push_back(SU);
1161 else
1162 NonAliasMemUses[V].push_back(SU);
1163 }
1164 // Add dependencies on alias and barrier chains, if needed.
1165 if (MayAlias && AliasChain)
1166 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
1167 RejectMemNodes);
1168 if (MayAlias)
1169 // This call must come after calls to addChainDependency() since it
1170 // consumes the 'RejectMemNodes' list that addChainDependency()
1171 // possibly adds to.
1172 adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
1173 RejectMemNodes, /*Latency=*/0);
1174 if (BarrierChain)
1175 BarrierChain->addPred(SDep(SU, SDep::Barrier));
1176 }
1177 }
1178 }
11311179 if (DbgMI)
11321180 FirstDbgValue = DbgMI;
11331181
11351183 Uses.clear();
11361184 CurrentVRegDefs.clear();
11371185 CurrentVRegUses.clear();
1138 }
1139
1140 raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
1141 PSV->printCustom(OS);
1142 return OS;
1143 }
1144
1145 void ScheduleDAGInstrs::Value2SUsMap::dump() {
1146 for (auto &Itr : *this) {
1147 if (Itr.first.is()) {
1148 const Value *V = Itr.first.get();
1149 if (isa(V))
1150 dbgs() << "Unknown";
1151 else
1152 V->printAsOperand(dbgs());
1153 }
1154 else if (Itr.first.is())
1155 dbgs() << Itr.first.get();
1156 else
1157 llvm_unreachable("Unknown Value type.");
1158
1159 dbgs() << " : ";
1160 dumpSUList(Itr.second);
1161 }
1162 }
1163
1164 /// Reduce maps in FIFO order, by N SUs. This is better than turning
1165 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
1166 /// it avoids unnecessary edges between seen SUs above the new
1167 /// BarrierChain, and those below it.
1168 void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
1169 Value2SUsMap &loads, unsigned N) {
1170 DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
1171 stores.dump();
1172 dbgs() << "Loading SUnits:\n";
1173 loads.dump());
1174
1175 // Insert all SU's NodeNums into a vector and sort it.
1176 std::vector NodeNums;
1177 NodeNums.reserve(stores.size() + loads.size());
1178 for (auto &I : stores)
1179 for (auto *SU : I.second)
1180 NodeNums.push_back(SU->NodeNum);
1181 for (auto &I : loads)
1182 for (auto *SU : I.second)
1183 NodeNums.push_back(SU->NodeNum);
1184 std::sort(NodeNums.begin(), NodeNums.end());
1185
1186 // The N last elements in NodeNums will be removed, and the SU with
1187 // the lowest NodeNum of them will become the new BarrierChain to
1188 // let the not yet seen SUs have a dependency to the removed SUs.
1189 assert (N <= NodeNums.size());
1190 SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1191 if (BarrierChain) {
1192 // The aliasing and non-aliasing maps reduce independently of each
1193 // other, but share a common BarrierChain. Check if the
1194 // newBarrierChain is above the former one. If it is not, it may
1195 // introduce a loop to use newBarrierChain, so keep the old one.
1196 if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1197 BarrierChain->addPredBarrier(newBarrierChain);
1198 BarrierChain = newBarrierChain;
1199 DEBUG(dbgs() << "Inserting new barrier chain: SU("
1200 << BarrierChain->NodeNum << ").\n";);
1201 }
1202 else
1203 DEBUG(dbgs() << "Keeping old barrier chain: SU("
1204 << BarrierChain->NodeNum << ").\n";);
1205 }
1206 else
1207 BarrierChain = newBarrierChain;
1208
1209 insertBarrierChain(stores);
1210 insertBarrierChain(loads);
1211
1212 DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
1213 stores.dump();
1214 dbgs() << "Loading SUnits:\n";
1215 loads.dump());
1186 PendingLoads.clear();
12161187 }
12171188
12181189 /// \brief Initialize register live-range state for updating kills.