llvm.org GIT mirror llvm / 72374d2
[ScheduleDAGInstrs::buildSchedGraph()] Handling of memory dependecies rewritten. The buildSchedGraph() was in need of reworking as the AA features had been added on top of earlier code. It was very difficult to understand, and buggy. There had been found cases where scheduling dependencies had actually been missed (see r228686). AliasChain, RejectMemNodes, adjustChainDeps() and iterateChainSucc() have been removed. There are instead now just the four maps from Value to SUs, which have been renamed to Stores, Loads, NonAliasStores and NonAliasLoads. An unknown store used to become the AliasChain, but now becomes a store mapped to 'unknownValue' (in Stores). What used to be PendingLoads is instead the list of SUs mapped to 'unknownValue' in Loads. RejectMemNodes and adjustChainDeps() used to be a safety-net for everything. The SU maps were sometimes cleared and SUs were put in RejectMemNodes, where adjustChainDeps() would look. Instead of this, a more straight forward approach is used in maintaining the SU maps without clearing them and simply letting them grow over time. Instead of the cutt-off in adjustChainDeps() search, a reduction of maps will be done if needed (see below). Each SUnit either becomes the BarrierChain, or is put into one of the maps. For each SUnit encountered, all the information about previous ones are still available until a new BarrierChain is set, at which point the maps are cleared. For huge regions, the algorithm becomes slow, therefore the maps will get reduced at a threshold (current default is 1000 nodes), by a fraction (default 1/2). These values can be tuned by use of CL options in case some test case shows that they need to be changed (-dag-maps-huge-region and -dag-maps-reduction-size). There has not been any considerable change observed in output quality or compile time. There may now be more DAG edges inserted than before (i.e. if A->B->C, then A->C is not needed). However, in a comparison run there were fewer total calls to AA, and a somewhat improved compile time, which means this seems to be not a problem. http://reviews.llvm.org/D8705 Reviewers: Hal Finkel, Andy Trick. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259201 91177308-0d34-0410-b5e6-96231b3b80d8 Jonas Paulsson 4 years ago
4 changed file(s) with 467 addition(s) and 361 deletion(s). Raw diff Collapse all Expand all
2626 class raw_ostream;
2727
2828 raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
29 class PseudoSourceValue;
30 raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV);
2931
3032 /// Special value supplied for machine level alias analysis. It indicates that
3133 /// a memory access references the functions stack frame (e.g., a spill slot),
4446
4547 private:
4648 PSVKind Kind;
49 friend raw_ostream &llvm::operator<<(raw_ostream &OS,
50 const PseudoSourceValue* PSV);
4751
4852 friend class MachineMemOperand; // For printCustom().
4953
395395 /// specified node.
396396 bool addPred(const SDep &D, bool Required = true);
397397
398 /// addPredBarrier - This adds a barrier edge to SU by calling
399 /// addPred(), with latency 0 generally or latency 1 for a store
400 /// followed by a load.
401 bool addPredBarrier(SUnit *SU) {
402 SDep Dep(SU, SDep::Barrier);
403 unsigned TrueMemOrderLatency =
404 ((SU->getInstr()->mayStore() && this->getInstr()->mayLoad()) ? 1 : 0);
405 Dep.setLatency(TrueMemOrderLatency);
406 return addPred(Dep);
407 }
408
398409 /// removePred - This removes the specified edge as a pred of the current
399410 /// node if it exists. It also removes the current node as a successor of
400411 /// the specified node.
1414 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
1515 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
1616
17 #include "llvm/ADT/MapVector.h"
1718 #include "llvm/ADT/SparseMultiSet.h"
1819 #include "llvm/ADT/SparseSet.h"
1920 #include "llvm/CodeGen/ScheduleDAG.h"
2021 #include "llvm/CodeGen/TargetSchedule.h"
2122 #include "llvm/Support/Compiler.h"
2223 #include "llvm/Target/TargetRegisterInfo.h"
24 #include
2325
2426 namespace llvm {
2527 class MachineFrameInfo;
8385 typedef SparseMultiSet
8486 VReg2SUnitOperIdxMultiMap;
8587
88 typedef PointerUnion ValueType;
89 typedef SmallVector, 4>
90 UnderlyingObjectsVector;
91
8692 /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
8793 /// MachineInstrs.
8894 class ScheduleDAGInstrs : public ScheduleDAG {
148154 /// Tracks the last instructions in this region using each virtual register.
149155 VReg2SUnitOperIdxMultiMap CurrentVRegUses;
150156
151 /// PendingLoads - Remember where unknown loads are after the most recent
152 /// unknown store, as we iterate. As with Defs and Uses, this is here
153 /// to minimize construction/destruction.
154 std::vector PendingLoads;
157 AliasAnalysis *AAForDep;
158
159 /// Remember a generic side-effecting instruction as we proceed.
160 /// No other SU ever gets scheduled around it (except in the special
161 /// case of a huge region that gets reduced).
162 SUnit *BarrierChain;
163
164 public:
165
166 /// A list of SUnits, used in Value2SUsMap, during DAG construction.
167 /// Note: to gain speed it might be worth investigating an optimized
168 /// implementation of this data structure, such as a singly linked list
169 /// with a memory pool (SmallVector was tried but slow and SparseSet is not
170 /// applicable).
171 typedef std::list SUList;
172 protected:
173 /// A map from ValueType to SUList, used during DAG construction,
174 /// as a means of remembering which SUs depend on which memory
175 /// locations.
176 class Value2SUsMap;
177
178 /// Remove in FIFO order some SUs from huge maps.
179 void reduceHugeMemNodeMaps(Value2SUsMap &stores,
180 Value2SUsMap &loads, unsigned N);
181
182 /// Add a chain edge between SUa and SUb, but only if both AliasAnalysis
183 /// and Target fail to deny the dependency.
184 void addChainDependency(SUnit *SUa, SUnit *SUb,
185 unsigned Latency = 0);
186
187 /// Add dependencies as needed from all SUs in list to SU.
188 void addChainDependencies(SUnit *SU, SUList &sus, unsigned Latency) {
189 for (auto *su : sus)
190 addChainDependency(SU, su, Latency);
191 }
192
193 /// Add dependencies as needed from all SUs in map, to SU.
194 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
195
196 /// Add dependencies as needed to SU, from all SUs mapped to V.
197 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
198 ValueType V);
199
200 /// Add barrier chain edges from all SUs in map, and then clear
201 /// the map. This is equivalent to insertBarrierChain(), but
202 /// optimized for the common case where the new BarrierChain (a
203 /// global memory object) has a higher NodeNum than all SUs in
204 /// map. It is assumed BarrierChain has been set before calling
205 /// this.
206 void addBarrierChain(Value2SUsMap &map);
207
208 /// Insert a barrier chain in a huge region, far below current
209 /// SU. Add barrier chain edges from all SUs in map with higher
210 /// NodeNums than this new BarrierChain, and remove them from
211 /// map. It is assumed BarrierChain has been set before calling
212 /// this.
213 void insertBarrierChain(Value2SUsMap &map);
214
215 /// For an unanalyzable memory access, this Value is used in maps.
216 UndefValue *UnknownValue;
155217
156218 /// DbgValues - Remember instruction that precedes DBG_VALUE.
157219 /// These are generated by buildSchedGraph but persist so they can be
1313
1414 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
1515 #include "llvm/ADT/IntEqClasses.h"
16 #include "llvm/ADT/MapVector.h"
1716 #include "llvm/ADT/SmallPtrSet.h"
1817 #include "llvm/ADT/SmallSet.h"
1918 #include "llvm/Analysis/AliasAnalysis.h"
2726 #include "llvm/CodeGen/PseudoSourceValue.h"
2827 #include "llvm/CodeGen/RegisterPressure.h"
2928 #include "llvm/CodeGen/ScheduleDFS.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Type.h"
3031 #include "llvm/IR/Operator.h"
3132 #include "llvm/Support/CommandLine.h"
3233 #include "llvm/Support/Debug.h"
4950 static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
5051 cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
5152
53 // Note: the two options below might be used in tuning compile time vs
54 // output quality. Setting HugeRegion so large that it will never be
55 // reached means best-effort, but may be slow.
56
57 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
58 // together hold this many SUs, a reduction of maps will be done.
59 static cl::opt HugeRegion("dag-maps-huge-region", cl::Hidden,
60 cl::init(1000), cl::desc("The limit to use while constructing the DAG "
61 "prior to scheduling, at which point a trade-off "
62 "is made to avoid excessive compile time."));
63
64 static cl::opt ReductionSize("dag-maps-reduction-size", cl::Hidden,
65 cl::desc("A huge scheduling region will have maps reduced by this many "
66 "nodes at a time. Defaults to HugeRegion / 2."));
67
68 static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
69 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
70 dbgs() << "{ ";
71 for (auto *su : L) {
72 dbgs() << "SU(" << su->NodeNum << ")";
73 if (su != L.back())
74 dbgs() << ", ";
75 }
76 dbgs() << "}\n";
77 #endif
78 }
79
5280 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
5381 const MachineLoopInfo *mli,
5482 bool RemoveKillFlags)
5583 : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
5684 RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
57 TrackLaneMasks(false), FirstDbgValue(nullptr) {
85 TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
86 UnknownValue(UndefValue::get(
87 Type::getVoidTy(mf.getFunction()->getContext()))),
88 FirstDbgValue(nullptr) {
5889 DbgValues.clear();
5990
6091 const TargetSubtargetInfo &ST = mf.getSubtarget();
119150 }
120151 } while (!Working.empty());
121152 }
122
123 typedef PointerUnion ValueType;
124 typedef SmallVector, 4>
125 UnderlyingObjectsVector;
126153
127154 /// getUnderlyingObjectsForInstr - If this machine instr has memory reference
128155 /// information and it can be tracked to a normal reference to a known
543570 return true;
544571 }
545572
546 const Value *V = (*MI->memoperands_begin())->getValue();
547 if (!V)
573 if ((*MI->memoperands_begin())->getValue() == nullptr)
548574 return true;
549575
550 SmallVector Objs;
551 getUnderlyingObjects(V, Objs, DL);
552 for (Value *V : Objs) {
553 // Does this pointer refer to a distinct and identifiable object?
554 if (!isIdentifiedObject(V))
555 return true;
556 }
557
558576 return false;
559577 }
560578
561579 /// This returns true if the two MIs need a chain edge between them.
562 /// If these are not even memory operations, we still may need
563 /// chain deps between them. The question really is - could
564 /// these two MIs be reordered during scheduling from memory dependency
565 /// point of view.
580 /// This is called on normal stores and loads.
566581 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
567582 const DataLayout &DL, MachineInstr *MIa,
568583 MachineInstr *MIb) {
569584 const MachineFunction *MF = MIa->getParent()->getParent();
570585 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
571586
572 // Cover a trivial case - no edge is need to itself.
573 if (MIa == MIb)
587 assert ((MIa->mayStore() || MIb->mayStore()) &&
588 "Dependency checked between two loads");
589
590 // buildSchedGraph() will clear list of stores if not using AA,
591 // which means all stores have to be chained without AA.
592 if (!AA && MIa->mayStore() && MIb->mayStore())
593 return true;
594
595 // Let the target decide if memory accesses cannot possibly overlap.
596 if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
574597 return false;
575
576 // Let the target decide if memory accesses cannot possibly overlap.
577 if ((MIa->mayLoad() || MIa->mayStore()) &&
578 (MIb->mayLoad() || MIb->mayStore()))
579 if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
580 return false;
581598
582599 // FIXME: Need to handle multiple memory operands to support all targets.
583600 if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
585602
586603 if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
587604 return true;
588
589 // If we are dealing with two "normal" loads, we do not need an edge
590 // between them - they could be reordered.
591 if (!MIa->mayStore() && !MIb->mayStore())
592 return false;
593605
594606 // To this point analysis is generic. From here on we do need AA.
595607 if (!AA)
633645 return (AAResult != NoAlias);
634646 }
635647
636 /// This recursive function iterates over chain deps of SUb looking for
637 /// "latest" node that needs a chain edge to SUa.
638 static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
639 const DataLayout &DL, SUnit *SUa, SUnit *SUb,
640 SUnit *ExitSU, unsigned *Depth,
641 SmallPtrSetImpl &Visited) {
642 if (!SUa || !SUb || SUb == ExitSU)
643 return *Depth;
644
645 // Remember visited nodes.
646 if (!Visited.insert(SUb).second)
647 return *Depth;
648 // If there is _some_ dependency already in place, do not
649 // descend any further.
650 // TODO: Need to make sure that if that dependency got eliminated or ignored
651 // for any reason in the future, we would not violate DAG topology.
652 // Currently it does not happen, but makes an implicit assumption about
653 // future implementation.
654 //
655 // Independently, if we encounter node that is some sort of global
656 // object (like a call) we already have full set of dependencies to it
657 // and we can stop descending.
658 if (SUa->isSucc(SUb) ||
659 isGlobalMemoryObject(AA, SUb->getInstr()))
660 return *Depth;
661
662 // If we do need an edge, or we have exceeded depth budget,
663 // add that edge to the predecessors chain of SUb,
664 // and stop descending.
665 if (*Depth > 200 ||
666 MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
667 SUb->addPred(SDep(SUa, SDep::MayAliasMem));
668 return *Depth;
669 }
670 // Track current depth.
671 (*Depth)++;
672 // Iterate over memory dependencies only.
673 for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
674 I != E; ++I)
675 if (I->isNormalMemoryOrBarrier())
676 iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
677 return *Depth;
678 }
679
680 /// This function assumes that "downward" from SU there exist
681 /// tail/leaf of already constructed DAG. It iterates downward and
682 /// checks whether SU can be aliasing any node dominated
683 /// by it.
684 static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
685 const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
686 std::set &CheckList,
687 unsigned LatencyToLoad) {
688 if (!SU)
689 return;
690
691 SmallPtrSet Visited;
692 unsigned Depth = 0;
693
694 for (std::set::iterator I = CheckList.begin(), IE = CheckList.end();
695 I != IE; ++I) {
696 if (SU == *I)
697 continue;
698 if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
699 SDep Dep(SU, SDep::MayAliasMem);
700 Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
701 (*I)->addPred(Dep);
702 }
703
704 // Iterate recursively over all previously added memory chain
705 // successors. Keep track of visited nodes.
706 for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
707 JE = (*I)->Succs.end(); J != JE; ++J)
708 if (J->isNormalMemoryOrBarrier())
709 iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
710 Visited);
711 }
712 }
713
714 /// Check whether two objects need a chain edge, if so, add it
715 /// otherwise remember the rejected SU.
716 static inline void addChainDependency(AliasAnalysis *AA,
717 const MachineFrameInfo *MFI,
718 const DataLayout &DL, SUnit *SUa,
719 SUnit *SUb, std::set &RejectList,
720 unsigned TrueMemOrderLatency = 0,
721 bool isNormalMemory = false) {
722 // If this is a false dependency,
723 // do not add the edge, but remember the rejected node.
724 if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
725 SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
726 Dep.setLatency(TrueMemOrderLatency);
648 /// Check whether two objects need a chain edge and add it if needed.
649 void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
650 unsigned Latency) {
651 if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
652 SUb->getInstr())) {
653 SDep Dep(SUa, SDep::MayAliasMem);
654 Dep.setLatency(Latency);
727655 SUb->addPred(Dep);
728 }
729 else {
730 // Duplicate entries should be ignored.
731 RejectList.insert(SUb);
732 DEBUG(dbgs() << "\tReject chain dep between SU("
733 << SUa->NodeNum << ") and SU("
734 << SUb->NodeNum << ")\n");
735656 }
736657 }
737658
831752 }
832753 }
833754
755 class ScheduleDAGInstrs::Value2SUsMap : public MapVector {
756
757 /// Current total number of SUs in map.
758 unsigned NumNodes;
759
760 /// 1 for loads, 0 for stores. (see comment in SUList)
761 unsigned TrueMemOrderLatency;
762 public:
763
764 Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
765
766 /// To keep NumNodes up to date, insert() is used instead of
767 /// this operator w/ push_back().
768 ValueType &operator[](const SUList &Key) {
769 llvm_unreachable("Don't use. Use insert() instead."); };
770
771 /// Add SU to the SUList of V. If Map grows huge, reduce its size
772 /// by calling reduce().
773 void inline insert(SUnit *SU, ValueType V) {
774 MapVector::operator[](V).push_back(SU);
775 NumNodes++;
776 }
777
778 /// Clears the list of SUs mapped to V.
779 void inline clearList(ValueType V) {
780 iterator Itr = find(V);
781 if (Itr != end()) {
782 assert (NumNodes >= Itr->second.size());
783 NumNodes -= Itr->second.size();
784
785 Itr->second.clear();
786 }
787 }
788
789 /// Clears map from all contents.
790 void clear() {
791 MapVector::clear();
792 NumNodes = 0;
793 }
794
795 unsigned inline size() const { return NumNodes; }
796
797 /// Count the number of SUs in this map after a reduction.
798 void reComputeSize(void) {
799 NumNodes = 0;
800 for (auto &I : *this)
801 NumNodes += I.second.size();
802 }
803
804 unsigned inline getTrueMemOrderLatency() const {
805 return TrueMemOrderLatency;
806 }
807
808 void dump();
809 };
810
811 void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
812 Value2SUsMap &Val2SUsMap) {
813 for (auto &I : Val2SUsMap)
814 addChainDependencies(SU, I.second,
815 Val2SUsMap.getTrueMemOrderLatency());
816 }
817
818 void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
819 Value2SUsMap &Val2SUsMap,
820 ValueType V) {
821 Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
822 if (Itr != Val2SUsMap.end())
823 addChainDependencies(SU, Itr->second,
824 Val2SUsMap.getTrueMemOrderLatency());
825 }
826
827 void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
828 assert (BarrierChain != nullptr);
829
830 for (auto &I : map) {
831 SUList &sus = I.second;
832 for (auto *SU : sus)
833 SU->addPredBarrier(BarrierChain);
834 }
835 map.clear();
836 }
837
838 void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
839 assert (BarrierChain != nullptr);
840
841 // Go through all lists of SUs.
842 for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
843 Value2SUsMap::iterator CurrItr = I++;
844 SUList &sus = CurrItr->second;
845 SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
846 for (; SUItr != SUEE; ++SUItr) {
847 // Stop on BarrierChain or any instruction above it.
848 if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
849 break;
850
851 (*SUItr)->addPredBarrier(BarrierChain);
852 }
853
854 // Remove also the BarrierChain from list if present.
855 if (*SUItr == BarrierChain)
856 SUItr++;
857
858 // Remove all SUs that are now successors of BarrierChain.
859 if (SUItr != sus.begin())
860 sus.erase(sus.begin(), SUItr);
861 }
862
863 // Remove all entries with empty su lists.
864 map.remove_if([&](std::pair &mapEntry) {
865 return (mapEntry.second.empty()); });
866
867 // Recompute the size of the map (NumNodes).
868 map.reComputeSize();
869 }
870
834871 /// If RegPressure is non-null, compute register pressure as a side effect. The
835872 /// DAG builder is an efficient place to do it because it already visits
836873 /// operands.
842879 const TargetSubtargetInfo &ST = MF.getSubtarget();
843880 bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
844881 : ST.useAA();
845 AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
882 AAForDep = UseAA ? AA : nullptr;
883
884 BarrierChain = nullptr;
846885
847886 this->TrackLaneMasks = TrackLaneMasks;
848887 MISUnitMap.clear();
854893 if (PDiffs)
855894 PDiffs->init(SUnits.size());
856895
857 // We build scheduling units by walking a block's instruction list from bottom
858 // to top.
859
860 // Remember where a generic side-effecting instruction is as we proceed.
861 SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
862
863 // Memory references to specific known memory locations are tracked
864 // so that they can be given more precise dependencies. We track
865 // separately the known memory locations that may alias and those
866 // that are known not to alias
867 MapVector > AliasMemDefs, NonAliasMemDefs;
868 MapVector > AliasMemUses, NonAliasMemUses;
869 std::set RejectMemNodes;
896 // We build scheduling units by walking a block's instruction list
897 // from bottom to top.
898
899 // Each MIs' memory operand(s) is analyzed to a list of underlying
900 // objects. The SU is then inserted in the SUList(s) mapped from
901 // that Value(s). Each Value thus gets mapped to a list of SUs
902 // depending on it, defs and uses kept separately. Two SUs are
903 // non-aliasing to each other if they depend on different Values
904 // exclusively.
905 Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
906
907 // Certain memory accesses are known to not alias any SU in Stores
908 // or Loads, and have therefore their own 'NonAlias'
909 // domain. E.g. spill / reload instructions never alias LLVM I/R
910 // Values. It is assumed that this type of memory accesses always
911 // have a proper memory operand modelling, and are therefore never
912 // unanalyzable. This means they are non aliasing against all nodes
913 // in Stores and Loads, including the unanalyzable ones.
914 Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
915
916 // Always reduce a huge region with half of the elements, except
917 // when user sets this number explicitly.
918 if (ReductionSize.getNumOccurrences() == 0)
919 ReductionSize = (HugeRegion / 2);
870920
871921 // Remove any stale debug info; sometimes BuildSchedGraph is called again
872922 // without emitting the info from the previous call.
9611011 ExitSU.addPred(Dep);
9621012 }
9631013
964 // Add chain dependencies.
965 // Chain dependencies used to enforce memory order should have
966 // latency of 0 (except for true dependency of Store followed by
967 // aliased Load... we estimate that with a single cycle of latency
968 // assuming the hardware will bypass)
969 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
970 // after stack slots are lowered to actual addresses.
971 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
972 // produce more precise dependence information.
973 unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
1014 // Add memory dependencies (Note: isStoreToStackSlot and
1015 // isLoadFromStackSLot are not usable after stack slots are lowered to
1016 // actual addresses).
1017
1018 // This is a barrier event that acts as a pivotal node in the DAG.
9741019 if (isGlobalMemoryObject(AA, MI)) {
975 // Be conservative with these and add dependencies on all memory
976 // references, even those that are known to not alias.
977 for (MapVector >::iterator I =
978 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
979 for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
980 I->second[i]->addPred(SDep(SU, SDep::Barrier));
981 }
982 }
983 for (MapVector >::iterator I =
984 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
985 for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
986 SDep Dep(SU, SDep::Barrier);
987 Dep.setLatency(TrueMemOrderLatency);
988 I->second[i]->addPred(Dep);
989 }
990 }
991 // Add SU to the barrier chain.
1020
1021 // Become the barrier chain.
9921022 if (BarrierChain)
993 BarrierChain->addPred(SDep(SU, SDep::Barrier));
1023 BarrierChain->addPredBarrier(SU);
9941024 BarrierChain = SU;
995 // This is a barrier event that acts as a pivotal node in the DAG,
996 // so it is safe to clear list of exposed nodes.
997 adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
998 TrueMemOrderLatency);
999 RejectMemNodes.clear();
1000 NonAliasMemDefs.clear();
1001 NonAliasMemUses.clear();
1002
1003 // fall-through
1004 new_alias_chain:
1005 // Chain all possibly aliasing memory references through SU.
1006 if (AliasChain) {
1007 unsigned ChainLatency = 0;
1008 if (AliasChain->getInstr()->mayLoad())
1009 ChainLatency = TrueMemOrderLatency;
1010 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
1011 RejectMemNodes, ChainLatency);
1012 }
1013 AliasChain = SU;
1014 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
1015 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1016 PendingLoads[k], RejectMemNodes,
1017 TrueMemOrderLatency);
1018 for (MapVector >::iterator I =
1019 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
1020 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1021 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1022 I->second[i], RejectMemNodes);
1023 }
1024 for (MapVector >::iterator I =
1025 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
1026 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1027 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1028 I->second[i], RejectMemNodes, TrueMemOrderLatency);
1029 }
1030 // This call must come after calls to addChainDependency() since it
1031 // consumes the 'RejectMemNodes' list that addChainDependency() possibly
1032 // adds to.
1033 adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
1034 TrueMemOrderLatency);
1035 PendingLoads.clear();
1036 AliasMemDefs.clear();
1037 AliasMemUses.clear();
1038 } else if (MI->mayStore()) {
1039 // Add dependence on barrier chain, if needed.
1040 // There is no point to check aliasing on barrier event. Even if
1041 // SU and barrier _could_ be reordered, they should not. In addition,
1042 // we have lost all RejectMemNodes below barrier.
1043 if (BarrierChain)
1044 BarrierChain->addPred(SDep(SU, SDep::Barrier));
1045
1046 UnderlyingObjectsVector Objs;
1047 getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
1048
1025
1026 DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
1027 << BarrierChain->NodeNum << ").\n";);
1028
1029 // Add dependencies against everything below it and clear maps.
1030 addBarrierChain(Stores);
1031 addBarrierChain(Loads);
1032 addBarrierChain(NonAliasStores);
1033 addBarrierChain(NonAliasLoads);
1034
1035 continue;
1036 }
1037
1038 // If it's not a store or a variant load, we're done.
1039 if (!MI->mayStore() && !(MI->mayLoad() && !MI->isInvariantLoad(AA)))
1040 continue;
1041
1042 // Always add dependecy edge to BarrierChain if present.
1043 if (BarrierChain)
1044 BarrierChain->addPredBarrier(SU);
1045
1046 // Find the underlying objects for MI. The Objs vector is either
1047 // empty, or filled with the Values of memory locations which this
1048 // SU depends on. An empty vector means the memory location is
1049 // unknown, and may alias anything except NonAlias nodes.
1050 UnderlyingObjectsVector Objs;
1051 getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
1052
1053 if (MI->mayStore()) {
10491054 if (Objs.empty()) {
1050 // Treat all other stores conservatively.
1051 goto new_alias_chain;
1052 }
1053
1054 bool MayAlias = false;
1055 for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
1056 K != KE; ++K) {
1057 ValueType V = K->getPointer();
1058 bool ThisMayAlias = K->getInt();
1059 if (ThisMayAlias)
1060 MayAlias = true;
1061
1062 // A store to a specific PseudoSourceValue. Add precise dependencies.
1063 // Record the def in MemDefs, first adding a dep if there is
1064 // an existing def.
1065 MapVector >::iterator I =
1066 ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
1067 MapVector >::iterator IE =
1068 ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
1069 if (I != IE) {
1070 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1071 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1072 I->second[i], RejectMemNodes, 0, true);
1073
1074 // If we're not using AA, then we only need one store per object.
1075 if (!AAForDep)
1076 I->second.clear();
1077 I->second.push_back(SU);
1078 } else {
1079 if (ThisMayAlias) {
1080 if (!AAForDep)
1081 AliasMemDefs[V].clear();
1082 AliasMemDefs[V].push_back(SU);
1083 } else {
1084 if (!AAForDep)
1085 NonAliasMemDefs[V].clear();
1086 NonAliasMemDefs[V].push_back(SU);
1087 }
1088 }
1089 // Handle the uses in MemUses, if there are any.
1090 MapVector >::iterator J =
1091 ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
1092 MapVector >::iterator JE =
1093 ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
1094 if (J != JE) {
1095 for (unsigned i = 0, e = J->second.size(); i != e; ++i)
1096 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1097 J->second[i], RejectMemNodes,
1098 TrueMemOrderLatency, true);
1099 J->second.clear();
1100 }
1101 }
1102 if (MayAlias) {
1103 // Add dependencies from all the PendingLoads, i.e. loads
1104 // with no underlying object.
1105 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
1106 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1107 PendingLoads[k], RejectMemNodes,
1108 TrueMemOrderLatency);
1109 // Add dependence on alias chain, if needed.
1110 if (AliasChain)
1111 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
1112 RejectMemNodes);
1113 }
1114 // This call must come after calls to addChainDependency() since it
1115 // consumes the 'RejectMemNodes' list that addChainDependency() possibly
1116 // adds to.
1117 adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
1118 TrueMemOrderLatency);
1119 } else if (MI->mayLoad()) {
1120 bool MayAlias = true;
1121 if (MI->isInvariantLoad(AA)) {
1122 // Invariant load, no chain dependencies needed!
1123 } else {
1124 UnderlyingObjectsVector Objs;
1125 getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
1126
1127 if (Objs.empty()) {
1128 // A load with no underlying object. Depend on all
1129 // potentially aliasing stores.
1130 for (MapVector >::iterator I =
1131 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
1132 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1133 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1134 I->second[i], RejectMemNodes);
1135
1136 PendingLoads.push_back(SU);
1137 MayAlias = true;
1138 } else {
1139 MayAlias = false;
1140 }
1141
1142 for (UnderlyingObjectsVector::iterator
1143 J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
1144 ValueType V = J->getPointer();
1145 bool ThisMayAlias = J->getInt();
1146
1147 if (ThisMayAlias)
1148 MayAlias = true;
1149
1150 // A load from a specific PseudoSourceValue. Add precise dependencies.
1151 MapVector >::iterator I =
1152 ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
1153 MapVector >::iterator IE =
1154 ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
1155 if (I != IE)
1156 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
1157 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
1158 I->second[i], RejectMemNodes, 0, true);
1159 if (ThisMayAlias)
1160 AliasMemUses[V].push_back(SU);
1161 else
1162 NonAliasMemUses[V].push_back(SU);
1163 }
1164 // Add dependencies on alias and barrier chains, if needed.
1165 if (MayAlias && AliasChain)
1166 addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
1167 RejectMemNodes);
1168 if (MayAlias)
1169 // This call must come after calls to addChainDependency() since it
1170 // consumes the 'RejectMemNodes' list that addChainDependency()
1171 // possibly adds to.
1172 adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
1173 RejectMemNodes, /*Latency=*/0);
1174 if (BarrierChain)
1175 BarrierChain->addPred(SDep(SU, SDep::Barrier));
1176 }
1177 }
1178 }
1055 // An unknown store depends on all stores and loads.
1056 addChainDependencies(SU, Stores);
1057 addChainDependencies(SU, NonAliasStores);
1058 addChainDependencies(SU, Loads);
1059 addChainDependencies(SU, NonAliasLoads);
1060
1061 // If we're not using AA, clear Stores map since all stores
1062 // will be chained.
1063 if (!AAForDep)
1064 Stores.clear();
1065
1066 // Map this store to 'UnknownValue'.
1067 Stores.insert(SU, UnknownValue);
1068 continue;
1069 }
1070
1071 // Add precise dependencies against all previously seen memory
1072 // accesses mapped to the same Value(s).
1073 for (auto &underlObj : Objs) {
1074 ValueType V = underlObj.getPointer();
1075 bool ThisMayAlias = underlObj.getInt();
1076
1077 Value2SUsMap &stores_ = (ThisMayAlias ? Stores : NonAliasStores);
1078
1079 // Add dependencies to previous stores and loads mapped to V.
1080 addChainDependencies(SU, stores_, V);
1081 addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
1082
1083 // If we're not using AA, then we only need one store per object.
1084 if (!AAForDep)
1085 stores_.clearList(V);
1086
1087 // Map this store to V.
1088 stores_.insert(SU, V);
1089 }
1090 // The store may have dependencies to unanalyzable loads and
1091 // stores.
1092 addChainDependencies(SU, Loads, UnknownValue);
1093 addChainDependencies(SU, Stores, UnknownValue);
1094 }
1095 else { // SU is a load.
1096 if (Objs.empty()) {
1097 // An unknown load depends on all stores.
1098 addChainDependencies(SU, Stores);
1099 addChainDependencies(SU, NonAliasStores);
1100
1101 Loads.insert(SU, UnknownValue);
1102 continue;
1103 }
1104
1105 for (auto &underlObj : Objs) {
1106 ValueType V = underlObj.getPointer();
1107 bool ThisMayAlias = underlObj.getInt();
1108
1109 // Add precise dependencies against all previously seen stores
1110 // mapping to the same Value(s).
1111 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
1112
1113 // Map this load to V.
1114 (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
1115 }
1116 // The load may have dependencies to unanalyzable stores.
1117 addChainDependencies(SU, Stores, UnknownValue);
1118 }
1119
1120 // Reduce maps if they grow huge.
1121 if (Stores.size() + Loads.size() >= HugeRegion) {
1122 DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
1123 reduceHugeMemNodeMaps(Stores, Loads, ReductionSize);
1124 }
1125 if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
1126 DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
1127 reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, ReductionSize);
1128 }
1129 }
1130
11791131 if (DbgMI)
11801132 FirstDbgValue = DbgMI;
11811133
11831135 Uses.clear();
11841136 CurrentVRegDefs.clear();
11851137 CurrentVRegUses.clear();
1186 PendingLoads.clear();
1138 }
1139
1140 raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
1141 PSV->printCustom(OS);
1142 return OS;
1143 }
1144
1145 void ScheduleDAGInstrs::Value2SUsMap::dump() {
1146 for (auto &Itr : *this) {
1147 if (Itr.first.is()) {
1148 const Value *V = Itr.first.get();
1149 if (isa(V))
1150 dbgs() << "Unknown";
1151 else
1152 V->printAsOperand(dbgs());
1153 }
1154 else if (Itr.first.is())
1155 dbgs() << Itr.first.get();
1156 else
1157 llvm_unreachable("Unknown Value type.");
1158
1159 dbgs() << " : ";
1160 dumpSUList(Itr.second);
1161 }
1162 }
1163
1164 /// Reduce maps in FIFO order, by N SUs. This is better than turning
1165 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
1166 /// it avoids unnecessary edges between seen SUs above the new
1167 /// BarrierChain, and those below it.
1168 void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
1169 Value2SUsMap &loads, unsigned N) {
1170 DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
1171 stores.dump();
1172 dbgs() << "Loading SUnits:\n";
1173 loads.dump());
1174
1175 // Insert all SU's NodeNums into a vector and sort it.
1176 std::vector NodeNums;
1177 NodeNums.reserve(stores.size() + loads.size());
1178 for (auto &I : stores)
1179 for (auto *SU : I.second)
1180 NodeNums.push_back(SU->NodeNum);
1181 for (auto &I : loads)
1182 for (auto *SU : I.second)
1183 NodeNums.push_back(SU->NodeNum);
1184 std::sort(NodeNums.begin(), NodeNums.end());
1185
1186 // The N last elements in NodeNums will be removed, and the SU with
1187 // the lowest NodeNum of them will become the new BarrierChain to
1188 // let the not yet seen SUs have a dependency to the removed SUs.
1189 assert (N <= NodeNums.size());
1190 SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1191 if (BarrierChain) {
1192 // The aliasing and non-aliasing maps reduce independently of each
1193 // other, but share a common BarrierChain. Check if the
1194 // newBarrierChain is above the former one. If it is not, it may
1195 // introduce a loop to use newBarrierChain, so keep the old one.
1196 if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1197 BarrierChain->addPredBarrier(newBarrierChain);
1198 BarrierChain = newBarrierChain;
1199 DEBUG(dbgs() << "Inserting new barrier chain: SU("
1200 << BarrierChain->NodeNum << ").\n";);
1201 }
1202 else
1203 DEBUG(dbgs() << "Keeping old barrier chain: SU("
1204 << BarrierChain->NodeNum << ").\n";);
1205 }
1206 else
1207 BarrierChain = newBarrierChain;
1208
1209 insertBarrierChain(stores);
1210 insertBarrierChain(loads);
1211
1212 DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
1213 stores.dump();
1214 dbgs() << "Loading SUnits:\n";
1215 loads.dump());
11871216 }
11881217
11891218 /// \brief Initialize register live-range state for updating kills.