llvm.org GIT mirror llvm / 8178ac8
[llvm-mca] Move llvm-mca library to llvm/lib/MCA. Summary: See PR38731. Reviewers: andreadb Subscribers: mgorny, javed.absar, tschuett, gbedwell, andreadb, RKSimon, llvm-commits Differential Revision: https://reviews.llvm.org/D55557 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349332 91177308-0d34-0410-b5e6-96231b3b80d8 Clement Courbet 9 months ago
87 changed file(s) with 5869 addition(s) and 5876 deletion(s). Raw diff Collapse all Expand all
7070 N: Andrea Di Biagio
7171 E: andrea.dibiagio@sony.com
7272 E: andrea.dibiagio@gmail.com
73 D: llvm-mca
73 D: MCA, llvm-mca
7474
7575 N: Duncan P. N. Exon Smith
7676 E: dexonsmith@apple.com
0 //===---------------------------- Context.h ---------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines a class for holding ownership of various simulated
11 /// hardware units. A Context also provides a utility routine for constructing
12 /// a default out-of-order pipeline with fetch, dispatch, execute, and retire
13 /// stages.
14 ///
15 //===----------------------------------------------------------------------===//
16
17 #ifndef LLVM_MCA_CONTEXT_H
18 #define LLVM_MCA_CONTEXT_H
19
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/MC/MCSubtargetInfo.h"
22 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
23 #include "llvm/MCA/InstrBuilder.h"
24 #include "llvm/MCA/Pipeline.h"
25 #include "llvm/MCA/SourceMgr.h"
26 #include
27
28 namespace llvm {
29 namespace mca {
30
31 /// This is a convenience struct to hold the parameters necessary for creating
32 /// the pre-built "default" out-of-order pipeline.
33 struct PipelineOptions {
34 PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS,
35 bool NoAlias)
36 : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS),
37 StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {}
38 unsigned DispatchWidth;
39 unsigned RegisterFileSize;
40 unsigned LoadQueueSize;
41 unsigned StoreQueueSize;
42 bool AssumeNoAlias;
43 };
44
45 class Context {
46 SmallVector, 4> Hardware;
47 const MCRegisterInfo &MRI;
48 const MCSubtargetInfo &STI;
49
50 public:
51 Context(const MCRegisterInfo &R, const MCSubtargetInfo &S) : MRI(R), STI(S) {}
52 Context(const Context &C) = delete;
53 Context &operator=(const Context &C) = delete;
54
55 void addHardwareUnit(std::unique_ptr H) {
56 Hardware.push_back(std::move(H));
57 }
58
59 /// Construct a basic pipeline for simulating an out-of-order pipeline.
60 /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages.
61 std::unique_ptr createDefaultPipeline(const PipelineOptions &Opts,
62 InstrBuilder &IB,
63 SourceMgr &SrcMgr);
64 };
65
66 } // namespace mca
67 } // namespace llvm
68 #endif // LLVM_MCA_CONTEXT_H
0 //===----------------------- HWEventListener.h ------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines the main interface for hardware event listeners.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_MCA_HWEVENTLISTENER_H
15 #define LLVM_MCA_HWEVENTLISTENER_H
16
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/MCA/Instruction.h"
19 #include "llvm/MCA/Support.h"
20
21 namespace llvm {
22 namespace mca {
23
24 // An HWInstructionEvent represents state changes of instructions that
25 // listeners might be interested in. Listeners can choose to ignore any event
26 // they are not interested in.
27 class HWInstructionEvent {
28 public:
29 // This is the list of event types that are shared by all targets, that
30 // generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent,
31 // ...) and generic Views can manipulate.
32 // Subtargets are free to define additional event types, that are goin to be
33 // handled by generic components as opaque values, but can still be
34 // emitted by subtarget-specific pipeline stages (e.g., ExecuteStage,
35 // DispatchStage, ...) and interpreted by subtarget-specific EventListener
36 // implementations.
37 enum GenericEventType {
38 Invalid = 0,
39 // Events generated by the Retire Control Unit.
40 Retired,
41 // Events generated by the Scheduler.
42 Ready,
43 Issued,
44 Executed,
45 // Events generated by the Dispatch logic.
46 Dispatched,
47
48 LastGenericEventType,
49 };
50
51 HWInstructionEvent(unsigned type, const InstRef &Inst)
52 : Type(type), IR(Inst) {}
53
54 // The event type. The exact meaning depends on the subtarget.
55 const unsigned Type;
56
57 // The instruction this event was generated for.
58 const InstRef &IR;
59 };
60
61 class HWInstructionIssuedEvent : public HWInstructionEvent {
62 public:
63 using ResourceRef = std::pair;
64 HWInstructionIssuedEvent(const InstRef &IR,
65 ArrayRef> UR)
66 : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {}
67
68 ArrayRef> UsedResources;
69 };
70
71 class HWInstructionDispatchedEvent : public HWInstructionEvent {
72 public:
73 HWInstructionDispatchedEvent(const InstRef &IR, ArrayRef Regs,
74 unsigned UOps)
75 : HWInstructionEvent(HWInstructionEvent::Dispatched, IR),
76 UsedPhysRegs(Regs), MicroOpcodes(UOps) {}
77 // Number of physical register allocated for this instruction. There is one
78 // entry per register file.
79 ArrayRef UsedPhysRegs;
80 // Number of micro opcodes dispatched.
81 // This field is often set to the total number of micro-opcodes specified by
82 // the instruction descriptor of IR.
83 // The only exception is when IR declares a number of micro opcodes
84 // which exceeds the processor DispatchWidth, and - by construction - it
85 // requires multiple cycles to be fully dispatched. In that particular case,
86 // the dispatch logic would generate more than one dispatch event (one per
87 // cycle), and each event would declare how many micro opcodes are effectively
88 // been dispatched to the schedulers.
89 unsigned MicroOpcodes;
90 };
91
92 class HWInstructionRetiredEvent : public HWInstructionEvent {
93 public:
94 HWInstructionRetiredEvent(const InstRef &IR, ArrayRef Regs)
95 : HWInstructionEvent(HWInstructionEvent::Retired, IR),
96 FreedPhysRegs(Regs) {}
97 // Number of register writes that have been architecturally committed. There
98 // is one entry per register file.
99 ArrayRef FreedPhysRegs;
100 };
101
102 // A HWStallEvent represents a pipeline stall caused by the lack of hardware
103 // resources.
104 class HWStallEvent {
105 public:
106 enum GenericEventType {
107 Invalid = 0,
108 // Generic stall events generated by the DispatchStage.
109 RegisterFileStall,
110 RetireControlUnitStall,
111 // Generic stall events generated by the Scheduler.
112 DispatchGroupStall,
113 SchedulerQueueFull,
114 LoadQueueFull,
115 StoreQueueFull,
116 LastGenericEvent
117 };
118
119 HWStallEvent(unsigned type, const InstRef &Inst) : Type(type), IR(Inst) {}
120
121 // The exact meaning of the stall event type depends on the subtarget.
122 const unsigned Type;
123
124 // The instruction this event was generated for.
125 const InstRef &IR;
126 };
127
128 class HWEventListener {
129 public:
130 // Generic events generated by the pipeline.
131 virtual void onCycleBegin() {}
132 virtual void onCycleEnd() {}
133
134 virtual void onEvent(const HWInstructionEvent &Event) {}
135 virtual void onEvent(const HWStallEvent &Event) {}
136
137 using ResourceRef = std::pair;
138 virtual void onResourceAvailable(const ResourceRef &RRef) {}
139
140 // Events generated by the Scheduler when buffered resources are
141 // consumed/freed for an instruction.
142 virtual void onReservedBuffers(const InstRef &Inst,
143 ArrayRef Buffers) {}
144 virtual void onReleasedBuffers(const InstRef &Inst,
145 ArrayRef Buffers) {}
146
147 virtual ~HWEventListener() {}
148
149 private:
150 virtual void anchor();
151 };
152 } // namespace mca
153 } // namespace llvm
154
155 #endif // LLVM_MCA_HWEVENTLISTENER_H
0 //===-------------------------- HardwareUnit.h ------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines a base class for describing a simulated hardware
11 /// unit. These units are used to construct a simulated backend.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_MCA_HARDWAREUNIT_H
16 #define LLVM_MCA_HARDWAREUNIT_H
17
18 namespace llvm {
19 namespace mca {
20
21 class HardwareUnit {
22 HardwareUnit(const HardwareUnit &H) = delete;
23 HardwareUnit &operator=(const HardwareUnit &H) = delete;
24
25 public:
26 HardwareUnit() = default;
27 virtual ~HardwareUnit();
28 };
29
30 } // namespace mca
31 } // namespace llvm
32 #endif // LLVM_MCA_HARDWAREUNIT_H
0 //===------------------------- LSUnit.h --------------------------*- C++-*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// A Load/Store unit class that models load/store queues and that implements
11 /// a simple weak memory consistency model.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_MCA_LSUNIT_H
16 #define LLVM_MCA_LSUNIT_H
17
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/MC/MCSchedule.h"
20 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
21
22 namespace llvm {
23 namespace mca {
24
25 class InstRef;
26 class Scheduler;
27
28 /// A Load/Store Unit implementing a load and store queues.
29 ///
30 /// This class implements a load queue and a store queue to emulate the
31 /// out-of-order execution of memory operations.
32 /// Each load (or store) consumes an entry in the load (or store) queue.
33 ///
34 /// Rules are:
35 /// 1) A younger load is allowed to pass an older load only if there are no
36 /// stores nor barriers in between the two loads.
37 /// 2) An younger store is not allowed to pass an older store.
38 /// 3) A younger store is not allowed to pass an older load.
39 /// 4) A younger load is allowed to pass an older store only if the load does
40 /// not alias with the store.
41 ///
42 /// This class optimistically assumes that loads don't alias store operations.
43 /// Under this assumption, younger loads are always allowed to pass older
44 /// stores (this would only affects rule 4).
45 /// Essentially, this class doesn't perform any sort alias analysis to
46 /// identify aliasing loads and stores.
47 ///
48 /// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
49 /// set to `false` by the constructor of LSUnit.
50 ///
51 /// Note that this class doesn't know about the existence of different memory
52 /// types for memory operations (example: write-through, write-combining, etc.).
53 /// Derived classes are responsible for implementing that extra knowledge, and
54 /// provide different sets of rules for loads and stores by overriding method
55 /// `isReady()`.
56 /// To emulate a write-combining memory type, rule 2. must be relaxed in a
57 /// derived class to enable the reordering of non-aliasing store operations.
58 ///
59 /// No assumptions are made by this class on the size of the store buffer. This
60 /// class doesn't know how to identify cases where store-to-load forwarding may
61 /// occur.
62 ///
63 /// LSUnit doesn't attempt to predict whether a load or store hits or misses
64 /// the L1 cache. To be more specific, LSUnit doesn't know anything about
65 /// cache hierarchy and memory types.
66 /// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
67 /// scheduling model provides an "optimistic" load-to-use latency (which usually
68 /// matches the load-to-use latency for when there is a hit in the L1D).
69 /// Derived classes may expand this knowledge.
70 ///
71 /// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
72 /// memory-barrier like instructions.
73 /// LSUnit conservatively assumes that an instruction which `mayLoad` and has
74 /// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
75 /// serializes loads without forcing a flush of the load queue.
76 /// Similarly, instructions that both `mayStore` and have `unmodeled side
77 /// effects` are treated like store barriers. A full memory
78 /// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
79 /// effects. This is obviously inaccurate, but this is the best that we can do
80 /// at the moment.
81 ///
82 /// Each load/store barrier consumes one entry in the load/store queue. A
83 /// load/store barrier enforces ordering of loads/stores:
84 /// - A younger load cannot pass a load barrier.
85 /// - A younger store cannot pass a store barrier.
86 ///
87 /// A younger load has to wait for the memory load barrier to execute.
88 /// A load/store barrier is "executed" when it becomes the oldest entry in
89 /// the load/store queue(s). That also means, all the older loads/stores have
90 /// already been executed.
91 class LSUnit : public HardwareUnit {
92 // Load queue size.
93 // LQ_Size == 0 means that there are infinite slots in the load queue.
94 unsigned LQ_Size;
95
96 // Store queue size.
97 // SQ_Size == 0 means that there are infinite slots in the store queue.
98 unsigned SQ_Size;
99
100 // If true, loads will never alias with stores. This is the default.
101 bool NoAlias;
102
103 // When a `MayLoad` instruction is dispatched to the schedulers for execution,
104 // the LSUnit reserves an entry in the `LoadQueue` for it.
105 //
106 // LoadQueue keeps track of all the loads that are in-flight. A load
107 // instruction is eventually removed from the LoadQueue when it reaches
108 // completion stage. That means, a load leaves the queue whe it is 'executed',
109 // and its value can be forwarded on the data path to outside units.
110 //
111 // This class doesn't know about the latency of a load instruction. So, it
112 // conservatively/pessimistically assumes that the latency of a load opcode
113 // matches the instruction latency.
114 //
115 // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses),
116 // and load/store conflicts, the latency of a load is determined by the depth
117 // of the load pipeline. So, we could use field `LoadLatency` in the
118 // MCSchedModel to model that latency.
119 // Field `LoadLatency` often matches the so-called 'load-to-use' latency from
120 // L1D, and it usually already accounts for any extra latency due to data
121 // forwarding.
122 // When doing throughput analysis, `LoadLatency` is likely to
123 // be a better predictor of load latency than instruction latency. This is
124 // particularly true when simulating code with temporal/spatial locality of
125 // memory accesses.
126 // Using `LoadLatency` (instead of the instruction latency) is also expected
127 // to improve the load queue allocation for long latency instructions with
128 // folded memory operands (See PR39829).
129 //
130 // FIXME: On some processors, load/store operations are split into multiple
131 // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but
132 // not 256-bit data types. So, a 256-bit load is effectively split into two
133 // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For
134 // simplicity, this class optimistically assumes that a load instruction only
135 // consumes one entry in the LoadQueue. Similarly, store instructions only
136 // consume a single entry in the StoreQueue.
137 // In future, we should reassess the quality of this design, and consider
138 // alternative approaches that let instructions specify the number of
139 // load/store queue entries which they consume at dispatch stage (See
140 // PR39830).
141 SmallSet LoadQueue;
142 SmallSet StoreQueue;
143
144 void assignLQSlot(unsigned Index);
145 void assignSQSlot(unsigned Index);
146 bool isReadyNoAlias(unsigned Index) const;
147
148 // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
149 // conservatively treated as a store barrier. It forces older store to be
150 // executed before newer stores are issued.
151 SmallSet StoreBarriers;
152
153 // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
154 // conservatively treated as a load barrier. It forces older loads to execute
155 // before newer loads are issued.
156 SmallSet LoadBarriers;
157
158 bool isSQEmpty() const { return StoreQueue.empty(); }
159 bool isLQEmpty() const { return LoadQueue.empty(); }
160 bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
161 bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
162
163 public:
164 LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0,
165 bool AssumeNoAlias = false);
166
167 #ifndef NDEBUG
168 void dump() const;
169 #endif
170
171 enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL };
172
173 // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
174 // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
175 Status isAvailable(const InstRef &IR) const;
176
177 // Allocates load/store queue resources for IR.
178 //
179 // This method assumes that a previous call to `isAvailable(IR)` returned
180 // LSU_AVAILABLE, and that IR is a memory operation.
181 void dispatch(const InstRef &IR);
182
183 // By default, rules are:
184 // 1. A store may not pass a previous store.
185 // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
186 // 3. A load may pass a previous load.
187 // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
188 // 5. A load has to wait until an older load barrier is fully executed.
189 // 6. A store has to wait until an older store barrier is fully executed.
190 virtual bool isReady(const InstRef &IR) const;
191
192 // Load and store instructions are tracked by their corresponding queues from
193 // dispatch until the "instruction executed" event.
194 // Only when a load instruction reaches the 'Executed' stage, its value
195 // becomes available to the users. At that point, the load no longer needs to
196 // be tracked by the load queue.
197 // FIXME: For simplicity, we optimistically assume a similar behavior for
198 // store instructions. In practice, store operations don't tend to leave the
199 // store queue until they reach the 'Retired' stage (See PR39830).
200 void onInstructionExecuted(const InstRef &IR);
201 };
202
203 } // namespace mca
204 } // namespace llvm
205
206 #endif // LLVM_MCA_LSUNIT_H
0 //===--------------------- RegisterFile.h -----------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines a register mapping file class. This class is responsible
11 /// for managing hardware register files and the tracking of data dependencies
12 /// between registers.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #ifndef LLVM_MCA_REGISTER_FILE_H
17 #define LLVM_MCA_REGISTER_FILE_H
18
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/MC/MCSchedule.h"
23 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
24 #include "llvm/Support/Error.h"
25
26 namespace llvm {
27 namespace mca {
28
29 class ReadState;
30 class WriteState;
31 class WriteRef;
32
33 /// Manages hardware register files, and tracks register definitions for
34 /// register renaming purposes.
35 class RegisterFile : public HardwareUnit {
36 const MCRegisterInfo &MRI;
37
38 // class RegisterMappingTracker is a physical register file (PRF) descriptor.
39 // There is one RegisterMappingTracker for every PRF definition in the
40 // scheduling model.
41 //
42 // An instance of RegisterMappingTracker tracks the number of physical
43 // registers available for renaming. It also tracks the number of register
44 // moves eliminated per cycle.
45 struct RegisterMappingTracker {
46 // The total number of physical registers that are available in this
47 // register file for register renaming purpouses. A value of zero for this
48 // field means: this register file has an unbounded number of physical
49 // registers.
50 const unsigned NumPhysRegs;
51 // Number of physical registers that are currently in use.
52 unsigned NumUsedPhysRegs;
53
54 // Maximum number of register moves that can be eliminated by this PRF every
55 // cycle. A value of zero means that there is no limit in the number of
56 // moves which can be eliminated every cycle.
57 const unsigned MaxMoveEliminatedPerCycle;
58
59 // Number of register moves eliminated during this cycle.
60 //
61 // This value is increased by one every time a register move is eliminated.
62 // Every new cycle, this value is reset to zero.
63 // A move can be eliminated only if MaxMoveEliminatedPerCycle is zero, or if
64 // NumMoveEliminated is less than MaxMoveEliminatedPerCycle.
65 unsigned NumMoveEliminated;
66
67 // If set, move elimination is restricted to zero-register moves only.
68 bool AllowZeroMoveEliminationOnly;
69
70 RegisterMappingTracker(unsigned NumPhysRegisters,
71 unsigned MaxMoveEliminated = 0U,
72 bool AllowZeroMoveElimOnly = false)
73 : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0),
74 MaxMoveEliminatedPerCycle(MaxMoveEliminated), NumMoveEliminated(0U),
75 AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly) {}
76 };
77
78 // A vector of register file descriptors. This set always contains at least
79 // one entry. Entry at index #0 is reserved. That entry describes a register
80 // file with an unbounded number of physical registers that "sees" all the
81 // hardware registers declared by the target (i.e. all the register
82 // definitions in the target specific `XYZRegisterInfo.td` - where `XYZ` is
83 // the target name).
84 //
85 // Users can limit the number of physical registers that are available in
86 // regsiter file #0 specifying command line flag `-register-file-size=`.
87 SmallVector RegisterFiles;
88
89 // This type is used to propagate information about the owner of a register,
90 // and the cost of allocating it in the PRF. Register cost is defined as the
91 // number of physical registers consumed by the PRF to allocate a user
92 // register.
93 //
94 // For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical
95 // registers. So, the cost of allocating a YMM register in BtVer2 is 2.
96 using IndexPlusCostPairTy = std::pair;
97
98 // Struct RegisterRenamingInfo is used to map logical registers to register
99 // files.
100 //
101 // There is a RegisterRenamingInfo object for every logical register defined
102 // by the target. RegisteRenamingInfo objects are stored into vector
103 // `RegisterMappings`, and MCPhysReg IDs can be used to reference
104 // elements in that vector.
105 //
106 // Each RegisterRenamingInfo is owned by a PRF, and field `IndexPlusCost`
107 // specifies both the owning PRF, as well as the number of physical registers
108 // consumed at register renaming stage.
109 //
110 // Field `AllowMoveElimination` is set for registers that are used as
111 // destination by optimizable register moves.
112 //
113 // Field `AliasRegID` is set by writes from register moves that have been
114 // eliminated at register renaming stage. A move eliminated at register
115 // renaming stage is effectively bypassed, and its write aliases the source
116 // register definition.
117 struct RegisterRenamingInfo {
118 IndexPlusCostPairTy IndexPlusCost;
119 MCPhysReg RenameAs;
120 MCPhysReg AliasRegID;
121 bool AllowMoveElimination;
122 RegisterRenamingInfo()
123 : IndexPlusCost(std::make_pair(0U, 1U)), RenameAs(0U), AliasRegID(0U),
124 AllowMoveElimination(false) {}
125 };
126
127 // RegisterMapping objects are mainly used to track physical register
128 // definitions and resolve data dependencies.
129 //
130 // Every register declared by the Target is associated with an instance of
131 // RegisterMapping. RegisterMapping objects keep track of writes to a logical
132 // register. That information is used by class RegisterFile to resolve data
133 // dependencies, and correctly set latencies for register uses.
134 //
135 // This implementation does not allow overlapping register files. The only
136 // register file that is allowed to overlap with other register files is
137 // register file #0. If we exclude register #0, every register is "owned" by
138 // at most one register file.
139 using RegisterMapping = std::pair;
140
141 // There is one entry per each register defined by the target.
142 std::vector RegisterMappings;
143
144 // Used to track zero registers. There is one bit for each register defined by
145 // the target. Bits are set for registers that are known to be zero.
146 APInt ZeroRegisters;
147
148 // This method creates a new register file descriptor.
149 // The new register file owns all of the registers declared by register
150 // classes in the 'RegisterClasses' set.
151 //
152 // Processor models allow the definition of RegisterFile(s) via tablegen. For
153 // example, this is a tablegen definition for a x86 register file for
154 // XMM[0-15] and YMM[0-15], that allows up to 60 renames (each rename costs 1
155 // physical register).
156 //
157 // def FPRegisterFile : RegisterFile<60, [VR128RegClass, VR256RegClass]>
158 //
159 // Here FPRegisterFile contains all the registers defined by register class
160 // VR128RegClass and VR256RegClass. FPRegisterFile implements 60
161 // registers which can be used for register renaming purpose.
162 void addRegisterFile(const MCRegisterFileDesc &RF,
163 ArrayRef Entries);
164
165 // Consumes physical registers in each register file specified by the
166 // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`.
167 void allocatePhysRegs(const RegisterRenamingInfo &Entry,
168 MutableArrayRef UsedPhysRegs);
169
170 // Releases previously allocated physical registers from the register file(s).
171 // This method is called from `invalidateRegisterMapping()`.
172 void freePhysRegs(const RegisterRenamingInfo &Entry,
173 MutableArrayRef FreedPhysRegs);
174
175 // Collects writes that are in a RAW dependency with RS.
176 // This method is called from `addRegisterRead()`.
177 void collectWrites(const ReadState &RS,
178 SmallVectorImpl &Writes) const;
179
180 // Create an instance of RegisterMappingTracker for every register file
181 // specified by the processor model.
182 // If no register file is specified, then this method creates a default
183 // register file with an unbounded number of physical registers.
184 void initialize(const MCSchedModel &SM, unsigned NumRegs);
185
186 public:
187 RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
188 unsigned NumRegs = 0);
189
190 // This method updates the register mappings inserting a new register
191 // definition. This method is also responsible for updating the number of
192 // allocated physical registers in each register file modified by the write.
193 // No physical regiser is allocated if this write is from a zero-idiom.
194 void addRegisterWrite(WriteRef Write, MutableArrayRef UsedPhysRegs);
195
196 // Collect writes that are in a data dependency with RS, and update RS
197 // internal state.
198 void addRegisterRead(ReadState &RS, SmallVectorImpl &Writes) const;
199
200 // Removes write \param WS from the register mappings.
201 // Physical registers may be released to reflect this update.
202 // No registers are released if this write is from a zero-idiom.
203 void removeRegisterWrite(const WriteState &WS,
204 MutableArrayRef FreedPhysRegs);
205
206 // Returns true if a move from RS to WS can be eliminated.
207 // On success, it updates WriteState by setting flag `WS.isEliminated`.
208 // If RS is a read from a zero register, and WS is eliminated, then
209 // `WS.WritesZero` is also set, so that method addRegisterWrite() would not
210 // reserve a physical register for it.
211 bool tryEliminateMove(WriteState &WS, ReadState &RS);
212
213 // Checks if there are enough physical registers in the register files.
214 // Returns a "response mask" where each bit represents the response from a
215 // different register file. A mask of all zeroes means that all register
216 // files are available. Otherwise, the mask can be used to identify which
217 // register file was busy. This sematic allows us to classify dispatch
218 // stalls caused by the lack of register file resources.
219 //
220 // Current implementation can simulate up to 32 register files (including the
221 // special register file at index #0).
222 unsigned isAvailable(ArrayRef Regs) const;
223
224 // Returns the number of PRFs implemented by this processor.
225 unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
226
227 // Notify each PRF that a new cycle just started.
228 void cycleStart();
229
230 #ifndef NDEBUG
231 void dump() const;
232 #endif
233 };
234
235 } // namespace mca
236 } // namespace llvm
237
238 #endif // LLVM_MCA_REGISTER_FILE_H
0 //===--------------------- ResourceManager.h --------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// The classes here represent processor resource units and their management
11 /// strategy. These classes are managed by the Scheduler.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_MCA_RESOURCE_MANAGER_H
16 #define LLVM_MCA_RESOURCE_MANAGER_H
17
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/MC/MCSchedule.h"
22 #include "llvm/MCA/Instruction.h"
23 #include "llvm/MCA/Support.h"
24
25 namespace llvm {
26 namespace mca {
27
28 /// Used to notify the internal state of a processor resource.
29 ///
30 /// A processor resource is available if it is not reserved, and there are
31 /// available slots in the buffer. A processor resource is unavailable if it
32 /// is either reserved, or the associated buffer is full. A processor resource
33 /// with a buffer size of -1 is always available if it is not reserved.
34 ///
35 /// Values of type ResourceStateEvent are returned by method
36 /// ResourceState::isBufferAvailable(), which is used to query the internal
37 /// state of a resource.
38 ///
39 /// The naming convention for resource state events is:
40 /// * Event names start with prefix RS_
41 /// * Prefix RS_ is followed by a string describing the actual resource state.
42 enum ResourceStateEvent {
43 RS_BUFFER_AVAILABLE,
44 RS_BUFFER_UNAVAILABLE,
45 RS_RESERVED
46 };
47
48 /// Resource allocation strategy used by hardware scheduler resources.
49 class ResourceStrategy {
50 ResourceStrategy(const ResourceStrategy &) = delete;
51 ResourceStrategy &operator=(const ResourceStrategy &) = delete;
52
53 public:
54 ResourceStrategy() {}
55 virtual ~ResourceStrategy();
56
57 /// Selects a processor resource unit from a ReadyMask.
58 virtual uint64_t select(uint64_t ReadyMask) = 0;
59
60 /// Called by the ResourceManager when a processor resource group, or a
61 /// processor resource with multiple units has become unavailable.
62 ///
63 /// The default strategy uses this information to bias its selection logic.
64 virtual void used(uint64_t ResourceMask) {}
65 };
66
67 /// Default resource allocation strategy used by processor resource groups and
68 /// processor resources with multiple units.
69 class DefaultResourceStrategy final : public ResourceStrategy {
70 /// A Mask of resource unit identifiers.
71 ///
72 /// There is one bit set for every available resource unit.
73 /// It defaults to the value of field ResourceSizeMask in ResourceState.
74 const unsigned ResourceUnitMask;
75
76 /// A simple round-robin selector for processor resource units.
77 /// Each bit of this mask identifies a sub resource within a group.
78 ///
79 /// As an example, lets assume that this is a default policy for a
80 /// processor resource group composed by the following three units:
81 /// ResourceA -- 0b001
82 /// ResourceB -- 0b010
83 /// ResourceC -- 0b100
84 ///
85 /// Field NextInSequenceMask is used to select the next unit from the set of
86 /// resource units. It defaults to the value of field `ResourceUnitMasks` (in
87 /// this example, it defaults to mask '0b111').
88 ///
89 /// The round-robin selector would firstly select 'ResourceC', then
90 /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the
91 /// corresponding bit in NextInSequenceMask is cleared. For example, if
92 /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes
93 /// 0xb011.
94 ///
95 /// When NextInSequenceMask becomes zero, it is automatically reset to the
96 /// default value (i.e. ResourceUnitMask).
97 uint64_t NextInSequenceMask;
98
99 /// This field is used to track resource units that are used (i.e. selected)
100 /// by other groups other than the one associated with this strategy object.
101 ///
102 /// In LLVM processor resource groups are allowed to partially (or fully)
103 /// overlap. That means, a same unit may be visible to multiple groups.
104 /// This field keeps track of uses that have originated from outside of
105 /// this group. The idea is to bias the selection strategy, so that resources
106 /// that haven't been used by other groups get prioritized.
107 ///
108 /// The end goal is to (try to) keep the resource distribution as much uniform
109 /// as possible. By construction, this mask only tracks one-level of resource
110 /// usage. Therefore, this strategy is expected to be less accurate when same
111 /// units are used multiple times by other groups within a single round of
112 /// select.
113 ///
114 /// Note: an LRU selector would have a better accuracy at the cost of being
115 /// slightly more expensive (mostly in terms of runtime cost). Methods
116 /// 'select' and 'used', are always in the hot execution path of llvm-mca.
117 /// Therefore, a slow implementation of 'select' would have a negative impact
118 /// on the overall performance of the tool.
119 uint64_t RemovedFromNextInSequence;
120
121 public:
122 DefaultResourceStrategy(uint64_t UnitMask)
123 : ResourceStrategy(), ResourceUnitMask(UnitMask),
124 NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {}
125 virtual ~DefaultResourceStrategy() = default;
126
127 uint64_t select(uint64_t ReadyMask) override;
128 void used(uint64_t Mask) override;
129 };
130
131 /// A processor resource descriptor.
132 ///
133 /// There is an instance of this class for every processor resource defined by
134 /// the machine scheduling model.
135 /// Objects of class ResourceState dynamically track the usage of processor
136 /// resource units.
137 class ResourceState {
138 /// An index to the MCProcResourceDesc entry in the processor model.
139 const unsigned ProcResourceDescIndex;
140 /// A resource mask. This is generated by the tool with the help of
141 /// function `mca::createProcResourceMasks' (see Support.h).
142 const uint64_t ResourceMask;
143
144 /// A ProcResource can have multiple units.
145 ///
146 /// For processor resource groups,
147 /// this field default to the value of field `ResourceMask`; the number of
148 /// bits set is equal to the cardinality of the group. For normal (i.e.
149 /// non-group) resources, the number of bits set in this mask is equivalent
150 /// to the number of units declared by the processor model (see field
151 /// 'NumUnits' in 'ProcResourceUnits').
152 uint64_t ResourceSizeMask;
153
154 /// A mask of ready units.
155 uint64_t ReadyMask;
156
157 /// Buffered resources will have this field set to a positive number different
158 /// than zero. A buffered resource behaves like a reservation station
159 /// implementing its own buffer for out-of-order execution.
160 ///
161 /// A BufferSize of 1 is used by scheduler resources that force in-order
162 /// execution.
163 ///
164 /// A BufferSize of 0 is used to model in-order issue/dispatch resources.
165 /// Since in-order issue/dispatch resources don't implement buffers, dispatch
166 /// events coincide with issue events.
167 /// Also, no other instruction ca be dispatched/issue while this resource is
168 /// in use. Only when all the "resource cycles" are consumed (after the issue
169 /// event), a new instruction ca be dispatched.
170 const int BufferSize;
171
172 /// Available slots in the buffer (zero, if this is not a buffered resource).
173 unsigned AvailableSlots;
174
175 /// This field is set if this resource is currently reserved.
176 ///
177 /// Resources can be reserved for a number of cycles.
178 /// Instructions can still be dispatched to reserved resources. However,
179 /// istructions dispatched to a reserved resource cannot be issued to the
180 /// underlying units (i.e. pipelines) until the resource is released.
181 bool Unavailable;
182
183 const bool IsAGroup;
184
185 /// Checks for the availability of unit 'SubResMask' in the group.
186 bool isSubResourceReady(uint64_t SubResMask) const {
187 return ReadyMask & SubResMask;
188 }
189
190 public:
191 ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask);
192
193 unsigned getProcResourceID() const { return ProcResourceDescIndex; }
194 uint64_t getResourceMask() const { return ResourceMask; }
195 uint64_t getReadyMask() const { return ReadyMask; }
196 int getBufferSize() const { return BufferSize; }
197
198 bool isBuffered() const { return BufferSize > 0; }
199 bool isInOrder() const { return BufferSize == 1; }
200
201 /// Returns true if this is an in-order dispatch/issue resource.
202 bool isADispatchHazard() const { return BufferSize == 0; }
203 bool isReserved() const { return Unavailable; }
204
205 void setReserved() { Unavailable = true; }
206 void clearReserved() { Unavailable = false; }
207
208 /// Returs true if this resource is not reserved, and if there are at least
209 /// `NumUnits` available units.
210 bool isReady(unsigned NumUnits = 1) const;
211
212 bool isAResourceGroup() const { return IsAGroup; }
213
214 bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
215
216 void markSubResourceAsUsed(uint64_t ID) {
217 assert(isSubResourceReady(ID));
218 ReadyMask ^= ID;
219 }
220
221 void releaseSubResource(uint64_t ID) {
222 assert(!isSubResourceReady(ID));
223 ReadyMask ^= ID;
224 }
225
226 unsigned getNumUnits() const {
227 return isAResourceGroup() ? 1U : countPopulation(ResourceSizeMask);
228 }
229
230 /// Checks if there is an available slot in the resource buffer.
231 ///
232 /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if
233 /// there is a slot available.
234 ///
235 /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it
236 /// is reserved.
237 ///
238 /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
239 ResourceStateEvent isBufferAvailable() const;
240
241 /// Reserve a slot in the buffer.
242 void reserveBuffer() {
243 if (AvailableSlots)
244 AvailableSlots--;
245 }
246
247 /// Release a slot in the buffer.
248 void releaseBuffer() {
249 if (BufferSize > 0)
250 AvailableSlots++;
251 assert(AvailableSlots <= static_cast(BufferSize));
252 }
253
254 #ifndef NDEBUG
255 void dump() const;
256 #endif
257 };
258
259 /// A resource unit identifier.
260 ///
261 /// This is used to identify a specific processor resource unit using a pair
262 /// of indices where the 'first' index is a processor resource mask, and the
263 /// 'second' index is an index for a "sub-resource" (i.e. unit).
264 typedef std::pair ResourceRef;
265
266 // First: a MCProcResourceDesc index identifying a buffered resource.
267 // Second: max number of buffer entries used in this resource.
268 typedef std::pair BufferUsageEntry;
269
270 /// A resource manager for processor resource units and groups.
271 ///
272 /// This class owns all the ResourceState objects, and it is responsible for
273 /// acting on requests from a Scheduler by updating the internal state of
274 /// ResourceState objects.
275 /// This class doesn't know about instruction itineraries and functional units.
276 /// In future, it can be extended to support itineraries too through the same
277 /// public interface.
278 class ResourceManager {
279 // The resource manager owns all the ResourceState.
280 std::vector> Resources;
281 std::vector> Strategies;
282
283 // Keeps track of which resources are busy, and how many cycles are left
284 // before those become usable again.
285 SmallDenseMap BusyResources;
286
287 // A table to map processor resource IDs to processor resource masks.
288 SmallVector ProcResID2Mask;
289
290 // Returns the actual resource unit that will be used.
291 ResourceRef selectPipe(uint64_t ResourceID);
292
293 void use(const ResourceRef &RR);
294 void release(const ResourceRef &RR);
295
296 unsigned getNumUnits(uint64_t ResourceID) const;
297
298 // Overrides the selection strategy for the processor resource with the given
299 // mask.
300 void setCustomStrategyImpl(std::unique_ptr S,
301 uint64_t ResourceMask);
302
303 public:
304 ResourceManager(const MCSchedModel &SM);
305 virtual ~ResourceManager() = default;
306
307 // Overrides the selection strategy for the resource at index ResourceID in
308 // the MCProcResourceDesc table.
309 void setCustomStrategy(std::unique_ptr S,
310 unsigned ResourceID) {
311 assert(ResourceID < ProcResID2Mask.size() &&
312 "Invalid resource index in input!");
313 return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]);
314 }
315
316 // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
317 // there are enough available slots in the buffers.
318 ResourceStateEvent canBeDispatched(ArrayRef Buffers) const;
319
320 // Return the processor resource identifier associated to this Mask.
321 unsigned resolveResourceMask(uint64_t Mask) const;
322
323 // Consume a slot in every buffered resource from array 'Buffers'. Resource
324 // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
325 void reserveBuffers(ArrayRef Buffers);
326
327 // Release buffer entries previously allocated by method reserveBuffers.
328 void releaseBuffers(ArrayRef Buffers);
329
330 // Reserve a processor resource. A reserved resource is not available for
331 // instruction issue until it is released.
332 void reserveResource(uint64_t ResourceID);
333
334 // Release a previously reserved processor resource.
335 void releaseResource(uint64_t ResourceID);
336
337 // Returns true if all resources are in-order, and there is at least one
338 // resource which is a dispatch hazard (BufferSize = 0).
339 bool mustIssueImmediately(const InstrDesc &Desc) const;
340
341 bool canBeIssued(const InstrDesc &Desc) const;
342
343 void issueInstruction(
344 const InstrDesc &Desc,
345 SmallVectorImpl> &Pipes);
346
347 void cycleEvent(SmallVectorImpl &ResourcesFreed);
348
349 #ifndef NDEBUG
350 void dump() const {
351 for (const std::unique_ptr &Resource : Resources)
352 Resource->dump();
353 }
354 #endif
355 };
356 } // namespace mca
357 } // namespace llvm
358
359 #endif // LLVM_MCA_RESOURCE_MANAGER_H
0 //===---------------------- RetireControlUnit.h -----------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file simulates the hardware responsible for retiring instructions.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_MCA_RETIRE_CONTROL_UNIT_H
15 #define LLVM_MCA_RETIRE_CONTROL_UNIT_H
16
17 #include "llvm/MC/MCSchedule.h"
18 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
19 #include "llvm/MCA/Instruction.h"
20 #include
21
22 namespace llvm {
23 namespace mca {
24
25 /// This class tracks which instructions are in-flight (i.e., dispatched but not
26 /// retired) in the OoO backend.
27 //
28 /// This class checks on every cycle if/which instructions can be retired.
29 /// Instructions are retired in program order.
30 /// In the event of an instruction being retired, the pipeline that owns
31 /// this RetireControlUnit (RCU) gets notified.
32 ///
33 /// On instruction retired, register updates are all architecturally
34 /// committed, and any physicall registers previously allocated for the
35 /// retired instruction are freed.
36 struct RetireControlUnit : public HardwareUnit {
37 // A RUToken is created by the RCU for every instruction dispatched to the
38 // schedulers. These "tokens" are managed by the RCU in its token Queue.
39 //
40 // On every cycle ('cycleEvent'), the RCU iterates through the token queue
41 // looking for any token with its 'Executed' flag set. If a token has that
42 // flag set, then the instruction has reached the write-back stage and will
43 // be retired by the RCU.
44 //
45 // 'NumSlots' represents the number of entries consumed by the instruction in
46 // the reorder buffer. Those entries will become available again once the
47 // instruction is retired.
48 //
49 // Note that the size of the reorder buffer is defined by the scheduling
50 // model via field 'NumMicroOpBufferSize'.
51 struct RUToken {
52 InstRef IR;
53 unsigned NumSlots; // Slots reserved to this instruction.
54 bool Executed; // True if the instruction is past the WB stage.
55 };
56
57 private:
58 unsigned NextAvailableSlotIdx;
59 unsigned CurrentInstructionSlotIdx;
60 unsigned AvailableSlots;
61 unsigned MaxRetirePerCycle; // 0 means no limit.
62 std::vector Queue;
63
64 public:
65 RetireControlUnit(const MCSchedModel &SM);
66
67 bool isEmpty() const { return AvailableSlots == Queue.size(); }
68 bool isAvailable(unsigned Quantity = 1) const {
69 // Some instructions may declare a number of uOps which exceeds the size
70 // of the reorder buffer. To avoid problems, cap the amount of slots to
71 // the size of the reorder buffer.
72 Quantity = std::min(Quantity, static_cast(Queue.size()));
73
74 // Further normalize the number of micro opcodes for instructions that
75 // declare zero opcodes. This should match the behavior of method
76 // reserveSlot().
77 Quantity = std::max(Quantity, 1U);
78 return AvailableSlots >= Quantity;
79 }
80
81 unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; }
82
83 // Reserves a number of slots, and returns a new token.
84 unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps);
85
86 // Return the current token from the RCU's circular token queue.
87 const RUToken &peekCurrentToken() const;
88
89 // Advance the pointer to the next token in the circular token queue.
90 void consumeCurrentToken();
91
92 // Update the RCU token to represent the executed state.
93 void onInstructionExecuted(unsigned TokenID);
94
95 #ifndef NDEBUG
96 void dump() const;
97 #endif
98 };
99
100 } // namespace mca
101 } // namespace llvm
102
103 #endif // LLVM_MCA_RETIRE_CONTROL_UNIT_H
0 //===--------------------- Scheduler.h ------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// A scheduler for Processor Resource Units and Processor Resource Groups.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_MCA_SCHEDULER_H
15 #define LLVM_MCA_SCHEDULER_H
16
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/MC/MCSchedule.h"
19 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
20 #include "llvm/MCA/HardwareUnits/LSUnit.h"
21 #include "llvm/MCA/HardwareUnits/ResourceManager.h"
22 #include "llvm/MCA/Support.h"
23
24 namespace llvm {
25 namespace mca {
26
27 class SchedulerStrategy {
28 public:
29 SchedulerStrategy() = default;
30 virtual ~SchedulerStrategy();
31
32 /// Returns true if Lhs should take priority over Rhs.
33 ///
34 /// This method is used by class Scheduler to select the "best" ready
35 /// instruction to issue to the underlying pipelines.
36 virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0;
37 };
38
39 /// Default instruction selection strategy used by class Scheduler.
40 class DefaultSchedulerStrategy : public SchedulerStrategy {
41 /// This method ranks instructions based on their age, and the number of known
42 /// users. The lower the rank value, the better.
43 int computeRank(const InstRef &Lhs) const {
44 return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers();
45 }
46
47 public:
48 DefaultSchedulerStrategy() = default;
49 virtual ~DefaultSchedulerStrategy();
50
51 bool compare(const InstRef &Lhs, const InstRef &Rhs) const override {
52 int LhsRank = computeRank(Lhs);
53 int RhsRank = computeRank(Rhs);
54
55 /// Prioritize older instructions over younger instructions to minimize the
56 /// pressure on the reorder buffer.
57 if (LhsRank == RhsRank)
58 return Lhs.getSourceIndex() < Rhs.getSourceIndex();
59 return LhsRank < RhsRank;
60 }
61 };
62
63 /// Class Scheduler is responsible for issuing instructions to pipeline
64 /// resources.
65 ///
66 /// Internally, it delegates to a ResourceManager the management of processor
67 /// resources. This class is also responsible for tracking the progress of
68 /// instructions from the dispatch stage, until the write-back stage.
69 ///
70 /// An instruction dispatched to the Scheduler is initially placed into either
71 /// the 'WaitSet' or the 'ReadySet' depending on the availability of the input
72 /// operands.
73 ///
74 /// An instruction is moved from the WaitSet to the ReadySet when register
75 /// operands become available, and all memory dependencies are met.
76 /// Instructions that are moved from the WaitSet to the ReadySet transition
77 /// in state from 'IS_AVAILABLE' to 'IS_READY'.
78 ///
79 /// On every cycle, the Scheduler checks if it can promote instructions from the
80 /// WaitSet to the ReadySet.
81 ///
82 /// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued
83 /// to a (one or more) pipeline(s). This event also causes an instruction state
84 /// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction
85 /// leaves the IssuedSet when it reaches the write-back stage.
86 class Scheduler : public HardwareUnit {
87 LSUnit &LSU;
88
89 // Instruction selection strategy for this Scheduler.
90 std::unique_ptr Strategy;
91
92 // Hardware resources that are managed by this scheduler.
93 std::unique_ptr Resources;
94
95 std::vector WaitSet;
96 std::vector ReadySet;
97 std::vector IssuedSet;
98
99 /// Verify the given selection strategy and set the Strategy member
100 /// accordingly. If no strategy is provided, the DefaultSchedulerStrategy is
101 /// used.
102 void initializeStrategy(std::unique_ptr S);
103
104 /// Issue an instruction without updating the ready queue.
105 void issueInstructionImpl(
106 InstRef &IR,
107 SmallVectorImpl> &Pipes);
108
109 // Identify instructions that have finished executing, and remove them from
110 // the IssuedSet. References to executed instructions are added to input
111 // vector 'Executed'.
112 void updateIssuedSet(SmallVectorImpl &Executed);
113
114 // Try to promote instructions from WaitSet to ReadySet.
115 // Add promoted instructions to the 'Ready' vector in input.
116 void promoteToReadySet(SmallVectorImpl &Ready);
117
118 public:
119 Scheduler(const MCSchedModel &Model, LSUnit &Lsu)
120 : Scheduler(Model, Lsu, nullptr) {}
121
122 Scheduler(const MCSchedModel &Model, LSUnit &Lsu,
123 std::unique_ptr SelectStrategy)
124 : Scheduler(make_unique(Model), Lsu,
125 std::move(SelectStrategy)) {}
126
127 Scheduler(std::unique_ptr RM, LSUnit &Lsu,
128 std::unique_ptr SelectStrategy)
129 : LSU(Lsu), Resources(std::move(RM)) {
130 initializeStrategy(std::move(SelectStrategy));
131 }
132
133 // Stalls generated by the scheduler.
134 enum Status {
135 SC_AVAILABLE,
136 SC_LOAD_QUEUE_FULL,
137 SC_STORE_QUEUE_FULL,
138 SC_BUFFERS_FULL,
139 SC_DISPATCH_GROUP_STALL,
140 };
141
142 /// Check if the instruction in 'IR' can be dispatched and returns an answer
143 /// in the form of a Status value.
144 ///
145 /// The DispatchStage is responsible for querying the Scheduler before
146 /// dispatching new instructions. This routine is used for performing such
147 /// a query. If the instruction 'IR' can be dispatched, then true is
148 /// returned, otherwise false is returned with Event set to the stall type.
149 /// Internally, it also checks if the load/store unit is available.
150 Status isAvailable(const InstRef &IR) const;
151
152 /// Reserves buffer and LSUnit queue resources that are necessary to issue
153 /// this instruction.
154 ///
155 /// Returns true if instruction IR is ready to be issued to the underlying
156 /// pipelines. Note that this operation cannot fail; it assumes that a
157 /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
158 void dispatch(const InstRef &IR);
159
160 /// Returns true if IR is ready to be executed by the underlying pipelines.
161 /// This method assumes that IR has been previously dispatched.
162 bool isReady(const InstRef &IR) const;
163
164 /// Issue an instruction and populates a vector of used pipeline resources,
165 /// and a vector of instructions that transitioned to the ready state as a
166 /// result of this event.
167 void issueInstruction(
168 InstRef &IR,
169 SmallVectorImpl> &Used,
170 SmallVectorImpl &Ready);
171
172 /// Returns true if IR has to be issued immediately, or if IR is a zero
173 /// latency instruction.
174 bool mustIssueImmediately(const InstRef &IR) const;
175
176 /// This routine notifies the Scheduler that a new cycle just started.
177 ///
178 /// It notifies the underlying ResourceManager that a new cycle just started.
179 /// Vector `Freed` is populated with resourceRef related to resources that
180 /// have changed in state, and that are now available to new instructions.
181 /// Instructions executed are added to vector Executed, while vector Ready is
182 /// populated with instructions that have become ready in this new cycle.
183 void cycleEvent(SmallVectorImpl &Freed,
184 SmallVectorImpl &Ready,
185 SmallVectorImpl &Executed);
186
187 /// Convert a resource mask into a valid llvm processor resource identifier.
188 unsigned getResourceID(uint64_t Mask) const {
189 return Resources->resolveResourceMask(Mask);
190 }
191
192 /// Select the next instruction to issue from the ReadySet. Returns an invalid
193 /// instruction reference if there are no ready instructions, or if processor
194 /// resources are not available.
195 InstRef select();
196
197 #ifndef NDEBUG
198 // Update the ready queues.
199 void dump() const;
200
201 // This routine performs a sanity check. This routine should only be called
202 // when we know that 'IR' is not in the scheduler's instruction queues.
203 void sanityCheck(const InstRef &IR) const {
204 assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!");
205 assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!");
206 assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!");
207 }
208 #endif // !NDEBUG
209 };
210 } // namespace mca
211 } // namespace llvm
212
213 #endif // LLVM_MCA_SCHEDULER_H
0 //===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// A builder class for instructions that are statically analyzed by llvm-mca.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_MCA_INSTRBUILDER_H
15 #define LLVM_MCA_INSTRBUILDER_H
16
17 #include "llvm/MC/MCInstrAnalysis.h"
18 #include "llvm/MC/MCInstrInfo.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCSubtargetInfo.h"
21 #include "llvm/MCA/Instruction.h"
22 #include "llvm/MCA/Support.h"
23 #include "llvm/Support/Error.h"
24
25 namespace llvm {
26 namespace mca {
27
28 /// A builder class that knows how to construct Instruction objects.
29 ///
30 /// Every llvm-mca Instruction is described by an object of class InstrDesc.
31 /// An InstrDesc describes which registers are read/written by the instruction,
32 /// as well as the instruction latency and hardware resources consumed.
33 ///
34 /// This class is used by the tool to construct Instructions and instruction
35 /// descriptors (i.e. InstrDesc objects).
36 /// Information from the machine scheduling model is used to identify processor
37 /// resources that are consumed by an instruction.
38 class InstrBuilder {
39 const MCSubtargetInfo &STI;
40 const MCInstrInfo &MCII;
41 const MCRegisterInfo &MRI;
42 const MCInstrAnalysis &MCIA;
43 SmallVector ProcResourceMasks;
44
45 DenseMap> Descriptors;
46 DenseMap> VariantDescriptors;
47
48 bool FirstCallInst;
49 bool FirstReturnInst;
50
51 Expected createInstrDescImpl(const MCInst &MCI);
52 Expected getOrCreateInstrDesc(const MCInst &MCI);
53
54 InstrBuilder(const InstrBuilder &) = delete;
55 InstrBuilder &operator=(const InstrBuilder &) = delete;
56
57 void populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
58 void populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
59 Error verifyInstrDesc(const InstrDesc &ID, const MCInst &MCI) const;
60
61 public:
62 InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
63 const MCRegisterInfo &RI, const MCInstrAnalysis &IA);
64
65 void clear() {
66 VariantDescriptors.shrink_and_clear();
67 FirstCallInst = true;
68 FirstReturnInst = true;
69 }
70
71 Expected> createInstruction(const MCInst &MCI);
72 };
73 } // namespace mca
74 } // namespace llvm
75
76 #endif // LLVM_MCA_INSTRBUILDER_H
0 //===--------------------- Instruction.h ------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines abstractions used by the Pipeline to model register reads,
11 /// register writes and instructions.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_MCA_INSTRUCTION_H
16 #define LLVM_MCA_INSTRUCTION_H
17
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Support/MathExtras.h"
22
23 #ifndef NDEBUG
24 #include "llvm/Support/raw_ostream.h"
25 #endif
26
27 #include
28
29 namespace llvm {
30 namespace mca {
31
32 constexpr int UNKNOWN_CYCLES = -512;
33
34 /// A register write descriptor.
35 struct WriteDescriptor {
36 // Operand index. The index is negative for implicit writes only.
37 // For implicit writes, the actual operand index is computed performing
38 // a bitwise not of the OpIndex.
39 int OpIndex;
40 // Write latency. Number of cycles before write-back stage.
41 unsigned Latency;
42 // This field is set to a value different than zero only if this
43 // is an implicit definition.
44 unsigned RegisterID;
45 // Instruction itineraries would set this field to the SchedClass ID.
46 // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry
47 // element associated to this write.
48 // When computing read latencies, this value is matched against the
49 // "ReadAdvance" information. The hardware backend may implement
50 // dedicated forwarding paths to quickly propagate write results to dependent
51 // instructions waiting in the reservation station (effectively bypassing the
52 // write-back stage).
53 unsigned SClassOrWriteResourceID;
54 // True only if this is a write obtained from an optional definition.
55 // Optional definitions are allowed to reference regID zero (i.e. "no
56 // register").
57 bool IsOptionalDef;
58
59 bool isImplicitWrite() const { return OpIndex < 0; };
60 };
61
62 /// A register read descriptor.
63 struct ReadDescriptor {
64 // A MCOperand index. This is used by the Dispatch logic to identify register
65 // reads. Implicit reads have negative indices. The actual operand index of an
66 // implicit read is the bitwise not of field OpIndex.
67 int OpIndex;
68 // The actual "UseIdx". This is used to query the ReadAdvance table. Explicit
69 // uses always come first in the sequence of uses.
70 unsigned UseIndex;
71 // This field is only set if this is an implicit read.
72 unsigned RegisterID;
73 // Scheduling Class Index. It is used to query the scheduling model for the
74 // MCSchedClassDesc object.
75 unsigned SchedClassID;
76
77 bool isImplicitRead() const { return OpIndex < 0; };
78 };
79
80 class ReadState;
81
82 /// Tracks uses of a register definition (e.g. register write).
83 ///
84 /// Each implicit/explicit register write is associated with an instance of
85 /// this class. A WriteState object tracks the dependent users of a
86 /// register write. It also tracks how many cycles are left before the write
87 /// back stage.
88 class WriteState {
89 const WriteDescriptor *WD;
90 // On instruction issue, this field is set equal to the write latency.
91 // Before instruction issue, this field defaults to -512, a special
92 // value that represents an "unknown" number of cycles.
93 int CyclesLeft;
94
95 // Actual register defined by this write. This field is only used
96 // to speedup queries on the register file.
97 // For implicit writes, this field always matches the value of
98 // field RegisterID from WD.
99 unsigned RegisterID;
100
101 // Physical register file that serves register RegisterID.
102 unsigned PRFID;
103
104 // True if this write implicitly clears the upper portion of RegisterID's
105 // super-registers.
106 bool ClearsSuperRegs;
107
108 // True if this write is from a dependency breaking zero-idiom instruction.
109 bool WritesZero;
110
111 // True if this write has been eliminated at register renaming stage.
112 // Example: a register move doesn't consume scheduler/pipleline resources if
113 // it is eliminated at register renaming stage. It still consumes
114 // decode bandwidth, and ROB entries.
115 bool IsEliminated;
116
117 // This field is set if this is a partial register write, and it has a false
118 // dependency on any previous write of the same register (or a portion of it).
119 // DependentWrite must be able to complete before this write completes, so
120 // that we don't break the WAW, and the two writes can be merged together.
121 const WriteState *DependentWrite;
122
123 // A partial write that is in a false dependency with this write.
124 WriteState *PartialWrite;
125
126 unsigned DependentWriteCyclesLeft;
127
128 // A list of dependent reads. Users is a set of dependent
129 // reads. A dependent read is added to the set only if CyclesLeft
130 // is "unknown". As soon as CyclesLeft is 'known', each user in the set
131 // gets notified with the actual CyclesLeft.
132
133 // The 'second' element of a pair is a "ReadAdvance" number of cycles.
134 SmallVector, 4> Users;
135
136 public:
137 WriteState(const WriteDescriptor &Desc, unsigned RegID,
138 bool clearsSuperRegs = false, bool writesZero = false)
139 : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
140 PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
141 IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
142 DependentWriteCyclesLeft(0) {}
143
144 WriteState(const WriteState &Other) = default;
145 WriteState &operator=(const WriteState &Other) = default;
146
147 int getCyclesLeft() const { return CyclesLeft; }
148 unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
149 unsigned getRegisterID() const { return RegisterID; }
150 unsigned getRegisterFileID() const { return PRFID; }
151 unsigned getLatency() const { return WD->Latency; }
152
153 void addUser(ReadState *Use, int ReadAdvance);
154 void addUser(WriteState *Use);
155
156 unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; }
157
158 unsigned getNumUsers() const {
159 unsigned NumUsers = Users.size();
160 if (PartialWrite)
161 ++NumUsers;
162 return NumUsers;
163 }
164
165 bool clearsSuperRegisters() const { return ClearsSuperRegs; }
166 bool isWriteZero() const { return WritesZero; }
167 bool isEliminated() const { return IsEliminated; }
168 bool isExecuted() const {
169 return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
170 }
171
172 const WriteState *getDependentWrite() const { return DependentWrite; }
173 void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
174 void writeStartEvent(unsigned Cycles) {
175 DependentWriteCyclesLeft = Cycles;
176 DependentWrite = nullptr;
177 }
178
179 void setWriteZero() { WritesZero = true; }
180 void setEliminated() {
181 assert(Users.empty() && "Write is in an inconsistent state.");
182 CyclesLeft = 0;
183 IsEliminated = true;
184 }
185
186 void setPRF(unsigned PRF) { PRFID = PRF; }
187
188 // On every cycle, update CyclesLeft and notify dependent users.
189 void cycleEvent();
190 void onInstructionIssued();
191
192 #ifndef NDEBUG
193 void dump() const;
194 #endif
195 };
196
197 /// Tracks register operand latency in cycles.
198 ///
199 /// A read may be dependent on more than one write. This occurs when some
200 /// writes only partially update the register associated to this read.
201 class ReadState {
202 const ReadDescriptor *RD;
203 // Physical register identified associated to this read.
204 unsigned RegisterID;
205 // Physical register file that serves register RegisterID.
206 unsigned PRFID;
207 // Number of writes that contribute to the definition of RegisterID.
208 // In the absence of partial register updates, the number of DependentWrites
209 // cannot be more than one.
210 unsigned DependentWrites;
211 // Number of cycles left before RegisterID can be read. This value depends on
212 // the latency of all the dependent writes. It defaults to UNKNOWN_CYCLES.
213 // It gets set to the value of field TotalCycles only when the 'CyclesLeft' of
214 // every dependent write is known.
215 int CyclesLeft;
216 // This field is updated on every writeStartEvent(). When the number of
217 // dependent writes (i.e. field DependentWrite) is zero, this value is
218 // propagated to field CyclesLeft.
219 unsigned TotalCycles;
220 // This field is set to true only if there are no dependent writes, and
221 // there are no `CyclesLeft' to wait.
222 bool IsReady;
223 // True if this is a read from a known zero register.
224 bool IsZero;
225 // True if this register read is from a dependency-breaking instruction.
226 bool IndependentFromDef;
227
228 public:
229 ReadState(const ReadDescriptor &Desc, unsigned RegID)
230 : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
231 CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
232 IsZero(false), IndependentFromDef(false) {}
233
234 const ReadDescriptor &getDescriptor() const { return *RD; }
235 unsigned getSchedClass() const { return RD->SchedClassID; }
236 unsigned getRegisterID() const { return RegisterID; }
237 unsigned getRegisterFileID() const { return PRFID; }
238
239 bool isReady() const { return IsReady; }
240 bool isImplicitRead() const { return RD->isImplicitRead(); }
241
242 bool isIndependentFromDef() const { return IndependentFromDef; }
243 void setIndependentFromDef() { IndependentFromDef = true; }
244
245 void cycleEvent();
246 void writeStartEvent(unsigned Cycles);
247 void setDependentWrites(unsigned Writes) {
248 DependentWrites = Writes;
249 IsReady = !Writes;
250 }
251
252 bool isReadZero() const { return IsZero; }
253 void setReadZero() { IsZero = true; }
254 void setPRF(unsigned ID) { PRFID = ID; }
255 };
256
257 /// A sequence of cycles.
258 ///
259 /// This class can be used as a building block to construct ranges of cycles.
260 class CycleSegment {
261 unsigned Begin; // Inclusive.
262 unsigned End; // Exclusive.
263 bool Reserved; // Resources associated to this segment must be reserved.
264
265 public:
266 CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false)
267 : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {}
268
269 bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; }
270 bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; }
271 bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; }
272 bool overlaps(const CycleSegment &CS) const {
273 return !startsAfter(CS) && !endsBefore(CS);
274 }
275 bool isExecuting() const { return Begin == 0 && End != 0; }
276 bool isExecuted() const { return End == 0; }
277 bool operator<(const CycleSegment &Other) const {
278 return Begin < Other.Begin;
279 }
280 CycleSegment &operator--(void) {
281 if (Begin)
282 Begin--;
283 if (End)
284 End--;
285 return *this;
286 }
287
288 bool isValid() const { return Begin <= End; }
289 unsigned size() const { return End - Begin; };
290 void subtract(unsigned Cycles) {
291 assert(End >= Cycles);
292 End -= Cycles;
293 }
294
295 unsigned begin() const { return Begin; }
296 unsigned end() const { return End; }
297 void setEnd(unsigned NewEnd) { End = NewEnd; }
298 bool isReserved() const { return Reserved; }
299 void setReserved() { Reserved = true; }
300 };
301
302 /// Helper used by class InstrDesc to describe how hardware resources
303 /// are used.
304 ///
305 /// This class describes how many resource units of a specific resource kind
306 /// (and how many cycles) are "used" by an instruction.
307 struct ResourceUsage {
308 CycleSegment CS;
309 unsigned NumUnits;
310 ResourceUsage(CycleSegment Cycles, unsigned Units = 1)
311 : CS(Cycles), NumUnits(Units) {}
312 unsigned size() const { return CS.size(); }
313 bool isReserved() const { return CS.isReserved(); }
314 void setReserved() { CS.setReserved(); }
315 };
316
317 /// An instruction descriptor
318 struct InstrDesc {
319 SmallVector Writes; // Implicit writes are at the end.
320 SmallVector Reads; // Implicit reads are at the end.
321
322 // For every resource used by an instruction of this kind, this vector
323 // reports the number of "consumed cycles".
324 SmallVector, 4> Resources;
325
326 // A list of buffered resources consumed by this instruction.
327 SmallVector Buffers;
328
329 unsigned MaxLatency;
330 // Number of MicroOps for this instruction.
331 unsigned NumMicroOps;
332
333 bool MayLoad;
334 bool MayStore;
335 bool HasSideEffects;
336
337 // A zero latency instruction doesn't consume any scheduler resources.
338 bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
339
340 InstrDesc() = default;
341 InstrDesc(const InstrDesc &Other) = delete;
342 InstrDesc &operator=(const InstrDesc &Other) = delete;
343 };
344
345 /// Base class for instructions consumed by the simulation pipeline.
346 ///
347 /// This class tracks data dependencies as well as generic properties
348 /// of the instruction.
349 class InstructionBase {
350 const InstrDesc &Desc;
351
352 // This field is set for instructions that are candidates for move
353 // elimination. For more information about move elimination, see the
354 // definition of RegisterMappingTracker in RegisterFile.h
355 bool IsOptimizableMove;
356
357 // Output dependencies.
358 // One entry per each implicit and explicit register definition.
359 SmallVector Defs;
360
361 // Input dependencies.
362 // One entry per each implicit and explicit register use.
363 SmallVector Uses;
364
365 public:
366 InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {}
367
368 SmallVectorImpl &getDefs() { return Defs; }
369 const ArrayRef getDefs() const { return Defs; }
370 SmallVectorImpl &getUses() { return Uses; }
371 const ArrayRef getUses() const { return Uses; }
372 const InstrDesc &getDesc() const { return Desc; }
373
374 unsigned getLatency() const { return Desc.MaxLatency; }
375
376 bool hasDependentUsers() const {
377 return any_of(Defs,
378 [](const WriteState &Def) { return Def.getNumUsers() > 0; });
379 }
380
381 unsigned getNumUsers() const {
382 unsigned NumUsers = 0;
383 for (const WriteState &Def : Defs)
384 NumUsers += Def.getNumUsers();
385 return NumUsers;
386 }
387
388 // Returns true if this instruction is a candidate for move elimination.
389 bool isOptimizableMove() const { return IsOptimizableMove; }
390 void setOptimizableMove() { IsOptimizableMove = true; }
391 };
392
393 /// An instruction propagated through the simulated instruction pipeline.
394 ///
395 /// This class is used to monitor changes to the internal state of instructions
396 /// that are sent to the various components of the simulated hardware pipeline.
397 class Instruction : public InstructionBase {
398 enum InstrStage {
399 IS_INVALID, // Instruction in an invalid state.
400 IS_AVAILABLE, // Instruction dispatched but operands are not ready.
401 IS_READY, // Instruction dispatched and operands ready.
402 IS_EXECUTING, // Instruction issued.
403 IS_EXECUTED, // Instruction executed. Values are written back.
404 IS_RETIRED // Instruction retired.
405 };
406
407 // The current instruction stage.
408 enum InstrStage Stage;
409
410 // This value defaults to the instruction latency. This instruction is
411 // considered executed when field CyclesLeft goes to zero.
412 int CyclesLeft;
413
414 // Retire Unit token ID for this instruction.
415 unsigned RCUTokenID;
416
417 public:
418 Instruction(const InstrDesc &D)
419 : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
420 RCUTokenID(0) {}
421
422 unsigned getRCUTokenID() const { return RCUTokenID; }
423 int getCyclesLeft() const { return CyclesLeft; }
424
425 // Transition to the dispatch stage, and assign a RCUToken to this
426 // instruction. The RCUToken is used to track the completion of every
427 // register write performed by this instruction.
428 void dispatch(unsigned RCUTokenID);
429
430 // Instruction issued. Transition to the IS_EXECUTING state, and update
431 // all the definitions.
432 void execute();
433
434 // Force a transition from the IS_AVAILABLE state to the IS_READY state if
435 // input operands are all ready. State transitions normally occur at the
436 // beginning of a new cycle (see method cycleEvent()). However, the scheduler
437 // may decide to promote instructions from the wait queue to the ready queue
438 // as the result of another issue event. This method is called every time the
439 // instruction might have changed in state.
440 void update();
441
442 bool isDispatched() const { return Stage == IS_AVAILABLE; }
443 bool isReady() const { return Stage == IS_READY; }
444 bool isExecuting() const { return Stage == IS_EXECUTING; }
445 bool isExecuted() const { return Stage == IS_EXECUTED; }
446 bool isRetired() const { return Stage == IS_RETIRED; }
447
448 bool isEliminated() const {
449 return isReady() && getDefs().size() &&
450 all_of(getDefs(),
451 [](const WriteState &W) { return W.isEliminated(); });
452 }
453
454 // Forces a transition from state IS_AVAILABLE to state IS_EXECUTED.
455 void forceExecuted();
456
457 void retire() {
458 assert(isExecuted() && "Instruction is in an invalid state!");
459 Stage = IS_RETIRED;
460 }
461
462 void cycleEvent();
463 };
464
465 /// An InstRef contains both a SourceMgr index and Instruction pair. The index
466 /// is used as a unique identifier for the instruction. MCA will make use of
467 /// this index as a key throughout MCA.
468 class InstRef {
469 std::pair Data;
470
471 public:
472 InstRef() : Data(std::make_pair(0, nullptr)) {}
473 InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {}
474
475 bool operator==(const InstRef &Other) const { return Data == Other.Data; }
476
477 unsigned getSourceIndex() const { return Data.first; }
478 Instruction *getInstruction() { return Data.second; }
479 const Instruction *getInstruction() const { return Data.second; }
480
481 /// Returns true if this references a valid instruction.
482 operator bool() const { return Data.second != nullptr; }
483
484 /// Invalidate this reference.
485 void invalidate() { Data.second = nullptr; }
486
487 #ifndef NDEBUG
488 void print(raw_ostream &OS) const { OS << getSourceIndex(); }
489 #endif
490 };
491
492 #ifndef NDEBUG
493 inline raw_ostream &operator<<(raw_ostream &OS, const InstRef &IR) {
494 IR.print(OS);
495 return OS;
496 }
497 #endif
498
499 /// A reference to a register write.
500 ///
501 /// This class is mainly used by the register file to describe register
502 /// mappings. It correlates a register write to the source index of the
503 /// defining instruction.
504 class WriteRef {
505 std::pair Data;
506 static const unsigned INVALID_IID;
507
508 public:
509 WriteRef() : Data(INVALID_IID, nullptr) {}
510 WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {}
511
512 unsigned getSourceIndex() const { return Data.first; }
513 const WriteState *getWriteState() const { return Data.second; }
514 WriteState *getWriteState() { return Data.second; }
515 void invalidate() { Data.second = nullptr; }
516 bool isWriteZero() const {
517 assert(isValid() && "Invalid null WriteState found!");
518 return getWriteState()->isWriteZero();
519 }
520
521 /// Returns true if this register write has been executed, and the new
522 /// register value is therefore available to users.
523 bool isAvailable() const {
524 if (getSourceIndex() == INVALID_IID)
525 return false;
526 const WriteState *WS = getWriteState();
527 return !WS || WS->isExecuted();
528 }
529
530 bool isValid() const { return Data.first != INVALID_IID && Data.second; }
531 bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
532
533 #ifndef NDEBUG
534 void dump() const;
535 #endif
536 };
537
538 } // namespace mca
539 } // namespace llvm
540
541 #endif // LLVM_MCA_INSTRUCTION_H
0 //===--------------------- Pipeline.h ---------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements an ordered container of stages that simulate the
11 /// pipeline of a hardware backend.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_MCA_PIPELINE_H
16 #define LLVM_MCA_PIPELINE_H
17
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/MCA/HardwareUnits/Scheduler.h"
20 #include "llvm/MCA/Stages/Stage.h"
21 #include "llvm/Support/Error.h"
22
23 namespace llvm {
24 namespace mca {
25
26 class HWEventListener;
27
28 /// A pipeline for a specific subtarget.
29 ///
30 /// It emulates an out-of-order execution of instructions. Instructions are
31 /// fetched from a MCInst sequence managed by an initial 'Fetch' stage.
32 /// Instructions are firstly fetched, then dispatched to the schedulers, and
33 /// then executed.
34 ///
35 /// This class tracks the lifetime of an instruction from the moment where
36 /// it gets dispatched to the schedulers, to the moment where it finishes
37 /// executing and register writes are architecturally committed.
38 /// In particular, it monitors changes in the state of every instruction
39 /// in flight.
40 ///
41 /// Instructions are executed in a loop of iterations. The number of iterations
42 /// is defined by the SourceMgr object, which is managed by the initial stage
43 /// of the instruction pipeline.
44 ///
45 /// The Pipeline entry point is method 'run()' which executes cycles in a loop
46 /// until there are new instructions to dispatch, and not every instruction
47 /// has been retired.
48 ///
49 /// Internally, the Pipeline collects statistical information in the form of
50 /// histograms. For example, it tracks how the dispatch group size changes
51 /// over time.
52 class Pipeline {
53 Pipeline(const Pipeline &P) = delete;
54 Pipeline &operator=(const Pipeline &P) = delete;
55
56 /// An ordered list of stages that define this instruction pipeline.
57 SmallVector, 8> Stages;
58 std::set Listeners;
59 unsigned Cycles;
60
61 Error runCycle();
62 bool hasWorkToProcess();
63 void notifyCycleBegin();
64 void notifyCycleEnd();
65
66 public:
67 Pipeline() : Cycles(0) {}
68 void appendStage(std::unique_ptr S);
69
70 /// Returns the total number of simulated cycles.
71 Expected run();
72
73 void addEventListener(HWEventListener *Listener);
74 };
75 } // namespace mca
76 } // namespace llvm
77
78 #endif // LLVM_MCA_PIPELINE_H
0 //===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements class SourceMgr. Class SourceMgr abstracts the input
10 /// code sequence (a sequence of MCInst), and assings unique identifiers to
11 /// every instruction in the sequence.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_MCA_SOURCEMGR_H
16 #define LLVM_MCA_SOURCEMGR_H
17
18 #include "llvm/ADT/ArrayRef.h"
19
20 namespace llvm {
21 namespace mca {
22
23 class Instruction;
24
25 typedef std::pair SourceRef;
26
27 class SourceMgr {
28 using UniqueInst = std::unique_ptr;
29 ArrayRef Sequence;
30 unsigned Current;
31 const unsigned Iterations;
32 static const unsigned DefaultIterations = 100;
33
34 public:
35 SourceMgr(ArrayRef S, unsigned Iter)
36 : Sequence(S), Current(0), Iterations(Iter ? Iter : DefaultIterations) {}
37
38 unsigned getNumIterations() const { return Iterations; }
39 unsigned size() const { return Sequence.size(); }
40 bool hasNext() const { return Current < (Iterations * Sequence.size()); }
41 void updateNext() { ++Current; }
42
43 SourceRef peekNext() const {
44 assert(hasNext() && "Already at end of sequence!");
45 return SourceRef(Current, *Sequence[Current % Sequence.size()]);
46 }
47
48 using const_iterator = ArrayRef::const_iterator;
49 const_iterator begin() const { return Sequence.begin(); }
50 const_iterator end() const { return Sequence.end(); }
51 };
52
53 } // namespace mca
54 } // namespace llvm
55
56 #endif // LLVM_MCA_SOURCEMGR_H
0 //===----------------------- DispatchStage.h --------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file models the dispatch component of an instruction pipeline.
11 ///
12 /// The DispatchStage is responsible for updating instruction dependencies
13 /// and communicating to the simulated instruction scheduler that an instruction
14 /// is ready to be scheduled for execution.
15 ///
16 //===----------------------------------------------------------------------===//
17
18 #ifndef LLVM_MCA_DISPATCH_STAGE_H
19 #define LLVM_MCA_DISPATCH_STAGE_H
20
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MCA/HWEventListener.h"
24 #include "llvm/MCA/HardwareUnits/RegisterFile.h"
25 #include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
26 #include "llvm/MCA/Instruction.h"
27 #include "llvm/MCA/Stages/Stage.h"
28
29 namespace llvm {
30 namespace mca {
31
32 // Implements the hardware dispatch logic.
33 //
34 // This class is responsible for the dispatch stage, in which instructions are
35 // dispatched in groups to the Scheduler. An instruction can be dispatched if
36 // the following conditions are met:
37 // 1) There are enough entries in the reorder buffer (see class
38 // RetireControlUnit) to write the opcodes associated with the instruction.
39 // 2) There are enough physical registers to rename output register operands.
40 // 3) There are enough entries available in the used buffered resource(s).
41 //
42 // The number of micro opcodes that can be dispatched in one cycle is limited by
43 // the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
44 // processor resources are not available. Dispatch stall events are counted
45 // during the entire execution of the code, and displayed by the performance
46 // report when flag '-dispatch-stats' is specified.
47 //
48 // If the number of micro opcodes exceedes DispatchWidth, then the instruction
49 // is dispatched in multiple cycles.
50 class DispatchStage final : public Stage {
51 unsigned DispatchWidth;
52 unsigned AvailableEntries;
53 unsigned CarryOver;
54 InstRef CarriedOver;
55 const MCSubtargetInfo &STI;
56 RetireControlUnit &RCU;
57 RegisterFile &PRF;
58
59 bool checkRCU(const InstRef &IR) const;
60 bool checkPRF(const InstRef &IR) const;
61 bool canDispatch(const InstRef &IR) const;
62 Error dispatch(InstRef IR);
63
64 void updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI);
65
66 void notifyInstructionDispatched(const InstRef &IR,
67 ArrayRef UsedPhysRegs,
68 unsigned uOps) const;
69
70 public:
71 DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
72 unsigned MaxDispatchWidth, RetireControlUnit &R,
73 RegisterFile &F)
74 : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
75 CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {}
76
77 bool isAvailable(const InstRef &IR) const override;
78
79 // The dispatch logic internally doesn't buffer instructions. So there is
80 // never work to do at the beginning of every cycle.
81 bool hasWorkToComplete() const override { return false; }
82 Error cycleStart() override;
83 Error execute(InstRef &IR) override;
84
85 #ifndef NDEBUG
86 void dump() const;
87 #endif
88 };
89 } // namespace mca
90 } // namespace llvm
91
92 #endif // LLVM_MCA_DISPATCH_STAGE_H
0 //===---------------------- EntryStage.h ------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines the Entry stage of an instruction pipeline. Its sole
11 /// purpose in life is to pick instructions in sequence and move them to the
12 /// next pipeline stage.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #ifndef LLVM_MCA_ENTRY_STAGE_H
17 #define LLVM_MCA_ENTRY_STAGE_H
18
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/MCA/SourceMgr.h"
21 #include "llvm/MCA/Stages/Stage.h"
22
23 namespace llvm {
24 namespace mca {
25
26 class EntryStage final : public Stage {
27 InstRef CurrentInstruction;
28 SmallVector, 16> Instructions;
29 SourceMgr &SM;
30 unsigned NumRetired;
31
32 // Updates the program counter, and sets 'CurrentInstruction'.
33 void getNextInstruction();
34
35 EntryStage(const EntryStage &Other) = delete;
36 EntryStage &operator=(const EntryStage &Other) = delete;
37
38 public:
39 EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { }
40
41 bool isAvailable(const InstRef &IR) const override;
42 bool hasWorkToComplete() const override;
43 Error execute(InstRef &IR) override;
44 Error cycleStart() override;
45 Error cycleEnd() override;
46 };
47
48 } // namespace mca
49 } // namespace llvm
50
51 #endif // LLVM_MCA_FETCH_STAGE_H
0 //===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines the execution stage of a default instruction pipeline.
11 ///
12 /// The ExecuteStage is responsible for managing the hardware scheduler
13 /// and issuing notifications that an instruction has been executed.
14 ///
15 //===----------------------------------------------------------------------===//
16
17 #ifndef LLVM_MCA_EXECUTE_STAGE_H
18 #define LLVM_MCA_EXECUTE_STAGE_H
19
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/MCA/HardwareUnits/Scheduler.h"
22 #include "llvm/MCA/Instruction.h"
23 #include "llvm/MCA/Stages/Stage.h"
24
25 namespace llvm {
26 namespace mca {
27
28 class ExecuteStage final : public Stage {
29 Scheduler &HWS;
30
31 Error issueInstruction(InstRef &IR);
32
33 // Called at the beginning of each cycle to issue already dispatched
34 // instructions to the underlying pipelines.
35 Error issueReadyInstructions();
36
37 // Used to notify instructions eliminated at register renaming stage.
38 Error handleInstructionEliminated(InstRef &IR);
39
40 ExecuteStage(const ExecuteStage &Other) = delete;
41 ExecuteStage &operator=(const ExecuteStage &Other) = delete;
42
43 public:
44 ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
45
46 // This stage works under the assumption that the Pipeline will eventually
47 // execute a retire stage. We don't need to check if pipelines and/or
48 // schedulers have instructions to process, because those instructions are
49 // also tracked by the retire control unit. That means,
50 // RetireControlUnit::hasWorkToComplete() is responsible for checking if there
51 // are still instructions in-flight in the out-of-order backend.
52 bool hasWorkToComplete() const override { return false; }
53 bool isAvailable(const InstRef &IR) const override;
54
55 // Notifies the scheduler that a new cycle just started.
56 //
57 // This method notifies the scheduler that a new cycle started.
58 // This method is also responsible for notifying listeners about instructions
59 // state changes, and processor resources freed by the scheduler.
60 // Instructions that transitioned to the 'Executed' state are automatically
61 // moved to the next stage (i.e. RetireStage).
62 Error cycleStart() override;
63 Error execute(InstRef &IR) override;
64
65 void notifyInstructionIssued(
66 const InstRef &IR,
67 ArrayRef> Used) const;
68 void notifyInstructionExecuted(const InstRef &IR) const;
69 void notifyInstructionReady(const InstRef &IR) const;
70 void notifyResourceAvailable(const ResourceRef &RR) const;
71
72 // Notify listeners that buffered resources have been consumed or freed.
73 void notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const;
74 };
75
76 } // namespace mca
77 } // namespace llvm
78
79 #endif // LLVM_MCA_EXECUTE_STAGE_H
0 //===--------------------- InstructionTables.h ------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements a custom stage to generate instruction tables.
11 /// See the description of command-line flag -instruction-tables in
12 /// docs/CommandGuide/lvm-mca.rst
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #ifndef LLVM_MCA_INSTRUCTIONTABLES_H
17 #define LLVM_MCA_INSTRUCTIONTABLES_H
18
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/MC/MCSchedule.h"
21 #include "llvm/MCA/HardwareUnits/Scheduler.h"
22 #include "llvm/MCA/Stages/Stage.h"
23 #include "llvm/MCA/Support.h"
24
25 namespace llvm {
26 namespace mca {
27
28 class InstructionTables final : public Stage {
29 const MCSchedModel &SM;
30 SmallVector, 4> UsedResources;
31 SmallVector Masks;
32
33 public:
34 InstructionTables(const MCSchedModel &Model) : Stage(), SM(Model) {
35 computeProcResourceMasks(Model, Masks);
36 }
37
38 bool hasWorkToComplete() const override { return false; }
39 Error execute(InstRef &IR) override;
40 };
41 } // namespace mca
42 } // namespace llvm
43
44 #endif // LLVM_MCA_INSTRUCTIONTABLES_H
0 //===---------------------- RetireStage.h -----------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines the retire stage of a default instruction pipeline.
11 /// The RetireStage represents the process logic that interacts with the
12 /// simulated RetireControlUnit hardware.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #ifndef LLVM_MCA_RETIRE_STAGE_H
17 #define LLVM_MCA_RETIRE_STAGE_H
18
19 #include "llvm/MCA/HardwareUnits/RegisterFile.h"
20 #include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
21 #include "llvm/MCA/Stages/Stage.h"
22
23 namespace llvm {
24 namespace mca {
25
26 class RetireStage final : public Stage {
27 // Owner will go away when we move listeners/eventing to the stages.
28 RetireControlUnit &RCU;
29 RegisterFile &PRF;
30
31 RetireStage(const RetireStage &Other) = delete;
32 RetireStage &operator=(const RetireStage &Other) = delete;
33
34 public:
35 RetireStage(RetireControlUnit &R, RegisterFile &F)
36 : Stage(), RCU(R), PRF(F) {}
37
38 bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
39 Error cycleStart() override;
40 Error execute(InstRef &IR) override;
41 void notifyInstructionRetired(const InstRef &IR) const;
42 };
43
44 } // namespace mca
45 } // namespace llvm
46
47 #endif // LLVM_MCA_RETIRE_STAGE_H
0 //===---------------------- Stage.h -----------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines a stage.
11 /// A chain of stages compose an instruction pipeline.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_MCA_STAGE_H
16 #define LLVM_MCA_STAGE_H
17
18 #include "llvm/MCA/HWEventListener.h"
19 #include "llvm/Support/Error.h"
20 #include
21
22 namespace llvm {
23 namespace mca {
24
25 class InstRef;
26
27 class Stage {
28 Stage *NextInSequence;
29 std::set Listeners;
30
31 Stage(const Stage &Other) = delete;
32 Stage &operator=(const Stage &Other) = delete;
33
34 protected:
35 const std::set &getListeners() const { return Listeners; }
36
37 public:
38 Stage() : NextInSequence(nullptr) {}
39 virtual ~Stage();
40
41 /// Returns true if it can execute IR during this cycle.
42 virtual bool isAvailable(const InstRef &IR) const { return true; }
43
44 /// Returns true if some instructions are still executing this stage.
45 virtual bool hasWorkToComplete() const = 0;
46
47 /// Called once at the start of each cycle. This can be used as a setup
48 /// phase to prepare for the executions during the cycle.
49 virtual Error cycleStart() { return ErrorSuccess(); }
50
51 /// Called once at the end of each cycle.
52 virtual Error cycleEnd() { return ErrorSuccess(); }
53
54 /// The primary action that this stage performs on instruction IR.
55 virtual Error execute(InstRef &IR) = 0;
56
57 void setNextInSequence(Stage *NextStage) {
58 assert(!NextInSequence && "This stage already has a NextInSequence!");
59 NextInSequence = NextStage;
60 }
61
62 bool checkNextStage(const InstRef &IR) const {
63 return NextInSequence && NextInSequence->isAvailable(IR);
64 }
65
66 /// Called when an instruction is ready to move the next pipeline stage.
67 ///
68 /// Stages are responsible for moving instructions to their immediate
69 /// successor stages.
70 Error moveToTheNextStage(InstRef &IR) {
71 assert(checkNextStage(IR) && "Next stage is not ready!");
72 return NextInSequence->execute(IR);
73 }
74
75 /// Add a listener to receive callbacks during the execution of this stage.
76 void addListener(HWEventListener *Listener);
77
78 /// Notify listeners of a particular hardware event.
79 template void notifyEvent(const EventT &Event) const {
80 for (HWEventListener *Listener : Listeners)
81 Listener->onEvent(Event);
82 }
83 };
84
85 } // namespace mca
86 } // namespace llvm
87 #endif // LLVM_MCA_STAGE_H
0 //===--------------------- Support.h ----------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// Helper functions used by various pipeline components.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_MCA_SUPPORT_H
15 #define LLVM_MCA_SUPPORT_H
16
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/MC/MCSchedule.h"
20 #include "llvm/Support/Error.h"
21
22 namespace llvm {
23 namespace mca {
24
25 template
26 class InstructionError : public ErrorInfo> {
27 public:
28 static char ID;
29 std::string Message;
30 const T &Inst;
31
32 InstructionError(std::string M, const T &MCI)
33 : Message(std::move(M)), Inst(MCI) {}
34
35 void log(raw_ostream &OS) const override { OS << Message; }
36
37 std::error_code convertToErrorCode() const override {
38 return inconvertibleErrorCode();
39 }
40 };
41
42 template char InstructionError::ID;
43
44 /// This class represents the number of cycles per resource (fractions of
45 /// cycles). That quantity is managed here as a ratio, and accessed via the
46 /// double cast-operator below. The two quantities, number of cycles and
47 /// number of resources, are kept separate. This is used by the
48 /// ResourcePressureView to calculate the average resource cycles
49 /// per instruction/iteration.
50 class ResourceCycles {
51 unsigned Numerator, Denominator;
52
53 public:
54 ResourceCycles() : Numerator(0), Denominator(1) {}
55 ResourceCycles(unsigned Cycles, unsigned ResourceUnits = 1)
56 : Numerator(Cycles), Denominator(ResourceUnits) {}
57
58 operator double() const {
59 assert(Denominator && "Invalid denominator (must be non-zero).");
60 return (Denominator == 1) ? Numerator : (double)Numerator / Denominator;
61 }
62
63 // Add the components of RHS to this instance. Instead of calculating
64 // the final value here, we keep track of the numerator and denominator
65 // separately, to reduce floating point error.
66 ResourceCycles &operator+=(const ResourceCycles &RHS) {
67 if (Denominator == RHS.Denominator)
68 Numerator += RHS.Numerator;
69 else {
70 // Create a common denominator for LHS and RHS by calculating the least
71 // common multiple from the GCD.
72 unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator);
73 unsigned LCM = (Denominator * RHS.Denominator) / GCD;
74 unsigned LHSNumerator = Numerator * (LCM / Denominator);
75 unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
76 Numerator = LHSNumerator + RHSNumerator;
77 Denominator = LCM;
78 }
79 return *this;
80 }
81 };
82
83 /// Populates vector Masks with processor resource masks.
84 ///
85 /// The number of bits set in a mask depends on the processor resource type.
86 /// Each processor resource mask has at least one bit set. For groups, the
87 /// number of bits set in the mask is equal to the cardinality of the group plus
88 /// one. Excluding the most significant bit, the remaining bits in the mask
89 /// identify processor resources that are part of the group.
90 ///
91 /// Example:
92 ///
93 /// ResourceA -- Mask: 0b001
94 /// ResourceB -- Mask: 0b010
95 /// ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
96 ///
97 /// ResourceAB is a processor resource group containing ResourceA and ResourceB.
98 /// Each resource mask uniquely identifies a resource; both ResourceA and
99 /// ResourceB only have one bit set.
100 /// ResourceAB is a group; excluding the most significant bit in the mask, the
101 /// remaining bits identify the composition of the group.
102 ///
103 /// Resource masks are used by the ResourceManager to solve set membership
104 /// problems with simple bit manipulation operations.
105 void computeProcResourceMasks(const MCSchedModel &SM,
106 SmallVectorImpl &Masks);
107
108 /// Compute the reciprocal block throughput from a set of processor resource
109 /// cycles. The reciprocal block throughput is computed as the MAX between:
110 /// - NumMicroOps / DispatchWidth
111 /// - ProcResourceCycles / #ProcResourceUnits (for every consumed resource).
112 double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
113 unsigned NumMicroOps,
114 ArrayRef ProcResourceUsage);
115 } // namespace mca
116 } // namespace llvm
117
118 #endif // LLVM_MCA_SUPPORT_H
1111 add_subdirectory(Analysis)
1212 add_subdirectory(LTO)
1313 add_subdirectory(MC)
14 add_subdirectory(MCA)
1415 add_subdirectory(Object)
1516 add_subdirectory(ObjectYAML)
1617 add_subdirectory(Option)
3030 IRReader
3131 LTO
3232 MC
33 MCA
3334 Object
3435 BinaryFormat
3536 ObjectYAML
0 add_llvm_library(LLVMMCA
1 Context.cpp
2 HWEventListener.cpp
3 HardwareUnits/HardwareUnit.cpp
4 HardwareUnits/LSUnit.cpp
5 HardwareUnits/RegisterFile.cpp
6 HardwareUnits/ResourceManager.cpp
7 HardwareUnits/RetireControlUnit.cpp
8 HardwareUnits/Scheduler.cpp
9 InstrBuilder.cpp
10 Instruction.cpp
11 Pipeline.cpp
12 Stages/DispatchStage.cpp
13 Stages/EntryStage.cpp
14 Stages/ExecuteStage.cpp
15 Stages/InstructionTables.cpp
16 Stages/RetireStage.cpp
17 Stages/Stage.cpp
18 Support.cpp
19
20 ADDITIONAL_HEADER_DIRS
21 ${LLVM_MAIN_INCLUDE_DIR}/llvm/MCA
22 )
0 //===---------------------------- Context.cpp -------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines a class for holding ownership of various simulated
11 /// hardware units. A Context also provides a utility routine for constructing
12 /// a default out-of-order pipeline with fetch, dispatch, execute, and retire
13 /// stages.
14 ///
15 //===----------------------------------------------------------------------===//
16
17 #include "llvm/MCA/Context.h"
18 #include "llvm/MCA/HardwareUnits/RegisterFile.h"
19 #include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
20 #include "llvm/MCA/HardwareUnits/Scheduler.h"
21 #include "llvm/MCA/Stages/DispatchStage.h"
22 #include "llvm/MCA/Stages/EntryStage.h"
23 #include "llvm/MCA/Stages/ExecuteStage.h"
24 #include "llvm/MCA/Stages/RetireStage.h"
25
26 namespace llvm {
27 namespace mca {
28
29 std::unique_ptr
30 Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
31 SourceMgr &SrcMgr) {
32 const MCSchedModel &SM = STI.getSchedModel();
33
34 // Create the hardware units defining the backend.
35 auto RCU = llvm::make_unique(SM);
36 auto PRF = llvm::make_unique(SM, MRI, Opts.RegisterFileSize);
37 auto LSU = llvm::make_unique(SM, Opts.LoadQueueSize,
38 Opts.StoreQueueSize, Opts.AssumeNoAlias);
39 auto HWS = llvm::make_unique(SM, *LSU);
40
41 // Create the pipeline stages.
42 auto Fetch = llvm::make_unique(SrcMgr);
43 auto Dispatch = llvm::make_unique(STI, MRI, Opts.DispatchWidth,
44 *RCU, *PRF);
45 auto Execute = llvm::make_unique(*HWS);
46 auto Retire = llvm::make_unique(*RCU, *PRF);
47
48 // Pass the ownership of all the hardware units to this Context.
49 addHardwareUnit(std::move(RCU));
50 addHardwareUnit(std::move(PRF));
51 addHardwareUnit(std::move(LSU));
52 addHardwareUnit(std::move(HWS));
53
54 // Build the pipeline.
55 auto StagePipeline = llvm::make_unique();
56 StagePipeline->appendStage(std::move(Fetch));
57 StagePipeline->appendStage(std::move(Dispatch));
58 StagePipeline->appendStage(std::move(Execute));
59 StagePipeline->appendStage(std::move(Retire));
60 return StagePipeline;
61 }
62
63 } // namespace mca
64 } // namespace llvm
0 //===----------------------- HWEventListener.cpp ----------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines a vtable anchor for class HWEventListener.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/MCA/HWEventListener.h"
15
16 namespace llvm {
17 namespace mca {
18
19 // Anchor the vtable here.
20 void HWEventListener::anchor() {}
21 } // namespace mca
22 } // namespace llvm
0 //===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines the anchor for the base class that describes
11 /// simulated hardware units.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
16
17 namespace llvm {
18 namespace mca {
19
20 // Pin the vtable with this method.
21 HardwareUnit::~HardwareUnit() = default;
22
23 } // namespace mca
24 } // namespace llvm
0 //===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// A Load-Store Unit for the llvm-mca tool.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/MCA/HardwareUnits/LSUnit.h"
15 #include "llvm/MCA/Instruction.h"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/raw_ostream.h"
18
19 #define DEBUG_TYPE "llvm-mca"
20
21 namespace llvm {
22 namespace mca {
23
24 LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
25 bool AssumeNoAlias)
26 : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {
27 if (SM.hasExtraProcessorInfo()) {
28 const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
29 if (!LQ_Size && EPI.LoadQueueID) {
30 const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
31 LQ_Size = LdQDesc.BufferSize;
32 }
33
34 if (!SQ_Size && EPI.StoreQueueID) {
35 const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
36 SQ_Size = StQDesc.BufferSize;
37 }
38 }
39 }
40
41 #ifndef NDEBUG
42 void LSUnit::dump() const {
43 dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
44 dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
45 dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
46 dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
47 }
48 #endif
49
50 void LSUnit::assignLQSlot(unsigned Index) {
51 assert(!isLQFull());
52 assert(LoadQueue.count(Index) == 0);
53
54 LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot
55 << ",slot=" << LoadQueue.size() << ">\n");
56 LoadQueue.insert(Index);
57 }
58
59 void LSUnit::assignSQSlot(unsigned Index) {
60 assert(!isSQFull());
61 assert(StoreQueue.count(Index) == 0);
62
63 LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot
64 << ",slot=" << StoreQueue.size() << ">\n");
65 StoreQueue.insert(Index);
66 }
67
68 void LSUnit::dispatch(const InstRef &IR) {
69 const InstrDesc &Desc = IR.getInstruction()->getDesc();
70 unsigned IsMemBarrier = Desc.HasSideEffects;
71 assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
72
73 const unsigned Index = IR.getSourceIndex();
74 if (Desc.MayLoad) {
75 if (IsMemBarrier)
76 LoadBarriers.insert(Index);
77 assignLQSlot(Index);
78 }
79
80 if (Desc.MayStore) {
81 if (IsMemBarrier)
82 StoreBarriers.insert(Index);
83 assignSQSlot(Index);
84 }
85 }
86
87 LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
88 const InstrDesc &Desc = IR.getInstruction()->getDesc();
89 if (Desc.MayLoad && isLQFull())
90 return LSUnit::LSU_LQUEUE_FULL;
91 if (Desc.MayStore && isSQFull())
92 return LSUnit::LSU_SQUEUE_FULL;
93 return LSUnit::LSU_AVAILABLE;
94 }
95
96 bool LSUnit::isReady(const InstRef &IR) const {
97 const InstrDesc &Desc = IR.getInstruction()->getDesc();
98 const unsigned Index = IR.getSourceIndex();
99 bool IsALoad = Desc.MayLoad;
100 bool IsAStore = Desc.MayStore;
101 assert((IsALoad || IsAStore) && "Not a memory operation!");
102 assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
103 assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
104
105 if (IsALoad && !LoadBarriers.empty()) {
106 unsigned LoadBarrierIndex = *LoadBarriers.begin();
107 // A younger load cannot pass a older load barrier.
108 if (Index > LoadBarrierIndex)
109 return false;
110 // A load barrier cannot pass a older load.
111 if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
112 return false;
113 }
114
115 if (IsAStore && !StoreBarriers.empty()) {
116 unsigned StoreBarrierIndex = *StoreBarriers.begin();
117 // A younger store cannot pass a older store barrier.
118 if (Index > StoreBarrierIndex)
119 return false;
120 // A store barrier cannot pass a older store.
121 if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
122 return false;
123 }
124
125 // A load may not pass a previous store unless flag 'NoAlias' is set.
126 // A load may pass a previous load.
127 if (NoAlias && IsALoad)
128 return true;
129
130 if (StoreQueue.size()) {
131 // A load may not pass a previous store.
132 // A store may not pass a previous store.
133 if (Index > *StoreQueue.begin())
134 return false;
135 }
136
137 // Okay, we are older than the oldest store in the queue.
138 // If there are no pending loads, then we can say for sure that this
139 // instruction is ready.
140 if (isLQEmpty())
141 return true;
142
143 // Check if there are no older loads.
144 if (Index <= *LoadQueue.begin())
145 return true;
146
147 // There is at least one younger load.
148 //
149 // A store may not pass a previous load.
150 // A load may pass a previous load.
151 return !IsAStore;
152 }
153
154 void LSUnit::onInstructionExecuted(const InstRef &IR) {
155 const InstrDesc &Desc = IR.getInstruction()->getDesc();
156 const unsigned Index = IR.getSourceIndex();
157 bool IsALoad = Desc.MayLoad;
158 bool IsAStore = Desc.MayStore;
159
160 if (IsALoad) {
161 if (LoadQueue.erase(Index)) {
162 LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
163 << " has been removed from the load queue.\n");
164 }
165 if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
166 LLVM_DEBUG(
167 dbgs() << "[LSUnit]: Instruction idx=" << Index
168 << " has been removed from the set of load barriers.\n");
169 LoadBarriers.erase(Index);
170 }
171 }
172
173 if (IsAStore) {
174 if (StoreQueue.erase(Index)) {
175 LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
176 << " has been removed from the store queue.\n");
177 }
178
179 if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
180 LLVM_DEBUG(
181 dbgs() << "[LSUnit]: Instruction idx=" << Index
182 << " has been removed from the set of store barriers.\n");
183 StoreBarriers.erase(Index);
184 }
185 }
186 }
187
188 } // namespace mca
189 } // namespace llvm
<
0 //===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file defines a register mapping file class. This class is responsible
11 /// for managing hardware register files and the tracking of data dependencies
12 /// between registers.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "llvm/MCA/HardwareUnits/RegisterFile.h"
17 #include "llvm/MCA/Instruction.h"
18 #include "llvm/Support/Debug.h"
19
20 #define DEBUG_TYPE "llvm-mca"
21
22 namespace llvm {
23 namespace mca {
24
25 RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
26 unsigned NumRegs)
27 : MRI(mri),
28 RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}),
29 ZeroRegisters(mri.getNumRegs(), false) {
30 initialize(SM, NumRegs);
31 }
32
33 void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
34 // Create a default register file that "sees" all the machine registers
35 // declared by the target. The number of physical registers in the default
36 // register file is set equal to `NumRegs`. A value of zero for `NumRegs`
37 // means: this register file has an unbounded number of physical registers.
38 RegisterFiles.emplace_back(NumRegs);
39 if (!SM.hasExtraProcessorInfo())
40 return;
41
42 // For each user defined register file, allocate a RegisterMappingTracker
43 // object. The size of every register file, as well as the mapping between
44 // register files and register classes is specified via tablegen.
45 const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
46
47 // Skip invalid register file at index 0.
48 for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) {
49 const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
50 assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!");
51
52 // The cost of a register definition is equivalent to the number of
53 // physical registers that are allocated at register renaming stage.
54 unsigned Length = RF.NumRegisterCostEntries;
55 const MCRegisterCostEntry *FirstElt =
56 &Info.RegisterCostTable[RF.RegisterCostEntryIdx];
57 addRegisterFile(RF, ArrayRef(FirstElt, Length));
58 }
59 }
60
61 void RegisterFile::cycleStart() {
62 for (RegisterMappingTracker &RMT : RegisterFiles)
63 RMT.NumMoveEliminated = 0;
64 }
65
66 void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF,
67 ArrayRef Entries) {
68 // A default register file is always allocated at index #0. That register file
69 // is mainly used to count the total number of mappings created by all
70 // register files at runtime. Users can limit the number of available physical
71 // registers in register file #0 through the command line flag
72 // `-register-file-size`.
73 unsigned RegisterFileIndex = RegisterFiles.size();
74 RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle,
75 RF.AllowZeroMoveEliminationOnly);
76
77 // Special case where there is no register class identifier in the set.
78 // An empty set of register classes means: this register file contains all
79 // the physical registers specified by the target.
80 // We optimistically assume that a register can be renamed at the cost of a
81 // single physical register. The constructor of RegisterFile ensures that
82 // a RegisterMapping exists for each logical register defined by the Target.
83 if (Entries.empty())
84 return;
85
86 // Now update the cost of individual registers.
87 for (const MCRegisterCostEntry &RCE : Entries) {
88 const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
89 for (const MCPhysReg Reg : RC) {
90 RegisterRenamingInfo &Entry = RegisterMappings[Reg].second;
91 IndexPlusCostPairTy &IPC = Entry.IndexPlusCost;
92 if (IPC.first && IPC.first != RegisterFileIndex) {
93 // The only register file that is allowed to overlap is the default
94 // register file at index #0. The analysis is inaccurate if register
95 // files overlap.
96 errs() << "warning: register " << MRI.getName(Reg)
97 << " defined in multiple register files.";
98 }
99 IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
100 Entry.RenameAs = Reg;
101 Entry.AllowMoveElimination = RCE.AllowMoveElimination;
102
103 // Assume the same cost for each sub-register.
104 for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
105 RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second;
106 if (!OtherEntry.IndexPlusCost.first &&
107 (!OtherEntry.RenameAs ||
108 MRI.isSuperRegister(*I, OtherEntry.RenameAs))) {
109 OtherEntry.IndexPlusCost = IPC;
110 OtherEntry.RenameAs = Reg;
111 }
112 }
113 }
114 }
115 }
116
117 void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry,
118 MutableArrayRef UsedPhysRegs) {
119 unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
120 unsigned Cost = Entry.IndexPlusCost.second;
121 if (RegisterFileIndex) {
122 RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
123 RMT.NumUsedPhysRegs += Cost;
124 UsedPhysRegs[RegisterFileIndex] += Cost;
125 }
126
127 // Now update the default register mapping tracker.
128 RegisterFiles[0].NumUsedPhysRegs += Cost;
129 UsedPhysRegs[0] += Cost;
130 }
131
132 void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
133 MutableArrayRef FreedPhysRegs) {
134 unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
135 unsigned Cost = Entry.IndexPlusCost.second;
136 if (RegisterFileIndex) {
137 RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
138 RMT.NumUsedPhysRegs -= Cost;
139 FreedPhysRegs[RegisterFileIndex] += Cost;
140 }
141
142 // Now update the default register mapping tracker.
143 RegisterFiles[0].NumUsedPhysRegs -= Cost;
144 FreedPhysRegs[0] += Cost;
145 }
146
147 void RegisterFile::addRegisterWrite(WriteRef Write,
148 MutableArrayRef UsedPhysRegs) {
149 WriteState &WS = *Write.getWriteState();
150 unsigned RegID = WS.getRegisterID();
151 assert(RegID && "Adding an invalid register definition?");
152
153 LLVM_DEBUG({
154 dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex()
155 << ", " << MRI.getName(RegID) << "]\n";
156 });
157
158 // If RenameAs is equal to RegID, then RegID is subject to register renaming
159 // and false dependencies on RegID are all eliminated.
160
161 // If RenameAs references the invalid register, then we optimistically assume
162 // that it can be renamed. In the absence of tablegen descriptors for register
163 // files, RenameAs is always set to the invalid register ID. In all other
164 // cases, RenameAs must be either equal to RegID, or it must reference a
165 // super-register of RegID.
166
167 // If RenameAs is a super-register of RegID, then a write to RegID has always
168 // a false dependency on RenameAs. The only exception is for when the write
169 // implicitly clears the upper portion of the underlying register.
170 // If a write clears its super-registers, then it is renamed as `RenameAs`.
171 bool IsWriteZero = WS.isWriteZero();
172 bool IsEliminated = WS.isEliminated();
173 bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated;
174 const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
175 WS.setPRF(RRI.IndexPlusCost.first);
176
177 if (RRI.RenameAs && RRI.RenameAs != RegID) {
178 RegID = RRI.RenameAs;
179 WriteRef &OtherWrite = RegisterMappings[RegID].first;
180
181 if (!WS.clearsSuperRegisters()) {
182 // The processor keeps the definition of `RegID` together with register
183 // `RenameAs`. Since this partial write is not renamed, no physical
184 // register is allocated.
185 ShouldAllocatePhysRegs = false;
186
187 WriteState *OtherWS = OtherWrite.getWriteState();
188 if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
189 // This partial write has a false dependency on RenameAs.
190 assert(!IsEliminated && "Unexpected partial update!");
191 OtherWS->addUser(&WS);
192 }
193 }
194 }
195
196 // Update zero registers.
197 unsigned ZeroRegisterID =
198 WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
199 if (IsWriteZero) {
200 ZeroRegisters.setBit(ZeroRegisterID);
201 for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
202 ZeroRegisters.setBit(*I);
203 } else {
204 ZeroRegisters.clearBit(ZeroRegisterID);
205 for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
206 ZeroRegisters.clearBit(*I);
207 }
208
209 // If this is move has been eliminated, then the call to tryEliminateMove
210 // should have already updated all the register mappings.
211 if (!IsEliminated) {
212 // Update the mapping for register RegID including its sub-registers.
213 RegisterMappings[RegID].first = Write;
214 RegisterMappings[RegID].second.AliasRegID = 0U;
215 for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
216 RegisterMappings[*I].first = Write;
217 RegisterMappings[*I].second.AliasRegID = 0U;
218 }
219
220 // No physical registers are allocated for instructions that are optimized
221 // in hardware. For example, zero-latency data-dependency breaking
222 // instructions don't consume physical registers.
223 if (ShouldAllocatePhysRegs)
224 allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
225 }
226
227 if (!WS.clearsSuperRegisters())
228 return;
229
230 for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
231 if (!IsEliminated) {
232 RegisterMappings[*I].first = Write;
233 RegisterMappings[*I].second.AliasRegID = 0U;
234 }
235
236 if (IsWriteZero)
237 ZeroRegisters.setBit(*I);
238 else
239 ZeroRegisters.clearBit(*I);
240 }
241 }
242
243 void RegisterFile::removeRegisterWrite(
244 const WriteState &WS, MutableArrayRef FreedPhysRegs) {
245 // Early exit if this write was eliminated. A write eliminated at register
246 // renaming stage generates an alias, and it is not added to the PRF.
247 if (WS.isEliminated())
248 return;
249
250 unsigned RegID = WS.getRegisterID();
251
252 assert(RegID != 0 && "Invalidating an already invalid register?");
253 assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
254 "Invalidating a write of unknown cycles!");
255 assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
256
257 bool ShouldFreePhysRegs = !WS.isWriteZero();
258 unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
259 if (RenameAs && RenameAs != RegID) {
260 RegID = RenameAs;
261
262 if (!WS.clearsSuperRegisters()) {
263 // Keep the definition of `RegID` together with register `RenameAs`.
264 ShouldFreePhysRegs = false;
265 }
266 }
267
268 if (ShouldFreePhysRegs)
269 freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs);
270
271 WriteRef &WR = RegisterMappings[RegID].first;
272 if (WR.getWriteState() == &WS)
273 WR.invalidate();
274
275 for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
276 WriteRef &OtherWR = RegisterMappings[*I].first;
277 if (OtherWR.getWriteState() == &WS)
278 OtherWR.invalidate();
279 }
280
281 if (!WS.clearsSuperRegisters())
282 return;
283
284 for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
285 WriteRef &OtherWR = RegisterMappings[*I].first;
286 if (OtherWR.getWriteState() == &WS)
287 OtherWR.invalidate();
288 }
289 }
290
291 bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
292 const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
293 const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
294
295 // From and To must be owned by the same PRF.
296 const RegisterRenamingInfo &RRIFrom = RMFrom.second;
297 const RegisterRenamingInfo &RRITo = RMTo.second;
298 unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first;
299 if (RegisterFileIndex != RRITo.IndexPlusCost.first)
300 return false;
301
302 // We only allow move elimination for writes that update a full physical
303 // register. On X86, move elimination is possible with 32-bit general purpose
304 // registers because writes to those registers are not partial writes. If a
305 // register move is a partial write, then we conservatively assume that move
306 // elimination fails, since it would either trigger a partial update, or the
307 // issue of a merge opcode.
308 //
309 // Note that this constraint may be lifted in future. For example, we could
310 // make this model more flexible, and let users customize the set of registers
311 // (i.e. register classes) that allow move elimination.
312 //
313 // For now, we assume that there is a strong correlation between registers
314 // that allow move elimination, and how those same registers are renamed in
315 // hardware.
316 if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) {
317 // Early exit if the PRF doesn't support move elimination for this register.
318 if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination)
319 return false;
320 if (!WS.clearsSuperRegisters())
321 return false;
322 }
323
324 RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
325 if (RMT.MaxMoveEliminatedPerCycle &&
326 RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle)
327 return false;
328
329 bool IsZeroMove = ZeroRegisters[RS.getRegisterID()];
330 if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove)
331 return false;
332
333 MCPhysReg FromReg = RS.getRegisterID();
334 MCPhysReg ToReg = WS.getRegisterID();
335
336 // Construct an alias.
337 MCPhysReg AliasReg = FromReg;
338 if (RRIFrom.RenameAs)
339 AliasReg = RRIFrom.RenameAs;
340
341 const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second;
342 if (RMAlias.AliasRegID)
343 AliasReg = RMAlias.AliasRegID;
344
345 if (AliasReg != ToReg) {
346 RegisterMappings[ToReg].second.AliasRegID = AliasReg;
347 for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I)
348 RegisterMappings[*I].second.AliasRegID = AliasReg;
349 }
350
351 RMT.NumMoveEliminated++;
352 if (IsZeroMove) {
353 WS.setWriteZero();
354 RS.setReadZero();
355 }
356 WS.setEliminated();
357
358 return true;
359 }
360
361 void RegisterFile::collectWrites(const ReadState &RS,
362 SmallVectorImpl &Writes) const {
363 unsigned RegID = RS.getRegisterID();
364 assert(RegID && RegID < RegisterMappings.size());
365 LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
366 << MRI.getName(RegID) << '\n');
367
368 // Check if this is an alias.
369 const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
370 if (RRI.AliasRegID)
371 RegID = RRI.AliasRegID;
372
373 const WriteRef &WR = RegisterMappings[RegID].first;
374 if (WR.isValid())
375 Writes.push_back(WR);
376
377 // Handle potential partial register updates.
378 for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
379 const WriteRef &WR = RegisterMappings[*I].first;
380 if (WR.isValid())
381 Writes.push_back(WR);
382 }
383
384 // Remove duplicate entries and resize the input vector.
385 if (Writes.size() > 1) {
386 sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
387 return Lhs.getWriteState() < Rhs.getWriteState();
388 });
389 auto It = std::unique(Writes.begin(), Writes.end());
390 Writes.resize(std::distance(Writes.begin(), It));
391 }
392
393 LLVM_DEBUG({
394 for (const WriteRef &WR : Writes) {
395 const WriteState &WS = *WR.getWriteState();
396 dbgs() << "[PRF] Found a dependent use of Register "
397 << MRI.getName(WS.getRegisterID()) << " (defined by instruction #"
398 << WR.getSourceIndex() << ")\n";
399 }
400 });
401 }
402
403 void RegisterFile::addRegisterRead(ReadState &RS,
404 SmallVectorImpl &Defs) const {
405 unsigned RegID = RS.getRegisterID();
406 const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
407 RS.setPRF(RRI.IndexPlusCost.first);
408 if (RS.isIndependentFromDef())
409 return;
410
411 if (ZeroRegisters[RS.getRegisterID()])
412 RS.setReadZero();