llvm.org GIT mirror llvm / 97666c4
[llvm-mca] Add support for move elimination in class RegisterFile. This patch teaches class RegisterFile how to analyze register writes from instructions that are move elimination candidates. In particular, it teaches it how to check if a move can be effectively eliminated by the underlying PRF, and (if necessary) how to perform move elimination. The long term goal is to allow processor models to describe instructions that are valid move elimination candidates. The idea is to let register file definitions in tablegen declare if/when moves can be eliminated. This patch is a non functional change. The logic that performs move elimination is currently disabled. A future patch will add support for move elimination in the processor models, and enable this new code path. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@343691 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 1 year, 9 months ago
8 changed file(s) with 260 addition(s) and 10 deletion(s). Raw diff Collapse all Expand all
0 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
1 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
2
3 # The register move from XMM0 to XMM1 can be eliminated at register renaming
4 # stage. So, it should not consume pipeline resources.
5
6 vxorps %xmm0, %xmm0, %xmm0
7 vmovaps %xmm0, %xmm1
8 vaddps %xmm1, %xmm1, %xmm2
9
10 # CHECK: Iterations: 3
11 # CHECK-NEXT: Instructions: 9
12 # CHECK-NEXT: Total Cycles: 10
13 # CHECK-NEXT: Total uOps: 9
14
15 # CHECK: Dispatch Width: 2
16 # CHECK-NEXT: uOps Per Cycle: 0.90
17 # CHECK-NEXT: IPC: 0.90
18 # CHECK-NEXT: Block RThroughput: 1.5
19
20 # CHECK: Instruction Info:
21 # CHECK-NEXT: [1]: #uOps
22 # CHECK-NEXT: [2]: Latency
23 # CHECK-NEXT: [3]: RThroughput
24 # CHECK-NEXT: [4]: MayLoad
25 # CHECK-NEXT: [5]: MayStore
26 # CHECK-NEXT: [6]: HasSideEffects (U)
27
28 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
29 # CHECK-NEXT: 1 0 0.50 vxorps %xmm0, %xmm0, %xmm0
30 # CHECK-NEXT: 1 1 0.50 vmovaps %xmm0, %xmm1
31 # CHECK-NEXT: 1 3 1.00 vaddps %xmm1, %xmm1, %xmm2
32
33 # CHECK: Register File statistics:
34 # CHECK-NEXT: Total number of mappings created: 6
35 # CHECK-NEXT: Max number of mappings used: 5
36
37 # CHECK: * Register File #1 -- JFpuPRF:
38 # CHECK-NEXT: Number of physical registers: 72
39 # CHECK-NEXT: Total number of mappings created: 6
40 # CHECK-NEXT: Max number of mappings used: 5
41
42 # CHECK: * Register File #2 -- JIntegerPRF:
43 # CHECK-NEXT: Number of physical registers: 64
44 # CHECK-NEXT: Total number of mappings created: 0
45 # CHECK-NEXT: Max number of mappings used: 0
46
47 # CHECK: Resources:
48 # CHECK-NEXT: [0] - JALU0
49 # CHECK-NEXT: [1] - JALU1
50 # CHECK-NEXT: [2] - JDiv
51 # CHECK-NEXT: [3] - JFPA
52 # CHECK-NEXT: [4] - JFPM
53 # CHECK-NEXT: [5] - JFPU0
54 # CHECK-NEXT: [6] - JFPU1
55 # CHECK-NEXT: [7] - JLAGU
56 # CHECK-NEXT: [8] - JMul
57 # CHECK-NEXT: [9] - JSAGU
58 # CHECK-NEXT: [10] - JSTC
59 # CHECK-NEXT: [11] - JVALU0
60 # CHECK-NEXT: [12] - JVALU1
61 # CHECK-NEXT: [13] - JVIMUL
62
63 # CHECK: Resource pressure per iteration:
64 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
65 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - -
66
67 # CHECK: Resource pressure by instruction:
68 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
69 # CHECK-NEXT: - - - - - - - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
70 # CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmovaps %xmm0, %xmm1
71 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm1, %xmm1, %xmm2
72
73 # CHECK: Timeline view:
74 # CHECK-NEXT: Index 0123456789
75
76 # CHECK: [0,0] DR . . vxorps %xmm0, %xmm0, %xmm0
77 # CHECK-NEXT: [0,1] DeER . . vmovaps %xmm0, %xmm1
78 # CHECK-NEXT: [0,2] .DeeeER . vaddps %xmm1, %xmm1, %xmm2
79 # CHECK-NEXT: [1,0] .D----R . vxorps %xmm0, %xmm0, %xmm0
80 # CHECK-NEXT: [1,1] . DeE--R . vmovaps %xmm0, %xmm1
81 # CHECK-NEXT: [1,2] . D=eeeER. vaddps %xmm1, %xmm1, %xmm2
82 # CHECK-NEXT: [2,0] . D----R. vxorps %xmm0, %xmm0, %xmm0
83 # CHECK-NEXT: [2,1] . DeE---R vmovaps %xmm0, %xmm1
84 # CHECK-NEXT: [2,2] . DeeeER vaddps %xmm1, %xmm1, %xmm2
85
86 # CHECK: Average Wait times (based on the timeline view):
87 # CHECK-NEXT: [0]: Executions
88 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
89 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
90 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
91
92 # CHECK: [0] [1] [2] [3]
93 # CHECK-NEXT: 0. 3 0.0 0.0 2.7 vxorps %xmm0, %xmm0, %xmm0
94 # CHECK-NEXT: 1. 3 1.0 1.0 1.7 vmovaps %xmm0, %xmm1
95 # CHECK-NEXT: 2. 3 1.3 0.0 0.0 vaddps %xmm1, %xmm1, %xmm2
1717 #define LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
1818
1919 #include "HardwareUnits/HardwareUnit.h"
20 #include "llvm/ADT/APInt.h"
2021 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/APInt.h"
2222 #include "llvm/MC/MCRegisterInfo.h"
2323 #include "llvm/MC/MCSchedule.h"
2424 #include "llvm/Support/Error.h"
2525
2626 namespace mca {
2727
28 class ReadState;
2829 class WriteState;
2930 class WriteRef;
3031
3334 class RegisterFile : public HardwareUnit {
3435 const llvm::MCRegisterInfo &MRI;
3536
36 // Each register file is associated with an instance of
37 // RegisterMappingTracker.
38 // A RegisterMappingTracker keeps track of the number of physical registers
39 // which have been dynamically allocated by the simulator.
37 // class RegisterMappingTracker is a physical register file (PRF) descriptor.
38 // There is one RegisterMappingTracker for every PRF definition in the
39 // scheduling model.
40 //
41 // An instance of RegisterMappingTracker tracks the number of physical
42 // registers available for renaming. It also tracks the number of register
43 // moves eliminated per cycle.
4044 struct RegisterMappingTracker {
4145 // The total number of physical registers that are available in this
4246 // register file for register renaming purpouses. A value of zero for this
4650 // Number of physical registers that are currently in use.
4751 unsigned NumUsedPhysRegs;
4852
49 RegisterMappingTracker(unsigned NumPhysRegisters)
50 : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0) {}
53 // Maximum number of register moves that can be eliminated by this PRF every
54 // cycle. A value of zero means that there is no limit in the number of
55 // moves which can be eliminated every cycle.
56 const unsigned MaxMoveEliminatedPerCycle;
57
58 // Number of register moves eliminated during this cycle.
59 //
60 // This value is increased by one every time a register move is eliminated.
61 // Every new cycle, this value is reset to zero.
62 // A move can be eliminated only if MaxMoveEliminatedPerCycle is zero, or if
63 // NumMoveEliminated is less than MaxMoveEliminatedPerCycle.
64 unsigned NumMoveEliminated;
65
66 RegisterMappingTracker(unsigned NumPhysRegisters,
67 unsigned MaxMoveEliminated = 0U)
68 : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0),
69 MaxMoveEliminatedPerCycle(MaxMoveEliminated), NumMoveEliminated(0U) {}
5170 };
5271
5372 // A vector of register file descriptors. This set always contains at least
81100 // Each RegisterRenamingInfo is owned by a PRF, and field `IndexPlusCost`
82101 // specifies both the owning PRF, as well as the number of physical registers
83102 // consumed at register renaming stage.
103 //
104 // Field `AllowMoveElimination` is set for registers that are used as
105 // destination by optimizable register moves.
106 // Field `AllowZeroMoveEliminationOnly` further restricts move elimination
107 // only to zero-register moves.
84108 struct RegisterRenamingInfo {
85109 IndexPlusCostPairTy IndexPlusCost;
86110 llvm::MCPhysReg RenameAs;
111 bool AllowMoveElimination;
112 bool AllowZeroMoveEliminationOnly;
87113 RegisterRenamingInfo()
88 : IndexPlusCost(std::make_pair(0U, 1U)), RenameAs(0U) {}
114 : IndexPlusCost(std::make_pair(0U, 1U)), RenameAs(0U),
115 AllowMoveElimination(false), AllowZeroMoveEliminationOnly(false) {}
89116 };
90117
91118 // RegisterMapping objects are mainly used to track physical register
160187 void removeRegisterWrite(const WriteState &WS,
161188 llvm::MutableArrayRef FreedPhysRegs);
162189
190 // Returns true if a move from RS to WS can be eliminated.
191 // On success, it updates WriteState by setting flag `WS.isEliminated`.
192 // If RS is a read from a zero register, and WS is eliminated, then
193 // `WS.WritesZero` is also set, so that method addRegisterWrite() would not
194 // reserve a physical register for it.
195 bool tryEliminateMove(WriteState &WS, const ReadState &RS);
196
163197 // Checks if there are enough physical registers in the register files.
164198 // Returns a "response mask" where each bit represents the response from a
165199 // different register file. A mask of all zeroes means that all register
174208 unsigned RegID) const;
175209 unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
176210
211 // Notify each PRF that a new cycle just started.
212 void cycleStart();
213
177214 #ifndef NDEBUG
178215 void dump() const;
179216 #endif
104104 // True if this write is from a dependency breaking zero-idiom instruction.
105105 bool WritesZero;
106106
107 // True if this write has been eliminated at register renaming stage.
108 // Example: a register move doesn't consume scheduler/pipleline resources if
109 // it is eliminated at register renaming stage. It still consumes
110 // decode bandwidth, and ROB entries.
111 bool IsEliminated;
112
107113 // This field is set if this is a partial register write, and it has a false
108114 // dependency on any previous write of the same register (or a portion of it).
109115 // DependentWrite must be able to complete before this write completes, so
126132 bool clearsSuperRegs = false, bool writesZero = false)
127133 : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
128134 ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
129 DependentWrite(nullptr), NumWriteUsers(0U) {}
135 IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {}
130136 WriteState(const WriteState &Other) = delete;
131137 WriteState &operator=(const WriteState &Other) = delete;
132138
140146 unsigned getNumUsers() const { return Users.size() + NumWriteUsers; }
141147 bool clearsSuperRegisters() const { return ClearsSuperRegs; }
142148 bool isWriteZero() const { return WritesZero; }
149 bool isEliminated() const { return IsEliminated; }
143150 bool isExecuted() const {
144151 return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
145152 }
148155 void setDependentWrite(WriteState *Other) {
149156 DependentWrite = Other;
150157 ++Other->NumWriteUsers;
158 }
159 void setWriteZero() { WritesZero = true; }
160 void setEliminated() {
161 assert(Users.empty() && "Write is in an inconsistent state.");
162 CyclesLeft = 0;
163 IsEliminated = true;
151164 }
152165
153166 // On every cycle, update CyclesLeft and notify dependent users.
325338 // Retire Unit token ID for this instruction.
326339 unsigned RCUTokenID;
327340
341 // This field is set for instructions that are candidates for move
342 // elimination. For more information about move elimination, see the
343 // definition of RegisterMappingTracker in RegisterFile.h
344 //
345 // TODO: Teach subtargets how to describe optimizable register moves.
346 bool IsOptimizableMove;
347
328348 using UniqueDef = std::unique_ptr;
329349 using UniqueUse = std::unique_ptr;
330350 using VecDefs = std::vector;
340360
341361 public:
342362 Instruction(const InstrDesc &D)
343 : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0) {}
363 : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0),
364 IsOptimizableMove(false) {}
344365 Instruction(const Instruction &Other) = delete;
345366 Instruction &operator=(const Instruction &Other) = delete;
346367
387408 bool isExecuted() const { return Stage == IS_EXECUTED; }
388409 bool isRetired() const { return Stage == IS_RETIRED; }
389410
411 // Returns true if this instruction is a candidate for move elimination.
412 bool isOptimizableMove() const { return IsOptimizableMove; }
413 void setOptimizableMove() { IsOptimizableMove = true; }
414 bool isEliminated() const {
415 return isReady() && Defs.size() &&
416 llvm::all_of(Defs,
417 [](const UniqueDef &D) { return D->isEliminated(); });
418 }
419
420 // Forces a transition from state IS_AVAILABLE to state IS_EXECUTED.
421 void forceExecuted();
422
390423 void retire() {
391424 assert(isExecuted() && "Instruction is in an invalid state!");
392425 Stage = IS_RETIRED;
3232 // Called at the beginning of each cycle to issue already dispatched
3333 // instructions to the underlying pipelines.
3434 llvm::Error issueReadyInstructions();
35
36 // Used to notify instructions eliminated at register renaming stage.
37 llvm::Error handleInstructionEliminated(InstRef &IR);
3538
3639 ExecuteStage(const ExecuteStage &Other) = delete;
3740 ExecuteStage &operator=(const ExecuteStage &Other) = delete;
5959 }
6060 }
6161
62 void RegisterFile::cycleStart() {
63 for (RegisterMappingTracker &RMT : RegisterFiles)
64 RMT.NumMoveEliminated = 0;
65 }
66
6267 void RegisterFile::addRegisterFile(ArrayRef Entries,
6368 unsigned NumPhysRegs) {
6469 // A default register file is always allocated at index #0. That register file
263268 }
264269 }
265270
271 bool RegisterFile::tryEliminateMove(WriteState &WS, const ReadState &RS) {
272 const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
273 const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
274
275 // Early exit if the PRF doesn't support move elimination for this register.
276 if (!RMTo.second.AllowMoveElimination)
277 return false;
278
279 // From and To must be owned by the same PRF.
280 const RegisterRenamingInfo &RRIFrom = RMFrom.second;
281 const RegisterRenamingInfo &RRITo = RMTo.second;
282 unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first;
283 if (RegisterFileIndex != RRITo.IndexPlusCost.first)
284 return false;
285
286 RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
287 if (RMT.MaxMoveEliminatedPerCycle &&
288 RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle)
289 return false;
290
291 bool IsZeroMove = ZeroRegisters[RS.getRegisterID()];
292 if (RRITo.AllowZeroMoveEliminationOnly && !IsZeroMove)
293 return false;
294
295 RMT.NumMoveEliminated++;
296 if (IsZeroMove)
297 WS.setWriteZero();
298 WS.setEliminated();
299 return true;
300 }
301
266302 void RegisterFile::collectWrites(SmallVectorImpl &Writes,
267303 unsigned RegID) const {
268304 assert(RegID && RegID < RegisterMappings.size());
129129 Stage = IS_EXECUTED;
130130 }
131131
132 void Instruction::forceExecuted() {
133 assert(Stage == IS_READY && "Invalid internal state!");
134 CyclesLeft = 0;
135 Stage = IS_EXECUTED;
136 }
137
132138 void Instruction::update() {
133139 assert(isDispatched() && "Unexpected instruction stage found!");
134140
9999 AvailableEntries -= NumMicroOps;
100100 }
101101
102 // Check if this is an optimizable reg-reg move.
103 if (IS.isOptimizableMove()) {
104 assert(IS.getDefs().size() == 1 && "Expected a single input!");
105 assert(IS.getUses().size() == 1 && "Expected a single output!");
106 PRF.tryEliminateMove(*IS.getDefs()[0], *IS.getUses()[0]);
107 }
108
102109 // A dependency-breaking instruction doesn't have to wait on the register
103110 // input operands, and it is often optimized at register renaming stage.
104111 // Update RAW dependencies if this instruction is not a dependency-breaking
129136 }
130137
131138 Error DispatchStage::cycleStart() {
139 PRF.cycleStart();
140
132141 if (!CarryOver) {
133142 AvailableEntries = DispatchWidth;
134143 return ErrorSuccess();
106106 return issueReadyInstructions();
107107 }
108108
109
110 #ifndef NDEBUG
111 static void verifyInstructionEliminated(const InstRef &IR) {
112 const Instruction &Inst = *IR.getInstruction();
113 assert(Inst.isEliminated() && "Instruction was not eliminated!");
114 assert(Inst.isReady() && "Instruction in an inconsistent state!");
115
116 // Ensure that instructions eliminated at register renaming stage are in a
117 // consistent state.
118 const InstrDesc &Desc = Inst.getDesc();
119 assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!");
120 }
121 #endif
122
123
124 Error ExecuteStage::handleInstructionEliminated(InstRef &IR) {
125 #ifndef NDEBUG
126 verifyInstructionEliminated(IR);
127 #endif
128 notifyInstructionReady(IR);
129 notifyInstructionIssued(IR, {});
130 IR.getInstruction()->forceExecuted();
131 notifyInstructionExecuted(IR);
132 return moveToTheNextStage(IR);
133 }
134
109135 // Schedule the instruction for execution on the hardware.
110136 Error ExecuteStage::execute(InstRef &IR) {
111137 assert(isAvailable(IR) && "Scheduler is not available!");
114140 // Ensure that the HWS has not stored this instruction in its queues.
115141 HWS.sanityCheck(IR);
116142 #endif
143
144 if (IR.getInstruction()->isEliminated())
145 return handleInstructionEliminated(IR);
146
117147 // Reserve a slot in each buffered resource. Also, mark units with
118148 // BufferSize=0 as reserved. Resources with a buffer size of zero will only
119149 // be released after MCIS is issued, and all the ResourceCycles for those