llvm.org GIT mirror llvm / b0b7088
MachineCombiner Pass for selecting faster instruction sequence - target independent framework When the DAGcombiner selects instruction sequences it could increase the critical path or resource len. For example, on arm64 there are multiply-accumulate instructions (madd, msub). If e.g. the equivalent multiply-add sequence is not on the crictial path it makes sense to select it instead of the combined, single accumulate instruction (madd/msub). The reason is that the conversion from add+mul to the madd could lengthen the critical path by the latency of the multiply. But the DAGCombiner would always combine and select the madd/msub instruction. This patch uses machine trace metrics to estimate critical path length and resource length of an original instruction sequence vs a combined instruction sequence and picks the faster code based on its estimates. This patch only commits the target independent framework that evaluates and selects code sequences. The machine instruction combiner is turned off for all targets and expected to evolve over time by gradually handling DAGCombiner pattern in the target specific code. This framework lays the groundwork for fixing rdar://16319955 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214666 91177308-0d34-0410-b5e6-96231b3b80d8 Gerolf Hoflehner 5 years ago
11 changed file(s) with 588 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
0 //===-- llvm/CodeGen/MachineCombinerPattern.h - Instruction pattern supported by
1 // combiner ------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines instruction pattern supported by combiner
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_CODEGEN_MACHINECOMBINERPATTERN_H
15 #define LLVM_CODEGEN_MACHINECOMBINERPATTERN_H
16
17 namespace llvm {
18
19 /// Enumeration of instruction pattern supported by machine combiner
20 ///
21 ///
22 namespace MachineCombinerPattern {
23 // Forward declaration
24 enum MC_PATTERN : int;
25 } // end namespace MachineCombinerPattern
26 } // end namespace llvm
27
28 #endif
263263 /// classes are included. For the caller to account for extra machine
264264 /// instructions, it must first resolve each instruction's scheduling class.
265265 unsigned getResourceLength(
266 ArrayRef Extrablocks = None,
267 ArrayRef ExtraInstrs = None) const;
266 ArrayRef Extrablocks = None,
267 ArrayRef ExtraInstrs = None,
268 ArrayRef RemoveInstrs = None) const;
268269
269270 /// Return the length of the (data dependency) critical path through the
270271 /// trace.
285286 /// Return the Depth of a PHI instruction in a trace center block successor.
286287 /// The PHI does not have to be part of the trace.
287288 unsigned getPHIDepth(const MachineInstr *PHI) const;
289
290 /// A dependence is useful if the basic block of the defining instruction
291 /// is part of the trace of the user instruction. It is assumed that DefMI
292 /// dominates UseMI (see also isUsefulDominator).
293 bool isDepInTrace(const MachineInstr *DefMI,
294 const MachineInstr *UseMI) const;
288295 };
289296
290297 /// A trace ensemble is a collection of traces selected using the same
488488 /// inserting cmov instructions.
489489 extern char &EarlyIfConverterID;
490490
491 /// This pass performs instruction combining using trace metrics to estimate
492 /// critical-path and resource depth.
493 extern char &MachineCombinerID;
494
491495 /// StackSlotColoring - This pass performs stack coloring and merging.
492496 /// It merges disjoint allocas to reduce the stack size.
493497 extern char &StackColoringID;
166166 /// if converter after moving it to TargetSchedModel).
167167 unsigned computeInstrLatency(const MachineInstr *MI,
168168 bool UseDefaultDefLatency = true) const;
169 unsigned computeInstrLatency(unsigned Opcode) const;
169170
170171 /// \brief Output dependency latency of a pair of defs of the same register.
171172 ///
277277 void initializeBBVectorizePass(PassRegistry&);
278278 void initializeMachineFunctionPrinterPassPass(PassRegistry&);
279279 void initializeStackMapLivenessPass(PassRegistry&);
280 void initializeMachineCombinerPass(PassRegistry &);
280281 void initializeLoadCombinePass(PassRegistry&);
281282 }
282283
1414 #define LLVM_TARGET_TARGETINSTRINFO_H
1515
1616 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/DenseMap.h"
1718 #include "llvm/CodeGen/DFAPacketizer.h"
1819 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineCombinerPattern.h"
1921 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/Target/TargetRegisterInfo.h"
2023
2124 namespace llvm {
2225
571574 const SmallVectorImpl &Ops,
572575 MachineInstr* LoadMI) const;
573576
577 /// hasPattern - return true when there is potentially a faster code sequence
578 /// for an instruction chain ending in \p Root. All potential pattern are
579 /// returned in the \p Pattern vector. Pattern should be sorted in priority
580 /// order since the pattern evaluator stops checking as soon as it finds a
581 /// faster sequence.
582 /// \param Root - Instruction that could be combined with one of its operands
583 /// \param Pattern - Vector of possible combination pattern
584
585 virtual bool hasPattern(
586 MachineInstr &Root,
587 SmallVectorImpl &Pattern) const {
588 return false;
589 }
590
591 /// genAlternativeCodeSequence - when hasPattern() finds a pattern this
592 /// function generates the instructions that could replace the original code
593 /// sequence. The client has to decide whether the actual replacementment is
594 /// beneficial or not.
595 /// \param Root - Instruction that could be combined with one of its operands
596 /// \param P - Combination pattern for Root
597 /// \param InsInstr - Vector of new instructions that implement P
598 /// \param DelInstr - Old instructions, including Root, that could be replaced
599 /// by InsInstr
600 /// \param InstrIdxForVirtReg - map of virtual register to instruction in
601 /// InsInstr that defines it
602 virtual void genAlternativeCodeSequence(
603 MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
604 SmallVectorImpl &InsInstrs,
605 SmallVectorImpl &DelInstrs,
606 DenseMap &InstrIdxForVirtReg) const {
607 return;
608 }
609
610 /// useMachineCombiner - return true when a target supports MachineCombiner
611 virtual bool useMachineCombiner(void) const { return false; }
612
574613 protected:
575614 /// foldMemoryOperandImpl - Target-dependent implementation for
576615 /// foldMemoryOperand. Target-independent code in foldMemoryOperand will
4848 MachineBranchProbabilityInfo.cpp
4949 MachineCSE.cpp
5050 MachineCodeEmitter.cpp
51 MachineCombiner.cpp
5152 MachineCopyPropagation.cpp
5253 MachineDominators.cpp
5354 MachineDominanceFrontier.cpp
4040 initializeMachineBlockPlacementPass(Registry);
4141 initializeMachineBlockPlacementStatsPass(Registry);
4242 initializeMachineCopyPropagationPass(Registry);
43 initializeMachineCombinerPass(Registry);
4344 initializeMachineCSEPass(Registry);
4445 initializeMachineDominatorTreePass(Registry);
4546 initializeMachinePostDominatorTreePass(Registry);
0 //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The machine combiner pass uses machine trace metrics to ensure the combined
10 // instructions does not lengthen the critical path or the resource depth.
11 //===----------------------------------------------------------------------===//
12 #define DEBUG_TYPE "machine-combiner"
13
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineLoopInfo.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/MachineTraceMetrics.h"
23 #include "llvm/CodeGen/Passes.h"
24 #include "llvm/CodeGen/TargetSchedule.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/Target/TargetInstrInfo.h"
29 #include "llvm/Target/TargetRegisterInfo.h"
30 #include "llvm/Target/TargetSubtargetInfo.h"
31
32 using namespace llvm;
33
34 STATISTIC(NumInstCombined, "Number of machineinst combined");
35
36 namespace {
37 class MachineCombiner : public MachineFunctionPass {
38 const TargetInstrInfo *TII;
39 const TargetRegisterInfo *TRI;
40 const MCSchedModel *SchedModel;
41 MachineRegisterInfo *MRI;
42 MachineTraceMetrics *Traces;
43 MachineTraceMetrics::Ensemble *MinInstr;
44
45 TargetSchedModel TSchedModel;
46
47 /// OptSize - True if optimizing for code size.
48 bool OptSize;
49
50 public:
51 static char ID;
52 MachineCombiner() : MachineFunctionPass(ID) {
53 initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
54 }
55 void getAnalysisUsage(AnalysisUsage &AU) const override;
56 bool runOnMachineFunction(MachineFunction &MF) override;
57 const char *getPassName() const override { return "Machine InstCombiner"; }
58
59 private:
60 bool doSubstitute(unsigned NewSize, unsigned OldSize);
61 bool combineInstructions(MachineBasicBlock *);
62 MachineInstr *getOperandDef(const MachineOperand &MO);
63 unsigned getDepth(SmallVectorImpl &InsInstrs,
64 DenseMap &InstrIdxForVirtReg,
65 MachineTraceMetrics::Trace BlockTrace);
66 unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
67 MachineTraceMetrics::Trace BlockTrace);
68 bool
69 preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
70 MachineTraceMetrics::Trace BlockTrace,
71 SmallVectorImpl &InsInstrs,
72 DenseMap &InstrIdxForVirtReg);
73 bool preservesResourceLen(MachineBasicBlock *MBB,
74 MachineTraceMetrics::Trace BlockTrace,
75 SmallVectorImpl &InsInstrs,
76 SmallVectorImpl &DelInstrs);
77 void instr2instrSC(SmallVectorImpl &Instrs,
78 SmallVectorImpl &InstrsSC);
79 };
80 }
81
82 char MachineCombiner::ID = 0;
83 char &llvm::MachineCombinerID = MachineCombiner::ID;
84
85 INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
86 "Machine InstCombiner", false, false)
87 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
88 INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
89 false, false)
90
91 void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
92 AU.setPreservesCFG();
93 AU.addPreserved();
94 AU.addPreserved();
95 AU.addRequired();
96 AU.addPreserved();
97 MachineFunctionPass::getAnalysisUsage(AU);
98 }
99
100 MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
101 MachineInstr *DefInstr = nullptr;
102 // We need a virtual register definition.
103 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
104 DefInstr = MRI->getUniqueVRegDef(MO.getReg());
105 // PHI's have no depth etc.
106 if (DefInstr && DefInstr->isPHI())
107 DefInstr = nullptr;
108 return DefInstr;
109 }
110
111 /// getDepth - Computes depth of instructions in vector \InsInstr.
112 ///
113 /// \param InsInstrs is a vector of machine instructions
114 /// \param InstrIdxForVirtReg is a dense map of virtual register to index
115 /// of defining machine instruction in \p InsInstrs
116 /// \param BlockTrace is a trace of machine instructions
117 ///
118 /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
119 unsigned
120 MachineCombiner::getDepth(SmallVectorImpl &InsInstrs,
121 DenseMap &InstrIdxForVirtReg,
122 MachineTraceMetrics::Trace BlockTrace) {
123
124 SmallVector InstrDepth;
125 assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
126
127 // Foreach instruction in in the new sequence compute the depth based on the
128 // operands. Use the trace information when possible. For new operands which
129 // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
130 for (auto *InstrPtr : InsInstrs) { // for each Use
131 unsigned IDepth = 0;
132 DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
133 for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
134 const MachineOperand &MO = InstrPtr->getOperand(i);
135 // Check for virtual register operand.
136 if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
137 continue;
138 if (!MO.isUse())
139 continue;
140 unsigned DepthOp = 0;
141 unsigned LatencyOp = 0;
142 DenseMap::iterator II =
143 InstrIdxForVirtReg.find(MO.getReg());
144 if (II != InstrIdxForVirtReg.end()) {
145 // Operand is new virtual register not in trace
146 assert(II->second >= 0 && II->second < InstrDepth.size() &&
147 "Bad Index");
148 MachineInstr *DefInstr = InsInstrs[II->second];
149 assert(DefInstr &&
150 "There must be a definition for a new virtual register");
151 DepthOp = InstrDepth[II->second];
152 LatencyOp = TSchedModel.computeOperandLatency(
153 DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
154 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
155 } else {
156 MachineInstr *DefInstr = getOperandDef(MO);
157 if (DefInstr) {
158 DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
159 LatencyOp = TSchedModel.computeOperandLatency(
160 DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
161 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
162 }
163 }
164 IDepth = std::max(IDepth, DepthOp + LatencyOp);
165 }
166 InstrDepth.push_back(IDepth);
167 }
168 unsigned NewRootIdx = InsInstrs.size() - 1;
169 return InstrDepth[NewRootIdx];
170 }
171
172 /// getLatency - Computes instruction latency as max of latency of defined
173 /// operands
174 ///
175 /// \param Root is a machine instruction that could be replaced by NewRoot.
176 /// It is used to compute a more accurate latency information for NewRoot in
177 /// case there is a dependent instruction in the same trace (\p BlockTrace)
178 /// \param NewRoot is the instruction for which the latency is computed
179 /// \param BlockTrace is a trace of machine instructions
180 ///
181 /// \returns Latency of \p NewRoot
182 unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
183 MachineTraceMetrics::Trace BlockTrace) {
184
185 assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
186
187 // Check each definition in NewRoot and compute the latency
188 unsigned NewRootLatency = 0;
189
190 for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
191 const MachineOperand &MO = NewRoot->getOperand(i);
192 // Check for virtual register operand.
193 if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
194 continue;
195 if (!MO.isDef())
196 continue;
197 // Get the first instruction that uses MO
198 MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
199 RI++;
200 MachineInstr *UseMO = RI->getParent();
201 unsigned LatencyOp = 0;
202 if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) {
203 LatencyOp = TSchedModel.computeOperandLatency(
204 NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
205 UseMO->findRegisterUseOperandIdx(MO.getReg()));
206 } else {
207 LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
208 }
209 NewRootLatency = std::max(NewRootLatency, LatencyOp);
210 }
211 return NewRootLatency;
212 }
213
214 /// preservesCriticalPathlen - True when the new instruction sequence does not
215 /// lengthen the critical path. The DAGCombine code sequence ends in MI
216 /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
217 /// necessary condition for the new sequence to replace the old sequence is that
218 /// is cannot lengthen the critical path. This is decided by the formula
219 /// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
220 /// The slack is the number of cycles Root can be delayed before the critical
221 /// patch becomes longer.
222 bool MachineCombiner::preservesCriticalPathLen(
223 MachineBasicBlock *MBB, MachineInstr *Root,
224 MachineTraceMetrics::Trace BlockTrace,
225 SmallVectorImpl &InsInstrs,
226 DenseMap &InstrIdxForVirtReg) {
227
228 assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
229 // NewRoot is the last instruction in the \p InsInstrs vector
230 // Get depth and latency of NewRoot
231 unsigned NewRootIdx = InsInstrs.size() - 1;
232 MachineInstr *NewRoot = InsInstrs[NewRootIdx];
233 unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
234 unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
235
236 // Get depth, latency and slack of Root
237 unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
238 unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
239 unsigned RootSlack = BlockTrace.getInstrSlack(Root);
240
241 DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
242 dbgs() << " NewRootDepth: " << NewRootDepth
243 << " NewRootLatency: " << NewRootLatency << "\n";
244 dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
245 << " RootSlack: " << RootSlack << "\n";
246 dbgs() << " NewRootDepth + NewRootLatency "
247 << NewRootDepth + NewRootLatency << "\n";
248 dbgs() << " RootDepth + RootLatency + RootSlack "
249 << RootDepth + RootLatency + RootSlack << "\n";);
250
251 /// True when the new sequence does not lenghten the critical path.
252 return ((NewRootDepth + NewRootLatency) <=
253 (RootDepth + RootLatency + RootSlack));
254 }
255
256 /// helper routine to convert instructions into SC
257 void MachineCombiner::instr2instrSC(
258 SmallVectorImpl &Instrs,
259 SmallVectorImpl &InstrsSC) {
260 for (auto *InstrPtr : Instrs) {
261 unsigned Opc = InstrPtr->getOpcode();
262 unsigned Idx = TII->get(Opc).getSchedClass();
263 const MCSchedClassDesc *SC = SchedModel->getSchedClassDesc(Idx);
264 InstrsSC.push_back(SC);
265 }
266 }
267 /// preservesResourceLen - True when the new instructions do not increase
268 /// resource length
269 bool MachineCombiner::preservesResourceLen(
270 MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
271 SmallVectorImpl &InsInstrs,
272 SmallVectorImpl &DelInstrs) {
273
274 // Compute current resource length
275
276 ArrayRef MBBarr(MBB);
277 unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
278
279 // Deal with SC rather than Instructions.
280 SmallVector InsInstrsSC;
281 SmallVector DelInstrsSC;
282
283 instr2instrSC(InsInstrs, InsInstrsSC);
284 instr2instrSC(DelInstrs, DelInstrsSC);
285
286 ArrayRef MSCInsArr = makeArrayRef(InsInstrsSC);
287 ArrayRef MSCDelArr = makeArrayRef(DelInstrsSC);
288
289 // Compute new resource length
290 unsigned ResLenAfterCombine =
291 BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
292
293 DEBUG(dbgs() << "RESOURCE DATA: \n";
294 dbgs() << " resource len before: " << ResLenBeforeCombine
295 << " after: " << ResLenAfterCombine << "\n";);
296
297 return ResLenAfterCombine <= ResLenBeforeCombine;
298 }
299
300 /// \returns true when new instruction sequence should be generated
301 /// independent if it lenghtens critical path or not
302 bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
303 if (OptSize && (NewSize < OldSize))
304 return true;
305 if (!TSchedModel.hasInstrSchedModel())
306 return true;
307 return false;
308 }
309
310 /// combineInstructions - substitute a slow code sequence with a faster one by
311 /// evaluating instruction combining pattern.
312 /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
313 /// combining based on machine trace metrics. Only combine a sequence of
314 /// instructions when this neither lengthens the critical path nor increases
315 /// resource pressure. When optimizing for codesize always combine when the new
316 /// sequence is shorter.
317 bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
318 bool Changed = false;
319 DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
320
321 auto BlockIter = MBB->begin();
322
323 while (BlockIter != MBB->end()) {
324 auto &MI = *BlockIter++;
325
326 DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
327 SmallVector Pattern;
328 // The motivating example is:
329 //
330 // MUL Other MUL_op1 MUL_op2 Other
331 // \ / \ | /
332 // ADD/SUB => MADD/MSUB
333 // (=Root) (=NewRoot)
334
335 // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
336 // usually beneficial for code size it unfortunately can hurt performance
337 // when the ADD is on the critical path, but the MUL is not. With the
338 // substitution the MUL becomes part of the critical path (in form of the
339 // MADD) and can lengthen it on architectures where the MADD latency is
340 // longer than the ADD latency.
341 //
342 // For each instruction we check if it can be the root of a combiner
343 // pattern. Then for each pattern the new code sequence in form of MI is
344 // generated and evaluated. When the efficiency criteria (don't lengthen
345 // critical path, don't use more resources) is met the new sequence gets
346 // hooked up into the basic block before the old sequence is removed.
347 //
348 // The algorithm does not try to evaluate all patterns and pick the best.
349 // This is only an artificial restriction though. In practice there is
350 // mostly one pattern and hasPattern() can order patterns based on an
351 // internal cost heuristic.
352
353 if (TII->hasPattern(MI, Pattern)) {
354 for (auto P : Pattern) {
355 SmallVector InsInstrs;
356 SmallVector DelInstrs;
357 DenseMap InstrIdxForVirtReg;
358 if (!MinInstr)
359 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
360 MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
361 Traces->verifyAnalysis();
362 TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
363 InstrIdxForVirtReg);
364 // Found pattern, but did not generate alternative sequence.
365 // This can happen e.g. when an immediate could not be materialized
366 // in a single instruction.
367 if (!InsInstrs.size())
368 continue;
369 // Substitute when we optimize for codesize and the new sequence has
370 // fewer instructions OR
371 // the new sequence neither lenghten the critical path nor increases
372 // resource pressure.
373 if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
374 (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
375 InstrIdxForVirtReg) &&
376 preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
377 for (auto *InstrPtr : InsInstrs)
378 MBB->insert((MachineBasicBlock::iterator) & MI,
379 (MachineInstr *)InstrPtr);
380 for (auto *InstrPtr : DelInstrs)
381 InstrPtr->eraseFromParent();
382
383 Changed = true;
384 ++NumInstCombined;
385
386 Traces->invalidate(MBB);
387 Traces->verifyAnalysis();
388 // Eagerly stop after the first pattern fired
389 break;
390 } else {
391 // Cleanup instructions of the alternative code sequence. There is no
392 // use for them.
393 for (auto *InstrPtr : InsInstrs) {
394 MachineFunction *MF = MBB->getParent();
395 MF->DeleteMachineInstr((MachineInstr *)InstrPtr);
396 }
397 }
398 InstrIdxForVirtReg.clear();
399 }
400 }
401 }
402
403 return Changed;
404 }
405
406 bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
407 TII = MF.getTarget().getInstrInfo();
408 TRI = MF.getTarget().getRegisterInfo();
409 const TargetSubtargetInfo &STI =
410 MF.getTarget().getSubtarget();
411 SchedModel = STI.getSchedModel();
412 TSchedModel.init(*SchedModel, &STI, TII);
413 MRI = &MF.getRegInfo();
414 Traces = &getAnalysis();
415 MinInstr = 0;
416
417 OptSize = MF.getFunction()->getAttributes().hasAttribute(
418 AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
419
420 DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
421 if (!TII->useMachineCombiner()) {
422 DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n");
423 return false;
424 }
425
426 bool Changed = false;
427
428 // Try to combine instructions.
429 for (auto &MBB : MF)
430 Changed |= combineInstructions(&MBB);
431
432 return Changed;
433 }
11681168 return DepCycle;
11691169 }
11701170
1171 /// When bottom is set include instructions in current block in estimate.
11711172 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
11721173 // Find the limiting processor resource.
11731174 // Numbers have been pre-scaled to be comparable.
11841185 // Convert to cycle count.
11851186 PRMax = TE.MTM.getCycles(PRMax);
11861187
1188 /// All instructions before current block
11871189 unsigned Instrs = TBI.InstrDepth;
1190 // plus instructions in current block
11881191 if (Bottom)
11891192 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
11901193 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
11931196 return std::max(Instrs, PRMax);
11941197 }
11951198
1196
1197 unsigned MachineTraceMetrics::Trace::
1198 getResourceLength(ArrayRef Extrablocks,
1199 ArrayRef ExtraInstrs) const {
1199 unsigned MachineTraceMetrics::Trace::getResourceLength(
1200 ArrayRef Extrablocks,
1201 ArrayRef ExtraInstrs,
1202 ArrayRef RemoveInstrs) const {
12001203 // Add up resources above and below the center block.
12011204 ArrayRef PRDepths = TE.getProcResourceDepths(getBlockNum());
12021205 ArrayRef PRHeights = TE.getProcResourceHeights(getBlockNum());
12031206 unsigned PRMax = 0;
1207
1208 // Capture computing cycles from extra instructions
1209 auto extraCycles = [this](ArrayRef Instrs,
1210 unsigned ResourceIdx)
1211 ->unsigned {
1212 unsigned Cycles = 0;
1213 for (unsigned I = 0; I != Instrs.size(); ++I) {
1214 const MCSchedClassDesc *SC = Instrs[I];
1215 if (!SC->isValid())
1216 continue;
1217 for (TargetSchedModel::ProcResIter
1218 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1219 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1220 PI != PE; ++PI) {
1221 if (PI->ProcResourceIdx != ResourceIdx)
1222 continue;
1223 Cycles +=
1224 (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1225 }
1226 }
1227 return Cycles;
1228 };
1229
12041230 for (unsigned K = 0; K != PRDepths.size(); ++K) {
12051231 unsigned PRCycles = PRDepths[K] + PRHeights[K];
12061232 for (unsigned I = 0; I != Extrablocks.size(); ++I)
12071233 PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
1208 for (unsigned I = 0; I != ExtraInstrs.size(); ++I) {
1209 const MCSchedClassDesc* SC = ExtraInstrs[I];
1210 if (!SC->isValid())
1211 continue;
1212 for (TargetSchedModel::ProcResIter
1213 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1214 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
1215 if (PI->ProcResourceIdx != K)
1216 continue;
1217 PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K));
1218 }
1219 }
1234 PRCycles += extraCycles(ExtraInstrs, K);
1235 PRCycles -= extraCycles(RemoveInstrs, K);
12201236 PRMax = std::max(PRMax, PRCycles);
12211237 }
12221238 // Convert to cycle count.
12231239 PRMax = TE.MTM.getCycles(PRMax);
12241240
1241 // Instrs: #instructions in current trace outside current block.
12251242 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1243 // Add instruction count from the extra blocks.
12261244 for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
12271245 Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
1246 Instrs += ExtraInstrs.size();
1247 Instrs -= RemoveInstrs.size();
12281248 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
12291249 Instrs /= IW;
12301250 // Assume issue width 1 without a schedule model.
12311251 return std::max(Instrs, PRMax);
1252 }
1253
1254 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
1255 const MachineInstr *UseMI) const {
1256 if (DefMI->getParent() == UseMI->getParent())
1257 return true;
1258
1259 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
1260 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
1261
1262 return DepTBI.isUsefulDominator(TBI);
12321263 }
12331264
12341265 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
224224 return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
225225 }
226226
227 unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
228 assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
229
230 unsigned SCIdx = TII->get(Opcode).getSchedClass();
231 const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx);
232 unsigned Latency = 0;
233
234 if (SCDesc->isValid() && !SCDesc->isVariant()) {
235 for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
236 DefIdx != DefEnd; ++DefIdx) {
237 // Lookup the definition's write latency in SubtargetInfo.
238 const MCWriteLatencyEntry *WLEntry =
239 STI->getWriteLatencyEntry(SCDesc, DefIdx);
240 Latency = std::max(Latency, capLatency(WLEntry->Cycles));
241 }
242 return Latency;
243 }
244
245 assert(Latency && "No MI sched latency");
246 return 0;
247 }
248
227249 unsigned
228250 TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
229251 bool UseDefaultDefLatency) const {