llvm.org GIT mirror llvm / f7e0423
Move Post RA Scheduling flag bit into SchedMachineModel Refactoring; no functional changes intended Removed PostRAScheduler bits from subtargets (X86, ARM). Added PostRAScheduler bit to MCSchedModel class. This bit is set by a CPU's scheduling model (if it exists). Removed enablePostRAScheduler() function from TargetSubtargetInfo and subclasses. Fixed the existing enablePostMachineScheduler() method to use the MCSchedModel (was just returning false!). Added methods to TargetSubtargetInfo to allow overrides for AntiDepBreakMode, CriticalPathRCs, and OptLevel for PostRAScheduling. Added enablePostRAScheduler() function to PostRAScheduler class which queries the subtarget for the above values. Preserved existing scheduler behavior for ARM, MIPS, PPC, and X86: a. ARM overrides the CPU's postRA settings by enabling postRA for any non-Thumb or Thumb2 subtarget. b. MIPS overrides the CPU's postRA settings by enabling postRA for everything. c. PPC overrides the CPU's postRA settings by enabling postRA for everything. d. X86 is the only target that actually has postRA specified via sched model info. Differential Revision: http://reviews.llvm.org/D4217 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213101 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 5 years ago
17 changed file(s) with 100 addition(s) and 110 deletion(s). Raw diff Collapse all Expand all
185185 // takes to recover from a branch misprediction.
186186 unsigned MispredictPenalty;
187187 static const unsigned DefaultMispredictPenalty = 10;
188
189 bool PostRAScheduler; // default value is false
188190
189191 bool CompleteModel;
190192
209211 LoadLatency(DefaultLoadLatency),
210212 HighLatency(DefaultHighLatency),
211213 MispredictPenalty(DefaultMispredictPenalty),
212 CompleteModel(true), ProcID(0), ProcResourceTable(nullptr),
214 PostRAScheduler(false), CompleteModel(true),
215 ProcID(0), ProcResourceTable(nullptr),
213216 SchedClassTable(nullptr), NumProcResourceKinds(0),
214217 NumSchedClasses(0), InstrItineraries(nullptr) {
215218 (void)NumProcResourceKinds;
218221
219222 // Table-gen driven ctor.
220223 MCSchedModel(unsigned iw, int mbs, int lmbs, unsigned ll, unsigned hl,
221 unsigned mp, bool cm, unsigned pi, const MCProcResourceDesc *pr,
222 const MCSchedClassDesc *sc, unsigned npr, unsigned nsc,
223 const InstrItinerary *ii):
224 unsigned mp, bool postRASched, bool cm, unsigned pi,
225 const MCProcResourceDesc *pr, const MCSchedClassDesc *sc,
226 unsigned npr, unsigned nsc, const InstrItinerary *ii):
224227 IssueWidth(iw), MicroOpBufferSize(mbs), LoopMicroOpBufferSize(lmbs),
225228 LoadLatency(ll), HighLatency(hl),
226 MispredictPenalty(mp), CompleteModel(cm), ProcID(pi),
229 MispredictPenalty(mp), PostRAScheduler(postRASched),
230 CompleteModel(cm), ProcID(pi),
227231 ProcResourceTable(pr), SchedClassTable(sc), NumProcResourceKinds(npr),
228232 NumSchedClasses(nsc), InstrItineraries(ii) {}
229233
8686
8787 // Per-cycle resources tables.
8888 ProcessorItineraries Itineraries = NoItineraries;
89
90 bit PostRAScheduler = 0; // Enable Post RegAlloc Scheduler pass.
8991
9092 // Subtargets that define a model for only a subset of instructions
9193 // that have a scheduling class (itinerary class or SchedRW list)
8989 // dependency.
9090 virtual void adjustSchedDependency(SUnit *def, SUnit *use,
9191 SDep& dep) const { }
92
93 // enablePostRAScheduler - If the target can benefit from post-regalloc
94 // scheduling and the specified optimization level meets the requirement
95 // return true to enable post-register-allocation scheduling. In
96 // CriticalPathRCs return any register classes that should only be broken
97 // if on the critical path.
98 virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
99 AntiDepBreakMode& Mode,
100 RegClassVector& CriticalPathRCs) const;
101
92
93 // For use with PostRAScheduling: get the anti-dependence breaking that should
94 // be performed before post-RA scheduling.
95 virtual AntiDepBreakMode getAntiDepBreakMode() const {
96 return ANTIDEP_NONE;
97 }
98
99 // For use with PostRAScheduling: in CriticalPathRCs, return any register
100 // classes that should only be considered for anti-dependence breaking if they
101 // are on the critical path.
102 virtual void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
103 return CriticalPathRCs.clear();
104 }
105
106 // For use with PostRAScheduling: get the minimum optimization level needed
107 // to enable post-RA scheduling.
108 virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const {
109 return CodeGenOpt::Default;
110 }
111
102112 /// \brief True if the subtarget should run the local reassignment
103113 /// heuristic of the register allocator.
104114 /// This heuristic may be compile time intensive, \p OptLevel provides
9797 }
9898
9999 bool runOnMachineFunction(MachineFunction &Fn) override;
100
101 bool enablePostRAScheduler(
102 const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
103 TargetSubtargetInfo::AntiDepBreakMode &Mode,
104 TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
100105 };
101106 char PostRAScheduler::ID = 0;
102107
243248 }
244249 }
245250 #endif
251
252 bool PostRAScheduler::enablePostRAScheduler(
253 const TargetSubtargetInfo &ST,
254 CodeGenOpt::Level OptLevel,
255 TargetSubtargetInfo::AntiDepBreakMode &Mode,
256 TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
257 Mode = ST.getAntiDepBreakMode();
258 ST.getCriticalPathRCs(CriticalPathRCs);
259 return ST.enablePostMachineScheduler() &&
260 OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
261 }
246262
247263 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
248264 if (skipOptnoneFunction(*Fn.getFunction()))
266282 } else {
267283 // Check that post-RA scheduling is enabled for this target.
268284 // This may upgrade the AntiDepMode.
269 const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget();
270 if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
271 CriticalPathRCs))
285 const TargetSubtargetInfo &ST =
286 Fn.getTarget().getSubtarget();
287 if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(),
288 AntiDepMode, CriticalPathRCs))
272289 return false;
273290 }
274291
190190 InThumbMode = false;
191191 HasThumb2 = false;
192192 NoARM = false;
193 PostRAScheduler = false;
194193 IsR9Reserved = ReserveR9;
195194 UseMovt = false;
196195 SupportsTailCall = false;
306305 IsR9Reserved = ReserveR9;
307306 SupportsTailCall = !isThumb1Only();
308307 }
309
310 if (!isThumb() || hasThumb2())
311 PostRAScheduler = true;
312308
313309 switch (Align) {
314310 case DefaultAlign:
424420 !getTargetTriple().isOSVersionLT(7, 0);
425421 }
426422
427 // Enable the PostMachineScheduler if the target selects it instead of
428 // PostRAScheduler. Currently only available on the command line via
429 // -misched-postra.
423 // This overrides the PostRAScheduler bit in the SchedModel for any CPU.
430424 bool ARMSubtarget::enablePostMachineScheduler() const {
431 return PostRAScheduler;
425 return (!isThumb() || hasThumb2());
432426 }
433427
434428 bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
435429 return hasAnyDataBarrier() && !isThumb1Only();
436 }
437
438 bool ARMSubtarget::enablePostRAScheduler(
439 CodeGenOpt::Level OptLevel,
440 TargetSubtargetInfo::AntiDepBreakMode& Mode,
441 RegClassVector& CriticalPathRCs) const {
442 Mode = TargetSubtargetInfo::ANTIDEP_NONE;
443 return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
444430 }
445431
446432 bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
104104 /// NoARM - True if subtarget does not support ARM mode execution.
105105 bool NoARM;
106106
107 /// PostRAScheduler - True if using post-register-allocation scheduler.
108 bool PostRAScheduler;
109
110107 /// IsR9Reserved - True if R9 is a not available as general purpose register.
111108 bool IsR9Reserved;
112109
428425 bool hasSinCos() const;
429426
430427 /// True for some subtargets at > -O0.
431 bool enablePostMachineScheduler() const;
432
433 /// enablePostRAScheduler - True at 'More' optimization.
434 bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
435 TargetSubtargetInfo::AntiDepBreakMode& Mode,
436 RegClassVector& CriticalPathRCs) const override;
428 bool enablePostMachineScheduler() const override;
437429
438430 // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
439431 bool enableAtomicExpandLoadLinked() const override;
176176 UseSmallSection = !IsLinux && (RM == Reloc::Static);
177177 }
178178
179 bool
180 MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
181 TargetSubtargetInfo::AntiDepBreakMode &Mode,
182 RegClassVector &CriticalPathRCs) const {
183 Mode = TargetSubtargetInfo::ANTIDEP_NONE;
179 /// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
180 bool MipsSubtarget::enablePostMachineScheduler() const { return true; }
181
182 void MipsSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
184183 CriticalPathRCs.clear();
185 CriticalPathRCs.push_back(isGP64bit() ? &Mips::GPR64RegClass
186 : &Mips::GPR32RegClass);
187 return OptLevel >= CodeGenOpt::Aggressive;
184 CriticalPathRCs.push_back(isGP64bit() ?
185 &Mips::GPR64RegClass : &Mips::GPR32RegClass);
186 }
187
188 CodeGenOpt::Level MipsSubtarget::getOptLevelToEnablePostRAScheduler() const {
189 return CodeGenOpt::Aggressive;
188190 }
189191
190192 MipsSubtarget &
159159 std::unique_ptr TLInfoSE;
160160
161161 public:
162 bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
163 AntiDepBreakMode& Mode,
164 RegClassVector& CriticalPathRCs) const override;
162 /// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
163 bool enablePostMachineScheduler() const override;
164 void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
165 CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override;
165166
166167 /// Only O32 and EABI supported right now.
167168 bool isABI_EABI() const { return MipsABI == EABI; }
221221 GV->hasCommonLinkage() || isDecl;
222222 }
223223
224 bool PPCSubtarget::enablePostRAScheduler(
225 CodeGenOpt::Level OptLevel,
226 TargetSubtargetInfo::AntiDepBreakMode& Mode,
227 RegClassVector& CriticalPathRCs) const {
228 Mode = TargetSubtargetInfo::ANTIDEP_ALL;
229
230 CriticalPathRCs.clear();
231
232 if (isPPC64())
233 CriticalPathRCs.push_back(&PPC::G8RCRegClass);
234 else
235 CriticalPathRCs.push_back(&PPC::GPRCRegClass);
236
237 return OptLevel >= CodeGenOpt::Default;
238 }
239
240224 // Embedded cores need aggressive scheduling (and some others also benefit).
241225 static bool needsAggressiveScheduling(unsigned Directive) {
242226 switch (Directive) {
258242 return needsAggressiveScheduling(DarwinDirective);
259243 }
260244
245 // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
246 bool PPCSubtarget::enablePostMachineScheduler() const { return true; }
247
248 PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
249 return TargetSubtargetInfo::ANTIDEP_ALL;
250 }
251
252 void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
253 CriticalPathRCs.clear();
254 CriticalPathRCs.push_back(isPPC64() ?
255 &PPC::G8RCRegClass : &PPC::GPRCRegClass);
256 }
257
261258 void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
262259 MachineInstr *begin,
263260 MachineInstr *end,
224224 bool isDarwinABI() const { return isDarwin(); }
225225 bool isSVR4ABI() const { return !isDarwin(); }
226226
227 /// enablePostRAScheduler - True at 'More' optimization.
228 bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
229 TargetSubtargetInfo::AntiDepBreakMode& Mode,
230 RegClassVector& CriticalPathRCs) const override;
231
232227 bool enableEarlyIfConversion() const override { return hasISEL(); }
233228
234229 // Scheduling customization.
235230 bool enableMachineScheduler() const override;
231 // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
232 bool enablePostMachineScheduler() const override;
233 AntiDepBreakMode getAntiDepBreakMode() const override;
234 void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
235
236236 void overrideSchedPolicy(MachineSchedPolicy &Policy,
237237 MachineInstr *begin,
238238 MachineInstr *end,
5252 }
5353
5454 bool TargetSubtargetInfo::enablePostMachineScheduler() const {
55 return false;
56 }
57
58 bool TargetSubtargetInfo::enablePostRAScheduler(
59 CodeGenOpt::Level OptLevel,
60 AntiDepBreakMode& Mode,
61 RegClassVector& CriticalPathRCs) const {
62 Mode = ANTIDEP_NONE;
63 CriticalPathRCs.clear();
64 return false;
55 return getSchedModel()->PostRAScheduler;
6556 }
6657
6758 bool TargetSubtargetInfo::useAA() const {
632632 let MicroOpBufferSize = 32;
633633 let LoadLatency = 4;
634634 let HighLatency = 10;
635 let PostRAScheduler = 0;
635636 }
636637
637638 include "X86ScheduleAtom.td"
537537 // On the Atom, the throughput for taken branches is 2 cycles. For small
538538 // simple loops, expand by a small factor to hide the backedge cost.
539539 let LoopMicroOpBufferSize = 10;
540 let PostRAScheduler = 1;
540541
541542 let Itineraries = AtomItineraries;
542543 }
1818 let MicroOpBufferSize = 32; // Based on the reorder buffer.
1919 let LoadLatency = 3;
2020 let MispredictPenalty = 10;
21 let PostRAScheduler = 1;
2122
2223 // For small loops, expand by a small factor to hide the backedge cost.
2324 let LoopMicroOpBufferSize = 10;
217217
218218 // Make sure the right MCSchedModel is used.
219219 InitCPUSchedModel(CPUName);
220
221 if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM)
222 PostRAScheduler = true;
223220
224221 InstrItins = getInstrItineraryForCPU(CPUName);
225222
285282 HasCmpxchg16b = false;
286283 UseLeaForSP = false;
287284 HasSlowDivide = false;
288 PostRAScheduler = false;
289285 PadShortFunctions = false;
290286 CallRegIndirect = false;
291287 LEAUsesAG = false;
358354 is64Bit() ? -8 : -4),
359355 JITInfo(hasSSE1()) {}
360356
361 bool
362 X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
363 TargetSubtargetInfo::AntiDepBreakMode &Mode,
364 RegClassVector &CriticalPathRCs) const {
365 Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
366 CriticalPathRCs.clear();
367 return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
368 }
369
370 bool
371 X86Subtarget::enableEarlyIfConversion() const {
357 bool X86Subtarget::enableEarlyIfConversion() const {
372358 return hasCMov() && X86EarlyIfConv;
373359 }
360
168168 /// HasSlowDivide - True if smaller divides are significantly faster than
169169 /// full divides and should be used when possible.
170170 bool HasSlowDivide;
171
172 /// PostRAScheduler - True if using post-register-allocation scheduler.
173 bool PostRAScheduler;
174171
175172 /// PadShortFunctions - True if the short functions should be padded to prevent
176173 /// a stall when returning too early.
452449 /// Enable the MachineScheduler pass for all X86 subtargets.
453450 bool enableMachineScheduler() const override { return true; }
454451
455 /// enablePostRAScheduler - run for Atom optimization.
456 bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
457 TargetSubtargetInfo::AntiDepBreakMode& Mode,
458 RegClassVector& CriticalPathRCs) const override;
459
460 bool postRAScheduler() const { return PostRAScheduler; }
461
462452 bool enableEarlyIfConversion() const override;
463453
464454 /// getInstrItins = Return the instruction itineraries based on the
465455 /// subtarget selection.
466456 const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
457
458 AntiDepBreakMode getAntiDepBreakMode() const override {
459 return TargetSubtargetInfo::ANTIDEP_CRITICAL;
460 }
467461 };
468462
469463 } // End llvm namespace
12001200 EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');
12011201
12021202 OS << " " << (bool)(PI->ModelDef ?
1203 PI->ModelDef->getValueAsBit("PostRAScheduler") : 0)
1204 << ", // " << "PostRAScheduler\n";
1205
1206 OS << " " << (bool)(PI->ModelDef ?
12031207 PI->ModelDef->getValueAsBit("CompleteModel") : 0)
12041208 << ", // " << "CompleteModel\n";
12051209