llvm.org GIT mirror llvm / c8bfd1d
Convert -enable-sched-cycles and -enable-sched-hazard to -disable flags. They are still not enable in this revision. Added TargetInstrInfo::isZeroCost() to fix a fundamental problem with the scheduler's model of operand latency in the selection DAG. Generalized unit tests to work with sched-cycles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123969 91177308-0d34-0410-b5e6-96231b3b80d8 Andrew Trick 9 years ago
10 changed file(s) with 88 addition(s) and 63 deletion(s). Raw diff Collapse all Expand all
4040 };
4141
4242 unsigned getMaxLookAhead() const { return MaxLookAhead; }
43
44 bool isEnabled() const { return MaxLookAhead != 0; }
4345
4446 /// atIssueLimit - Return true if no more instructions may be issued in this
4547 /// cycle.
566566 virtual unsigned getInlineAsmLength(const char *Str,
567567 const MCAsmInfo &MAI) const;
568568
569 /// CreateTargetPreRAHazardRecognizer - Allocate and return a hazard
570 /// recognizer to use for this target when scheduling the machine instructions
571 /// before register allocation.
569 /// CreateTargetHazardRecognizer - Allocate and return a hazard recognizer to
570 /// use for this target when scheduling the machine instructions before
571 /// register allocation.
572572 virtual ScheduleHazardRecognizer*
573573 CreateTargetHazardRecognizer(const TargetMachine *TM,
574574 const ScheduleDAG *DAG) const = 0;
608608 /// instruction will be decoded to on the target cpu.
609609 virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
610610 const MachineInstr *MI) const;
611
612 /// isZeroCost - Return true for pseudo instructions that don't consume any
613 /// machine resources in their current form. These are common cases that the
614 /// scheduler should consider free, rather than conservatively handling them
615 /// as instructions with no itinerary.
616 bool isZeroCost(unsigned Opcode) const {
617 return Opcode <= TargetOpcode::COPY;
618 }
611619
612620 /// getOperandLatency - Compute and return the use operand latency of a given
613621 /// pair of def and use.
685693 const MachineBasicBlock *MBB,
686694 const MachineFunction &MF) const;
687695
696 bool usePreRAHazardRecognizer() const;
697
688698 virtual ScheduleHazardRecognizer *
689699 CreateTargetHazardRecognizer(const TargetMachine*, const ScheduleDAG*) const;
690700
1818 #include "llvm/Support/Debug.h"
1919 #include "llvm/Support/ErrorHandling.h"
2020 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetInstrInfo.h"
2122 #include "llvm/Target/TargetInstrItineraries.h"
2223
2324 using namespace llvm;
169170 if (!ItinData || ItinData->isEmpty())
170171 return;
171172
172 ++IssueCount;
173
174 unsigned cycle = 0;
175
176173 // Use the itinerary for the underlying instruction to reserve FU's
177174 // in the scoreboard at the appropriate future cycles.
178175 const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
179176 assert(TID && "The scheduler must filter non-machineinstrs");
177 if (DAG->TII->isZeroCost(TID->Opcode))
178 return;
179
180 ++IssueCount;
181
182 unsigned cycle = 0;
183
180184 unsigned idx = TID->getSchedClass();
181185 for (const InstrStage *IS = ItinData->beginStage(idx),
182186 *E = ItinData->endStage(idx); IS != E; ++IS) {
6565 "which tries to balance ILP and register pressure",
6666 createILPListDAGScheduler);
6767
68 static cl::opt EnableSchedCycles(
69 "enable-sched-cycles",
70 cl::desc("Enable cycle-level precision during preRA scheduling"),
71 cl::init(false), cl::Hidden);
68 static cl::opt DisableSchedCycles(
69 "disable-sched-cycles", cl::Hidden, cl::init(true),
70 cl::desc("Disable cycle-level precision during preRA scheduling"));
7271
7372 namespace {
7473 //===----------------------------------------------------------------------===//
123122 Topo(SUnits) {
124123
125124 const TargetMachine &tm = mf.getTarget();
126 if (EnableSchedCycles && OptLevel != CodeGenOpt::None)
125 if (DisableSchedCycles || !NeedLatency)
126 HazardRec = new ScheduleHazardRecognizer();
127 else
127128 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
128 else
129 HazardRec = new ScheduleHazardRecognizer();
130129 }
131130
132131 ~ScheduleDAGRRList() {
167166
168167 private:
169168 bool isReady(SUnit *SU) {
170 return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() ||
169 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
171170 AvailableQueue->isReady(SU);
172171 }
173172
236235 << " '" << BB->getName() << "' **********\n");
237236
238237 CurCycle = 0;
239 MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0;
238 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
240239 NumLiveRegs = 0;
241240 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
242241 LiveRegGens.resize(TRI->getNumRegs(), NULL);
349348 /// Check to see if any of the pending instructions are ready to issue. If
350349 /// so, add them to the available queue.
351350 void ScheduleDAGRRList::ReleasePending() {
352 if (!EnableSchedCycles) {
351 if (DisableSchedCycles) {
353352 assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
354353 return;
355354 }
384383 return;
385384
386385 AvailableQueue->setCurCycle(NextCycle);
387 if (HazardRec->getMaxLookAhead() == 0) {
386 if (!HazardRec->isEnabled()) {
388387 // Bypass lots of virtual calls in case of long latency.
389388 CurCycle = NextCycle;
390389 }
404403 /// Move the scheduler state forward until the specified node's dependents are
405404 /// ready and can be scheduled with no resource conflicts.
406405 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
407 if (!EnableSchedCycles)
406 if (DisableSchedCycles)
408407 return;
409408
410409 unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
439438 /// Record this SUnit in the HazardRecognizer.
440439 /// Does not update CurCycle.
441440 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
442 if (!EnableSchedCycles || HazardRec->getMaxLookAhead() == 0)
441 if (!HazardRec->isEnabled())
443442 return;
444443
445444 // Check for phys reg copy.
524523 // (1) No available instructions
525524 // (2) All pipelines full, so available instructions must have hazards.
526525 //
527 // If SchedCycles is disabled, count each inst as one cycle.
528 if (!EnableSchedCycles ||
529 AvailableQueue->empty() || HazardRec->atIssueLimit())
526 // If HazardRec is disabled, count each inst as one cycle.
527 if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
528 || AvailableQueue->empty())
530529 AdvanceToCycle(CurCycle + 1);
531530 }
532531
584583 SU->setHeightDirty();
585584 SU->isScheduled = false;
586585 SU->isAvailable = true;
587 if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) {
586 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
588587 // Don't make available until backtracking is complete.
589588 SU->isPending = true;
590589 PendingQueue.push_back(SU);
20092008 } else if (RStall)
20102009 return -1;
20112010
2012 // If either node is scheduling for latency, sort them by depth
2011 // If either node is scheduling for latency, sort them by height/depth
20132012 // and latency.
20142013 if (!checkPref || (left->SchedulingPref == Sched::Latency ||
20152014 right->SchedulingPref == Sched::Latency)) {
2016 int LDepth = (int)left->getDepth();
2017 int RDepth = (int)right->getDepth();
2018
2019 if (EnableSchedCycles) {
2020 if (LDepth != RDepth)
2015 if (DisableSchedCycles) {
2016 if (LHeight != RHeight)
2017 return LHeight > RHeight ? 1 : -1;
2018 }
2019 else {
2020 // If neither instruction stalls (!LStall && !RStall) then
2021 // it's height is already covered so only its depth matters. We also reach
2022 // this if both stall but have the same height.
2023 unsigned LDepth = left->getDepth();
2024 unsigned RDepth = right->getDepth();
2025 if (LDepth != RDepth) {
20212026 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
20222027 << ") depth " << LDepth << " vs SU (" << right->NodeNum
2023 << ") depth " << RDepth << ")\n");
2028 << ") depth " << RDepth << "\n");
20242029 return LDepth < RDepth ? 1 : -1;
2025 }
2026 else {
2027 if (LHeight != RHeight)
2028 return LHeight > RHeight ? 1 : -1;
2030 }
20292031 }
20302032 if (left->Latency != right->Latency)
20312033 return left->Latency > right->Latency ? 1 : -1;
20672069 if (LScratch != RScratch)
20682070 return LScratch > RScratch;
20692071
2070 if (EnableSchedCycles) {
2072 if (!DisableSchedCycles) {
20712073 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
20722074 if (result != 0)
20732075 return result > 0;
2323 #include "llvm/CodeGen/MachineRegisterInfo.h"
2424 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
2525 #include "llvm/CodeGen/PseudoSourceValue.h"
26 #include "llvm/Support/CommandLine.h"
2627 #include "llvm/Support/Debug.h"
2728 #include "llvm/Support/ErrorHandling.h"
2829 #include "llvm/Support/raw_ostream.h"
2930 using namespace llvm;
31
32 static cl::opt DisableHazardRecognizer(
33 "disable-sched-hazard", cl::Hidden, cl::init(false),
34 cl::desc("Disable hazard detection during preRA scheduling"));
3035
3136 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
3237 /// after it, replacing it with an unconditional branch to NewDest.
420425 return false;
421426 }
422427
423 // Default implementation of CreateTargetPreRAHazardRecognizer.
428 // Provide a global flag for disabling the PreRA hazard recognizer that targets
429 // may choose to honor.
430 bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
431 return !DisableHazardRecognizer;
432 }
433
434 // Default implementation of CreateTargetRAHazardRecognizer.
424435 ScheduleHazardRecognizer *TargetInstrInfoImpl::
425436 CreateTargetHazardRecognizer(const TargetMachine *TM,
426437 const ScheduleDAG *DAG) const {
4040 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
4141 cl::desc("Enable ARM 2-addr to 3-addr conv"));
4242
43 // Other targets already have a hazard recognizer enabled by default, so this
44 // flag currently only affects ARM. It will be generalized when it becomes a
45 // disabled flag.
46 static cl::opt EnableHazardRecognizer(
47 "enable-sched-hazard", cl::Hidden,
48 cl::desc("Enable hazard detection during preRA scheduling"),
49 cl::init(false));
50
5143 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
5244 struct ARM_MLxEntry {
5345 unsigned MLxOpc; // MLA / MLS opcode
9688 ScheduleHazardRecognizer *ARMBaseInstrInfo::
9789 CreateTargetHazardRecognizer(const TargetMachine *TM,
9890 const ScheduleDAG *DAG) const {
99 if (EnableHazardRecognizer) {
91 if (usePreRAHazardRecognizer()) {
10092 const InstrItineraryData *II = TM->getInstrItineraryData();
10193 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
10294 }
21722164 return 1;
21732165
21742166 const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
2167
2168 if (isZeroCost(DefTID.Opcode))
2169 return 0;
2170
21752171 if (!ItinData || ItinData->isEmpty())
21762172 return DefTID.mayLoad() ? 3 : 1;
21772173
1212 ret float %retval
1313 }
1414 ; VFP2: test1:
15 ; VFP2: vneg.f32 s1, s0
15 ; VFP2: vneg.f32 s{{.*}}, s{{.*}}
1616
1717 ; NFP1: test1:
18 ; NFP1: vneg.f32 d1, d0
18 ; NFP1: vneg.f32 d{{.*}}, d{{.*}}
1919
2020 ; NFP0: test1:
21 ; NFP0: vneg.f32 s1, s0
21 ; NFP0: vneg.f32 s{{.*}}, s{{.*}}
2222
2323 ; CORTEXA8: test1:
24 ; CORTEXA8: vneg.f32 d1, d0
24 ; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
2525
2626 ; CORTEXA9: test1:
27 ; CORTEXA9: vneg.f32 s1, s0
27 ; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
2828
2929 define float @test2(float* %a) {
3030 entry:
3636 ret float %retval
3737 }
3838 ; VFP2: test2:
39 ; VFP2: vneg.f32 s1, s0
39 ; VFP2: vneg.f32 s{{.*}}, s{{.*}}
4040
4141 ; NFP1: test2:
42 ; NFP1: vneg.f32 d1, d0
42 ; NFP1: vneg.f32 d{{.*}}, d{{.*}}
4343
4444 ; NFP0: test2:
45 ; NFP0: vneg.f32 s1, s0
45 ; NFP0: vneg.f32 s{{.*}}, s{{.*}}
4646
4747 ; CORTEXA8: test2:
48 ; CORTEXA8: vneg.f32 d1, d0
48 ; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
4949
5050 ; CORTEXA9: test2:
51 ; CORTEXA9: vneg.f32 s1, s0
51 ; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
5252
1010 ; NEON: vnmla.f32
1111
1212 ; A8: t1:
13 ; A8: vnmul.f32 s0, s1, s0
13 ; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
1414 ; A8: vsub.f32 d0, d0, d1
1515 %0 = fmul float %a, %b
1616 %1 = fsub float -0.0, %0
2727 ; NEON: vnmla.f32
2828
2929 ; A8: t2:
30 ; A8: vnmul.f32 s0, s1, s0
30 ; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
3131 ; A8: vsub.f32 d0, d0, d1
3232 %0 = fmul float %a, %b
3333 %1 = fmul float -1.0, %0
4444 ; NEON: vnmla.f64
4545
4646 ; A8: t3:
47 ; A8: vnmul.f64 d16, d16, d17
47 ; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
4848 ; A8: vsub.f64 d16, d16, d17
4949 %0 = fmul double %a, %b
5050 %1 = fsub double -0.0, %0
6161 ; NEON: vnmla.f64
6262
6363 ; A8: t4:
64 ; A8: vnmul.f64 d16, d16, d17
64 ; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
6565 ; A8: vsub.f64 d16, d16, d17
6666 %0 = fmul double %a, %b
6767 %1 = fmul double -1.0, %0
22 define float @t1(float %x) nounwind readnone optsize {
33 entry:
44 ; CHECK: t1:
5 ; CHECK: vmov.f32 s1, #4.000000e+00
5 ; CHECK: vmov.f32 s{{.*}}, #4.000000e+00
66 %0 = fadd float %x, 4.000000e+00
77 ret float %0
88 }
2626 define float @t4(float %x) nounwind readnone optsize {
2727 entry:
2828 ; CHECK: t4:
29 ; CHECK: vmov.f32 s1, #-2.400000e+01
29 ; CHECK: vmov.f32 s{{.*}}, #-2.400000e+01
3030 %0 = fmul float %x, -2.400000e+01
3131 ret float %0
3232 }
None ; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC
0 ; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
11 ; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
22 ; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
33 ; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC