llvm.org GIT mirror llvm / 4765229
Improve machine schedulers for in-order processors This patch enables schedulers to specify instructions that cannot be issued with any other instructions. It also fixes BeginGroup/EndGroup. Reviewed by: Andrew Trick Differential Revision: https://reviews.llvm.org/D30744 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298885 91177308-0d34-0410-b5e6-96231b3b80d8 Javed Absar 3 years ago
7 changed file(s) with 154 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
9090 /// \brief Maximum number of micro-ops that may be scheduled per cycle.
9191 unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
9292
93 /// \brief Return true if new group must begin.
94 bool mustBeginGroup(const MachineInstr *MI,
95 const MCSchedClassDesc *SC = nullptr) const;
96 /// \brief Return true if current group must end.
97 bool mustEndGroup(const MachineInstr *MI,
98 const MCSchedClassDesc *SC = nullptr) const;
99
93100 /// \brief Return the number of issue slots required for this MI.
94101 unsigned getNumMicroOps(const MachineInstr *MI,
95102 const MCSchedClassDesc *SC = nullptr) const;
175182 bool UseDefaultDefLatency = true) const;
176183 unsigned computeInstrLatency(unsigned Opcode) const;
177184
185
178186 /// \brief Output dependency latency of a pair of defs of the same register.
179187 ///
180188 /// This is typically one cycle.
254254 // Allow a processor to mark some scheduling classes as unsupported
255255 // for stronger verification.
256256 bit Unsupported = 0;
257 // Allow a processor to mark some scheduling classes as single-issue.
258 // SingleIssue is an alias for Begin/End Group.
259 bit SingleIssue = 0;
257260 SchedMachineModel SchedModel = ?;
258261 }
259262
11721172 dbgs() << " Pressure Diff : ";
11731173 getPressureDiff(&SU).dump(*TRI);
11741174 }
1175 dbgs() << " Single Issue : ";
1176 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1177 SchedModel.mustEndGroup(SU.getInstr()))
1178 dbgs() << "true;";
1179 else
1180 dbgs() << "false;";
11751181 dbgs() << '\n';
11761182 }
11771183 if (ExitSU.getInstr() != nullptr)
19091915 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
19101916 return true;
19111917 }
1918
19121919 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
19131920 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
19141921 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
19151922 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
19161923 return true;
19171924 }
1925
1926 if (CurrMOps > 0 &&
1927 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1928 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1929 DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
1930 << (isTop()? "begin" : "end") << " group\n");
1931 return true;
1932 }
1933
19181934 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
19191935 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
19201936 for (TargetSchedModel::ProcResIter
22102226 // one cycle. Since we commonly reach the max MOps here, opportunistically
22112227 // bump the cycle to avoid uselessly checking everything in the readyQ.
22122228 CurrMOps += IncMOps;
2229
2230 // Bump the cycle count for issue group constraints.
2231 // This must be done after NextCycle has been adjust for all other stalls.
2232 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2233 // currCycle to X.
2234 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2235 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2236 DEBUG(dbgs() << " Bump cycle to "
2237 << (isTop() ? "end" : "begin") << " group\n");
2238 bumpCycle(++NextCycle);
2239 }
2240
22132241 while (CurrMOps >= SchedModel->getIssueWidth()) {
22142242 DEBUG(dbgs() << " *** Max MOps " << CurrMOps
22152243 << " at cycle " << CurrCycle << '\n');
8383 }
8484 }
8585
86 /// Returns true only if instruction is specified as single issue.
87 bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI,
88 const MCSchedClassDesc *SC) const {
89 if (hasInstrSchedModel()) {
90 if (!SC)
91 SC = resolveSchedClass(MI);
92 if (SC->isValid())
93 return SC->BeginGroup;
94 }
95 return false;
96 }
97
98 bool TargetSchedModel::mustEndGroup(const MachineInstr *MI,
99 const MCSchedClassDesc *SC) const {
100 if (hasInstrSchedModel()) {
101 if (!SC)
102 SC = resolveSchedClass(MI);
103 if (SC->isValid())
104 return SC->EndGroup;
105 }
106 return false;
107 }
108
86109 unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
87110 const MCSchedClassDesc *SC) const {
88111 if (hasInstrItineraries()) {
7373
7474 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
7575 def : WriteRes {
76 let Latency = 8; let ResourceCycles = [8]; // not pipelined
76 let Latency = 8; let ResourceCycles = [8]; // non-pipelined
7777 }
7878
7979 // Branches - LR written in Late EX2
716716 let Latency = 6;
717717 let NumMicroOps = 3;
718718 let ResourceCycles = [2];
719 let SingleIssue = 1;
719720 }
720721 def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
721722 let Latency = 7;
722723 let NumMicroOps = 5;
723724 let ResourceCycles = [3];
725 let SingleIssue = 1;
724726 }
725727 def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
726728 let Latency = 8;
727729 let NumMicroOps = 7;
728730 let ResourceCycles = [4];
731 let SingleIssue = 1;
729732 }
730733 def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
731734 let Latency = 5;
0 # RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-topdown 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=TOPDOWN
1 # RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-bottomup 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOTTOMUP
2 # REQUIRES: asserts
3 --- |
4 ; ModuleID = 'foo.ll'
5 source_filename = "foo.ll"
6 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
7 target triple = "arm---eabi"
8
9 %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
10 ; Function Attrs: nounwind
11 define <8 x i8> @foo(i8* %A) {
12 %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8)
13 %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
14 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 1
15 %tmp4 = add <8 x i8> %tmp2, %tmp3
16 ret <8 x i8> %tmp4
17 }
18 declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8*, i32)
19
20 # CHECK: ********** MI Scheduling **********
21 # CHECK: ScheduleDAGMILive::schedule starting
22 # CHECK: SU(1): %vreg1 = VLD4d8Pseudo %vreg0, 8, pred:14, pred:%noreg; mem:LD32[%A](align=8) QQPR:%vreg1 GPR:%vreg0
23 # CHECK: Latency : 8
24 # CHECK: Single Issue : true;
25 # CHECK: SU(2): %vreg4 = VADDv8i8 %vreg1:dsub_0, %vreg1:dsub_1, pred:14, pred:%noreg; DPR:%vreg4 QQPR:%vreg1
26 # CHECK: Latency : 5
27 # CHECK: Single Issue : false;
28 # CHECK: SU(3): %vreg5, %vreg6 = VMOVRRD %vreg4, pred:14, pred:%noreg; GPR:%vreg5,%vreg6 DPR:%vreg4
29 # CHECK: Latency : 4
30 # CHECK: Single Issue : false;
31
32 # TOPDOWN: Scheduling SU(1) %vreg1 = VLD4d8Pseudo
33 # TOPDOWN: Bump cycle to end group
34 # TOPDOWN: Scheduling SU(2) %vreg4 = VADDv8i8
35
36 # BOTTOMUP: Scheduling SU(2) %vreg4 = VADDv8i8
37 # BOTTOMUP: Scheduling SU(1) %vreg1 = VLD4d8Pseudo
38 # BOTTOMUP: Bump cycle to begin group
39
40 ...
41 ---
42 name: foo
43 alignment: 2
44 exposesReturnsTwice: false
45 legalized: false
46 regBankSelected: false
47 selected: false
48 tracksRegLiveness: true
49 registers:
50 - { id: 0, class: gpr }
51 - { id: 1, class: qqpr }
52 - { id: 2, class: dpr }
53 - { id: 3, class: dpr }
54 - { id: 4, class: dpr }
55 - { id: 5, class: gpr }
56 - { id: 6, class: gpr }
57 liveins:
58 - { reg: '%r0', virtual-reg: '%0' }
59 frameInfo:
60 isFrameAddressTaken: false
61 isReturnAddressTaken: false
62 hasStackMap: false
63 hasPatchPoint: false
64 stackSize: 0
65 offsetAdjustment: 0
66 maxAlignment: 0
67 adjustsStack: false
68 hasCalls: false
69 maxCallFrameSize: 0
70 hasOpaqueSPAdjustment: false
71 hasVAStart: false
72 hasMustTailInVarArgFunc: false
73 body: |
74 bb.0 (%ir-block.0):
75 liveins: %r0
76
77 %0 = COPY %r0
78 %1 = VLD4d8Pseudo %0, 8, 14, _ :: (load 32 from %ir.A, align 8)
79 %4 = VADDv8i8 %1.dsub_0, %1.dsub_1, 14, _
80 %5, %6 = VMOVRRD %4, 14, _
81 %r0 = COPY %5
82 %r1 = COPY %6
83 BX_RET 14, _, implicit %r0, implicit killed %r1
84
85 ...
916916 SCDesc.NumMicroOps += WriteRes->getValueAsInt("NumMicroOps");
917917 SCDesc.BeginGroup |= WriteRes->getValueAsBit("BeginGroup");
918918 SCDesc.EndGroup |= WriteRes->getValueAsBit("EndGroup");
919 SCDesc.BeginGroup |= WriteRes->getValueAsBit("SingleIssue");
920 SCDesc.EndGroup |= WriteRes->getValueAsBit("SingleIssue");
919921
920922 // Create an entry for each ProcResource listed in WriteRes.
921923 RecVec PRVec = WriteRes->getValueAsListOfDefs("ProcResources");