llvm.org GIT mirror llvm / 40b689d
[MachineScheduler] checkResourceLimit boundary condition update When we call checkResourceLimit in bumpCycle or bumpNode, and we know the resource count has just reached the limit (the equations are equal). We should return true to mark that we are resource limited for next schedule, or else we might continue to schedule in favor of latency for 1 more schedule and create a schedule that actually overbook the resource. When we call checkResourceLimit to estimate the resource limite before scheduling, we don't need to return true even if the equations are equal, as it shouldn't limit the schedule for it . Differential Revision: https://reviews.llvm.org/D62345 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362805 91177308-0d34-0410-b5e6-96231b3b80d8 Jinsong Ji 4 months ago
6 changed file(s) with 33 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
18361836
18371837 /// Given a Count of resource usage and a Latency value, return true if a
18381838 /// SchedBoundary becomes resource limited.
1839 /// If we are checking after scheduling a node, we should return true when
1840 /// we just reach the resource limit.
18391841 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1840 unsigned Latency) {
1841 return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1842 unsigned Latency, bool AfterSchedNode) {
1843 int ResCntFactor = (int)(Count - (Latency * LFactor));
1844 if (AfterSchedNode)
1845 return ResCntFactor >= (int)LFactor;
1846 else
1847 return ResCntFactor > (int)LFactor;
18421848 }
18431849
18441850 void SchedBoundary::reset() {
21332139 CheckPending = true;
21342140 IsResourceLimited =
21352141 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2136 getScheduledLatency());
2142 getScheduledLatency(), true);
21372143
21382144 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
21392145 << '\n');
23012307 // resource limited. If a stall occurred, bumpCycle does this.
23022308 IsResourceLimited =
23032309 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2304 getScheduledLatency());
2310 getScheduledLatency(), true);
23052311
23062312 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
23072313 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
25202526 RemLatency = computeRemLatency(CurrZone);
25212527 RemLatencyComputed = true;
25222528 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2523 OtherCount, RemLatency);
2529 OtherCount, RemLatency, false);
25242530 }
25252531
25262532 // Schedule aggressively for latency in PostRA mode. We don't check for
20232023 ; P9LE: # %bb.0: # %entry
20242024 ; P9LE-NEXT: lfd f0, 24(r3)
20252025 ; P9LE-NEXT: lfd f2, 8(r3)
2026 ; P9LE-NEXT: xxmrghd vs0, vs2, vs0
20262027 ; P9LE-NEXT: lfd f1, 16(r3)
2027 ; P9LE-NEXT: xxmrghd vs0, vs2, vs0
20282028 ; P9LE-NEXT: lfd f3, 0(r3)
20292029 ; P9LE-NEXT: xvcvdpsxws v2, vs0
20302030 ; P9LE-NEXT: xxmrghd vs0, vs3, vs1
36073607 ; P9LE: # %bb.0: # %entry
36083608 ; P9LE-NEXT: lfd f0, 24(r3)
36093609 ; P9LE-NEXT: lfd f2, 8(r3)
3610 ; P9LE-NEXT: xxmrghd vs0, vs2, vs0
36103611 ; P9LE-NEXT: lfd f1, 16(r3)
3611 ; P9LE-NEXT: xxmrghd vs0, vs2, vs0
36123612 ; P9LE-NEXT: lfd f3, 0(r3)
36133613 ; P9LE-NEXT: xvcvdpuxws v2, vs0
36143614 ; P9LE-NEXT: xxmrghd vs0, vs3, vs1
5656 ; CHECK-PWR9-NEXT: .cfi_offset v31, -304
5757 ; CHECK-PWR9-NEXT: std r14, 240(r1) # 8-byte Folded Spill
5858 ; CHECK-PWR9-NEXT: std r15, 248(r1) # 8-byte Folded Spill
59 ; CHECK-PWR9-NEXT: std r16, 256(r1) # 8-byte Folded Spill
6059 ; CHECK-PWR9-NEXT: stxv v20, 48(r1) # 16-byte Folded Spill
6160 ; CHECK-PWR9-NEXT: stxv v21, 64(r1) # 16-byte Folded Spill
61 ; CHECK-PWR9-NEXT: std r16, 256(r1) # 8-byte Folded Spill
6262 ; CHECK-PWR9-NEXT: stxv v22, 80(r1) # 16-byte Folded Spill
6363 ; CHECK-PWR9-NEXT: std r17, 264(r1) # 8-byte Folded Spill
6464 ; CHECK-PWR9-NEXT: stxv v23, 96(r1) # 16-byte Folded Spill
111111 ; CHECK-PWR9-NEXT: ld r4, 32(r1) # 8-byte Folded Reload
112112 ; CHECK-PWR9-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload
113113 ; CHECK-PWR9-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload
114 ; CHECK-PWR9-NEXT: add r3, r4, r3
114115 ; CHECK-PWR9-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
115 ; CHECK-PWR9-NEXT: add r3, r4, r3
116116 ; CHECK-PWR9-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload
117117 ; CHECK-PWR9-NEXT: lxv v27, 160(r1) # 16-byte Folded Reload
118118 ; CHECK-PWR9-NEXT: lxv v26, 144(r1) # 16-byte Folded Reload
127127 ; CHECK-PWR9-NEXT: lfd f29, 504(r1) # 8-byte Folded Reload
128128 ; CHECK-PWR9-NEXT: lfd f28, 496(r1) # 8-byte Folded Reload
129129 ; CHECK-PWR9-NEXT: lfd f27, 488(r1) # 8-byte Folded Reload
130 ; CHECK-PWR9-NEXT: lfd f26, 480(r1) # 8-byte Folded Reload
131130 ; CHECK-PWR9-NEXT: ld r31, 376(r1) # 8-byte Folded Reload
132131 ; CHECK-PWR9-NEXT: ld r30, 368(r1) # 8-byte Folded Reload
133132 ; CHECK-PWR9-NEXT: ld r29, 360(r1) # 8-byte Folded Reload
134133 ; CHECK-PWR9-NEXT: ld r28, 352(r1) # 8-byte Folded Reload
134 ; CHECK-PWR9-NEXT: lfd f26, 480(r1) # 8-byte Folded Reload
135135 ; CHECK-PWR9-NEXT: ld r27, 344(r1) # 8-byte Folded Reload
136136 ; CHECK-PWR9-NEXT: ld r26, 336(r1) # 8-byte Folded Reload
137137 ; CHECK-PWR9-NEXT: ld r25, 328(r1) # 8-byte Folded Reload
8484 ; CHECK-LABEL: maxVecParam:
8585 ; CHECK: # %bb.0: # %entry
8686 ; CHECK-NEXT: xsaddqp v2, v2, v3
87 ; CHECK-NEXT: lxv v[[REG0:[0-9]+]], 224(r1)
8788 ; CHECK-NEXT: xsaddqp v2, v2, v4
8889 ; CHECK-NEXT: xsaddqp v2, v2, v5
8990 ; CHECK-NEXT: xsaddqp v2, v2, v6
9495 ; CHECK-NEXT: xsaddqp v2, v2, v11
9596 ; CHECK-NEXT: xsaddqp v2, v2, v12
9697 ; CHECK-NEXT: xsaddqp v2, v2, v13
97 ; CHECK-NEXT: lxv v[[REG0:[0-9]+]], 224(r1)
9898 ; CHECK-NEXT: xssubqp v2, v2, v[[REG0]]
9999 ; CHECK-NEXT: blr
100100 fp128 %p6, fp128 %p7, fp128 %p8, fp128 %p9, fp128 %p10,
166166 ; CHECK-P9-NEXT: lxv vs1, 96(r4)
167167 ; CHECK-P9-NEXT: lxv vs2, 80(r4)
168168 ; CHECK-P9-NEXT: lxv vs3, 64(r4)
169 ; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
169170 ; CHECK-P9-NEXT: lxv vs4, 48(r4)
170 ; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
171171 ; CHECK-P9-NEXT: lxv vs5, 32(r4)
172172 ; CHECK-P9-NEXT: lxv vs6, 16(r4)
173173 ; CHECK-P9-NEXT: lxv vs7, 0(r4)
174174 ; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7
175175 ; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6
176176 ; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5
177 ; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3
177 ; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4
178178 ; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2
179179 ; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1
180180 ; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0
194194 ; CHECK-BE-NEXT: lxv vs1, 96(r4)
195195 ; CHECK-BE-NEXT: lxv vs2, 80(r4)
196196 ; CHECK-BE-NEXT: lxv vs3, 64(r4)
197 ; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
197198 ; CHECK-BE-NEXT: lxv vs4, 48(r4)
198 ; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
199199 ; CHECK-BE-NEXT: lxv vs5, 32(r4)
200200 ; CHECK-BE-NEXT: lxv vs6, 16(r4)
201201 ; CHECK-BE-NEXT: lxv vs7, 0(r4)
202202 ; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7
203203 ; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6
204204 ; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5
205 ; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3
205 ; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4
206206 ; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2
207207 ; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1
208208 ; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0
380380 ; CHECK-P9-NEXT: lxv vs1, 96(r4)
381381 ; CHECK-P9-NEXT: lxv vs2, 80(r4)
382382 ; CHECK-P9-NEXT: lxv vs3, 64(r4)
383 ; CHECK-P9-NEXT: xvcvdpsxds vs3, vs3
383384 ; CHECK-P9-NEXT: lxv vs4, 48(r4)
384 ; CHECK-P9-NEXT: xvcvdpsxds vs4, vs4
385385 ; CHECK-P9-NEXT: lxv vs5, 32(r4)
386386 ; CHECK-P9-NEXT: lxv vs6, 16(r4)
387387 ; CHECK-P9-NEXT: lxv vs7, 0(r4)
388388 ; CHECK-P9-NEXT: xvcvdpsxds vs7, vs7
389389 ; CHECK-P9-NEXT: xvcvdpsxds vs6, vs6
390390 ; CHECK-P9-NEXT: xvcvdpsxds vs5, vs5
391 ; CHECK-P9-NEXT: xvcvdpsxds vs3, vs3
391 ; CHECK-P9-NEXT: xvcvdpsxds vs4, vs4
392392 ; CHECK-P9-NEXT: xvcvdpsxds vs2, vs2
393393 ; CHECK-P9-NEXT: xvcvdpsxds vs1, vs1
394394 ; CHECK-P9-NEXT: xvcvdpsxds vs0, vs0
408408 ; CHECK-BE-NEXT: lxv vs1, 96(r4)
409409 ; CHECK-BE-NEXT: lxv vs2, 80(r4)
410410 ; CHECK-BE-NEXT: lxv vs3, 64(r4)
411 ; CHECK-BE-NEXT: xvcvdpsxds vs3, vs3
411412 ; CHECK-BE-NEXT: lxv vs4, 48(r4)
412 ; CHECK-BE-NEXT: xvcvdpsxds vs4, vs4
413413 ; CHECK-BE-NEXT: lxv vs5, 32(r4)
414414 ; CHECK-BE-NEXT: lxv vs6, 16(r4)
415415 ; CHECK-BE-NEXT: lxv vs7, 0(r4)
416416 ; CHECK-BE-NEXT: xvcvdpsxds vs7, vs7
417417 ; CHECK-BE-NEXT: xvcvdpsxds vs6, vs6
418418 ; CHECK-BE-NEXT: xvcvdpsxds vs5, vs5
419 ; CHECK-BE-NEXT: xvcvdpsxds vs3, vs3
419 ; CHECK-BE-NEXT: xvcvdpsxds vs4, vs4
420420 ; CHECK-BE-NEXT: xvcvdpsxds vs2, vs2
421421 ; CHECK-BE-NEXT: xvcvdpsxds vs1, vs1
422422 ; CHECK-BE-NEXT: xvcvdpsxds vs0, vs0
166166 ; CHECK-P9-NEXT: lxv v3, 96(r4)
167167 ; CHECK-P9-NEXT: lxv v4, 80(r4)
168168 ; CHECK-P9-NEXT: lxv v5, 64(r4)
169 ; CHECK-P9-NEXT: xvcvuxddp vs4, v5
169170 ; CHECK-P9-NEXT: lxv v0, 48(r4)
170 ; CHECK-P9-NEXT: xvcvuxddp vs3, v0
171171 ; CHECK-P9-NEXT: lxv v1, 32(r4)
172172 ; CHECK-P9-NEXT: lxv v6, 16(r4)
173173 ; CHECK-P9-NEXT: lxv v7, 0(r4)
174174 ; CHECK-P9-NEXT: xvcvuxddp vs0, v7
175175 ; CHECK-P9-NEXT: xvcvuxddp vs1, v6
176176 ; CHECK-P9-NEXT: xvcvuxddp vs2, v1
177 ; CHECK-P9-NEXT: xvcvuxddp vs4, v5
177 ; CHECK-P9-NEXT: xvcvuxddp vs3, v0
178178 ; CHECK-P9-NEXT: xvcvuxddp vs5, v4
179179 ; CHECK-P9-NEXT: xvcvuxddp vs6, v3
180180 ; CHECK-P9-NEXT: xvcvuxddp vs7, v2
194194 ; CHECK-BE-NEXT: lxv v3, 96(r4)
195195 ; CHECK-BE-NEXT: lxv v4, 80(r4)
196196 ; CHECK-BE-NEXT: lxv v5, 64(r4)
197 ; CHECK-BE-NEXT: xvcvuxddp vs4, v5
197198 ; CHECK-BE-NEXT: lxv v0, 48(r4)
198 ; CHECK-BE-NEXT: xvcvuxddp vs3, v0
199199 ; CHECK-BE-NEXT: lxv v1, 32(r4)
200200 ; CHECK-BE-NEXT: lxv v6, 16(r4)
201201 ; CHECK-BE-NEXT: lxv v7, 0(r4)
202202 ; CHECK-BE-NEXT: xvcvuxddp vs0, v7
203203 ; CHECK-BE-NEXT: xvcvuxddp vs1, v6
204204 ; CHECK-BE-NEXT: xvcvuxddp vs2, v1
205 ; CHECK-BE-NEXT: xvcvuxddp vs4, v5
205 ; CHECK-BE-NEXT: xvcvuxddp vs3, v0
206206 ; CHECK-BE-NEXT: xvcvuxddp vs5, v4
207207 ; CHECK-BE-NEXT: xvcvuxddp vs6, v3
208208 ; CHECK-BE-NEXT: xvcvuxddp vs7, v2
380380 ; CHECK-P9-NEXT: lxv v3, 96(r4)
381381 ; CHECK-P9-NEXT: lxv v4, 80(r4)
382382 ; CHECK-P9-NEXT: lxv v5, 64(r4)
383 ; CHECK-P9-NEXT: xvcvsxddp vs4, v5
383384 ; CHECK-P9-NEXT: lxv v0, 48(r4)
384 ; CHECK-P9-NEXT: xvcvsxddp vs3, v0
385385 ; CHECK-P9-NEXT: lxv v1, 32(r4)
386386 ; CHECK-P9-NEXT: lxv v6, 16(r4)
387387 ; CHECK-P9-NEXT: lxv v7, 0(r4)
388388 ; CHECK-P9-NEXT: xvcvsxddp vs0, v7
389389 ; CHECK-P9-NEXT: xvcvsxddp vs1, v6
390390 ; CHECK-P9-NEXT: xvcvsxddp vs2, v1
391 ; CHECK-P9-NEXT: xvcvsxddp vs4, v5
391 ; CHECK-P9-NEXT: xvcvsxddp vs3, v0
392392 ; CHECK-P9-NEXT: xvcvsxddp vs5, v4
393393 ; CHECK-P9-NEXT: xvcvsxddp vs6, v3
394394 ; CHECK-P9-NEXT: xvcvsxddp vs7, v2
408408 ; CHECK-BE-NEXT: lxv v3, 96(r4)
409409 ; CHECK-BE-NEXT: lxv v4, 80(r4)
410410 ; CHECK-BE-NEXT: lxv v5, 64(r4)
411 ; CHECK-BE-NEXT: xvcvsxddp vs4, v5
411412 ; CHECK-BE-NEXT: lxv v0, 48(r4)
412 ; CHECK-BE-NEXT: xvcvsxddp vs3, v0
413413 ; CHECK-BE-NEXT: lxv v1, 32(r4)
414414 ; CHECK-BE-NEXT: lxv v6, 16(r4)
415415 ; CHECK-BE-NEXT: lxv v7, 0(r4)
416416 ; CHECK-BE-NEXT: xvcvsxddp vs0, v7
417417 ; CHECK-BE-NEXT: xvcvsxddp vs1, v6
418418 ; CHECK-BE-NEXT: xvcvsxddp vs2, v1
419 ; CHECK-BE-NEXT: xvcvsxddp vs4, v5
419 ; CHECK-BE-NEXT: xvcvsxddp vs3, v0
420420 ; CHECK-BE-NEXT: xvcvsxddp vs5, v4
421421 ; CHECK-BE-NEXT: xvcvsxddp vs6, v3
422422 ; CHECK-BE-NEXT: xvcvsxddp vs7, v2