llvm.org GIT mirror llvm / 9217916
Complete the MachineScheduler fix made way back in r210390. "Fix the MachineScheduler's logic for updating ready times for in-order. Now the scheduler updates a node's ready time as soon as it is scheduled, before releasing dependent nodes." This fix was only made in one variant of the ScheduleDAGMI driver. Francois de Ferriere reported the issue in the other bit of code where it was also needed. I never got around to coming up with a test case, but it's an obvious fix that shouldn't be delayed any longer. I'll try to refactor this code a little better. I did verify performance on a wide variety of targets and saw no negative impact with this fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233366 91177308-0d34-0410-b5e6-96231b3b80d8 Andrew Trick 4 years ago
8 changed file(s) with 28 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
10351035
10361036 scheduleMI(SU, IsTopNode);
10371037
1038 updateQueues(SU, IsTopNode);
1039
10401038 if (DFSResult) {
10411039 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
10421040 if (!ScheduledTrees.test(SubtreeID)) {
10481046
10491047 // Notify the scheduling strategy after updating the DAG.
10501048 SchedImpl->schedNode(SU, IsTopNode);
1049
1050 updateQueues(SU, IsTopNode);
10511051 }
10521052 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
10531053
66 ; CHECK-LABEL: bar:
77 ; CHECK: add.2d v[[REG:[0-9]+]], v0, v1
88 ; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1
9 ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1
910 ; Without advanced copy optimization, we end up with cross register
1011 ; banks copies that cannot be coalesced.
1112 ; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
1213 ; With advanced copy optimization, we end up with just one copy
1314 ; to insert the computed high part into the V register.
1415 ; CHECK-OPT-NOT: fmov
15 ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1
1616 ; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
1717 ; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
1818 ; CHECK-OPT-NOT: fmov
2222 ; GENERIC-LABEL: bar:
2323 ; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d
2424 ; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1
25 ; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
2526 ; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
2627 ; GENERIC-OPT-NOT: fmov
27 ; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
2828 ; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
2929 ; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
3030 ; GENERIC-OPT-NOT: fmov
1414
1515 define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
1616 ; CHECK: fptosi_v4f64_to_v4i8
17 ; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d
18 ; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
19 ; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
1720 ; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
18 ; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
19 ; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
20 ; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d
21 ; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
22 ; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
23 ; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
2124 ; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
22 ; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
23 ; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
24 ; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
25 ; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
26 ; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
25 ; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV1]].4h, v[[CONV0]].4h
26 ; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV3]].4h, v[[CONV2]].4h
2727 ; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
2828 %tmp1 = load <8 x double>, <8 x double>* %ptr
2929 %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
7474
7575 ; CHECK: ldr [[VAL0:x[0-9]+]],
7676 ; CHECK: ldr [[VAL1:x[0-9]+]],
77 ; CHECK: str [[VAL0]],
7778 ; CHECK: str [[VAL1]],
78 ; CHECK: str [[VAL0]],
7979
8080 ; CHECK-NOT: add sp, sp,
8181 ; CHECK: b callee_stack16
1313 ; CHECK: vldr
1414 ; CHECK: vldr
1515 ; CHECK: vldr
16 ; CHECK: vldr
16 ; CHECK-NEXT: vldr
17 ; CHECK-NEXT: vmul
1718 ; CHECK-NEXT: vadd
1819 ; CHECK-NEXT: vadd
1920 ; CHECK-NEXT: vldr
20 ; CHECK-NEXT: vldr
21 ; CHECK-NEXT: vldr
22 ; CHECK-NEXT: vadd
23 ; CHECK-NEXT: vmul
2421 ; CHECK-NEXT: vldr
2522 ; CHECK-NEXT: vadd
2623 ; CHECK-NEXT: vadd
2724 ; CHECK-NEXT: vmul
2825 ; CHECK-NEXT: vldr
2926 ; CHECK-NEXT: vadd
27 ; CHECK-NEXT: vadd
28 ; CHECK-NEXT: vldr
29 ; CHECK-NEXT: vmul
30 ; CHECK-NEXT: vldr
31 ; CHECK-NEXT: vadd
32 ; CHECK-NEXT: vldr
3033 ; CHECK-NEXT: vadd
3134 ; CHECK-NEXT: vldr
3235 ; CHECK-NEXT: vmul
4245 ; CHECK-NEXT: vmul
4346 ; CHECK-NEXT: vadd
4447 ; CHECK-NEXT: vldr
45 ; CHECK-NEXT: vadd
46 ; CHECK-NEXT: vldr
47 ; CHECK-NEXT: vmul
4848 ; CHECK-NEXT: vadd
4949 ; CHECK-NEXT: vldr
5050 ; CHECK-NEXT: vmul
9797 ; re-ordering the instructions.
9898 ; CHECK-DAG: xsmaddadp [[F1]], 2, 3
9999
100 ; CHECK-DAG: xsmaddmdp 2, 3, 4
100 ; CHECK-DAG: xsmaddmdp 3, 2, 4
101101 ; CHECK-DAG: stxsdx [[F1]], 0, 8
102 ; CHECK-DAG: stxsdx 2, 8, [[C1]]
102 ; CHECK-DAG: stxsdx 3, 8, [[C1]]
103103 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
104104 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
105105 ; CHECK: blr
268268 ; re-ordering the instructions.
269269 ; CHECK-DAG: xvmaddadp [[V1]], 35, 36
270270
271 ; CHECK-DAG: xvmaddmdp 35, 36, 37
271 ; CHECK-DAG: xvmaddmdp 36, 35, 37
272272 ; CHECK-DAG: xvmaddadp 34, 35, 38
273273 ; CHECK-DAG: stxvd2x 32, 0, 3
274 ; CHECK-DAG: stxvd2x 35, 3, [[C1]]
274 ; CHECK-DAG: stxvd2x 36, 3, [[C1]]
275275 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
276276 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
277277 ; CHECK: blr
2626 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
2727 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
2828 ; SI: s_lshr_b64
29 ; SI: cmp_gt_i32
3029 ; SI: s_not_b64
3130 ; SI: s_and_b64
31 ; SI: cmp_gt_i32
3232 ; SI: cndmask_b32
3333 ; SI: cndmask_b32
3434 ; SI: cmp_lt_i32
3131 ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
3232 ; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
3333 ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
34 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]]
35 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] offset:16
34 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
35 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
3636
3737 define void @local_memory_two_objects(i32 addrspace(1)* %out) {
3838 entry: