llvm.org GIT mirror llvm / 736a6ea
Change the scheduler from adding nodes in allnodes order to adding them in a determinstic order (bottom up from the root) based on the structure of the graph itself. This updates tests for some random changes, interesting bits: CodeGen/Blackfin/promote-logic.ll no longer crashes. I have no idea why, but that's good right? CodeGen/X86/2009-07-16-LoadFoldingBug.ll also fails, but now compiles to have one fewer constant pool entry, making the expected load that was being folded disappear. Since it is an unreduced mass of gnast, I just removed it. This fixes PR6370 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@97023 91177308-0d34-0410-b5e6-96231b3b80d8 Chris Lattner 10 years ago
5 changed file(s) with 18 addition(s) and 109 deletion(s). Raw diff Collapse all Expand all
217217 // Check to see if the scheduler cares about latencies.
218218 bool UnitLatencies = ForceUnitLatencies();
219219
220 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
221 E = DAG->allnodes_end(); NI != E; ++NI) {
220 // Add all nodes in depth first order.
221 SmallVector Worklist;
222 SmallPtrSet Visited;
223 Worklist.push_back(DAG->getRoot().getNode());
224 Visited.insert(DAG->getRoot().getNode());
225
226 while (!Worklist.empty()) {
227 SDNode *NI = Worklist.pop_back_val();
228
229 // Add all operands to the worklist unless they've already been added.
230 for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
231 if (Visited.insert(NI->getOperand(i).getNode()))
232 Worklist.push_back(NI->getOperand(i).getNode());
233
222234 if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
223235 continue;
224236
None ; RUN: llc < %s -march=bfin > %t
1 ; XFAIL: *
0 ; RUN: llc < %s -march=bfin
21
32 ; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
43 ; operation after LegalizeOps.
99
1010 define i8 @add(i8 %a, i8 %b) nounwind {
1111 ; CHECK: add:
12 ; CHECK: add.b r14, r15
12 ; CHECK: add.b r12, r15
1313 %1 = add i8 %a, %b
1414 ret i8 %1
1515 }
0 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
1 ; RUN: grep {stw r4, 32751}
1 ; RUN: grep {stw r3, 32751}
22 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
3 ; RUN: grep {stw r4, 32751}
3 ; RUN: grep {stw r3, 32751}
44 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
55 ; RUN: grep {std r3, 9024}
66
+0
-102
test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll less more
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
1
2 ; CHECK: _foo:
3 ; CHECK: pavgw LCPI1_4(%rip)
4
5 ; rdar://7057804
6
7 define void @foo(i16* %out8x8, i16* %in8x8, i32 %lastrow) optsize ssp {
8 entry:
9 %0 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> , <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2]
10 %1 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %0, <8 x i16> ) nounwind readnone ; <<8 x i16>> [#uses=1]
11 %2 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1]
12 %3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %2, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1]
13 %tmp.i.i10 = add <8 x i16> %0, %3 ; <<8 x i16>> [#uses=1]
14 %4 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> zeroinitializer, <8 x i16> %1) nounwind readnone ; <<8 x i16>> [#uses=1]
15 %5 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i10, <8 x i16> %4) nounwind readnone ; <<8 x i16>> [#uses=3]
16 %6 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1]
17 %7 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> , <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2]
18 %8 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %7, <8 x i16> ) nounwind readnone ; <<8 x i16>> [#uses=1]
19 %9 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1]
20 %10 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %9, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1]
21 %tmp.i.i8 = add <8 x i16> %7, %10 ; <<8 x i16>> [#uses=1]
22 %11 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %8) nounwind readnone ; <<8 x i16>> [#uses=1]
23 %12 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i8, <8 x i16> %11) nounwind readnone ; <<8 x i16>> [#uses=1]
24 %13 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1]
25 %14 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1]
26 %15 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1]
27 %16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %6, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1]
28 %17 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %12, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1]
29 %18 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %13, <8 x i16> %15) nounwind readnone ; <<8 x i16>> [#uses=1]
30 %19 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %14) nounwind readnone ; <<8 x i16>> [#uses=2]
31 %20 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=4]
32 %21 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %17) nounwind readnone ; <<8 x i16>> [#uses=1]
33 %22 = bitcast <8 x i16> %21 to <2 x i64> ; <<2 x i64>> [#uses=1]
34 %23 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> , <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2]
35 %24 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %23, <8 x i16> ) nounwind readnone ; <<8 x i16>> [#uses=1]
36 %25 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1]
37 %26 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %25, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1]
38 %tmp.i.i6 = add <8 x i16> %23, %26 ; <<8 x i16>> [#uses=1]
39 %27 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %24) nounwind readnone ; <<8 x i16>> [#uses=1]
40 %28 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i6, <8 x i16> %27) nounwind readnone ; <<8 x i16>> [#uses=1]
41 %29 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> , <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2]
42 %30 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %29, <8 x i16> ) nounwind readnone ; <<8 x i16>> [#uses=1]
43 %31 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1]
44 %32 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %31, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1]
45 %tmp.i.i4 = add <8 x i16> %29, %32 ; <<8 x i16>> [#uses=1]
46 %33 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %30) nounwind readnone ; <<8 x i16>> [#uses=1]
47 %34 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i4, <8 x i16> %33) nounwind readnone ; <<8 x i16>> [#uses=1]
48 %35 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> , <8 x i16> %20) nounwind readnone ; <<8 x i16>> [#uses=2]
49 %tmp.i2.i1 = mul <8 x i16> %20, ; <<8 x i16>> [#uses=1]
50 %36 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %35, <8 x i16> ) nounwind readnone ; <<8 x i16>> [#uses=1]
51 %37 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i1, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1]
52 %38 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %37, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1]
53 %tmp.i.i2 = add <8 x i16> %35, %38 ; <<8 x i16>> [#uses=1]
54 %39 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %36) nounwind readnone ; <<8 x i16>> [#uses=1]
55 %40 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i2, <8 x i16> %39) nounwind readnone ; <<8 x i16>> [#uses=1]
56 %41 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> , <8 x i16> %20) nounwind readnone ; <<8 x i16>> [#uses=2]
57 %tmp.i2.i = mul <8 x i16> %20, ; <<8 x i16>> [#uses=1]
58 %42 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %41, <8 x i16> ) nounwind readnone ; <<8 x i16>> [#uses=1]
59 %43 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1]
60 %44 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %43, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1]
61 %tmp.i.i = add <8 x i16> %41, %44 ; <<8 x i16>> [#uses=1]
62 %45 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %42) nounwind readnone ; <<8 x i16>> [#uses=1]
63 %46 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i, <8 x i16> %45) nounwind readnone ; <<8 x i16>> [#uses=1]
64 %47 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %18, <8 x i16> %16) nounwind readnone ; <<8 x i16>> [#uses=1]
65 %48 = bitcast <8 x i16> %47 to <2 x i64> ; <<2 x i64>> [#uses=1]
66 %49 = bitcast <8 x i16> %28 to <2 x i64> ; <<2 x i64>> [#uses=1]
67 %50 = getelementptr i16* %out8x8, i64 8 ; [#uses=1]
68 %51 = bitcast i16* %50 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
69 store <2 x i64> %49, <2 x i64>* %51, align 16
70 %52 = bitcast <8 x i16> %40 to <2 x i64> ; <<2 x i64>> [#uses=1]
71 %53 = getelementptr i16* %out8x8, i64 16 ; [#uses=1]
72 %54 = bitcast i16* %53 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
73 store <2 x i64> %52, <2 x i64>* %54, align 16
74 %55 = getelementptr i16* %out8x8, i64 24 ; [#uses=1]
75 %56 = bitcast i16* %55 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
76 store <2 x i64> %48, <2 x i64>* %56, align 16
77 %57 = bitcast <8 x i16> %46 to <2 x i64> ; <<2 x i64>> [#uses=1]
78 %58 = getelementptr i16* %out8x8, i64 40 ; [#uses=1]
79 %59 = bitcast i16* %58 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
80 store <2 x i64> %57, <2 x i64>* %59, align 16
81 %60 = bitcast <8 x i16> %34 to <2 x i64> ; <<2 x i64>> [#uses=1]
82 %61 = getelementptr i16* %out8x8, i64 48 ; [#uses=1]
83 %62 = bitcast i16* %61 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
84 store <2 x i64> %60, <2 x i64>* %62, align 16
85 %63 = getelementptr i16* %out8x8, i64 56 ; [#uses=1]
86 %64 = bitcast i16* %63 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
87 store <2 x i64> %22, <2 x i64>* %64, align 16
88 ret void
89 }
90
91 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
92
93 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
94
95 declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
96
97 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
98
99 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
100
101 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone