llvm.org GIT mirror llvm / 7c2cdb1
Turn on list-ilp scheduling by default on x86 and x86-64, fix up testcases accordingly. Some are currently xfailed and will be filed as bugs to be fixed or understood. Performance results: roughly neutral on SPEC some micro benchmarks in the llvm suite are up between 100 and 150%, only a pair of regressions that are due to be investigated john-the-ripper saw: 10% improvement in traditional DES 8% improvement in BSDI DES 59% improvement in FreeBSD MD5 67% improvement in OpenBSD Blowfish 14% improvement in LM DES Small compile time impact. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127208 91177308-0d34-0410-b5e6-96231b3b80d8 Eric Christopher 8 years ago
19 changed file(s) with 25 addition(s) and 19 deletion(s). Raw diff Collapse all Expand all
220220
221221 // X86 is weird, it always uses i8 for shift amounts and setcc results.
222222 setBooleanContents(ZeroOrOneBooleanContent);
223 setSchedulingPreference(Sched::RegPressure);
223 setSchedulingPreference(Sched::ILP);
224224 setStackPointerRegisterToSaveRestore(X86StackPtr);
225225
226226 if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
0 ; XFAIL: *
1 ; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \
12 ; RUN: not grep {Number of register spills}
23 ; END.
33 ; CHECK: andl $65534, %
44 ; CHECK-NEXT: movl %
55 ; CHECK-NEXT: movzwl
6 ; CHECK-NEXT: movl $17
76
87 @g_5 = external global i16 ; [#uses=2]
98 @g_107 = external global i16 ; [#uses=1]
None ; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
1 ; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 75
0 ; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movups | count 33
1 ; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movups | count 33
22 ; PR2539
33 ; PR8969 - make 32-bit linux have a 16-byte aligned stack
4 ; Verify that movups is still generated with an aligned stack for the globals
5 ; that must be accessed unaligned
46
57 external global <4 x float>, align 1 ; <<4 x float>*>:0 [#uses=2]
68 external global <4 x float>, align 1 ; <<4 x float>*>:1 [#uses=1]
None ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -disable-fp-elim -stats |& grep asm-printer | grep 55
0 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -pre-RA-sched=list-burr -disable-fp-elim -stats |& grep asm-printer | grep 55
11 ; PR2568
22
33 @g_3 = external global i16 ; [#uses=1]
0 ; Check that eh_return & unwind_init were properly lowered
11 ; RUN: llc < %s | grep %ebp | count 7
2 ; RUN: llc < %s | grep %ecx | count 5
2 ; RUN: llc < %s | grep %edx | count 5
33
44 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
55 target triple = "i386-pc-linux"
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 82
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 79
11 ; rdar://6802189
22
33 ; Test if linearscan is unfavoring registers for allocation to allow more reuse
None ; RUN: llc < %s | FileCheck %s
0 ; RUN: llc -pre-RA-sched=list-burr < %s | FileCheck %s
11 ; PR6941
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
33 target triple = "x86_64-apple-darwin10.0.0"
1818 }
1919
2020 ; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
21 ; CHECK: movb 30(%rsp), %dl
22 ; CHECK: movb (%rsp), %sil
23 ; CHECK: movb %sil, (%rsp)
24 ; CHECK: movb %dl, 30(%rsp)
21 ; CHECK: movb 38(%rsp), %bl
22 ; CHECK: movb 8(%rsp), %dl
23 ; CHECK: movb %dl, 8(%rsp)
24 ; CHECK: movb %bl, 38(%rsp)
2525 ; CHECK: callq ___stack_chk_fail
None ; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=none > %t
0 ; Without list-burr scheduling we may not see the difference in codegen here.
1 ; RUN: llc < %s -march=x86-64 -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
12 ; RUN: grep {%xmm0} %t | count 14
23 ; RUN: not grep {%xmm1} %t
34 ; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=critical > %t
None ; RUN: llc < %s | grep {movl %esp, %ecx}
0 ; RUN: llc < %s | grep {movl %esp, %ebp}
11 ; PR4572
22
33 ; Don't coalesce with %esp if it would end up putting %esp in
3737 define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
3838 entry:
3939 ; DARWIN: t3:
40 ; DARWIN: shll $16
4041 ; DARWIN: shlq $32, %rcx
4142 ; DARWIN-NOT: leaq
4243 ; DARWIN: orq %rcx, %rax
43 ; DARWIN-NOT: mov
44 ; DARWIN: shll $16
4544 %tmp21 = zext i32 %lb to i64
4645 %tmp23 = zext i32 %ub to i64
4746 %tmp24 = shl i64 %tmp23, 32
None ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | not grep pcmpeqd
1 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | grep orps | grep CPI0_2 | count 2
0 ; RUN: llc < %s -mtriple=i386-apple-darwin | grep pcmpeqd | count 1
21 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
32
43 ; This testcase shouldn't need to spill the -1 value,
0 ; XFAIL: *
1 ; RUN: llc -march=x86-64 < %s | FileCheck %s
12
23 ; CHECK: decq
0 ; XFAIL: *
1 ; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
12 target datalayout = "e-p:64:64:64"
23 target triple = "x86_64-unknown-unknown"
0 ; XFAIL: *
1 ; RUN: llc < %s -mcpu=i486 | grep fstpl | count 5
12 ; RUN: llc < %s -mcpu=i486 | grep fstps | count 2
23 ; PR1505
0 ; XFAIL: *
1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
12 ; RUN: not grep xor %t
23 ; RUN: not grep movap %t
0 ; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
1 ; CHECK: divps
12 ; CHECK: divss
2 ; CHECK: divps
33 ; CHECK: divps
44
55 %vec = type <9 x float>
0 ; XFAIL: *
1 ; RUN: llc < %s -march=x86-64 | FileCheck %s
12 ;
23