llvm.org GIT mirror llvm / 6a7770b
Enable MI Sched for x86. This changes the SelectionDAG scheduling preference to source order. Soon, the SelectionDAG scheduler can be bypassed saving a nice chunk of compile time. Performance differences that result from this change are often a consequence of register coalescing. The register coalescer is far from perfect. Bugs can be filed for deficiencies. On x86 SandyBridge/Haswell, the source order schedule is often preserved, particularly for small blocks. Register pressure is generally improved over the SD scheduler's ILP mode. However, we are still able to handle large blocks that require latency hiding, unlike the SD scheduler's BURR mode. MI scheduler also attempts to discover the critical path in single-block loops and adjust heuristics accordingly. The MI scheduler relies on the new machine model. This is currently unimplemented for AVX, so we may not be generating the best code yet. Unit tests are updated so they don't depend on SD scheduling heuristics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192750 91177308-0d34-0410-b5e6-96231b3b80d8 Andrew Trick 6 years ago
67 changed file(s) with 364 addition(s) and 303 deletion(s). Raw diff Collapse all Expand all
382382 /// memset with zero passed as the second argument. Otherwise it
383383 /// returns null.
384384 const char *getBZeroEntry() const;
385
385
386386 /// This function returns true if the target has sincos() routine in its
387387 /// compiler runtime or math libraries.
388388 bool hasSinCos() const;
389
390 /// Enable the MachineScheduler pass for all X86 subtargets.
391 bool enableMachineScheduler() const LLVM_OVERRIDE { return true; }
389392
390393 /// enablePostRAScheduler - run for Atom optimization.
391394 bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
0 ; REQUIRES: asserts
11 ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
2 ; RUN: grep asm-printer | grep 14
2 ; RUN: grep asm-printer | grep 16
33 ;
4 ; It's possible to schedule this in 14 instructions by avoiding
5 ; callee-save registers, but the scheduler isn't currently that
6 ; conervative with registers.
47 @size20 = external global i32 ; [#uses=1]
58 @in5 = external global i8* ; [#uses=1]
69
2023 }
2124
2225 declare i32 @memcmp(i8*, i8*, i32)
23
1212
1313 ; CHECK: mulss
1414 ; CHECK: mulss
15 ; CHECK: addss
15 ; CHECK: mulss
1616 ; CHECK: mulss
1717 ; CHECK: addss
18 ; CHECK: mulss
18 ; CHECK: addss
1919 ; CHECK: addss
2020 ; CHECK: ret
2121 }
1616 ; CHECK: %bb4
1717 ; CHECK: xorl
1818 ; CHECK: callq
19 ; CHECK: movq
2019 ; CHECK: xorl
2120 ; CHECK: xorl
21 ; CHECK: movq
2222
2323 %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; [#uses=0]
2424 %ins = or i64 %p, 2097152 ; [#uses=1]
None ; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
0 ; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt -enable-misched=false < %s | FileCheck %s
11 ; Check that lowered argumens do not overwrite the return address before it is moved.
22 ; Bug 6225
33 ;
44 ; If a call is a fastcc tail call and tail call optimization is enabled, the
5 ; caller frame is replaced by the callee frame. This can require that arguments are
5 ; caller frame is replaced by the callee frame. This can require that arguments are
66 ; placed on the former return address stack slot. Special care needs to be taken
77 ; taken that the return address is moved / or stored in a register before
88 ; lowering of arguments potentially overwrites the value.
5050 tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
5151 ret void
5252 }
53
54
1818 }
1919
2020 ; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
21 ; CHECK: movb 38(%rsp), [[R0:%.+]]
22 ; CHECK: movb 8(%rsp), [[R1:%.+]]
23 ; CHECK: movb [[R1]], 8(%rsp)
24 ; CHECK: movb [[R0]], 38(%rsp)
21 ; CHECK: movb (%rsp), [[R1:%.+]]
22 ; CHECK: movb 30(%rsp), [[R0:%.+]]
23 ; CHECK: movb [[R1]], (%rsp)
24 ; CHECK: movb [[R0]], 30(%rsp)
2525 ; CHECK: callq ___stack_chk_fail
1515 ; CHECK: main
1616 define i32 @main() nounwind uwtable {
1717 entry:
18 ; CHECK: pmovsxbq i(%rip), %
1819 ; CHECK: pmovsxbq j(%rip), %
19 ; CHECK: pmovsxbq i(%rip), %
2020 %0 = load <2 x i8>* @i, align 8
2121 %1 = load <2 x i8>* @j, align 8
2222 %div = sdiv <2 x i8> %1, %0
2424 ret i32 0
2525 ; CHECK: ret
2626 }
27
44 ; It's hard to test for the ISEL condition because CodeGen optimizes
55 ; away the bugpointed code. Just ensure the basics are still there.
66 ;CHECK-LABEL: func:
7 ;CHECK: vxorps
8 ;CHECK: vinsertf128
7 ;CHECK: vpxor
8 ;CHECK: vinserti128
99 ;CHECK: vpshufd
1010 ;CHECK: vpshufd
1111 ;CHECK: vmulps
3333
3434 ; 64BIT-LABEL: t2:
3535 ; 64BIT-NOT: movw %si, %ax
36 ; 64BIT: leal -1(%rsi), %eax
36 ; 64BIT: decl %eax
37 ; 64BIT: movzwl %ax
3738 %0 = icmp eq i16 %k, %c ; [#uses=1]
3839 %1 = add i16 %k, -1 ; [#uses=3]
3940 br i1 %0, label %bb, label %bb1
5758
5859 ; 64BIT-LABEL: t3:
5960 ; 64BIT-NOT: movw %si, %ax
60 ; 64BIT: leal 2(%rsi), %eax
61 ; 64BIT: addl $2, %eax
6162 %0 = add i16 %k, 2 ; [#uses=3]
6263 %1 = icmp eq i16 %k, %c ; [#uses=1]
6364 br i1 %1, label %bb, label %bb1
8081
8182 ; 64BIT-LABEL: t4:
8283 ; 64BIT-NOT: movw %si, %ax
83 ; 64BIT: leal (%rsi,%rdi), %eax
84 ; 64BIT: addl %edi, %eax
8485 %0 = add i16 %k, %c ; [#uses=3]
8586 %1 = icmp eq i16 %k, %c ; [#uses=1]
8687 br i1 %1, label %bb, label %bb1
33 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
44 target triple = "x86_64-apple-macosx10.8.0"
55
6 ;YESCOLOR: subq $136, %rsp
7 ;NOCOLOR: subq $264, %rsp
6 ;YESCOLOR: subq $144, %rsp
7 ;NOCOLOR: subq $272, %rsp
88
99 define i32 @myCall_w2(i32 %in) {
1010 entry:
None ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
1 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
2
3 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
4 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
5
6 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
7 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
8 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
9
10 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
11 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
12 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
0 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-STATIC
1 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-PIC
2
3 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-STATIC
4 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-PIC
5
6 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-STATIC
7 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
8 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-PIC
9
10 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-STATIC
11 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
12 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-PIC
1313
1414 @src = external global [131072 x i32]
1515 @dst = external global [131072 x i32]
88 %b = add i32 %a, 128
99 ret i32 %b
1010 ; X32: subl $-128, %eax
11 ; X64: subl $-128,
11 ; X64: subl $-128,
1212 }
1313 define i64 @test2(i64 inreg %a) nounwind {
1414 %b = add i64 %a, 2147483648
1919 define i64 @test3(i64 inreg %a) nounwind {
2020 %b = add i64 %a, 128
2121 ret i64 %b
22
22
2323 ; X32: addl $128, %eax
2424 ; X64: subq $-128,
2525 }
3737
3838 overflow:
3939 ret i1 false
40
40
4141 ; X32-LABEL: test4:
4242 ; X32: addl
4343 ; X32-NEXT: jo
8181 ret i64 %tmp5
8282
8383 ; X32-LABEL: test6:
84 ; X32: movl 12(%esp), %edx
84 ; X32: movl 4(%esp), %eax
85 ; X32-NEXT: movl 12(%esp), %edx
8586 ; X32-NEXT: addl 8(%esp), %edx
86 ; X32-NEXT: movl 4(%esp), %eax
8787 ; X32-NEXT: ret
88
88
8989 ; X64-LABEL: test6:
9090 ; X64: shlq $32, %r[[A1]]
9191 ; X64: leaq (%r[[A1]],%r[[A0]]), %rax
None ; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
0 ; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s
11
22 declare void @bar(<2 x i64>* %n)
33
239239 ; CHECK-NEXT: vpmuludq %xmm
240240 ; CHECK-NEXT: vpsllq $32, %xmm
241241 ; CHECK-NEXT: vpaddq %xmm
242 ; CHECK-NEXT: vpmuludq %xmm
243 ; CHECK-NEXT: vpsrlq $32, %xmm
244 ; CHECK-NEXT: vpmuludq %xmm
245 ; CHECK-NEXT: vpsllq $32, %xmm
246 ; CHECK-NEXT: vpsrlq $32, %xmm
247 ; CHECK-NEXT: vpmuludq %xmm
248 ; CHECK-NEXT: vpsllq $32, %xmm
249 ; CHECK-NEXT: vpaddq %xmm
242 ; CHECK-NEXT: vpsrlq $32, %xmm
243 ; CHECK-NEXT: vpmuludq %xmm
244 ; CHECK-NEXT: vpsllq $32, %xmm
245 ; CHECK-NEXT: vpaddq %xmm
246 ; CHECK-NEXT: vpmuludq %xmm
247 ; CHECK-NEXT: vpsrlq $32, %xmm
248 ; CHECK-NEXT: vpmuludq %xmm
249 ; CHECK-NEXT: vpsllq $32, %xmm
250250 ; CHECK-NEXT: vpaddq %xmm
251251 ; CHECK-NEXT: vpsrlq $32, %xmm
252252 ; CHECK-NEXT: vpmuludq %xmm
268268 %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
269269 ret <4 x float> %x2
270270 }
271
3131 define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
3232 %y = alloca <16 x float>, align 16
3333 %x = fadd <16 x float> %a, %b
34 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
34 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
3535 %2 = load <16 x float>* %y, align 16
3636 %3 = fadd <16 x float> %2, %1
3737 ret <16 x float> %3
4242 ; preserved ymm6-ymm15
4343 ; WIN64: testf16_regs
4444 ; WIN64: call
45 ; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0
46 ; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1
45 ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
46 ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
4747 ; WIN64: ret
4848
4949 ; preserved ymm8-ymm15
5050 ; X64: testf16_regs
5151 ; X64: call
52 ; X64: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0
53 ; X64: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1
52 ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
53 ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
5454 ; X64: ret
5555
5656 define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
5757 %y = alloca <16 x float>, align 16
5858 %x = fadd <16 x float> %a, %b
59 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
59 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
6060 %2 = load <16 x float>* %y, align 16
6161 %3 = fadd <16 x float> %1, %b
6262 %4 = fadd <16 x float> %2, %3
165165 %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32>
166166 ret <8 x float> %8
167167 }
168
8080 define i32 @test9(<4 x i32> %a) nounwind {
8181 ; CHECK: test9
8282 ; CHECK: vpextrd
83 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32>
83 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32>
8484 %r = extractelement <8 x i32> %b, i32 2
8585 ; CHECK: ret
8686 ret i32 %r
250250 ; CHECK: swap8doubles
251251 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
252252 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
253 ; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
254 ; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
253255 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
254256 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
255257 ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
166166 }
167167
168168 ; CHECK-LABEL: uitof64
169 ; CHECK: vcvtudq2pd
169170 ; CHECK: vextracti64x4
170 ; CHECK: vcvtudq2pd
171171 ; CHECK: vcvtudq2pd
172172 ; CHECK: ret
173173 define <16 x double> @uitof64(<16 x i32> %a) nounwind {
2626 %md = xor <16 x i1> %ma, %mb
2727 %me = or <16 x i1> %mc, %md
2828 %ret = bitcast <16 x i1> %me to i16
29 ; CHECK: kandw
2930 ; CHECK: kxorw
30 ; CHECK: kandw
3131 ; CHECK: korw
3232 ret i16 %ret
3333 }
5454 %mask1 = bitcast <8 x i1> %mask to i8
5555 ret i8 %mask1
5656 }
57
0 ; Without list-burr scheduling we may not see the difference in codegen here.
11 ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency
22 ; breaker requires liveness information to be kept.
3 ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
3 ; RUN: llc < %s -march=x86-64 -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
44 ; RUN: grep "%xmm0" %t | count 14
55 ; RUN: not grep "%xmm1" %t
66 ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t
3737 define void @test2b(i32 %x, i32 %n) nounwind {
3838 entry:
3939 ; CHECK: test2b
40 ; CHECK: btl %eax, %ecx
40 ; CHECK: btl %e{{..}}, %e{{..}}
4141 ; CHECK: jb
4242 %tmp29 = lshr i32 %x, %n ; [#uses=1]
4343 %tmp3 = and i32 1, %tmp29
5555 define void @atest2(i32 %x, i32 %n) nounwind {
5656 entry:
5757 ; CHECK: atest2
58 ; CHECK: btl %eax, %ecx
58 ; CHECK: btl %e{{..}}, %e{{..}}
5959 ; CHECK: jb
6060 %tmp29 = ashr i32 %x, %n ; [#uses=1]
6161 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
7373 define void @atest2b(i32 %x, i32 %n) nounwind {
7474 entry:
7575 ; CHECK: atest2b
76 ; CHECK: btl %eax, %ecx
76 ; CHECK: btl %e{{..}}, %e{{..}}
7777 %tmp29 = ashr i32 %x, %n ; [#uses=1]
7878 %tmp3 = and i32 1, %tmp29
7979 %tmp4 = icmp eq i32 %tmp3, 0 ; [#uses=1]
9090 define void @test3(i32 %x, i32 %n) nounwind {
9191 entry:
9292 ; CHECK: test3
93 ; CHECK: btl %eax, %ecx
93 ; CHECK: btl %e{{..}}, %e{{..}}
9494 ; CHECK: jb
9595 %tmp29 = shl i32 1, %n ; [#uses=1]
9696 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
108108 define void @test3b(i32 %x, i32 %n) nounwind {
109109 entry:
110110 ; CHECK: test3b
111 ; CHECK: btl %eax, %ecx
111 ; CHECK: btl %e{{..}}, %e{{..}}
112112 ; CHECK: jb
113113 %tmp29 = shl i32 1, %n ; [#uses=1]
114114 %tmp3 = and i32 %x, %tmp29
126126 define void @testne2(i32 %x, i32 %n) nounwind {
127127 entry:
128128 ; CHECK: testne2
129 ; CHECK: btl %eax, %ecx
129 ; CHECK: btl %e{{..}}, %e{{..}}
130130 ; CHECK: jae
131131 %tmp29 = lshr i32 %x, %n ; [#uses=1]
132132 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
144144 define void @testne2b(i32 %x, i32 %n) nounwind {
145145 entry:
146146 ; CHECK: testne2b
147 ; CHECK: btl %eax, %ecx
147 ; CHECK: btl %e{{..}}, %e{{..}}
148148 ; CHECK: jae
149149 %tmp29 = lshr i32 %x, %n ; [#uses=1]
150150 %tmp3 = and i32 1, %tmp29
162162 define void @atestne2(i32 %x, i32 %n) nounwind {
163163 entry:
164164 ; CHECK: atestne2
165 ; CHECK: btl %eax, %ecx
165 ; CHECK: btl %e{{..}}, %e{{..}}
166166 ; CHECK: jae
167167 %tmp29 = ashr i32 %x, %n ; [#uses=1]
168168 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
180180 define void @atestne2b(i32 %x, i32 %n) nounwind {
181181 entry:
182182 ; CHECK: atestne2b
183 ; CHECK: btl %eax, %ecx
183 ; CHECK: btl %e{{..}}, %e{{..}}
184184 ; CHECK: jae
185185 %tmp29 = ashr i32 %x, %n ; [#uses=1]
186186 %tmp3 = and i32 1, %tmp29
198198 define void @testne3(i32 %x, i32 %n) nounwind {
199199 entry:
200200 ; CHECK: testne3
201 ; CHECK: btl %eax, %ecx
201 ; CHECK: btl %e{{..}}, %e{{..}}
202202 ; CHECK: jae
203203 %tmp29 = shl i32 1, %n ; [#uses=1]
204204 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
216216 define void @testne3b(i32 %x, i32 %n) nounwind {
217217 entry:
218218 ; CHECK: testne3b
219 ; CHECK: btl %eax, %ecx
219 ; CHECK: btl %e{{..}}, %e{{..}}
220220 ; CHECK: jae
221221 %tmp29 = shl i32 1, %n ; [#uses=1]
222222 %tmp3 = and i32 %x, %tmp29
234234 define void @query2(i32 %x, i32 %n) nounwind {
235235 entry:
236236 ; CHECK: query2
237 ; CHECK: btl %eax, %ecx
237 ; CHECK: btl %e{{..}}, %e{{..}}
238238 ; CHECK: jae
239239 %tmp29 = lshr i32 %x, %n ; [#uses=1]
240240 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
252252 define void @query2b(i32 %x, i32 %n) nounwind {
253253 entry:
254254 ; CHECK: query2b
255 ; CHECK: btl %eax, %ecx
255 ; CHECK: btl %e{{..}}, %e{{..}}
256256 ; CHECK: jae
257257 %tmp29 = lshr i32 %x, %n ; [#uses=1]
258258 %tmp3 = and i32 1, %tmp29
270270 define void @aquery2(i32 %x, i32 %n) nounwind {
271271 entry:
272272 ; CHECK: aquery2
273 ; CHECK: btl %eax, %ecx
273 ; CHECK: btl %e{{..}}, %e{{..}}
274274 ; CHECK: jae
275275 %tmp29 = ashr i32 %x, %n ; [#uses=1]
276276 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
288288 define void @aquery2b(i32 %x, i32 %n) nounwind {
289289 entry:
290290 ; CHECK: aquery2b
291 ; CHECK: btl %eax, %ecx
291 ; CHECK: btl %e{{..}}, %e{{..}}
292292 ; CHECK: jae
293293 %tmp29 = ashr i32 %x, %n ; [#uses=1]
294294 %tmp3 = and i32 1, %tmp29
306306 define void @query3(i32 %x, i32 %n) nounwind {
307307 entry:
308308 ; CHECK: query3
309 ; CHECK: btl %eax, %ecx
309 ; CHECK: btl %e{{..}}, %e{{..}}
310310 ; CHECK: jae
311311 %tmp29 = shl i32 1, %n ; [#uses=1]
312312 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
324324 define void @query3b(i32 %x, i32 %n) nounwind {
325325 entry:
326326 ; CHECK: query3b
327 ; CHECK: btl %eax, %ecx
327 ; CHECK: btl %e{{..}}, %e{{..}}
328328 ; CHECK: jae
329329 %tmp29 = shl i32 1, %n ; [#uses=1]
330330 %tmp3 = and i32 %x, %tmp29
342342 define void @query3x(i32 %x, i32 %n) nounwind {
343343 entry:
344344 ; CHECK: query3x
345 ; CHECK: btl %eax, %ecx
345 ; CHECK: btl %e{{..}}, %e{{..}}
346346 ; CHECK: jae
347347 %tmp29 = shl i32 1, %n ; [#uses=1]
348348 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
360360 define void @query3bx(i32 %x, i32 %n) nounwind {
361361 entry:
362362 ; CHECK: query3bx
363 ; CHECK: btl %eax, %ecx
363 ; CHECK: btl %e{{..}}, %e{{..}}
364364 ; CHECK: jae
365365 %tmp29 = shl i32 1, %n ; [#uses=1]
366366 %tmp3 = and i32 %x, %tmp29
378378 define void @queryne2(i32 %x, i32 %n) nounwind {
379379 entry:
380380 ; CHECK: queryne2
381 ; CHECK: btl %eax, %ecx
381 ; CHECK: btl %e{{..}}, %e{{..}}
382382 ; CHECK: jb
383383 %tmp29 = lshr i32 %x, %n ; [#uses=1]
384384 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
396396 define void @queryne2b(i32 %x, i32 %n) nounwind {
397397 entry:
398398 ; CHECK: queryne2b
399 ; CHECK: btl %eax, %ecx
399 ; CHECK: btl %e{{..}}, %e{{..}}
400400 ; CHECK: jb
401401 %tmp29 = lshr i32 %x, %n ; [#uses=1]
402402 %tmp3 = and i32 1, %tmp29
414414 define void @aqueryne2(i32 %x, i32 %n) nounwind {
415415 entry:
416416 ; CHECK: aqueryne2
417 ; CHECK: btl %eax, %ecx
417 ; CHECK: btl %e{{..}}, %e{{..}}
418418 ; CHECK: jb
419419 %tmp29 = ashr i32 %x, %n ; [#uses=1]
420420 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
432432 define void @aqueryne2b(i32 %x, i32 %n) nounwind {
433433 entry:
434434 ; CHECK: aqueryne2b
435 ; CHECK: btl %eax, %ecx
435 ; CHECK: btl %e{{..}}, %e{{..}}
436436 ; CHECK: jb
437437 %tmp29 = ashr i32 %x, %n ; [#uses=1]
438438 %tmp3 = and i32 1, %tmp29
450450 define void @queryne3(i32 %x, i32 %n) nounwind {
451451 entry:
452452 ; CHECK: queryne3
453 ; CHECK: btl %eax, %ecx
453 ; CHECK: btl %e{{..}}, %e{{..}}
454454 ; CHECK: jb
455455 %tmp29 = shl i32 1, %n ; [#uses=1]
456456 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
468468 define void @queryne3b(i32 %x, i32 %n) nounwind {
469469 entry:
470470 ; CHECK: queryne3b
471 ; CHECK: btl %eax, %ecx
471 ; CHECK: btl %e{{..}}, %e{{..}}
472472 ; CHECK: jb
473473 %tmp29 = shl i32 1, %n ; [#uses=1]
474474 %tmp3 = and i32 %x, %tmp29
486486 define void @queryne3x(i32 %x, i32 %n) nounwind {
487487 entry:
488488 ; CHECK: queryne3x
489 ; CHECK: btl %eax, %ecx
489 ; CHECK: btl %e{{..}}, %e{{..}}
490490 ; CHECK: jb
491491 %tmp29 = shl i32 1, %n ; [#uses=1]
492492 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
504504 define void @queryne3bx(i32 %x, i32 %n) nounwind {
505505 entry:
506506 ; CHECK: queryne3bx
507 ; CHECK: btl %eax, %ecx
507 ; CHECK: btl %e{{..}}, %e{{..}}
508508 ; CHECK: jb
509509 %tmp29 = shl i32 1, %n ; [#uses=1]
510510 %tmp3 = and i32 %x, %tmp29
66 define i32 @main() nounwind {
77 entry:
88 ; CHECK-LABEL: main:
9 ; CHECK: movl $1, (%esp)
109 ; CHECK: leal 16(%esp), %edi
1110 ; CHECK: leal 160(%esp), %esi
1211 ; CHECK: rep;movsl
12 ; CHECK: movl $1, (%esp)
1313 %s = alloca %struct.S ; <%struct.S*> [#uses=2]
1414 %tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1]
1515 store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
16 call void @t( i32 1, %struct.S* byval %s ) nounwind
16 call void @t( i32 1, %struct.S* byval %s ) nounwind
1717 ret i32 0
1818 }
1919
22 ;CHECK-LABEL: cftx020:
33 ;CHECK: vmovsd (%rdi), %xmm{{.*}}
44 ;CHECK: vmovsd 16(%rdi), %xmm{{.*}}
5 ;CHECK: vmovsd 24(%rdi), %xmm{{.*}}
56 ;CHECK: vmovhpd 8(%rdi), %xmm{{.*}}
6 ;CHECK: vmovsd 24(%rdi), %xmm{{.*}}
77 ;CHECK: vmovupd %xmm{{.*}}, (%rdi)
88 ;CHECK: vmovupd %xmm{{.*}}, 16(%rdi)
99 ;CHECK: ret
3434 store <2 x double> %14, <2 x double>* %15, align 8
3535 ret void
3636 }
37
33 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
44 entry:
55 ; CHECK-LABEL: test1:
6 ; CHECK: movl $12, %eax
7 ; CHECK-NEXT: btl
6 ; CHECK: btl
7 ; CHECK-NEXT: movl $12, %eax
88 ; CHECK-NEXT: cmovael (%rcx), %eax
99 ; CHECK-NEXT: ret
1010
1818 define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
1919 entry:
2020 ; CHECK-LABEL: test2:
21 ; CHECK: movl $12, %eax
22 ; CHECK-NEXT: btl
21 ; CHECK: btl
22 ; CHECK-NEXT: movl $12, %eax
2323 ; CHECK-NEXT: cmovbl (%rcx), %eax
2424 ; CHECK-NEXT: ret
2525
9191 ; CHECK: testb
9292 ; CHECK-NOT: xor
9393 ; CHECK: setne
94 ; CHECK-NEXT: testb
94 ; CHECK: testb
9595
9696 func_4.exit.i: ; preds = %bb.i.i.i, %entry
9797 %.not.i = xor i1 %2, true ; [#uses=1]
3737 define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
3838 entry:
3939 ; DARWIN-LABEL: t3:
40 ; DARWIN: shll $16
4140 ; DARWIN: shlq $32, %rcx
41 ; DARWIN-NEXT: orq %rcx, %rax
42 ; DARWIN-NEXT: shll $8
4243 ; DARWIN-NOT: leaq
43 ; DARWIN: orq %rcx, %rax
4444 %tmp21 = zext i32 %lb to i64
4545 %tmp23 = zext i32 %ub to i64
4646 %tmp24 = shl i64 %tmp23, 32
3939 ; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx
4040
4141 ; ATOM: _t:
42 ; ATOM: movl L_LotsStuff$non_lazy_ptr, %ecx
43 ; ATOM: movl $0, %eax
42 ; ATOM: movl L_LotsStuff$non_lazy_ptr, %e{{..}}
43 ; ATOM: movl $0, %e{{..}}
4444
4545 }
0 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s
1 ; CHECK: movsd %xmm0, 8(%esp)
2 ; CHECK: xorl %ecx, %ecx
1 ; CHECK: movsd %xmm{{[0-9]}}, 8(%esp)
2 ; CHECK: xorl %eax, %eax
33
44 @d = external global double ; [#uses=1]
55 @c = external global double ; [#uses=1]
3737
3838 store i16 %A, i16* %Q
3939 ret i32 %D
40
40
4141 ; CHECK-LABEL: test2:
4242 ; CHECK: movl 4(%esp), %eax
43 ; CHECK-NEXT: movzwl (%eax), %ecx
43 ; CHECK-NEXT: movzwl (%eax), %e{{..}}
4444
4545 }
4646
5353 %mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2]
5454 %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1]
5555 %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1]
56 %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
57 %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1]
58 %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1]
59
60 call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
61
62 %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
63
5664 %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2]
5765 %andps.i14 = add <4 x i32> , %bitcast6.i13 ; <<4 x i32>> [#uses=1]
5866 %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
5967 %andnps.i17 = add <4 x i32> , %not.i16 ; <<4 x i32>> [#uses=1]
6068 %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1]
6169 %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1]
62 %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
63 %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1]
64 %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1]
70
6571 %bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1]
6672 %not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
6773 %andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1]
68 call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
6974 %orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1]
7075 %bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1]
71 %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
76
7277 %bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=2]
7378 %andps.i = and <4 x i32> zeroinitializer, %bitcast6.i ; <<4 x i32>> [#uses=1]
7479 %bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32> ; <<4 x i32>> [#uses=1]
33 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
44 ; ATOM: foo
55 ; ATOM: addl
6 ; ATOM: leal
6 ; ATOM: addl
77 ; ATOM: leal
88
99 ; CHECK: foo
None ; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s
1 ; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s
0 ; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN
1 ; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN
22 ; rdar://7398554
33
44 ; When doing vector gather-scatter index calculation with 32-bit indices,
55 ; bounce the vector off of cache rather than shuffling each individual
66 ; element out of the index vector.
77
8 ; CHECK: andps ([[H:%rdx|%r8]]), %xmm0
9 ; CHECK: movaps %xmm0, {{(-24)?}}(%rsp)
10 ; CHECK: movslq {{(-24)?}}(%rsp), %rax
11 ; CHECK: movsd ([[P:%rdi|%rcx]],%rax,8), %xmm0
12 ; CHECK: movslq {{-20|4}}(%rsp), %rax
13 ; CHECK: movhpd ([[P]],%rax,8), %xmm0
14 ; CHECK: movslq {{-16|8}}(%rsp), %rax
15 ; CHECK: movsd ([[P]],%rax,8), %xmm1
16 ; CHECK: movslq {{-12|12}}(%rsp), %rax
17 ; CHECK: movhpd ([[P]],%rax,8), %xmm1
8 ; CHECK: foo:
9 ; LIN: movaps (%rsi), %xmm0
10 ; LIN: andps (%rdx), %xmm0
11 ; LIN: movaps %xmm0, -24(%rsp)
12 ; LIN: movslq -24(%rsp), %[[REG1:r.+]]
13 ; LIN: movslq -20(%rsp), %[[REG2:r.+]]
14 ; LIN: movslq -16(%rsp), %[[REG3:r.+]]
15 ; LIN: movslq -12(%rsp), %[[REG4:r.+]]
16 ; LIN: movsd (%rdi,%[[REG1]],8), %xmm0
17 ; LIN: movhpd (%rdi,%[[REG2]],8), %xmm0
18 ; LIN: movsd (%rdi,%[[REG3]],8), %xmm1
19 ; LIN: movhpd (%rdi,%[[REG4]],8), %xmm1
20
21 ; WIN: movaps (%rdx), %xmm0
22 ; WIN: andps (%r8), %xmm0
23 ; WIN: movaps %xmm0, (%rsp)
24 ; WIN: movslq (%rsp), %[[REG1:r.+]]
25 ; WIN: movslq 4(%rsp), %[[REG2:r.+]]
26 ; WIN: movslq 8(%rsp), %[[REG3:r.+]]
27 ; WIN: movslq 12(%rsp), %[[REG4:r.+]]
28 ; WIN: movsd (%rcx,%[[REG1]],8), %xmm0
29 ; WIN: movhpd (%rcx,%[[REG2]],8), %xmm0
30 ; WIN: movsd (%rcx,%[[REG3]],8), %xmm1
31 ; WIN: movhpd (%rcx,%[[REG4]],8), %xmm1
1832
1933 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
2034 %a = load <4 x i32>* %i
2727
2828 define cc 10 void @foo() nounwind {
2929 entry:
30 ; CHECK: movl base, %ebx
30 ; CHECK: movl r1, %esi
31 ; CHECK-NEXT: movl hp, %edi
3132 ; CHECK-NEXT: movl sp, %ebp
32 ; CHECK-NEXT: movl hp, %edi
33 ; CHECK-NEXT: movl r1, %esi
33 ; CHECK-NEXT: movl base, %ebx
3434 %0 = load i32* @r1
3535 %1 = load i32* @hp
3636 %2 = load i32* @sp
4141 }
4242
4343 declare cc 10 void @bar(i32, i32, i32, i32)
44
4040
4141 define cc 10 void @foo() nounwind {
4242 entry:
43 ; CHECK: movq base(%rip), %r13
43 ; CHECK: movsd d2(%rip), %xmm6
44 ; CHECK-NEXT: movsd d1(%rip), %xmm5
45 ; CHECK-NEXT: movss f4(%rip), %xmm4
46 ; CHECK-NEXT: movss f3(%rip), %xmm3
47 ; CHECK-NEXT: movss f2(%rip), %xmm2
48 ; CHECK-NEXT: movss f1(%rip), %xmm1
49 ; CHECK-NEXT: movq splim(%rip), %r15
50 ; CHECK-NEXT: movq r6(%rip), %r9
51 ; CHECK-NEXT: movq r5(%rip), %r8
52 ; CHECK-NEXT: movq r4(%rip), %rdi
53 ; CHECK-NEXT: movq r3(%rip), %rsi
54 ; CHECK-NEXT: movq r2(%rip), %r14
55 ; CHECK-NEXT: movq r1(%rip), %rbx
56 ; CHECK-NEXT: movq hp(%rip), %r12
4457 ; CHECK-NEXT: movq sp(%rip), %rbp
45 ; CHECK-NEXT: movq hp(%rip), %r12
46 ; CHECK-NEXT: movq r1(%rip), %rbx
47 ; CHECK-NEXT: movq r2(%rip), %r14
48 ; CHECK-NEXT: movq r3(%rip), %rsi
49 ; CHECK-NEXT: movq r4(%rip), %rdi
50 ; CHECK-NEXT: movq r5(%rip), %r8
51 ; CHECK-NEXT: movq r6(%rip), %r9
52 ; CHECK-NEXT: movq splim(%rip), %r15
53 ; CHECK-NEXT: movss f1(%rip), %xmm1
54 ; CHECK-NEXT: movss f2(%rip), %xmm2
55 ; CHECK-NEXT: movss f3(%rip), %xmm3
56 ; CHECK-NEXT: movss f4(%rip), %xmm4
57 ; CHECK-NEXT: movsd d1(%rip), %xmm5
58 ; CHECK-NEXT: movsd d2(%rip), %xmm6
58 ; CHECK-NEXT: movq base(%rip), %r13
5959 %0 = load double* @d2
6060 %1 = load double* @d1
6161 %2 = load float* @f4
8282
8383 declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
8484 float, float, float, float, double, double)
85
4848 store i32 %arg1, i32* %arg1_var
4949 store i32 %arg2, i32* %arg2_var
5050
51 ; CHECK: movl 4(%esp), %edx
51 ; CHECK: movl 16(%esp), %esi
52 ; CHECK-NEXT: movl 12(%esp), %ebp
5253 ; CHECK-NEXT: movl 8(%esp), %eax
53 ; CHECK-NEXT: movl 12(%esp), %ebp
54 ; CHECK-NEXT: movl 16(%esp), %esi
54 ; CHECK-NEXT: movl 4(%esp), %edx
5555 %0 = load i32* %hp_var
5656 %1 = load i32* %p_var
5757 %2 = load i32* %arg0_var
44 define void @zap(i64 %a, i64 %b) nounwind {
55 entry:
66 ; CHECK: movq %rsi, %rax
7 ; CHECK-NEXT: movl $8, %ecx
8 ; CHECK-NEXT: movl $9, %r8d
79 ; CHECK-NEXT: movq %rdi, %rsi
810 ; CHECK-NEXT: movq %rax, %rdx
9 ; CHECK-NEXT: movl $8, %ecx
10 ; CHECK-NEXT: movl $9, %r8d
1111 ; CHECK-NEXT: callq addfour
1212 %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
1313 %res = extractvalue {i64, i64, i64} %0, 2
5656 store i64 %arg2, i64* %arg2_var
5757 store i64 %arg3, i64* %arg3_var
5858
59 ; CHECK: movq 8(%rsp), %rcx
59 ; CHECK: movq 40(%rsp), %r15
60 ; CHECK-NEXT: movq 32(%rsp), %rbp
61 ; CHECK-NEXT: movq 24(%rsp), %rsi
6062 ; CHECK-NEXT: movq 16(%rsp), %rdx
61 ; CHECK-NEXT: movq 24(%rsp), %rsi
62 ; CHECK-NEXT: movq 32(%rsp), %rbp
63 ; CHECK-NEXT: movq 40(%rsp), %r15
63 ; CHECK-NEXT: movq 8(%rsp), %rcx
6464 %0 = load i64* %hp_var
6565 %1 = load i64* %p_var
6666 %2 = load i64* %arg0_var
None ; RUN: llc < %s -march=x86-64 | grep lea | count 12
0 ; RUN: llc < %s -march=x86-64 | grep lea | count 13
11
22 ; This testcase was written to demonstrate an instruction-selection problem,
33 ; however it also happens to expose a limitation in the DAGCombiner's
4343 store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
4444 ret void
4545 }
46
2727 bb2:
2828 ret i32 %x_offs
2929 ; CHECK-LABEL: test2:
30 ; CHECK: movl %e[[A0]], %eax
31 ; CHECK: addl $-5, %eax
30 ; CHECK: leal -5(%r[[A0:..]]), %eax
3231 ; CHECK: andl $-4, %eax
3332 ; CHECK: negl %eax
3433 ; CHECK: leal -4(%r[[A0]],%rax), %eax
1616 ; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned.
1717 ;
1818 ; STRESS-LABEL: t1:
19 ; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
20 ; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]]
21 ; Add low slice: out[out_start].real, this is base + 0.
22 ; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
1923 ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
20 ; STRESS: vmovss 68([[BASE:[^)]+]]), [[OUT_Imm:%xmm[0-9]+]]
24 ; STRESS-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
2125 ; Add high slice: out[out_start].imm, this is base + 4.
2226 ; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
23 ; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
24 ; STRESS-NEXT: vmovss 64([[BASE]]), [[OUT_Real:%xmm[0-9]+]]
25 ; Add low slice: out[out_start].real, this is base + 0.
26 ; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
2727 ; Swap Imm and Real.
2828 ; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
2929 ; Put the results back into out[out_start].
3131 ;
3232 ; Same for REGULAR, we eliminate register bank copy with each slices.
3333 ; REGULAR-LABEL: t1:
34 ; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
35 ; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]]
36 ; Add low slice: out[out_start].real, this is base + 0.
37 ; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
3438 ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
35 ; REGULAR: vmovss 68([[BASE:[^)]+]]), [[OUT_Imm:%xmm[0-9]+]]
39 ; REGULAR-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
3640 ; Add high slice: out[out_start].imm, this is base + 4.
3741 ; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
38 ; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
39 ; REGULAR-NEXT: vmovss 64([[BASE]]), [[OUT_Real:%xmm[0-9]+]]
40 ; Add low slice: out[out_start].real, this is base + 0.
41 ; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
4242 ; Swap Imm and Real.
4343 ; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
4444 ; Put the results back into out[out_start].
136136 %res = add i32 %slice32_lowhigh, %tmpres
137137 ret i32 %res
138138 }
139
11 ; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
22
33 ; CHECK-LABEL: t:
4 ; CHECK: decq
5 ; CHECK-NEXT: movl (%r9,%rax,4), %eax
4 ; CHECK: movl (%r9,%rax,4), %e{{..}}
5 ; CHECK-NEXT: decq
66 ; CHECK-NEXT: jne
77
88 ; ATOM-LABEL: t:
9 ; ATOM: movl (%r9,%r{{.+}},4), %eax
9 ; ATOM: movl (%r9,%r{{.+}},4), %e{{..}}
1010 ; ATOM-NEXT: decq
1111 ; ATOM-NEXT: jne
1212
None ; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
1 ; RUN: not grep and %t
2 ; RUN: not grep movz %t
3 ; RUN: not grep sar %t
4 ; RUN: not grep shl %t
5 ; RUN: grep add %t | count 5
6 ; RUN: grep inc %t | count 2
7 ; RUN: grep lea %t | count 3
0 ; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s
81
92 ; Optimize away zext-inreg and sext-inreg on the loop induction
103 ; variable using trip-count information.
114
5 ; CHECK-LABEL: count_up
6 ; CHECK-NOT: {{and|movz|sar|shl}}
7 ; CHECK: inc
8 ; CHECK-NOT: {{and|movz|sar|shl}}
9 ; CHECK: jne
1210 define void @count_up(double* %d, i64 %n) nounwind {
1311 entry:
1412 br label %loop
3735 ret void
3836 }
3937
38 ; CHECK-LABEL: count_down
39 ; CHECK-NOT: {{and|movz|sar|shl}}
40 ; CHECK: addq
41 ; CHECK-NOT: {{and|movz|sar|shl}}
42 ; CHECK: jne
4043 define void @count_down(double* %d, i64 %n) nounwind {
4144 entry:
4245 br label %loop
6568 ret void
6669 }
6770
71 ; CHECK-LABEL: count_up_signed
72 ; CHECK-NOT: {{and|movz|sar|shl}}
73 ; CHECK: inc
74 ; CHECK-NOT: {{and|movz|sar|shl}}
75 ; CHECK: jne
6876 define void @count_up_signed(double* %d, i64 %n) nounwind {
6977 entry:
7078 br label %loop
95103 ret void
96104 }
97105
106 ; CHECK-LABEL: count_down_signed
107 ; CHECK-NOT: {{and|movz|sar|shl}}
108 ; CHECK: addq
109 ; CHECK-NOT: {{and|movz|sar|shl}}
110 ; CHECK: jne
98111 define void @count_down_signed(double* %d, i64 %n) nounwind {
99112 entry:
100113 br label %loop
125138 ret void
126139 }
127140
141 ; CHECK-LABEL: another_count_up
142 ; CHECK-NOT: {{and|movz|sar|shl}}
143 ; CHECK: addq
144 ; CHECK-NOT: {{and|movz|sar|shl}}
145 ; CHECK: jne
128146 define void @another_count_up(double* %d, i64 %n) nounwind {
129147 entry:
130148 br label %loop
153171 ret void
154172 }
155173
174 ; CHECK-LABEL: another_count_down
175 ; CHECK-NOT: {{and|movz|sar|shl}}
176 ; CHECK: decq
177 ; CHECK-NOT: {{and|movz|sar|shl}}
178 ; CHECK: jne
156179 define void @another_count_down(double* %d, i64 %n) nounwind {
157180 entry:
158181 br label %loop
181204 ret void
182205 }
183206
207 ; CHECK-LABEL: another_count_up_signed
208 ; CHECK-NOT: {{and|movz|sar|shl}}
209 ; CHECK: addq
210 ; CHECK-NOT: {{and|movz|sar|shl}}
211 ; CHECK: jne
184212 define void @another_count_up_signed(double* %d, i64 %n) nounwind {
185213 entry:
186214 br label %loop
211239 ret void
212240 }
213241
242 ; CHECK-LABEL: another_count_down_signed
243 ; CHECK-NOT: {{and|movz|sar|shl}}
244 ; CHECK: decq
245 ; CHECK-NOT: {{and|movz|sar|shl}}
246 ; CHECK: jne
214247 define void @another_count_down_signed(double* %d, i64 %n) nounwind {
215248 entry:
216249 br label %loop
5555 define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
5656 entry:
5757 ; SSE2-Darwin-LABEL: t2:
58 ; SSE2-Darwin: movaps (%eax), %xmm0
58 ; SSE2-Darwin: movaps (%ecx), %xmm0
5959 ; SSE2-Darwin: movaps %xmm0, (%eax)
6060
6161 ; SSE2-Mingw32-LABEL: t2:
62 ; SSE2-Mingw32: movaps (%eax), %xmm0
62 ; SSE2-Mingw32: movaps (%ecx), %xmm0
6363 ; SSE2-Mingw32: movaps %xmm0, (%eax)
6464
6565 ; SSE1-LABEL: t2:
66 ; SSE1: movaps (%eax), %xmm0
66 ; SSE1: movaps (%ecx), %xmm0
6767 ; SSE1: movaps %xmm0, (%eax)
6868
6969 ; NOSSE-LABEL: t2:
9090 define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
9191 entry:
9292 ; SSE2-Darwin-LABEL: t3:
93 ; SSE2-Darwin: movsd (%eax), %xmm0
94 ; SSE2-Darwin: movsd 8(%eax), %xmm1
93 ; SSE2-Darwin: movsd (%ecx), %xmm0
94 ; SSE2-Darwin: movsd 8(%ecx), %xmm1
9595 ; SSE2-Darwin: movsd %xmm1, 8(%eax)
9696 ; SSE2-Darwin: movsd %xmm0, (%eax)
9797
9898 ; SSE2-Mingw32-LABEL: t3:
99 ; SSE2-Mingw32: movsd (%eax), %xmm0
100 ; SSE2-Mingw32: movsd 8(%eax), %xmm1
99 ; SSE2-Mingw32: movsd (%ecx), %xmm0
100 ; SSE2-Mingw32: movsd 8(%ecx), %xmm1
101101 ; SSE2-Mingw32: movsd %xmm1, 8(%eax)
102102 ; SSE2-Mingw32: movsd %xmm0, (%eax)
103103
0 ; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t
11 ; RUN: grep pmul %t | count 12
2 ; RUN: grep mov %t | count 11
2 ; RUN: grep mov %t | count 14
33
44 define <4 x i32> @a(<4 x i32> %i) nounwind {
55 %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
1818 ret i32 %retval.0
1919 }
2020
21 ; We were miscompiling this and using %ax instead of %cx in the movw.
22 ; CHECK: movswl %cx, %ecx
23 ; CHECK: movw %cx, (%rsi)
24 ; CHECK: movslq %ecx, %rcx
21 ; We were miscompiling this and using %ax instead of %cx in the movw
22 ; in the following sequence:
23 ; movswl %cx, %ecx
24 ; movw %cx, (%rsi)
25 ; movslq %ecx, %rcx
26 ;
27 ; We can't produce the above sequence without special SD-level
28 ; heuristics. Now we produce this:
29 ; CHECK: movw %ax, (%rsi)
30 ; CHECK: cwtl
31 ; CHECK: cltq
5656 %tmp22 = tail call %"struct.std::basic_ostream >"* @_ZNSolsEd( %"struct.std::basic_ostream >"* %tmp16, double %tmp1920 ) ; <%"struct.std::basic_ostream >"*> [#uses=1]
5757 %tmp30 = tail call %"struct.std::basic_ostream >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream >"* %tmp22 ) ; <%"struct.std::basic_ostream >"*> [#uses=0]
5858 ; reload:
59 ; CHECK: fld
60 ; CHECK: fstps
6159 ; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
6260 %tmp34 = tail call %"struct.std::basic_ostream >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream >"*> [#uses=1]
6361 %tmp3940 = fpext float %tmp1314 to double ; [#uses=1]
62 ; CHECK: fld
6463 ; CHECK: fstpl
6564 ; CHECK: ZNSolsEd
6665 %tmp42 = tail call %"struct.std::basic_ostream >"* @_ZNSolsEd( %"struct.std::basic_ostream >"* %tmp34, double %tmp3940 ) ; <%"struct.std::basic_ostream >"*> [#uses=1]
None ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
0 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx -enable-misched=false | FileCheck %s
11
22 ; CHECK-LABEL: main:
33 ; CHECK: pushl %esi
None ; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \
1 ; RUN: 2>&1 | FileCheck %s
0 ; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \
1 ; RUN-disabled: 2>&1 | FileCheck %s
2 ; RUN: true
23 ; REQUIRES: asserts
34 ;
45 ; rdar:13279013: pre-RA-sched should not check all interferences and
1010 ret i32 %isvalid
1111 ; CHECK-LABEL: _rdrand16_step:
1212 ; CHECK: rdrandw %ax
13 ; CHECK: movw %ax, (%r[[A0:di|cx]])
1413 ; CHECK: movzwl %ax, %ecx
1514 ; CHECK: movl $1, %eax
1615 ; CHECK: cmovael %ecx, %eax
16 ; CHECK: movw %cx, (%r[[A0:di|cx]])
1717 ; CHECK: ret
1818 }
1919
2525 ret i32 %isvalid
2626 ; CHECK-LABEL: _rdrand32_step:
2727 ; CHECK: rdrandl %e[[T0:[a-z]+]]
28 ; CHECK: movl %e[[T0]], (%r[[A0]])
2928 ; CHECK: movl $1, %eax
3029 ; CHECK: cmovael %e[[T0]], %eax
30 ; CHECK: movl %e[[T0]], (%r[[A0]])
3131 ; CHECK: ret
3232 }
3333
3939 ret i32 %isvalid
4040 ; CHECK-LABEL: _rdrand64_step:
4141 ; CHECK: rdrandq %r[[T1:[a-z]+]]
42 ; CHECK: movq %r[[T1]], (%r[[A0]])
4342 ; CHECK: movl $1, %eax
4443 ; CHECK: cmovael %e[[T1]], %eax
44 ; CHECK: movq %r[[T1]], (%r[[A0]])
4545 ; CHECK: ret
4646 }
4747
1111 ret i32 %isvalid
1212 ; CHECK-LABEL: _rdseed16_step:
1313 ; CHECK: rdseedw %ax
14 ; CHECK: movw %ax, (%r[[A0:di|cx]])
1514 ; CHECK: movzwl %ax, %ecx
1615 ; CHECK: movl $1, %eax
1716 ; CHECK: cmovael %ecx, %eax
17 ; CHECK: movw %cx, (%r[[A0:di|cx]])
1818 ; CHECK: ret
1919 }
2020
2626 ret i32 %isvalid
2727 ; CHECK-LABEL: _rdseed32_step:
2828 ; CHECK: rdseedl %e[[T0:[a-z]+]]
29 ; CHECK: movl %e[[T0]], (%r[[A0]])
3029 ; CHECK: movl $1, %eax
3130 ; CHECK: cmovael %e[[T0]], %eax
31 ; CHECK: movl %e[[T0]], (%r[[A0]])
3232 ; CHECK: ret
3333 }
3434
4040 ret i32 %isvalid
4141 ; CHECK-LABEL: _rdseed64_step:
4242 ; CHECK: rdseedq %r[[T1:[a-z]+]]
43 ; CHECK: movq %r[[T1]], (%r[[A0]])
4443 ; CHECK: movl $1, %eax
4544 ; CHECK: cmovael %e[[T1]], %eax
45 ; CHECK: movq %r[[T1]], (%r[[A0]])
4646 ; CHECK: ret
4747 }
3030 ; X32-NEXT: ret
3131
3232 ; X32: movl %esp, %eax
33 ; X32-NEXT: subl %ecx, %eax
33 ; X32: subl %ecx, %eax
3434 ; X32-NEXT: cmpl %eax, %gs:48
3535
3636 ; X32: movl %eax, %esp
5151 ; X64-NEXT: ret
5252
5353 ; X64: movq %rsp, %[[RDI:rdi|rax]]
54 ; X64-NEXT: subq %{{.*}}, %[[RDI]]
54 ; X64: subq %{{.*}}, %[[RDI]]
5555 ; X64-NEXT: cmpq %[[RDI]], %fs:112
5656
5757 ; X64: movq %[[RDI]], %rsp
3333 bb91: ; preds = %bb84
3434 ret i32 0
3535 ; CHECK-LABEL: test2:
36 ; CHECK: movnew
37 ; CHECK: movswl
36 ; CHECK: cmovnew
37 ; CHECK: cwtl
3838
3939 ; ATOM-LABEL: test2:
40 ; ATOM: movnew
41 ; ATOM: movswl
40 ; ATOM: cmovnew
41 ; ATOM: cwtl
4242 }
4343
4444 declare i1 @return_false()
255255 %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
256256 ret i8* %call
257257 ; CHECK-LABEL: test12:
258 ; CHECK: mulq
258259 ; CHECK: movq $-1, %[[R:r..]]
259 ; CHECK: mulq
260260 ; CHECK: cmovnoq %rax, %[[R]]
261261 ; CHECK: jmp __Znam
262262
2929 %x = load i32* %p
3030 %shl = shl i32 %x, %shamt
3131 ; BMI2: shl32p
32 ; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
32 ; Source order scheduling prevents folding, rdar:14208996.
33 ; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
3334 ; BMI2: ret
3435 ; BMI264: shl32p
35 ; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
36 ; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
3637 ; BMI264: ret
3738 ret i32 %shl
3839 }
7374 %x = load i64* %p
7475 %shl = shl i64 %x, %shamt
7576 ; BMI264: shl64p
76 ; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
77 ; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
7778 ; BMI264: ret
7879 ret i64 %shl
7980 }
105106 %x = load i32* %p
106107 %shl = lshr i32 %x, %shamt
107108 ; BMI2: lshr32p
108 ; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
109 ; Source order scheduling prevents folding, rdar:14208996.
110 ; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
109111 ; BMI2: ret
110112 ; BMI264: lshr32p
111 ; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
113 ; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
112114 ; BMI264: ret
113115 ret i32 %shl
114116 }
127129 %x = load i64* %p
128130 %shl = lshr i64 %x, %shamt
129131 ; BMI264: lshr64p
130 ; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
132 ; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
131133 ; BMI264: ret
132134 ret i64 %shl
133135 }
149151 %x = load i32* %p
150152 %shl = ashr i32 %x, %shamt
151153 ; BMI2: ashr32p
152 ; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
154 ; Source order scheduling prevents folding, rdar:14208996.
155 ; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
153156 ; BMI2: ret
154157 ; BMI264: ashr32p
155 ; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
158 ; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
156159 ; BMI264: ret
157160 ret i32 %shl
158161 }
171174 %x = load i64* %p
172175 %shl = ashr i64 %x, %shamt
173176 ; BMI264: ashr64p
174 ; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
177 ; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
175178 ; BMI264: ret
176179 ret i64 %shl
177180 }
2525
2626 ; CHECK-LABEL: split:
2727 ; CHECK-NEXT: testb $1, %dil
28 ; CHECK-NEXT: jne
29 ; CHECK-NEXT: movaps
30 ; CHECK-NEXT: ret
28 ; CHECK-NEXT: je
3129 ; CHECK: divsd
32 ; CHECK-NEXT: ret
30 ; CHECK: movaps
31 ; CHECK: ret
3332 define double @split(double %x, double %y, i1 %c) nounwind {
3433 %a = fdiv double %x, 3.2
3534 %z = select i1 %c, double %a, double %y
6463 ; Sink instructions with dead EFLAGS defs.
6564
6665 ; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag.
67 ;
66 ;
6867 ; See . This test isn't valid after we made machine
6968 ; sinking more conservative about sinking instructions that define a preg into a
7069 ; block when we don't know if the preg is killed within the current block.
66 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
77 store <2 x double> %tmp9, <2 x double>* %r, align 16
88 ret void
9
9
1010 ; CHECK-LABEL: test1:
1111 ; CHECK: movl 8(%esp), %eax
1212 ; CHECK-NEXT: movapd (%eax), %xmm0
2222 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
2323 store <2 x double> %tmp9, <2 x double>* %r, align 16
2424 ret void
25
25
2626 ; CHECK-LABEL: test2:
27 ; CHECK: movl 8(%esp), %eax
28 ; CHECK-NEXT: movapd (%eax), %xmm0
27 ; CHECK: movl 4(%esp), %eax
28 ; CHECK: movl 8(%esp), %ecx
29 ; CHECK-NEXT: movapd (%ecx), %xmm0
2930 ; CHECK-NEXT: movhpd 12(%esp), %xmm0
30 ; CHECK-NEXT: movl 4(%esp), %eax
3131 ; CHECK-NEXT: movapd %xmm0, (%eax)
3232 ; CHECK-NEXT: ret
3333 }
4747 store <4 x float> %tmp13, <4 x float>* %res
4848 ret void
4949 ; CHECK: @test3
50 ; CHECK: unpcklps
50 ; CHECK: unpcklps
5151 }
5252
5353 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
8484 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
8585 store <4 x float> %tmp2, <4 x float>* %res
8686 ret void
87
87
8888 ; CHECK-LABEL: test6:
89 ; CHECK: movaps (%eax), %xmm0
89 ; CHECK: movaps (%ecx), %xmm0
9090 ; CHECK: movaps %xmm0, (%eax)
9191 }
9292
9595 shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1]
9696 store <4 x float> %2, <4 x float>* null
9797 ret void
98
98
9999 ; CHECK-LABEL: test7:
100100 ; CHECK: xorps %xmm0, %xmm0
101101 ; CHECK: movaps %xmm0, 0
165165 store <4 x float> %tmp11, <4 x float>* %res
166166 ret void
167167 ; CHECK: test13
168 ; CHECK: shufps $69, (%eax), %xmm0
168 ; CHECK: shufps $69, (%ecx), %xmm0
169169 ; CHECK: pshufd $-40, %xmm0, %xmm0
170170 }
171171
177177 %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
178178 ret <4 x float> %tmp27
179179 ; CHECK-LABEL: test14:
180 ; CHECK: subps [[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
181 ; CHECK: addps [[X1]], [[X0:%xmm[0-9]+]]
180 ; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
181 ; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]]
182182 ; CHECK: movlhps [[X2]], [[X0]]
183183 }
184184
220220 %double2float.i = fptrunc <4 x double> %0 to <4 x float>
221221 ret <4 x float> %double2float.i
222222 }
223
1111 %D = or i32 %C, %B
1212 store i32 %D, i32* %a0, align 4
1313 ret void
14
14
1515 ; X64-LABEL: test1:
1616 ; X64: movb %sil, (%rdi)
1717
3333 ; X64: movb %sil, 1(%rdi)
3434
3535 ; X32-LABEL: test2:
36 ; X32: movb 8(%esp), %al
37 ; X32: movb %al, 1(%{{.*}})
36 ; X32: movb 8(%esp), %[[REG:[abcd]l]]
37 ; X32: movb %[[REG]], 1(%{{.*}})
3838 }
3939
4040 define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
6666 ; X64: movw %si, 2(%rdi)
6767
6868 ; X32-LABEL: test4:
69 ; X32: movl 8(%esp), %eax
70 ; X32: movw %ax, 2(%{{.*}})
69 ; X32: movl 8(%esp), %e[[REG:[abcd]x]]
70 ; X32: movw %[[REG]], 2(%{{.*}})
7171 }
7272
7373 define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
8383 ; X64: movw %si, 2(%rdi)
8484
8585 ; X32-LABEL: test5:
86 ; X32: movzwl 8(%esp), %eax
87 ; X32: movw %ax, 2(%{{.*}})
86 ; X32: movzwl 8(%esp), %e[[REG:[abcd]x]]
87 ; X32: movw %[[REG]], 2(%{{.*}})
8888 }
8989
9090 define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
101101
102102
103103 ; X32-LABEL: test6:
104 ; X32: movb 8(%esp), %al
105 ; X32: movb %al, 5(%{{.*}})
104 ; X32: movb 8(%esp), %[[REG:[abcd]l]]
105 ; X32: movb %[[REG]], 5(%{{.*}})
106106 }
107107
108108 define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
120120
121121
122122 ; X32-LABEL: test7:
123 ; X32: movb 8(%esp), %cl
124 ; X32: movb %cl, 5(%{{.*}})
123 ; X32: movb 8(%esp), %[[REG:[abcd]l]]
124 ; X32: movb %[[REG]], 5(%{{.*}})
125125 }
126126
127127 ; PR7833
None ; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large -enable-misched=false | FileCheck %s
11
22 declare fastcc i32 @callee(i32 %arg)
33 define fastcc i32 @directcall(i32 %arg) {
11 ; rdar://5752025
22
33 ; We want:
4 ; CHECK: movl $42, %ecx
5 ; CHECK-NEXT: movl 4(%esp), %eax
6 ; CHECK-NEXT: andl $15, %eax
7 ; CHECK-NEXT: cmovnel %ecx, %eax
4 ; CHECK: movl 4(%esp), %ecx
5 ; CHECK-NEXT: andl $15, %ecx
6 ; CHECK-NEXT: movl $42, %eax
7 ; CHECK-NEXT: cmovel %ecx, %eax
88 ; CHECK-NEXT: ret
99 ;
1010 ; We don't want:
3838 %retval = select i1 %tmp4, i32 %tmp2, i32 42 ; [#uses=1]
3939 ret i32 %retval
4040 }
41
2121 ret i1 false
2222 }
2323 ; CHECK-LABEL: test2:
24 ; CHECK: btl %eax
24 ; CHECK: btl
2525
2626 define i32 @test3(i8* %ptr) nounwind {
2727 %val = load i8* %ptr
0 ; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
1 ; CHECK: divss
21 ; CHECK: divps
32 ; CHECK: divps
3 ; CHECK: divss
44
55 %vec = type <9 x float>
66 define %vec @vecdiv( %vec %p1, %vec %p2)
88 %result = fdiv %vec %p1, %p2
99 ret %vec %result
1010 }
11
11 ; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
22
33 %vec = type <6 x float>
4 ; CHECK: divps
45 ; CHECK: divss
56 ; CHECK: divss
6 ; CHECK: divps
77
88 ; Scheduler causes a different instruction order to be produced on Intel Atom
99 ; ATOM: divps
66 define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
77 entry:
88 ; CHECK: subps
9 ; CHECK: subps
10 ; CHECK: mulps
911 ; CHECK: mulps
1012 ; CHECK: addps
11 ; CHECK: subps
12 ; CHECK: mulps
1313 ; CHECK: addps
1414 %tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 > ; <<8 x float>> [#uses=1]
1515 %sub = fsub <8 x float> %T1, %T0 ; <<8 x float>> [#uses=1]
5353 define <2 x double> @t3() nounwind readonly {
5454 bb:
5555 ; CHECK-LABEL: t3:
56 ; CHECK: punpcklqdq %xmm1, %xmm0
5756 ; CHECK: movq (%rax), %xmm1
57 ; CHECK: punpcklqdq %xmm2, %xmm0
5858 ; CHECK: movsd %xmm1, %xmm0
5959 %tmp0 = load i128* null, align 1
6060 %tmp1 = load <2 x i32>* undef, align 8
7171 define <2 x i64> @t4() nounwind readonly {
7272 bb:
7373 ; CHECK-LABEL: t4:
74 ; CHECK: punpcklqdq %xmm0, %xmm1
7574 ; CHECK: movq (%rax), %xmm0
76 ; CHECK: movsd %xmm1, %xmm0
75 ; CHECK: punpcklqdq %{{xmm.}}, %[[XMM:xmm[0-9]]]
76 ; CHECK: movsd %[[XMM]], %xmm0
7777 %tmp0 = load i128* null, align 1
7878 %tmp1 = load <2 x i32>* undef, align 8
7979 %tmp2 = bitcast i128 %tmp0 to <16 x i8>
0 ; RUN: llc -march=x86 -mcpu=generic -mattr=+sse4.2 < %s | FileCheck %s
11 ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
22
3 ; CHECK: movl
34 ; CHECK: paddd
4 ; CHECK: movl
55 ; CHECK: movlpd
66
77 ; Scheduler causes produce a different instruction order
None ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64
1 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
2 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
0 ; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64
1 ; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
2 ; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
33 ; PR8777
44 ; PR8778
55
5151 %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
5252
5353 ; M64: subq $48, %rsp
54 ; M64: movq %rax, 32(%rsp)
5455 ; M64: leaq -4096(%rbp), %r9
55 ; M64: movq %rax, 32(%rsp)
5656 ; M64: callq bar
5757
5858 ; W64: subq $48, %rsp
59 ; W64: movq %rax, 32(%rsp)
5960 ; W64: leaq -4096(%rbp), %r9
60 ; W64: movq %rax, 32(%rsp)
6161 ; W64: callq bar
6262
6363 ; EFI: subq $48, %rsp
64 ; EFI: movq [[R64]], 32(%rsp)
6465 ; EFI: leaq -[[B0OFS]](%rbp), %r9
65 ; EFI: movq [[R64]], 32(%rsp)
6666 ; EFI: callq _bar
6767
6868 ret i64 %r
33 ; This test checks that the operands of packed sub instructions are
44 ; never interchanged by the "Two-Address instruction pass".
55
6 declare { i64, double } @getFirstParam()
7 declare { i64, double } @getSecondParam()
6 declare { i64, double } @getFirstParam()
7 declare { i64, double } @getSecondParam()
88
99 define i64 @test_psubb() {
1010 entry:
2727
2828 ; CHECK-LABEL: test_psubb:
2929 ; CHECK: callq getFirstParam
30 ; CHECK: callq getSecondParam
31 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
32 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
30 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
31 ; CHECK: callq getSecondParam
32 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
33 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
3334 ; CHECK: psubb [[PARAM2]], [[PARAM1]]
3435 ; CHECK: ret
3536
5455
5556 ; CHECK-LABEL: test_psubw:
5657 ; CHECK: callq getFirstParam
57 ; CHECK: callq getSecondParam
58 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
59 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
58 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
59 ; CHECK: callq getSecondParam
60 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
61 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
6062 ; CHECK: psubw [[PARAM2]], [[PARAM1]]
6163 ; CHECK: ret
6264
8284
8385 ; CHECK-LABEL: test_psubd:
8486 ; CHECK: callq getFirstParam
85 ; CHECK: callq getSecondParam
86 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
87 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
87 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
88 ; CHECK: callq getSecondParam
89 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
90 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
8891 ; CHECK: psubd [[PARAM2]], [[PARAM1]]
8992 ; CHECK: ret
9093
109112
110113 ; CHECK-LABEL: test_psubsb:
111114 ; CHECK: callq getFirstParam
112 ; CHECK: callq getSecondParam
113 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
114 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
115 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
116 ; CHECK: callq getSecondParam
117 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
118 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
115119 ; CHECK: psubsb [[PARAM2]], [[PARAM1]]
116120 ; CHECK: ret
117121
136140
137141 ; CHECK-LABEL: test_psubswv:
138142 ; CHECK: callq getFirstParam
139 ; CHECK: callq getSecondParam
140 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
141 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
143 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
144 ; CHECK: callq getSecondParam
145 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
146 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
142147 ; CHECK: psubsw [[PARAM2]], [[PARAM1]]
143148 ; CHECK: ret
144149
163168
164169 ; CHECK-LABEL: test_psubusbv:
165170 ; CHECK: callq getFirstParam
166 ; CHECK: callq getSecondParam
167 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
168 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
171 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
172 ; CHECK: callq getSecondParam
173 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
174 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
169175 ; CHECK: psubusb [[PARAM2]], [[PARAM1]]
170176 ; CHECK: ret
171177
190196
191197 ; CHECK-LABEL: test_psubuswv:
192198 ; CHECK: callq getFirstParam
193 ; CHECK: callq getSecondParam
194 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
195 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
199 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
200 ; CHECK: callq getSecondParam
201 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
202 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
196203 ; CHECK: psubusw [[PARAM2]], [[PARAM1]]
197204 ; CHECK: ret
198205
55 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
66 entry:
77 ; CHECK: shl4
8 ; CHECK: pslld
89 ; CHECK: padd
9 ; CHECK: pslld
1010 ; CHECK: ret
1111 %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
1212 %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
6666 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
6767 entry:
6868 ; CHECK: shl8
69 ; CHECK: psllw
6970 ; CHECK: padd
70 ; CHECK: psllw
7171 ; CHECK: ret
7272 %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
7373 %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
None ; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
0 ; RUN: llc < %s -mcpu=generic -march=x86 -enable-misched=false | FileCheck %s
11
22 ;; Simple case
33 define i32 @test1(i8 %x) nounwind readnone {
99 ; CHECK: movzbl
1010 ; CHECK-NEXT: andl {{.*}}224
1111
12 ;; Multiple uses of %x but easily extensible.
12 ;; Multiple uses of %x but easily extensible.
1313 define i32 @test2(i8 %x) nounwind readnone {
1414 %A = and i8 %x, -32
1515 %B = zext i8 %A to i32
2020 }
2121 ; CHECK: test2
2222 ; CHECK: movzbl
23 ; CHECK: andl $224
2324 ; CHECK: orl $63
24 ; CHECK: andl $224
2525
2626 declare void @use(i32, i8)
2727
3333 %tmp12 = add i64 %tmp11, 5089792279245435153
3434
3535 ; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
36 ; CHECK-NEXT: cmpl $-8608074, %e[[REGISTER_zext]]
37 ; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_tmp:%r[a-z0-9]+]]
38 ; CHECK: movq [[REGISTER_tmp]], [[REGISTER_sext:%r[a-z0-9]+]]
36 ; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
37 ; CHECK: cmpl $-8608074, %e[[REGISTER_zext]]
3938 ; CHECK-NOT: [[REGISTER_zext]]
39 ; CHECK-DAG: testl %e[[REGISTER_zext]]
4040 ; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]]
4141
4242 %tmp13 = sub i64 %tmp12, 2138875574
1515 call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
1616 %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
1717 %tmp3 = add i32 0, %tmp2, !dbg !15
18 ; CHECK: ##DEBUG_VALUE: idx <- EAX{{$}}
18 ; CHECK: ##DEBUG_VALUE: idx <- E{{..$}}
1919 call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
2020 !15
2121 %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16