llvm.org GIT mirror llvm / b2b5dc6
Revert "Temporarily enable MI-Sched on X86." This reverts commit 98a9b72e8c56dc13a2617de84503a3d78352789c. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184823 91177308-0d34-0410-b5e6-96231b3b80d8 Andrew Trick 6 years ago
64 changed file(s) with 281 addition(s) and 300 deletion(s). Raw diff Collapse all Expand all
360360 /// memset with zero passed as the second argument. Otherwise it
361361 /// returns null.
362362 const char *getBZeroEntry() const;
363
363
364364 /// This function returns true if the target has sincos() routine in its
365365 /// compiler runtime or math libraries.
366366 bool hasSinCos() const;
367
368 /// Enable the MachineScheduler pass for all X86 subtargets.
369 bool enableMachineScheduler() const LLVM_OVERRIDE { return true; }
370367
371368 /// enablePostRAScheduler - run for Atom optimization.
372369 bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
0 ; REQUIRES: asserts
11 ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
2 ; RUN: grep asm-printer | grep 16
2 ; RUN: grep asm-printer | grep 14
33 ;
4 ; It's possible to schedule this in 14 instructions by avoiding
5 ; callee-save registers, but the scheduler isn't currently that
6 ; conervative with registers.
74 @size20 = external global i32 ; [#uses=1]
85 @in5 = external global i8* ; [#uses=1]
96
2320 }
2421
2522 declare i32 @memcmp(i8*, i8*, i32)
23
1212
1313 ; CHECK: mulss
1414 ; CHECK: mulss
15 ; CHECK: mulss
15 ; CHECK: addss
1616 ; CHECK: mulss
1717 ; CHECK: addss
18 ; CHECK: addss
18 ; CHECK: mulss
1919 ; CHECK: addss
2020 ; CHECK: ret
2121 }
1616 ; CHECK: %bb4
1717 ; CHECK: xorl
1818 ; CHECK: callq
19 ; CHECK: movq
1920 ; CHECK: xorl
2021 ; CHECK: xorl
21 ; CHECK: movq
2222
2323 %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; [#uses=0]
2424 %ins = or i64 %p, 2097152 ; [#uses=1]
22 ; Bug 6225
33 ;
44 ; If a call is a fastcc tail call and tail call optimization is enabled, the
5 ; caller frame is replaced by the callee frame. This can require that arguments are
5 ; caller frame is replaced by the callee frame. This can require that arguments are
66 ; placed on the former return address stack slot. Special care needs to be taken
77 ; taken that the return address is moved / or stored in a register before
88 ; lowering of arguments potentially overwrites the value.
99 ;
10 ; Move return address (60(%esp)) to a temporary register (%ebp)
11 ; CHECK: movl 60(%esp), [[REGISTER:%[a-z]+]]
10 ; Move return address (76(%esp)) to a temporary register (%ebp)
11 ; CHECK: movl 76(%esp), [[REGISTER:%[a-z]+]]
1212 ; Overwrite return addresss
13 ; CHECK: movl [[EBX:%[a-z]+]], 60(%esp)
13 ; CHECK: movl [[EBX:%[a-z]+]], 76(%esp)
1414 ; Move return address from temporary register (%ebp) to new stack location (60(%esp))
15 ; CHECK: movl [[REGISTER]], 44(%esp)
15 ; CHECK: movl [[REGISTER]], 60(%esp)
1616
1717 %tupl_p = type [9 x i32]*
1818
5050 tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
5151 ret void
5252 }
53
54
1818 }
1919
2020 ; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
21 ; CHECK: movb (%rsp), [[R1:%.+]]
22 ; CHECK: movb 30(%rsp), [[R0:%.+]]
23 ; CHECK: movb [[R1]], (%rsp)
24 ; CHECK: movb [[R0]], 30(%rsp)
21 ; CHECK: movb 38(%rsp), [[R0:%.+]]
22 ; CHECK: movb 8(%rsp), [[R1:%.+]]
23 ; CHECK: movb [[R1]], 8(%rsp)
24 ; CHECK: movb [[R0]], 38(%rsp)
2525 ; CHECK: callq ___stack_chk_fail
1515 ; CHECK: main
1616 define i32 @main() nounwind uwtable {
1717 entry:
18 ; CHECK: pmovsxbq j(%rip), %
1819 ; CHECK: pmovsxbq i(%rip), %
19 ; CHECK: pmovsxbq j(%rip), %
2020 %0 = load <2 x i8>* @i, align 8
2121 %1 = load <2 x i8>* @j, align 8
2222 %div = sdiv <2 x i8> %1, %0
2424 ret i32 0
2525 ; CHECK: ret
2626 }
27
44 ; It's hard to test for the ISEL condition because CodeGen optimizes
55 ; away the bugpointed code. Just ensure the basics are still there.
66 ;CHECK: func:
7 ;CHECK: vpxor
8 ;CHECK: vinserti128
7 ;CHECK: vxorps
8 ;CHECK: vinsertf128
99 ;CHECK: vpshufd
1010 ;CHECK: vpshufd
1111 ;CHECK: vmulps
3333
3434 ; 64BIT: t2:
3535 ; 64BIT-NOT: movw %si, %ax
36 ; 64BIT: decl %eax
37 ; 64BIT: movzwl %ax
36 ; 64BIT: leal -1(%rsi), %eax
3837 %0 = icmp eq i16 %k, %c ; [#uses=1]
3938 %1 = add i16 %k, -1 ; [#uses=3]
4039 br i1 %0, label %bb, label %bb1
5857
5958 ; 64BIT: t3:
6059 ; 64BIT-NOT: movw %si, %ax
61 ; 64BIT: addl $2, %eax
60 ; 64BIT: leal 2(%rsi), %eax
6261 %0 = add i16 %k, 2 ; [#uses=3]
6362 %1 = icmp eq i16 %k, %c ; [#uses=1]
6463 br i1 %1, label %bb, label %bb1
8180
8281 ; 64BIT: t4:
8382 ; 64BIT-NOT: movw %si, %ax
84 ; 64BIT: addl %edi, %eax
83 ; 64BIT: leal (%rsi,%rdi), %eax
8584 %0 = add i16 %k, %c ; [#uses=3]
8685 %1 = icmp eq i16 %k, %c ; [#uses=1]
8786 br i1 %1, label %bb, label %bb1
33 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
44 target triple = "x86_64-apple-macosx10.8.0"
55
6 ;YESCOLOR: subq $144, %rsp
7 ;NOCOLOR: subq $272, %rsp
6 ;YESCOLOR: subq $136, %rsp
7 ;NOCOLOR: subq $264, %rsp
88
99 define i32 @myCall_w2(i32 %in) {
1010 entry:
428428 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
429429
430430 declare i32 @foo(i32, i8*)
431
None ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-STATIC
1 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-PIC
2
3 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-STATIC
4 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-PIC
5
6 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-STATIC
7 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
8 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-PIC
9
10 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-STATIC
11 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
12 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-PIC
0 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
1 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
2
3 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
4 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
5
6 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
7 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
8 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
9
10 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
11 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
12 ; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
1313
1414 @src = external global [131072 x i32]
1515 @dst = external global [131072 x i32]
88 %b = add i32 %a, 128
99 ret i32 %b
1010 ; X32: subl $-128, %eax
11 ; X64: subl $-128,
11 ; X64: subl $-128,
1212 }
1313 define i64 @test2(i64 inreg %a) nounwind {
1414 %b = add i64 %a, 2147483648
1919 define i64 @test3(i64 inreg %a) nounwind {
2020 %b = add i64 %a, 128
2121 ret i64 %b
22
22
2323 ; X32: addl $128, %eax
2424 ; X64: subq $-128,
2525 }
3737
3838 overflow:
3939 ret i1 false
40
40
4141 ; X32: test4:
4242 ; X32: addl
4343 ; X32-NEXT: jo
8181 ret i64 %tmp5
8282
8383 ; X32: test6:
84 ; X32: movl 4(%esp), %eax
85 ; X32-NEXT: movl 12(%esp), %edx
84 ; X32: movl 12(%esp), %edx
8685 ; X32-NEXT: addl 8(%esp), %edx
86 ; X32-NEXT: movl 4(%esp), %eax
8787 ; X32-NEXT: ret
88
88
8989 ; X64: test6:
9090 ; X64: shlq $32, %r[[A1]]
9191 ; X64: leaq (%r[[A1]],%r[[A0]]), %rax
None ; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s
0 ; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
11
22 declare void @bar(<2 x i64>* %n)
33
239239 ; CHECK-NEXT: vpmuludq %xmm
240240 ; CHECK-NEXT: vpsllq $32, %xmm
241241 ; CHECK-NEXT: vpaddq %xmm
242 ; CHECK-NEXT: vpsrlq $32, %xmm
243 ; CHECK-NEXT: vpmuludq %xmm
244 ; CHECK-NEXT: vpsllq $32, %xmm
245 ; CHECK-NEXT: vpaddq %xmm
246 ; CHECK-NEXT: vpmuludq %xmm
247 ; CHECK-NEXT: vpsrlq $32, %xmm
248 ; CHECK-NEXT: vpmuludq %xmm
249 ; CHECK-NEXT: vpsllq $32, %xmm
242 ; CHECK-NEXT: vpmuludq %xmm
243 ; CHECK-NEXT: vpsrlq $32, %xmm
244 ; CHECK-NEXT: vpmuludq %xmm
245 ; CHECK-NEXT: vpsllq $32, %xmm
246 ; CHECK-NEXT: vpsrlq $32, %xmm
247 ; CHECK-NEXT: vpmuludq %xmm
248 ; CHECK-NEXT: vpsllq $32, %xmm
249 ; CHECK-NEXT: vpaddq %xmm
250250 ; CHECK-NEXT: vpaddq %xmm
251251 ; CHECK-NEXT: vpsrlq $32, %xmm
252252 ; CHECK-NEXT: vpmuludq %xmm
268268 %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
269269 ret <4 x float> %x2
270270 }
271
3131 define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
3232 %y = alloca <16 x float>, align 16
3333 %x = fadd <16 x float> %a, %b
34 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
34 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
3535 %2 = load <16 x float>* %y, align 16
3636 %3 = fadd <16 x float> %2, %1
3737 ret <16 x float> %3
4242 ; preserved ymm6-ymm15
4343 ; WIN64: testf16_regs
4444 ; WIN64: call
45 ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
46 ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
45 ; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0
46 ; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1
4747 ; WIN64: ret
4848
4949 ; preserved ymm8-ymm15
5050 ; X64: testf16_regs
5151 ; X64: call
52 ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
53 ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
52 ; X64: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0
53 ; X64: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1
5454 ; X64: ret
5555
5656 define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
5757 %y = alloca <16 x float>, align 16
5858 %x = fadd <16 x float> %a, %b
59 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
59 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
6060 %2 = load <16 x float>* %y, align 16
6161 %3 = fadd <16 x float> %1, %b
6262 %4 = fadd <16 x float> %2, %3
165165 %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32>
166166 ret <8 x float> %8
167167 }
168
8080 define i32 @test9(<4 x i32> %a) nounwind {
8181 ; CHECK: test9
8282 ; CHECK: vpextrd
83 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32>
83 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32>
8484 %r = extractelement <8 x i32> %b, i32 2
8585 ; CHECK: ret
8686 ret i32 %r
250250 ; CHECK: swap8doubles
251251 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
252252 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
253 ; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
254 ; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
253 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
254 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
255255 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
256256 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
257257 ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
0 ; Without list-burr scheduling we may not see the difference in codegen here.
11 ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency
22 ; breaker requires liveness information to be kept.
3 ; RUN: llc < %s -march=x86-64 -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
3 ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
44 ; RUN: grep "%xmm0" %t | count 14
55 ; RUN: not grep "%xmm1" %t
66 ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t
3737 define void @test2b(i32 %x, i32 %n) nounwind {
3838 entry:
3939 ; CHECK: test2b
40 ; CHECK: btl %e{{..}}, %e{{..}}
40 ; CHECK: btl %eax, %ecx
4141 ; CHECK: jb
4242 %tmp29 = lshr i32 %x, %n ; [#uses=1]
4343 %tmp3 = and i32 1, %tmp29
5555 define void @atest2(i32 %x, i32 %n) nounwind {
5656 entry:
5757 ; CHECK: atest2
58 ; CHECK: btl %e{{..}}, %e{{..}}
58 ; CHECK: btl %eax, %ecx
5959 ; CHECK: jb
6060 %tmp29 = ashr i32 %x, %n ; [#uses=1]
6161 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
7373 define void @atest2b(i32 %x, i32 %n) nounwind {
7474 entry:
7575 ; CHECK: atest2b
76 ; CHECK: btl %e{{..}}, %e{{..}}
76 ; CHECK: btl %eax, %ecx
7777 %tmp29 = ashr i32 %x, %n ; [#uses=1]
7878 %tmp3 = and i32 1, %tmp29
7979 %tmp4 = icmp eq i32 %tmp3, 0 ; [#uses=1]
9090 define void @test3(i32 %x, i32 %n) nounwind {
9191 entry:
9292 ; CHECK: test3
93 ; CHECK: btl %e{{..}}, %e{{..}}
93 ; CHECK: btl %eax, %ecx
9494 ; CHECK: jb
9595 %tmp29 = shl i32 1, %n ; [#uses=1]
9696 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
108108 define void @test3b(i32 %x, i32 %n) nounwind {
109109 entry:
110110 ; CHECK: test3b
111 ; CHECK: btl %e{{..}}, %e{{..}}
111 ; CHECK: btl %eax, %ecx
112112 ; CHECK: jb
113113 %tmp29 = shl i32 1, %n ; [#uses=1]
114114 %tmp3 = and i32 %x, %tmp29
126126 define void @testne2(i32 %x, i32 %n) nounwind {
127127 entry:
128128 ; CHECK: testne2
129 ; CHECK: btl %e{{..}}, %e{{..}}
129 ; CHECK: btl %eax, %ecx
130130 ; CHECK: jae
131131 %tmp29 = lshr i32 %x, %n ; [#uses=1]
132132 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
144144 define void @testne2b(i32 %x, i32 %n) nounwind {
145145 entry:
146146 ; CHECK: testne2b
147 ; CHECK: btl %e{{..}}, %e{{..}}
147 ; CHECK: btl %eax, %ecx
148148 ; CHECK: jae
149149 %tmp29 = lshr i32 %x, %n ; [#uses=1]
150150 %tmp3 = and i32 1, %tmp29
162162 define void @atestne2(i32 %x, i32 %n) nounwind {
163163 entry:
164164 ; CHECK: atestne2
165 ; CHECK: btl %e{{..}}, %e{{..}}
165 ; CHECK: btl %eax, %ecx
166166 ; CHECK: jae
167167 %tmp29 = ashr i32 %x, %n ; [#uses=1]
168168 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
180180 define void @atestne2b(i32 %x, i32 %n) nounwind {
181181 entry:
182182 ; CHECK: atestne2b
183 ; CHECK: btl %e{{..}}, %e{{..}}
183 ; CHECK: btl %eax, %ecx
184184 ; CHECK: jae
185185 %tmp29 = ashr i32 %x, %n ; [#uses=1]
186186 %tmp3 = and i32 1, %tmp29
198198 define void @testne3(i32 %x, i32 %n) nounwind {
199199 entry:
200200 ; CHECK: testne3
201 ; CHECK: btl %e{{..}}, %e{{..}}
201 ; CHECK: btl %eax, %ecx
202202 ; CHECK: jae
203203 %tmp29 = shl i32 1, %n ; [#uses=1]
204204 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
216216 define void @testne3b(i32 %x, i32 %n) nounwind {
217217 entry:
218218 ; CHECK: testne3b
219 ; CHECK: btl %e{{..}}, %e{{..}}
219 ; CHECK: btl %eax, %ecx
220220 ; CHECK: jae
221221 %tmp29 = shl i32 1, %n ; [#uses=1]
222222 %tmp3 = and i32 %x, %tmp29
234234 define void @query2(i32 %x, i32 %n) nounwind {
235235 entry:
236236 ; CHECK: query2
237 ; CHECK: btl %e{{..}}, %e{{..}}
237 ; CHECK: btl %eax, %ecx
238238 ; CHECK: jae
239239 %tmp29 = lshr i32 %x, %n ; [#uses=1]
240240 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
252252 define void @query2b(i32 %x, i32 %n) nounwind {
253253 entry:
254254 ; CHECK: query2b
255 ; CHECK: btl %e{{..}}, %e{{..}}
255 ; CHECK: btl %eax, %ecx
256256 ; CHECK: jae
257257 %tmp29 = lshr i32 %x, %n ; [#uses=1]
258258 %tmp3 = and i32 1, %tmp29
270270 define void @aquery2(i32 %x, i32 %n) nounwind {
271271 entry:
272272 ; CHECK: aquery2
273 ; CHECK: btl %e{{..}}, %e{{..}}
273 ; CHECK: btl %eax, %ecx
274274 ; CHECK: jae
275275 %tmp29 = ashr i32 %x, %n ; [#uses=1]
276276 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
288288 define void @aquery2b(i32 %x, i32 %n) nounwind {
289289 entry:
290290 ; CHECK: aquery2b
291 ; CHECK: btl %e{{..}}, %e{{..}}
291 ; CHECK: btl %eax, %ecx
292292 ; CHECK: jae
293293 %tmp29 = ashr i32 %x, %n ; [#uses=1]
294294 %tmp3 = and i32 1, %tmp29
306306 define void @query3(i32 %x, i32 %n) nounwind {
307307 entry:
308308 ; CHECK: query3
309 ; CHECK: btl %e{{..}}, %e{{..}}
309 ; CHECK: btl %eax, %ecx
310310 ; CHECK: jae
311311 %tmp29 = shl i32 1, %n ; [#uses=1]
312312 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
324324 define void @query3b(i32 %x, i32 %n) nounwind {
325325 entry:
326326 ; CHECK: query3b
327 ; CHECK: btl %e{{..}}, %e{{..}}
327 ; CHECK: btl %eax, %ecx
328328 ; CHECK: jae
329329 %tmp29 = shl i32 1, %n ; [#uses=1]
330330 %tmp3 = and i32 %x, %tmp29
342342 define void @query3x(i32 %x, i32 %n) nounwind {
343343 entry:
344344 ; CHECK: query3x
345 ; CHECK: btl %e{{..}}, %e{{..}}
345 ; CHECK: btl %eax, %ecx
346346 ; CHECK: jae
347347 %tmp29 = shl i32 1, %n ; [#uses=1]
348348 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
360360 define void @query3bx(i32 %x, i32 %n) nounwind {
361361 entry:
362362 ; CHECK: query3bx
363 ; CHECK: btl %e{{..}}, %e{{..}}
363 ; CHECK: btl %eax, %ecx
364364 ; CHECK: jae
365365 %tmp29 = shl i32 1, %n ; [#uses=1]
366366 %tmp3 = and i32 %x, %tmp29
378378 define void @queryne2(i32 %x, i32 %n) nounwind {
379379 entry:
380380 ; CHECK: queryne2
381 ; CHECK: btl %e{{..}}, %e{{..}}
381 ; CHECK: btl %eax, %ecx
382382 ; CHECK: jb
383383 %tmp29 = lshr i32 %x, %n ; [#uses=1]
384384 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
396396 define void @queryne2b(i32 %x, i32 %n) nounwind {
397397 entry:
398398 ; CHECK: queryne2b
399 ; CHECK: btl %e{{..}}, %e{{..}}
399 ; CHECK: btl %eax, %ecx
400400 ; CHECK: jb
401401 %tmp29 = lshr i32 %x, %n ; [#uses=1]
402402 %tmp3 = and i32 1, %tmp29
414414 define void @aqueryne2(i32 %x, i32 %n) nounwind {
415415 entry:
416416 ; CHECK: aqueryne2
417 ; CHECK: btl %e{{..}}, %e{{..}}
417 ; CHECK: btl %eax, %ecx
418418 ; CHECK: jb
419419 %tmp29 = ashr i32 %x, %n ; [#uses=1]
420420 %tmp3 = and i32 %tmp29, 1 ; [#uses=1]
432432 define void @aqueryne2b(i32 %x, i32 %n) nounwind {
433433 entry:
434434 ; CHECK: aqueryne2b
435 ; CHECK: btl %e{{..}}, %e{{..}}
435 ; CHECK: btl %eax, %ecx
436436 ; CHECK: jb
437437 %tmp29 = ashr i32 %x, %n ; [#uses=1]
438438 %tmp3 = and i32 1, %tmp29
450450 define void @queryne3(i32 %x, i32 %n) nounwind {
451451 entry:
452452 ; CHECK: queryne3
453 ; CHECK: btl %e{{..}}, %e{{..}}
453 ; CHECK: btl %eax, %ecx
454454 ; CHECK: jb
455455 %tmp29 = shl i32 1, %n ; [#uses=1]
456456 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
468468 define void @queryne3b(i32 %x, i32 %n) nounwind {
469469 entry:
470470 ; CHECK: queryne3b
471 ; CHECK: btl %e{{..}}, %e{{..}}
471 ; CHECK: btl %eax, %ecx
472472 ; CHECK: jb
473473 %tmp29 = shl i32 1, %n ; [#uses=1]
474474 %tmp3 = and i32 %x, %tmp29
486486 define void @queryne3x(i32 %x, i32 %n) nounwind {
487487 entry:
488488 ; CHECK: queryne3x
489 ; CHECK: btl %e{{..}}, %e{{..}}
489 ; CHECK: btl %eax, %ecx
490490 ; CHECK: jb
491491 %tmp29 = shl i32 1, %n ; [#uses=1]
492492 %tmp3 = and i32 %tmp29, %x ; [#uses=1]
504504 define void @queryne3bx(i32 %x, i32 %n) nounwind {
505505 entry:
506506 ; CHECK: queryne3bx
507 ; CHECK: btl %e{{..}}, %e{{..}}
507 ; CHECK: btl %eax, %ecx
508508 ; CHECK: jb
509509 %tmp29 = shl i32 1, %n ; [#uses=1]
510510 %tmp3 = and i32 %x, %tmp29
66 define i32 @main() nounwind {
77 entry:
88 ; CHECK: main:
9 ; CHECK: movl $1, (%esp)
910 ; CHECK: leal 16(%esp), %edi
1011 ; CHECK: leal 160(%esp), %esi
1112 ; CHECK: rep;movsl
12 ; CHECK: movl $1, (%esp)
1313 %s = alloca %struct.S ; <%struct.S*> [#uses=2]
1414 %tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1]
1515 store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
16 call void @t( i32 1, %struct.S* byval %s ) nounwind
16 call void @t( i32 1, %struct.S* byval %s ) nounwind
1717 ret i32 0
1818 }
1919
22 ;CHECK: cftx020
33 ;CHECK: vmovsd (%rdi), %xmm{{.*}}
44 ;CHECK: vmovsd 16(%rdi), %xmm{{.*}}
5 ;CHECK: vmovhpd 8(%rdi), %xmm{{.*}}
56 ;CHECK: vmovsd 24(%rdi), %xmm{{.*}}
6 ;CHECK: vmovhpd 8(%rdi), %xmm{{.*}}
77 ;CHECK: vmovupd %xmm{{.*}}, (%rdi)
88 ;CHECK: vmovupd %xmm{{.*}}, 16(%rdi)
99 ;CHECK: ret
3434 store <2 x double> %14, <2 x double>* %15, align 8
3535 ret void
3636 }
37
33 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
44 entry:
55 ; CHECK: test1:
6 ; CHECK: btl
7 ; CHECK-NEXT: movl $12, %eax
6 ; CHECK: movl $12, %eax
7 ; CHECK-NEXT: btl
88 ; CHECK-NEXT: cmovael (%rcx), %eax
99 ; CHECK-NEXT: ret
1010
1818 define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
1919 entry:
2020 ; CHECK: test2:
21 ; CHECK: btl
22 ; CHECK-NEXT: movl $12, %eax
21 ; CHECK: movl $12, %eax
22 ; CHECK-NEXT: btl
2323 ; CHECK-NEXT: cmovbl (%rcx), %eax
2424 ; CHECK-NEXT: ret
2525
9191 ; CHECK: testb
9292 ; CHECK-NOT: xor
9393 ; CHECK: setne
94 ; CHECK: testb
94 ; CHECK-NEXT: testb
9595
9696 func_4.exit.i: ; preds = %bb.i.i.i, %entry
9797 %.not.i = xor i1 %2, true ; [#uses=1]
3737 define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
3838 entry:
3939 ; DARWIN: t3:
40 ; DARWIN: shll $16
4041 ; DARWIN: shlq $32, %rcx
4142 ; DARWIN-NOT: leaq
4243 ; DARWIN: orq %rcx, %rax
43 ; DARWIN-NOT: leaq
44 ; DARWIN: shll $16
4544 %tmp21 = zext i32 %lb to i64
4645 %tmp23 = zext i32 %ub to i64
4746 %tmp24 = shl i64 %tmp23, 32
1515 call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
1616 %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
1717 %tmp3 = add i32 0, %tmp2, !dbg !15
18 ; CHECK: ##DEBUG_VALUE: idx <- E{{..$}}
18 ; CHECK: ##DEBUG_VALUE: idx <- EAX{{$}}
1919 call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
2020 !15
2121 %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16
3939 ; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx
4040
4141 ; ATOM: _t:
42 ; ATOM: movl L_LotsStuff$non_lazy_ptr, %e{{..}}
43 ; ATOM: movl $0, %e{{..}}
42 ; ATOM: movl L_LotsStuff$non_lazy_ptr, %ecx
43 ; ATOM: movl $0, %eax
4444
4545 }
0 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s
1 ; CHECK: movsd %xmm{{[0-9]}}, 8(%esp)
2 ; CHECK: xorl %eax, %eax
1 ; CHECK: movsd %xmm0, 8(%esp)
2 ; CHECK: xorl %ecx, %ecx
33
44 @d = external global double ; [#uses=1]
55 @c = external global double ; [#uses=1]
3737
3838 store i16 %A, i16* %Q
3939 ret i32 %D
40
40
4141 ; CHECK: test2:
4242 ; CHECK: movl 4(%esp), %eax
43 ; CHECK-NEXT: movzwl (%eax), %e{{..}}
43 ; CHECK-NEXT: movzwl (%eax), %ecx
4444
4545 }
4646
5353 %mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2]
5454 %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1]
5555 %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1]
56 %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
57 %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1]
58 %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1]
59
60 call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
61
62 %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
63
6456 %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2]
6557 %andps.i14 = add <4 x i32> , %bitcast6.i13 ; <<4 x i32>> [#uses=1]
6658 %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
6759 %andnps.i17 = add <4 x i32> , %not.i16 ; <<4 x i32>> [#uses=1]
6860 %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1]
6961 %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1]
70
62 %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
63 %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1]
64 %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1]
7165 %bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1]
7266 %not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
7367 %andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1]
68 call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
7469 %orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1]
7570 %bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1]
76
71 %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
7772 %bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=2]
7873 %andps.i = and <4 x i32> zeroinitializer, %bitcast6.i ; <<4 x i32>> [#uses=1]
7974 %bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32> ; <<4 x i32>> [#uses=1]
33 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
44 ; ATOM: foo
55 ; ATOM: addl
6 ; ATOM: addl
6 ; ATOM: leal
77 ; ATOM: leal
88
99 ; CHECK: foo
None ; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN
1 ; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN
0 ; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s
1 ; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s
22 ; rdar://7398554
33
44 ; When doing vector gather-scatter index calculation with 32-bit indices,
55 ; bounce the vector off of cache rather than shuffling each individual
66 ; element out of the index vector.
77
8 ; CHECK: foo:
9 ; LIN: movaps (%rsi), %xmm0
10 ; LIN: andps (%rdx), %xmm0
11 ; LIN: movaps %xmm0, -24(%rsp)
12 ; LIN: movslq -24(%rsp), %rsi
13 ; LIN: movslq -20(%rsp), %rcx
14 ; LIN: movslq -16(%rsp), %rdx
15 ; LIN: movslq -12(%rsp), %rax
16 ; LIN: movsd (%rdi,%rsi,8), %xmm0
17 ; LIN: movhpd (%rdi,%rcx,8), %xmm0
18 ; LIN: movsd (%rdi,%rdx,8), %xmm1
19 ; LIN: movhpd (%rdi,%rax,8), %xmm1
20
21 ; WIN: movaps (%rdx), %xmm0
22 ; WIN: andps (%r8), %xmm0
23 ; WIN: movaps %xmm0, (%rsp)
24 ; WIN: movslq (%rsp), %rax
25 ; WIN: movslq 4(%rsp), %rdx
26 ; WIN: movslq 8(%rsp), %r9
27 ; WIN: movslq 12(%rsp), %r8
28 ; WIN: movsd (%rcx,%rax,8), %xmm0
29 ; WIN: movhpd (%rcx,%rdx,8), %xmm0
30 ; WIN: movsd (%rcx,%r9,8), %xmm1
31 ; WIN: movhpd (%rcx,%r8,8), %xmm1
8 ; CHECK: andps ([[H:%rdx|%r8]]), %xmm0
9 ; CHECK: movaps %xmm0, {{(-24)?}}(%rsp)
10 ; CHECK: movslq {{(-24)?}}(%rsp), %rax
11 ; CHECK: movsd ([[P:%rdi|%rcx]],%rax,8), %xmm0
12 ; CHECK: movslq {{-20|4}}(%rsp), %rax
13 ; CHECK: movhpd ([[P]],%rax,8), %xmm0
14 ; CHECK: movslq {{-16|8}}(%rsp), %rax
15 ; CHECK: movsd ([[P]],%rax,8), %xmm1
16 ; CHECK: movslq {{-12|12}}(%rsp), %rax
17 ; CHECK: movhpd ([[P]],%rax,8), %xmm1
3218
3319 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
3420 %a = load <4 x i32>* %i
2727
2828 define cc 10 void @foo() nounwind {
2929 entry:
30 ; CHECK: movl r1, %esi
30 ; CHECK: movl base, %ebx
31 ; CHECK-NEXT: movl sp, %ebp
3132 ; CHECK-NEXT: movl hp, %edi
32 ; CHECK-NEXT: movl sp, %ebp
33 ; CHECK-NEXT: movl base, %ebx
33 ; CHECK-NEXT: movl r1, %esi
3434 %0 = load i32* @r1
3535 %1 = load i32* @hp
3636 %2 = load i32* @sp
4141 }
4242
4343 declare cc 10 void @bar(i32, i32, i32, i32)
44
4040
4141 define cc 10 void @foo() nounwind {
4242 entry:
43 ; CHECK: movsd d2(%rip), %xmm6
43 ; CHECK: movq base(%rip), %r13
44 ; CHECK-NEXT: movq sp(%rip), %rbp
45 ; CHECK-NEXT: movq hp(%rip), %r12
46 ; CHECK-NEXT: movq r1(%rip), %rbx
47 ; CHECK-NEXT: movq r2(%rip), %r14
48 ; CHECK-NEXT: movq r3(%rip), %rsi
49 ; CHECK-NEXT: movq r4(%rip), %rdi
50 ; CHECK-NEXT: movq r5(%rip), %r8
51 ; CHECK-NEXT: movq r6(%rip), %r9
52 ; CHECK-NEXT: movq splim(%rip), %r15
53 ; CHECK-NEXT: movss f1(%rip), %xmm1
54 ; CHECK-NEXT: movss f2(%rip), %xmm2
55 ; CHECK-NEXT: movss f3(%rip), %xmm3
56 ; CHECK-NEXT: movss f4(%rip), %xmm4
4457 ; CHECK-NEXT: movsd d1(%rip), %xmm5
45 ; CHECK-NEXT: movss f4(%rip), %xmm4
46 ; CHECK-NEXT: movss f3(%rip), %xmm3
47 ; CHECK-NEXT: movss f2(%rip), %xmm2
48 ; CHECK-NEXT: movss f1(%rip), %xmm1
49 ; CHECK-NEXT: movq splim(%rip), %r15
50 ; CHECK-NEXT: movq r6(%rip), %r9
51 ; CHECK-NEXT: movq r5(%rip), %r8
52 ; CHECK-NEXT: movq r4(%rip), %rdi
53 ; CHECK-NEXT: movq r3(%rip), %rsi
54 ; CHECK-NEXT: movq r2(%rip), %r14
55 ; CHECK-NEXT: movq r1(%rip), %rbx
56 ; CHECK-NEXT: movq hp(%rip), %r12
57 ; CHECK-NEXT: movq sp(%rip), %rbp
58 ; CHECK-NEXT: movq base(%rip), %r13
58 ; CHECK-NEXT: movsd d2(%rip), %xmm6
5959 %0 = load double* @d2
6060 %1 = load double* @d1
6161 %2 = load float* @f4
8282
8383 declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
8484 float, float, float, float, double, double)
85
4848 store i32 %arg1, i32* %arg1_var
4949 store i32 %arg2, i32* %arg2_var
5050
51 ; CHECK: movl 16(%esp), %esi
51 ; CHECK: movl 4(%esp), %edx
52 ; CHECK-NEXT: movl 8(%esp), %eax
5253 ; CHECK-NEXT: movl 12(%esp), %ebp
53 ; CHECK-NEXT: movl 8(%esp), %eax
54 ; CHECK-NEXT: movl 4(%esp), %edx
54 ; CHECK-NEXT: movl 16(%esp), %esi
5555 %0 = load i32* %hp_var
5656 %1 = load i32* %p_var
5757 %2 = load i32* %arg0_var
44 define void @zap(i64 %a, i64 %b) nounwind {
55 entry:
66 ; CHECK: movq %rsi, %rax
7 ; CHECK-NEXT: movq %rdi, %rsi
8 ; CHECK-NEXT: movq %rax, %rdx
79 ; CHECK-NEXT: movl $8, %ecx
810 ; CHECK-NEXT: movl $9, %r8d
9 ; CHECK-NEXT: movq %rdi, %rsi
10 ; CHECK-NEXT: movq %rax, %rdx
1111 ; CHECK-NEXT: callq addfour
1212 %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
1313 %res = extractvalue {i64, i64, i64} %0, 2
5656 store i64 %arg2, i64* %arg2_var
5757 store i64 %arg3, i64* %arg3_var
5858
59 ; CHECK: movq 40(%rsp), %r15
59 ; CHECK: movq 8(%rsp), %rcx
60 ; CHECK-NEXT: movq 16(%rsp), %rdx
61 ; CHECK-NEXT: movq 24(%rsp), %rsi
6062 ; CHECK-NEXT: movq 32(%rsp), %rbp
61 ; CHECK-NEXT: movq 24(%rsp), %rsi
62 ; CHECK-NEXT: movq 16(%rsp), %rdx
63 ; CHECK-NEXT: movq 8(%rsp), %rcx
63 ; CHECK-NEXT: movq 40(%rsp), %r15
6464 %0 = load i64* %hp_var
6565 %1 = load i64* %p_var
6666 %2 = load i64* %arg0_var
None ; RUN: llc < %s -march=x86-64 | grep lea | count 13
0 ; RUN: llc < %s -march=x86-64 | grep lea | count 12
11
22 ; This testcase was written to demonstrate an instruction-selection problem,
33 ; however it also happens to expose a limitation in the DAGCombiner's
4343 store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
4444 ret void
4545 }
46
2727 bb2:
2828 ret i32 %x_offs
2929 ; CHECK: test2:
30 ; CHECK: leal -5(%r[[A0:..]]), %eax
30 ; CHECK: movl %e[[A0]], %eax
31 ; CHECK: addl $-5, %eax
3132 ; CHECK: andl $-4, %eax
3233 ; CHECK: negl %eax
3334 ; CHECK: leal -4(%r[[A0]],%rax), %eax
11 ; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
22
33 ; CHECK: t:
4 ; CHECK: movl (%r9,%rax,4), %e{{..}}
5 ; CHECK-NEXT: decq
4 ; CHECK: decq
5 ; CHECK-NEXT: movl (%r9,%rax,4), %eax
66 ; CHECK-NEXT: jne
77
88 ; ATOM: t:
9 ; ATOM: movl (%r9,%rax,4), %e{{..}}
9 ; ATOM: movl (%r9,%rax,4), %eax
1010 ; ATOM-NEXT: decq
1111 ; ATOM-NEXT: jne
1212
189189 %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
190190 ret i32 %bi.0.lcssa
191191 }
192
22 ; RUN: not grep movz %t
33 ; RUN: not grep sar %t
44 ; RUN: not grep shl %t
5 ; RUN: grep add %t | count 6
5 ; RUN: grep add %t | count 5
66 ; RUN: grep inc %t | count 2
7 ; RUN: grep lea %t | count 0
7 ; RUN: grep lea %t | count 3
88
99 ; Optimize away zext-inreg and sext-inreg on the loop induction
1010 ; variable using trip-count information.
5555 define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
5656 entry:
5757 ; SSE2-Darwin: t2:
58 ; SSE2-Darwin: movaps (%ecx), %xmm0
58 ; SSE2-Darwin: movaps (%eax), %xmm0
5959 ; SSE2-Darwin: movaps %xmm0, (%eax)
6060
6161 ; SSE2-Mingw32: t2:
62 ; SSE2-Mingw32: movaps (%ecx), %xmm0
62 ; SSE2-Mingw32: movaps (%eax), %xmm0
6363 ; SSE2-Mingw32: movaps %xmm0, (%eax)
6464
6565 ; SSE1: t2:
66 ; SSE1: movaps (%ecx), %xmm0
66 ; SSE1: movaps (%eax), %xmm0
6767 ; SSE1: movaps %xmm0, (%eax)
6868
6969 ; NOSSE: t2:
9090 define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
9191 entry:
9292 ; SSE2-Darwin: t3:
93 ; SSE2-Darwin: movsd (%ecx), %xmm0
94 ; SSE2-Darwin: movsd 8(%ecx), %xmm1
93 ; SSE2-Darwin: movsd (%eax), %xmm0
94 ; SSE2-Darwin: movsd 8(%eax), %xmm1
9595 ; SSE2-Darwin: movsd %xmm1, 8(%eax)
9696 ; SSE2-Darwin: movsd %xmm0, (%eax)
9797
9898 ; SSE2-Mingw32: t3:
99 ; SSE2-Mingw32: movsd (%ecx), %xmm0
100 ; SSE2-Mingw32: movsd 8(%ecx), %xmm1
99 ; SSE2-Mingw32: movsd (%eax), %xmm0
100 ; SSE2-Mingw32: movsd 8(%eax), %xmm1
101101 ; SSE2-Mingw32: movsd %xmm1, 8(%eax)
102102 ; SSE2-Mingw32: movsd %xmm0, (%eax)
103103
0 ; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t
11 ; RUN: grep pmul %t | count 12
2 ; RUN: grep mov %t | count 14
2 ; RUN: grep mov %t | count 11
33
44 define <4 x i32> @a(<4 x i32> %i) nounwind {
55 %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
1919 }
2020
2121 ; We were miscompiling this and using %ax instead of %cx in the movw.
22 ; CHECK: movw %ax, (%rsi)
23 ; CHECK: movswl %ax, %eax
24 ; CHECK: movslq %eax, %rax
22 ; CHECK: movswl %cx, %ecx
23 ; CHECK: movw %cx, (%rsi)
24 ; CHECK: movslq %ecx, %rcx
5656 %tmp22 = tail call %"struct.std::basic_ostream >"* @_ZNSolsEd( %"struct.std::basic_ostream >"* %tmp16, double %tmp1920 ) ; <%"struct.std::basic_ostream >"*> [#uses=1]
5757 %tmp30 = tail call %"struct.std::basic_ostream >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream >"* %tmp22 ) ; <%"struct.std::basic_ostream >"*> [#uses=0]
5858 ; reload:
59 ; CHECK: fld
60 ; CHECK: fstps
5961 ; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
6062 %tmp34 = tail call %"struct.std::basic_ostream >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream >"*> [#uses=1]
6163 %tmp3940 = fpext float %tmp1314 to double ; [#uses=1]
62 ; CHECK: fld
6364 ; CHECK: fstpl
6465 ; CHECK: ZNSolsEd
6566 %tmp42 = tail call %"struct.std::basic_ostream >"* @_ZNSolsEd( %"struct.std::basic_ostream >"* %tmp34, double %tmp3940 ) ; <%"struct.std::basic_ostream >"*> [#uses=1]
None ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx -enable-misched=false | FileCheck %s
0 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
11
22 ; CHECK: main:
33 ; CHECK: pushl %esi
None ; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \
1 ; RUN-disabled: 2>&1 | FileCheck %s
2 ; RUN: true
0 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \
1 ; RUN: 2>&1 | FileCheck %s
32 ; REQUIRES: asserts
43 ;
54 ; rdar:13279013: pre-RA-sched should not check all interferences and
1010 ret i32 %isvalid
1111 ; CHECK: _rdrand16_step:
1212 ; CHECK: rdrandw %ax
13 ; CHECK: movw %ax, (%r[[A0:di|cx]])
1314 ; CHECK: movzwl %ax, %ecx
1415 ; CHECK: movl $1, %eax
1516 ; CHECK: cmovael %ecx, %eax
16 ; CHECK: movw %cx, (%r[[A0:di|cx]])
1717 ; CHECK: ret
1818 }
1919
2525 ret i32 %isvalid
2626 ; CHECK: _rdrand32_step:
2727 ; CHECK: rdrandl %e[[T0:[a-z]+]]
28 ; CHECK: movl %e[[T0]], (%r[[A0]])
2829 ; CHECK: movl $1, %eax
2930 ; CHECK: cmovael %e[[T0]], %eax
30 ; CHECK: movl %e[[T0]], (%r[[A0]])
3131 ; CHECK: ret
3232 }
3333
3939 ret i32 %isvalid
4040 ; CHECK: _rdrand64_step:
4141 ; CHECK: rdrandq %r[[T1:[a-z]+]]
42 ; CHECK: movq %r[[T1]], (%r[[A0]])
4243 ; CHECK: movl $1, %eax
4344 ; CHECK: cmovael %e[[T1]], %eax
44 ; CHECK: movq %r[[T1]], (%r[[A0]])
4545 ; CHECK: ret
4646 }
4747
1111 ret i32 %isvalid
1212 ; CHECK: _rdseed16_step:
1313 ; CHECK: rdseedw %ax
14 ; CHECK: movw %ax, (%r[[A0:di|cx]])
1415 ; CHECK: movzwl %ax, %ecx
1516 ; CHECK: movl $1, %eax
1617 ; CHECK: cmovael %ecx, %eax
17 ; CHECK: movw %cx, (%r[[A0:di|cx]])
1818 ; CHECK: ret
1919 }
2020
2626 ret i32 %isvalid
2727 ; CHECK: _rdseed32_step:
2828 ; CHECK: rdseedl %e[[T0:[a-z]+]]
29 ; CHECK: movl %e[[T0]], (%r[[A0]])
2930 ; CHECK: movl $1, %eax
3031 ; CHECK: cmovael %e[[T0]], %eax
31 ; CHECK: movl %e[[T0]], (%r[[A0]])
3232 ; CHECK: ret
3333 }
3434
4040 ret i32 %isvalid
4141 ; CHECK: _rdseed64_step:
4242 ; CHECK: rdseedq %r[[T1:[a-z]+]]
43 ; CHECK: movq %r[[T1]], (%r[[A0]])
4344 ; CHECK: movl $1, %eax
4445 ; CHECK: cmovael %e[[T1]], %eax
45 ; CHECK: movq %r[[T1]], (%r[[A0]])
4646 ; CHECK: ret
4747 }
3030 ; X32-NEXT: ret
3131
3232 ; X32: movl %esp, %eax
33 ; X32: subl %ecx, %eax
33 ; X32-NEXT: subl %ecx, %eax
3434 ; X32-NEXT: cmpl %eax, %gs:48
3535
3636 ; X32: movl %eax, %esp
5151 ; X64-NEXT: ret
5252
5353 ; X64: movq %rsp, %[[RDI:rdi|rax]]
54 ; X64: subq %{{.*}}, %[[RDI]]
54 ; X64-NEXT: subq %{{.*}}, %[[RDI]]
5555 ; X64-NEXT: cmpq %[[RDI]], %fs:112
5656
5757 ; X64: movq %[[RDI]], %rsp
255255 %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
256256 ret i8* %call
257257 ; CHECK: test12:
258 ; CHECK: movq $-1, %rdi
258259 ; CHECK: mulq
259 ; CHECK: movq $-1, %rdi
260260 ; CHECK: cmovnoq %rax, %rdi
261261 ; CHECK: jmp __Znam
262262
2929 %x = load i32* %p
3030 %shl = shl i32 %x, %shamt
3131 ; BMI2: shl32p
32 ; Source order scheduling prevents folding, rdar:14208996.
33 ; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
32 ; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
3433 ; BMI2: ret
3534 ; BMI264: shl32p
36 ; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
35 ; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
3736 ; BMI264: ret
3837 ret i32 %shl
3938 }
7473 %x = load i64* %p
7574 %shl = shl i64 %x, %shamt
7675 ; BMI264: shl64p
77 ; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
76 ; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
7877 ; BMI264: ret
7978 ret i64 %shl
8079 }
106105 %x = load i32* %p
107106 %shl = lshr i32 %x, %shamt
108107 ; BMI2: lshr32p
109 ; Source order scheduling prevents folding, rdar:14208996.
110 ; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
108 ; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
111109 ; BMI2: ret
112110 ; BMI264: lshr32
113 ; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
111 ; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
114112 ; BMI264: ret
115113 ret i32 %shl
116114 }
129127 %x = load i64* %p
130128 %shl = lshr i64 %x, %shamt
131129 ; BMI264: lshr64p
132 ; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
130 ; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
133131 ; BMI264: ret
134132 ret i64 %shl
135133 }
151149 %x = load i32* %p
152150 %shl = ashr i32 %x, %shamt
153151 ; BMI2: ashr32p
154 ; Source order scheduling prevents folding, rdar:14208996.
155 ; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
152 ; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
156153 ; BMI2: ret
157154 ; BMI264: ashr32
158 ; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
155 ; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
159156 ; BMI264: ret
160157 ret i32 %shl
161158 }
174171 %x = load i64* %p
175172 %shl = ashr i64 %x, %shamt
176173 ; BMI264: ashr64p
177 ; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
174 ; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
178175 ; BMI264: ret
179176 ret i64 %shl
180177 }
2525
2626 ; CHECK: split:
2727 ; CHECK-NEXT: testb $1, %dil
28 ; CHECK-NEXT: je
28 ; CHECK-NEXT: jne
29 ; CHECK-NEXT: movaps
30 ; CHECK-NEXT: ret
2931 ; CHECK: divsd
30 ; CHECK: movaps
31 ; CHECK: ret
32 ; CHECK-NEXT: ret
3233 define double @split(double %x, double %y, i1 %c) nounwind {
3334 %a = fdiv double %x, 3.2
3435 %z = select i1 %c, double %a, double %y
6364 ; Sink instructions with dead EFLAGS defs.
6465
6566 ; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag.
66 ;
67 ;
6768 ; See . This test isn't valid after we made machine
6869 ; sinking more conservative about sinking instructions that define a preg into a
6970 ; block when we don't know if the preg is killed within the current block.
66 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
77 store <2 x double> %tmp9, <2 x double>* %r, align 16
88 ret void
9
9
1010 ; CHECK: test1:
1111 ; CHECK: movl 8(%esp), %eax
1212 ; CHECK-NEXT: movapd (%eax), %xmm0
2222 %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
2323 store <2 x double> %tmp9, <2 x double>* %r, align 16
2424 ret void
25
25
2626 ; CHECK: test2:
27 ; CHECK: movl 4(%esp), %eax
28 ; CHECK: movl 8(%esp), %ecx
29 ; CHECK-NEXT: movapd (%ecx), %xmm0
27 ; CHECK: movl 8(%esp), %eax
28 ; CHECK-NEXT: movapd (%eax), %xmm0
3029 ; CHECK-NEXT: movhpd 12(%esp), %xmm0
30 ; CHECK-NEXT: movl 4(%esp), %eax
3131 ; CHECK-NEXT: movapd %xmm0, (%eax)
3232 ; CHECK-NEXT: ret
3333 }
4747 store <4 x float> %tmp13, <4 x float>* %res
4848 ret void
4949 ; CHECK: @test3
50 ; CHECK: unpcklps
50 ; CHECK: unpcklps
5151 }
5252
5353 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
8484 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
8585 store <4 x float> %tmp2, <4 x float>* %res
8686 ret void
87
87
8888 ; CHECK: test6:
89 ; CHECK: movaps (%ecx), %xmm0
89 ; CHECK: movaps (%eax), %xmm0
9090 ; CHECK: movaps %xmm0, (%eax)
9191 }
9292
9595 shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1]
9696 store <4 x float> %2, <4 x float>* null
9797 ret void
98
98
9999 ; CHECK: test7:
100100 ; CHECK: xorps %xmm0, %xmm0
101101 ; CHECK: movaps %xmm0, 0
165165 store <4 x float> %tmp11, <4 x float>* %res
166166 ret void
167167 ; CHECK: test13
168 ; CHECK: shufps $69, (%ecx), %xmm0
168 ; CHECK: shufps $69, (%eax), %xmm0
169169 ; CHECK: pshufd $-40, %xmm0, %xmm0
170170 }
171171
177177 %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
178178 ret <4 x float> %tmp27
179179 ; CHECK: test14:
180 ; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
181 ; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]]
180 ; CHECK: subps [[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
181 ; CHECK: addps [[X1]], [[X0:%xmm[0-9]+]]
182182 ; CHECK: movlhps [[X2]], [[X0]]
183183 }
184184
220220 %double2float.i = fptrunc <4 x double> %0 to <4 x float>
221221 ret <4 x float> %double2float.i
222222 }
223
1111 %D = or i32 %C, %B
1212 store i32 %D, i32* %a0, align 4
1313 ret void
14
14
1515 ; X64: test1:
1616 ; X64: movb %sil, (%rdi)
1717
3333 ; X64: movb %sil, 1(%rdi)
3434
3535 ; X32: test2:
36 ; X32: movb 8(%esp), %[[REG:[abcd]l]]
37 ; X32: movb %[[REG]], 1(%{{.*}})
36 ; X32: movb 8(%esp), %al
37 ; X32: movb %al, 1(%{{.*}})
3838 }
3939
4040 define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
6666 ; X64: movw %si, 2(%rdi)
6767
6868 ; X32: test4:
69 ; X32: movl 8(%esp), %e[[REG:[abcd]x]]
70 ; X32: movw %[[REG]], 2(%{{.*}})
69 ; X32: movl 8(%esp), %eax
70 ; X32: movw %ax, 2(%{{.*}})
7171 }
7272
7373 define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
8383 ; X64: movw %si, 2(%rdi)
8484
8585 ; X32: test5:
86 ; X32: movzwl 8(%esp), %e[[REG:[abcd]x]]
87 ; X32: movw %[[REG]], 2(%{{.*}})
86 ; X32: movzwl 8(%esp), %eax
87 ; X32: movw %ax, 2(%{{.*}})
8888 }
8989
9090 define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
101101
102102
103103 ; X32: test6:
104 ; X32: movb 8(%esp), %[[REG:[abcd]l]]
105 ; X32: movb %[[REG]], 5(%{{.*}})
104 ; X32: movb 8(%esp), %al
105 ; X32: movb %al, 5(%{{.*}})
106106 }
107107
108108 define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
120120
121121
122122 ; X32: test7:
123 ; X32: movb 8(%esp), %[[REG:[abcd]l]]
124 ; X32: movb %[[REG]], 5(%{{.*}})
123 ; X32: movb 8(%esp), %cl
124 ; X32: movb %cl, 5(%{{.*}})
125125 }
126126
127127 ; PR7833
None ; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large -enable-misched=false | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
11
22 declare fastcc i32 @callee(i32 %arg)
33 define fastcc i32 @directcall(i32 %arg) {
11 ; rdar://5752025
22
33 ; We want:
4 ; CHECK: movl 4(%esp), %ecx
5 ; CHECK-NEXT: andl $15, %ecx
6 ; CHECK-NEXT: movl $42, %eax
7 ; CHECK-NEXT: cmovel %ecx, %eax
4 ; CHECK: movl $42, %ecx
5 ; CHECK-NEXT: movl 4(%esp), %eax
6 ; CHECK-NEXT: andl $15, %eax
7 ; CHECK-NEXT: cmovnel %ecx, %eax
88 ; CHECK-NEXT: ret
99 ;
1010 ; We don't want:
3838 %retval = select i1 %tmp4, i32 %tmp2, i32 42 ; [#uses=1]
3939 ret i32 %retval
4040 }
41
2121 ret i1 false
2222 }
2323 ; CHECK: test2:
24 ; CHECK: btl
24 ; CHECK: btl %eax
2525
2626 define i32 @test3(i8* %ptr) nounwind {
2727 %val = load i8* %ptr
0 ; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
1 ; CHECK: divss
12 ; CHECK: divps
23 ; CHECK: divps
3 ; CHECK: divss
44
55 %vec = type <9 x float>
66 define %vec @vecdiv( %vec %p1, %vec %p2)
88 %result = fdiv %vec %p1, %p2
99 ret %vec %result
1010 }
11
11 ; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
22
33 %vec = type <6 x float>
4 ; CHECK: divps
54 ; CHECK: divss
65 ; CHECK: divss
6 ; CHECK: divps
77
88 ; Scheduler causes a different instruction order to be produced on Intel Atom
99 ; ATOM: divps
66 define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
77 entry:
88 ; CHECK: subps
9 ; CHECK: mulps
10 ; CHECK: addps
911 ; CHECK: subps
1012 ; CHECK: mulps
11 ; CHECK: mulps
12 ; CHECK: addps
1313 ; CHECK: addps
1414 %tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 > ; <<8 x float>> [#uses=1]
1515 %sub = fsub <8 x float> %T1, %T0 ; <<8 x float>> [#uses=1]
5353 define <2 x double> @t3() nounwind readonly {
5454 bb:
5555 ; CHECK: t3:
56 ; CHECK: punpcklqdq %xmm1, %xmm0
5657 ; CHECK: movq (%rax), %xmm1
57 ; CHECK: punpcklqdq %xmm2, %xmm0
5858 ; CHECK: movsd %xmm1, %xmm0
5959 %tmp0 = load i128* null, align 1
6060 %tmp1 = load <2 x i32>* undef, align 8
7171 define <2 x i64> @t4() nounwind readonly {
7272 bb:
7373 ; CHECK: t4:
74 ; CHECK: punpcklqdq %xmm0, %xmm1
7475 ; CHECK: movq (%rax), %xmm0
75 ; CHECK: punpcklqdq %xmm2, %xmm1
7676 ; CHECK: movsd %xmm1, %xmm0
7777 %tmp0 = load i128* null, align 1
7878 %tmp1 = load <2 x i32>* undef, align 8
0 ; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
11 ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
22
3 ; CHECK: paddd
34 ; CHECK: movl
4 ; CHECK: paddd
55 ; CHECK: movlpd
66
77 ; Scheduler causes produce a different instruction order
3939 ; W64: subq %rax, %rsp
4040 ; W64: movq %rsp, %rax
4141
42 ; EFI: movq %rsp, [[R64:%r.*]]
4342 ; EFI: leaq 15(%{{.*}}), [[R1:%r.*]]
4443 ; EFI: andq $-16, [[R1]]
44 ; EFI: movq %rsp, [[R64:%r.*]]
4545 ; EFI: subq [[R1]], [[R64]]
4646 ; EFI: movq [[R64]], %rsp
4747
4848 %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
4949
5050 ; M64: subq $48, %rsp
51 ; M64: leaq -4096(%rbp), %r9
5152 ; M64: movq %rax, 32(%rsp)
52 ; M64: leaq -4096(%rbp), %r9
5353 ; M64: callq bar
5454
5555 ; W64: subq $48, %rsp
56 ; W64: leaq -4096(%rbp), %r9
5657 ; W64: movq %rax, 32(%rsp)
57 ; W64: leaq -4096(%rbp), %r9
5858 ; W64: callq bar
5959
6060 ; EFI: subq $48, %rsp
61 ; EFI: leaq -[[B0OFS]](%rbp), %r9
6162 ; EFI: movq [[R64]], 32(%rsp)
62 ; EFI: leaq -[[B0OFS]](%rbp), %r9
6363 ; EFI: callq _bar
6464
6565 ret i64 %r
33 ; This test checks that the operands of packed sub instructions are
44 ; never interchanged by the "Two-Address instruction pass".
55
6 declare { i64, double } @getFirstParam()
7 declare { i64, double } @getSecondParam()
6 declare { i64, double } @getFirstParam()
7 declare { i64, double } @getSecondParam()
88
99 define i64 @test_psubb() {
1010 entry:
2727
2828 ; CHECK: test_psubb:
2929 ; CHECK: callq getFirstParam
30 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
31 ; CHECK: callq getSecondParam
32 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
33 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
30 ; CHECK: callq getSecondParam
31 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
32 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
3433 ; CHECK: psubb [[PARAM2]], [[PARAM1]]
3534 ; CHECK: ret
3635
5554
5655 ; CHECK: test_psubw:
5756 ; CHECK: callq getFirstParam
58 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
59 ; CHECK: callq getSecondParam
60 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
61 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
57 ; CHECK: callq getSecondParam
58 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
59 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
6260 ; CHECK: psubw [[PARAM2]], [[PARAM1]]
6361 ; CHECK: ret
6462
8482
8583 ; CHECK: test_psubd:
8684 ; CHECK: callq getFirstParam
87 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
88 ; CHECK: callq getSecondParam
89 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
90 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
85 ; CHECK: callq getSecondParam
86 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
87 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
9188 ; CHECK: psubd [[PARAM2]], [[PARAM1]]
9289 ; CHECK: ret
9390
112109
113110 ; CHECK: test_psubsb:
114111 ; CHECK: callq getFirstParam
115 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
116 ; CHECK: callq getSecondParam
117 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
118 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
112 ; CHECK: callq getSecondParam
113 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
114 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
119115 ; CHECK: psubsb [[PARAM2]], [[PARAM1]]
120116 ; CHECK: ret
121117
140136
141137 ; CHECK: test_psubswv:
142138 ; CHECK: callq getFirstParam
143 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
144 ; CHECK: callq getSecondParam
145 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
146 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
139 ; CHECK: callq getSecondParam
140 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
141 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
147142 ; CHECK: psubsw [[PARAM2]], [[PARAM1]]
148143 ; CHECK: ret
149144
168163
169164 ; CHECK: test_psubusbv:
170165 ; CHECK: callq getFirstParam
171 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
172 ; CHECK: callq getSecondParam
173 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
174 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
166 ; CHECK: callq getSecondParam
167 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
168 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
175169 ; CHECK: psubusb [[PARAM2]], [[PARAM1]]
176170 ; CHECK: ret
177171
196190
197191 ; CHECK: test_psubuswv:
198192 ; CHECK: callq getFirstParam
199 ; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
200 ; CHECK: callq getSecondParam
201 ; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
202 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
193 ; CHECK: callq getSecondParam
194 ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
195 ; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
203196 ; CHECK: psubusw [[PARAM2]], [[PARAM1]]
204197 ; CHECK: ret
205198
55 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
66 entry:
77 ; CHECK: shl4
8 ; CHECK: padd
89 ; CHECK: pslld
9 ; CHECK: padd
1010 ; CHECK: ret
1111 %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
1212 %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
6666 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
6767 entry:
6868 ; CHECK: shl8
69 ; CHECK: padd
6970 ; CHECK: psllw
70 ; CHECK: padd
7171 ; CHECK: ret
7272 %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
7373 %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
None ; RUN: llc < %s -mcpu=generic -march=x86 -enable-misched=false | FileCheck %s
0 ; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
11
22 ;; Simple case
33 define i32 @test1(i8 %x) nounwind readnone {
99 ; CHECK: movzbl
1010 ; CHECK-NEXT: andl {{.*}}224
1111
12 ;; Multiple uses of %x but easily extensible.
12 ;; Multiple uses of %x but easily extensible.
1313 define i32 @test2(i8 %x) nounwind readnone {
1414 %A = and i8 %x, -32
1515 %B = zext i8 %A to i32
2020 }
2121 ; CHECK: test2
2222 ; CHECK: movzbl
23 ; CHECK: orl $63
2324 ; CHECK: andl $224
24 ; CHECK: orl $63
2525
2626 declare void @use(i32, i8)
2727
3333 %tmp12 = add i64 %tmp11, 5089792279245435153
3434
3535 ; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
36 ; CHECK: cmpl $-8608074, %e[[REGISTER_zext]]
37 ; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
36 ; CHECK-NEXT: cmpl $-8608074, %e[[REGISTER_zext]]
37 ; CHECK-NEXT: movslq %e[[REGISTER_zext]], [[REGISTER_tmp:%r[a-z0-9]+]]
38 ; CHECK: movq [[REGISTER_tmp]], [[REGISTER_sext:%r[a-z0-9]+]]
3839 ; CHECK-NOT: [[REGISTER_zext]]
3940 ; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]]
4041