llvm.org GIT mirror llvm / bfcd61b
Enable -sse-domain-fix by default. Now with tests! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99954 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 10 years ago
10 changed file(s) with 60 addition(s) and 51 deletion(s). Raw diff Collapse all Expand all
2121 #include "llvm/Target/TargetOptions.h"
2222 #include "llvm/Target/TargetRegistry.h"
2323 using namespace llvm;
24
25 static cl::opt
26 SSEDomainFix("sse-domain-fix",
27 cl::desc("Enable fixing of SSE execution domain"),
28 cl::init(false), cl::Hidden);
2924
3025 static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
3126 Triple TheTriple(TT);
176171
177172 bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
178173 CodeGenOpt::Level OptLevel) {
179 if (SSEDomainFix && OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
174 if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
180175 PM.add(createSSEDomainFixPass());
181176 return true;
182177 }
None ; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 2
1 ; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
0 ; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
1 ; RUN: grep movss %t | count 2
2 ; RUN: grep movaps %t | count 2
3 ; RUN: grep movdqa %t | count 2
24
35 define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
46 newFuncRoot:
None ; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx -o %t
1 ; RUN: grep unpcklpd %t | count 1
2 ; RUN: grep movapd %t | count 1
3 ; RUN: grep movaps %t | count 1
0 ; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
41
52 ; Shows a dag combine bug that will generate an illegal build vector
63 ; with v2i64 build_vector i32, i32.
74
5 ; CHECK: _test:
6 ; CHECK: unpcklpd
7 ; CHECK: movapd
88 define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
99 entry:
1010 %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
1212 ret void
1313 }
1414
15 ; CHECK: _test2:
16 ; CHECK: movdqa
1517 define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
1618 entry:
1719 %tmp1 = load <4 x i16>* %src
44 ; bounce the vector off of cache rather than shuffling each individual
55 ; element out of the index vector.
66
7 ; CHECK: pand (%rdx), %xmm0
7 ; CHECK: andps (%rdx), %xmm0
88 ; CHECK: movaps %xmm0, -24(%rsp)
99 ; CHECK: movslq -24(%rsp), %rax
1010 ; CHECK: movsd (%rdi,%rax,8), %xmm0
None ; RUN: llc < %s -march=x86-64 > %t
1 ; RUN: grep unpck %t | count 2
2 ; RUN: grep shuf %t | count 2
3 ; RUN: grep ps %t | count 4
4 ; RUN: grep pd %t | count 4
5 ; RUN: grep movup %t | count 4
0 ; RUN: llc < %s -march=x86-64 | FileCheck %s
61
2 ; CHECK: _a:
3 ; CHECK: movdqu
4 ; CHECK: pshufd
75 define <4 x float> @a(<4 x float>* %y) nounwind {
86 %x = load <4 x float>* %y, align 4
97 %a = extractelement <4 x float> %x, i32 0
1614 %s = insertelement <4 x float> %r, float %a, i32 3
1715 ret <4 x float> %s
1816 }
17
18 ; CHECK: _b:
19 ; CHECK: movups
20 ; CHECK: unpckhps
1921 define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
2022 %x = load <4 x float>* %y, align 4
2123 %a = extractelement <4 x float> %x, i32 2
2830 %s = insertelement <4 x float> %r, float %b, i32 3
2931 ret <4 x float> %s
3032 }
33
34 ; CHECK: _c:
35 ; CHECK: movupd
36 ; CHECK: shufpd
3137 define <2 x double> @c(<2 x double>* %y) nounwind {
3238 %x = load <2 x double>* %y, align 8
3339 %a = extractelement <2 x double> %x, i32 0
3642 %r = insertelement <2 x double> %p, double %a, i32 1
3743 ret <2 x double> %r
3844 }
45
46 ; CHECK: _d:
47 ; CHECK: movupd
48 ; CHECK: unpckhpd
3949 define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
4050 %x = load <2 x double>* %y, align 8
4151 %a = extractelement <2 x double> %x, i32 1
None ; RUN: llc < %s -march=x86-64 | grep movups | count 1
0 ; RUN: llc < %s -march=x86-64 | grep movdqu | count 1
11
22 define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
33 %t = load <2 x i64>* %p, align 8
1919 ; X64: pshuflw $0, %xmm0, %xmm0
2020 ; X64: xorl %eax, %eax
2121 ; X64: pinsrw $0, %eax, %xmm0
22 ; X64: movaps %xmm0, (%rdi)
22 ; X64: movdqa %xmm0, (%rdi)
2323 ; X64: ret
2424 }
2525
3131
3232 ; X64: t1:
3333 ; X64: movl (%rsi), %eax
34 ; X64: movaps (%rdi), %xmm0
34 ; X64: movdqa (%rdi), %xmm0
3535 ; X64: pinsrw $0, %eax, %xmm0
3636 ; X64: ret
3737 }
6565 ; X64: pshufhw $100, %xmm0, %xmm2
6666 ; X64: pinsrw $1, %eax, %xmm2
6767 ; X64: pextrw $1, %xmm0, %eax
68 ; X64: movaps %xmm2, %xmm0
68 ; X64: movdqa %xmm2, %xmm0
6969 ; X64: pinsrw $4, %eax, %xmm0
7070 ; X64: ret
7171 }
121121 ; X64: t8:
122122 ; X64: pshuflw $-58, (%rsi), %xmm0
123123 ; X64: pshufhw $-58, %xmm0, %xmm0
124 ; X64: movaps %xmm0, (%rdi)
124 ; X64: movdqa %xmm0, (%rdi)
125125 ; X64: ret
126126 }
127127
1414 ; CHECK: test2:
1515 ; CHECK: pcmp
1616 ; CHECK: pcmp
17 ; CHECK: xorps
17 ; CHECK: pxor
1818 ; CHECK: ret
1919 %C = icmp sge <4 x i32> %A, %B
2020 %D = sext <4 x i1> %C to <4 x i32>
2424 define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
2525 ; CHECK: test3:
2626 ; CHECK: pcmpgtd
27 ; CHECK: movaps
27 ; CHECK: movdqa
2828 ; CHECK: ret
2929 %C = icmp slt <4 x i32> %A, %B
3030 %D = sext <4 x i1> %C to <4 x i32>
3333
3434 define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
3535 ; CHECK: test4:
36 ; CHECK: movaps
36 ; CHECK: movdqa
3737 ; CHECK: pcmpgtd
3838 ; CHECK: ret
3939 %C = icmp ugt <4 x i32> %A, %B
11 ; CHECK: pextrd
22 ; CHECK: pextrd
33 ; CHECK: movd
4 ; CHECK: movaps
4 ; CHECK: movdqa
55
66
77 ; bitcast v14i16 to v7i32
44
55 %i32vec3 = type <3 x i32>
66 define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
7 ; CHECK: movaps
7 ; CHECK: movdqa
88 ; CHECK: paddd
99 ; CHECK: pextrd
1010 ; CHECK: movq
3232
3333 %i32vec7 = type <7 x i32>
3434 define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
35 ; CHECK: movaps
36 ; CHECK: movaps
35 ; CHECK: movdqa
36 ; CHECK: movdqa
3737 ; CHECK: paddd
3838 ; CHECK: paddd
3939 ; CHECK: pextrd
4040 ; CHECK: movq
41 ; CHECK: movaps
41 ; CHECK: movdqa
4242 %a = load %i32vec7* %ap, align 16
4343 %b = load %i32vec7* %bp, align 16
4444 %x = add %i32vec7 %a, %b
4848
4949 %i32vec12 = type <12 x i32>
5050 define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
51 ; CHECK: movaps
52 ; CHECK: movaps
53 ; CHECK: movaps
51 ; CHECK: movdqa
52 ; CHECK: movdqa
53 ; CHECK: movdqa
5454 ; CHECK: paddd
5555 ; CHECK: paddd
5656 ; CHECK: paddd
57 ; CHECK: movaps
58 ; CHECK: movaps
59 ; CHECK: movaps
57 ; CHECK: movdqa
58 ; CHECK: movdqa
59 ; CHECK: movdqa
6060 %a = load %i32vec12* %ap, align 16
6161 %b = load %i32vec12* %bp, align 16
6262 %x = add %i32vec12 %a, %b
6767
6868 %i16vec3 = type <3 x i16>
6969 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
70 ; CHECK: movaps
70 ; CHECK: movdqa
7171 ; CHECK: paddw
7272 ; CHECK: movd
7373 ; CHECK: pextrw
8080
8181 %i16vec4 = type <4 x i16>
8282 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
83 ; CHECK: movaps
83 ; CHECK: movdqa
8484 ; CHECK: paddw
8585 ; CHECK: movq
8686 %a = load %i16vec4* %ap, align 16
9292
9393 %i16vec12 = type <12 x i16>
9494 define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
95 ; CHECK: movaps
96 ; CHECK: movaps
95 ; CHECK: movdqa
96 ; CHECK: movdqa
9797 ; CHECK: paddw
9898 ; CHECK: paddw
9999 ; CHECK: movq
100 ; CHECK: movaps
100 ; CHECK: movdqa
101101 %a = load %i16vec12* %ap, align 16
102102 %b = load %i16vec12* %bp, align 16
103103 %x = add %i16vec12 %a, %b
107107
108108 %i16vec18 = type <18 x i16>
109109 define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
110 ; CHECK: movaps
111 ; CHECK: movaps
112 ; CHECK: movaps
110 ; CHECK: movdqa
111 ; CHECK: movdqa
112 ; CHECK: movdqa
113113 ; CHECK: paddw
114114 ; CHECK: paddw
115115 ; CHECK: paddw
116116 ; CHECK: movd
117 ; CHECK: movaps
118 ; CHECK: movaps
117 ; CHECK: movdqa
118 ; CHECK: movdqa
119119 %a = load %i16vec18* %ap, align 16
120120 %b = load %i16vec18* %bp, align 16
121121 %x = add %i16vec18 %a, %b
126126
127127 %i8vec3 = type <3 x i8>
128128 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
129 ; CHECK: movaps
129 ; CHECK: movdqa
130130 ; CHECK: paddb
131131 ; CHECK: pextrb
132132 ; CHECK: movb
139139
140140 %i8vec31 = type <31 x i8>
141141 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
142 ; CHECK: movaps
143 ; CHECK: movaps
142 ; CHECK: movdqa
143 ; CHECK: movdqa
144144 ; CHECK: paddb
145145 ; CHECK: paddb
146146 ; CHECK: movq