llvm.org GIT mirror llvm / 44e6fff
[X86][SSE] Regenerated vector float tests - fabs / floor(etc.) / fneg / float2double git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265186 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
4 changed file(s) with 535 addition(s) and 206 deletion(s). Raw diff Collapse all Expand all
None ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64
25
3 define <2 x double> @fabs_v2f64(<2 x double> %p)
4 {
5 ; CHECK-LABEL: fabs_v2f64
6 ; CHECK: vandpd
6 define <2 x double> @fabs_v2f64(<2 x double> %p) {
7 ; X32-LABEL: fabs_v2f64:
8 ; X32: # BB#0:
9 ; X32-NEXT: vandpd .LCPI0_0, %xmm0, %xmm0
10 ; X32-NEXT: retl
11 ;
12 ; X64-LABEL: fabs_v2f64:
13 ; X64: # BB#0:
14 ; X64-NEXT: vandpd {{.*}}(%rip), %xmm0, %xmm0
15 ; X64-NEXT: retq
716 %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
817 ret <2 x double> %t
918 }
1019 declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
1120
12 define <4 x float> @fabs_v4f32(<4 x float> %p)
13 {
14 ; CHECK-LABEL: fabs_v4f32
15 ; CHECK: vandps
21 define <4 x float> @fabs_v4f32(<4 x float> %p) {
22 ; X32-LABEL: fabs_v4f32:
23 ; X32: # BB#0:
24 ; X32-NEXT: vandps .LCPI1_0, %xmm0, %xmm0
25 ; X32-NEXT: retl
26 ;
27 ; X64-LABEL: fabs_v4f32:
28 ; X64: # BB#0:
29 ; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
30 ; X64-NEXT: retq
1631 %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
1732 ret <4 x float> %t
1833 }
1934 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
2035
21 define <4 x double> @fabs_v4f64(<4 x double> %p)
22 {
23 ; CHECK-LABEL: fabs_v4f64
24 ; CHECK: vandpd
36 define <4 x double> @fabs_v4f64(<4 x double> %p) {
37 ; X32-LABEL: fabs_v4f64:
38 ; X32: # BB#0:
39 ; X32-NEXT: vandpd .LCPI2_0, %ymm0, %ymm0
40 ; X32-NEXT: retl
41 ;
42 ; X64-LABEL: fabs_v4f64:
43 ; X64: # BB#0:
44 ; X64-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0
45 ; X64-NEXT: retq
2546 %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
2647 ret <4 x double> %t
2748 }
2849 declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
2950
30 define <8 x float> @fabs_v8f32(<8 x float> %p)
31 {
32 ; CHECK-LABEL: fabs_v8f32
33 ; CHECK: vandps
51 define <8 x float> @fabs_v8f32(<8 x float> %p) {
52 ; X32-LABEL: fabs_v8f32:
53 ; X32: # BB#0:
54 ; X32-NEXT: vandps .LCPI3_0, %ymm0, %ymm0
55 ; X32-NEXT: retl
56 ;
57 ; X64-LABEL: fabs_v8f32:
58 ; X64: # BB#0:
59 ; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
60 ; X64-NEXT: retq
3461 %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
3562 ret <8 x float> %t
3663 }
4370 ; that has the sign bits turned off.
4471 ;
4572 ; So instead of something like this:
46 ; movabsq (constant pool load of mask for sign bits)
73 ; movabsq (constant pool load of mask for sign bits)
4774 ; vmovq (move from integer register to vector/fp register)
4875 ; vandps (mask off sign bits)
4976 ; vmovq (move vector/fp register back to integer return register)
5279 ; mov (put constant value in return register)
5380
5481 define i64 @fabs_v2f32_1() {
55 ; CHECK-LABEL: fabs_v2f32_1:
56 ; CHECK: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
57 ; CHECK-NEXT: retq
82 ; X32-LABEL: fabs_v2f32_1:
83 ; X32: # BB#0:
84 ; X32-NEXT: xorl %eax, %eax
85 ; X32-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
86 ; X32-NEXT: retl
87 ;
88 ; X64-LABEL: fabs_v2f32_1:
89 ; X64: # BB#0:
90 ; X64-NEXT: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
91 ; X64-NEXT: retq
5892 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
5993 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
6094 %ret = bitcast <2 x float> %fabs to i64
6296 }
6397
6498 define i64 @fabs_v2f32_2() {
65 ; CHECK-LABEL: fabs_v2f32_2:
66 ; CHECK: movl $2147483647, %eax # imm = 0x7FFFFFFF
67 ; CHECK-NEXT: retq
99 ; X32-LABEL: fabs_v2f32_2:
100 ; X32: # BB#0:
101 ; X32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
102 ; X32-NEXT: xorl %edx, %edx
103 ; X32-NEXT: retl
104 ;
105 ; X64-LABEL: fabs_v2f32_2:
106 ; X64: # BB#0:
107 ; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
108 ; X64-NEXT: retq
68109 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
69110 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
70111 %ret = bitcast <2 x float> %fabs to i64
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
1
2
3 define <2 x double> @floor_v2f64(<2 x double> %p)
4 {
5 ; CHECK: floor_v2f64
6 ; CHECK: vroundpd
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX
3
4 define <2 x double> @floor_v2f64(<2 x double> %p) {
5 ; SSE41-LABEL: floor_v2f64:
6 ; SSE41: ## BB#0:
7 ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0
8 ; SSE41-NEXT: retq
9 ;
10 ; AVX-LABEL: floor_v2f64:
11 ; AVX: ## BB#0:
12 ; AVX-NEXT: vroundpd $9, %xmm0, %xmm0
13 ; AVX-NEXT: retq
714 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
815 ret <2 x double> %t
916 }
1017 declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
1118
12 define <4 x float> @floor_v4f32(<4 x float> %p)
13 {
14 ; CHECK: floor_v4f32
15 ; CHECK: vroundps
19 define <4 x float> @floor_v4f32(<4 x float> %p) {
20 ; SSE41-LABEL: floor_v4f32:
21 ; SSE41: ## BB#0:
22 ; SSE41-NEXT: roundps $9, %xmm0, %xmm0
23 ; SSE41-NEXT: retq
24 ;
25 ; AVX-LABEL: floor_v4f32:
26 ; AVX: ## BB#0:
27 ; AVX-NEXT: vroundps $9, %xmm0, %xmm0
28 ; AVX-NEXT: retq
1629 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
1730 ret <4 x float> %t
1831 }
1932 declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
2033
21 define <4 x double> @floor_v4f64(<4 x double> %p)
22 {
23 ; CHECK: floor_v4f64
24 ; CHECK: vroundpd
34 define <4 x double> @floor_v4f64(<4 x double> %p){
35 ; SSE41-LABEL: floor_v4f64:
36 ; SSE41: ## BB#0:
37 ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0
38 ; SSE41-NEXT: roundpd $9, %xmm1, %xmm1
39 ; SSE41-NEXT: retq
40 ;
41 ; AVX-LABEL: floor_v4f64:
42 ; AVX: ## BB#0:
43 ; AVX-NEXT: vroundpd $9, %ymm0, %ymm0
44 ; AVX-NEXT: retq
2545 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
2646 ret <4 x double> %t
2747 }
2848 declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
2949
30 define <8 x float> @floor_v8f32(<8 x float> %p)
31 {
32 ; CHECK: floor_v8f32
33 ; CHECK: vroundps
50 define <8 x float> @floor_v8f32(<8 x float> %p) {
51 ; SSE41-LABEL: floor_v8f32:
52 ; SSE41: ## BB#0:
53 ; SSE41-NEXT: roundps $9, %xmm0, %xmm0
54 ; SSE41-NEXT: roundps $9, %xmm1, %xmm1
55 ; SSE41-NEXT: retq
56 ;
57 ; AVX-LABEL: floor_v8f32:
58 ; AVX: ## BB#0:
59 ; AVX-NEXT: vroundps $9, %ymm0, %ymm0
60 ; AVX-NEXT: retq
3461 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
3562 ret <8 x float> %t
3663 }
3764 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
3865
39 define <2 x double> @ceil_v2f64(<2 x double> %p)
40 {
41 ; CHECK: ceil_v2f64
42 ; CHECK: vroundpd
66 define <2 x double> @ceil_v2f64(<2 x double> %p) {
67 ; SSE41-LABEL: ceil_v2f64:
68 ; SSE41: ## BB#0:
69 ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0
70 ; SSE41-NEXT: retq
71 ;
72 ; AVX-LABEL: ceil_v2f64:
73 ; AVX: ## BB#0:
74 ; AVX-NEXT: vroundpd $10, %xmm0, %xmm0
75 ; AVX-NEXT: retq
4376 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
4477 ret <2 x double> %t
4578 }
4679 declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
4780
48 define <4 x float> @ceil_v4f32(<4 x float> %p)
49 {
50 ; CHECK: ceil_v4f32
51 ; CHECK: vroundps
81 define <4 x float> @ceil_v4f32(<4 x float> %p) {
82 ; SSE41-LABEL: ceil_v4f32:
83 ; SSE41: ## BB#0:
84 ; SSE41-NEXT: roundps $10, %xmm0, %xmm0
85 ; SSE41-NEXT: retq
86 ;
87 ; AVX-LABEL: ceil_v4f32:
88 ; AVX: ## BB#0:
89 ; AVX-NEXT: vroundps $10, %xmm0, %xmm0
90 ; AVX-NEXT: retq
5291 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
5392 ret <4 x float> %t
5493 }
5594 declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
5695
57 define <4 x double> @ceil_v4f64(<4 x double> %p)
58 {
59 ; CHECK: ceil_v4f64
60 ; CHECK: vroundpd
96 define <4 x double> @ceil_v4f64(<4 x double> %p) {
97 ; SSE41-LABEL: ceil_v4f64:
98 ; SSE41: ## BB#0:
99 ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0
100 ; SSE41-NEXT: roundpd $10, %xmm1, %xmm1
101 ; SSE41-NEXT: retq
102 ;
103 ; AVX-LABEL: ceil_v4f64:
104 ; AVX: ## BB#0:
105 ; AVX-NEXT: vroundpd $10, %ymm0, %ymm0
106 ; AVX-NEXT: retq
61107 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
62108 ret <4 x double> %t
63109 }
64110 declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
65111
66 define <8 x float> @ceil_v8f32(<8 x float> %p)
67 {
68 ; CHECK: ceil_v8f32
69 ; CHECK: vroundps
112 define <8 x float> @ceil_v8f32(<8 x float> %p) {
113 ; SSE41-LABEL: ceil_v8f32:
114 ; SSE41: ## BB#0:
115 ; SSE41-NEXT: roundps $10, %xmm0, %xmm0
116 ; SSE41-NEXT: roundps $10, %xmm1, %xmm1
117 ; SSE41-NEXT: retq
118 ;
119 ; AVX-LABEL: ceil_v8f32:
120 ; AVX: ## BB#0:
121 ; AVX-NEXT: vroundps $10, %ymm0, %ymm0
122 ; AVX-NEXT: retq
70123 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
71124 ret <8 x float> %t
72125 }
73126 declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
74127
75 define <2 x double> @trunc_v2f64(<2 x double> %p)
76 {
77 ; CHECK: trunc_v2f64
78 ; CHECK: vroundpd
128 define <2 x double> @trunc_v2f64(<2 x double> %p) {
129 ; SSE41-LABEL: trunc_v2f64:
130 ; SSE41: ## BB#0:
131 ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
132 ; SSE41-NEXT: retq
133 ;
134 ; AVX-LABEL: trunc_v2f64:
135 ; AVX: ## BB#0:
136 ; AVX-NEXT: vroundpd $11, %xmm0, %xmm0
137 ; AVX-NEXT: retq
79138 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
80139 ret <2 x double> %t
81140 }
82141 declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
83142
84 define <4 x float> @trunc_v4f32(<4 x float> %p)
85 {
86 ; CHECK: trunc_v4f32
87 ; CHECK: vroundps
143 define <4 x float> @trunc_v4f32(<4 x float> %p) {
144 ; SSE41-LABEL: trunc_v4f32:
145 ; SSE41: ## BB#0:
146 ; SSE41-NEXT: roundps $11, %xmm0, %xmm0
147 ; SSE41-NEXT: retq
148 ;
149 ; AVX-LABEL: trunc_v4f32:
150 ; AVX: ## BB#0:
151 ; AVX-NEXT: vroundps $11, %xmm0, %xmm0
152 ; AVX-NEXT: retq
88153 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
89154 ret <4 x float> %t
90155 }
91156 declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
92157
93 define <4 x double> @trunc_v4f64(<4 x double> %p)
94 {
95 ; CHECK: trunc_v4f64
96 ; CHECK: vroundpd
158 define <4 x double> @trunc_v4f64(<4 x double> %p) {
159 ; SSE41-LABEL: trunc_v4f64:
160 ; SSE41: ## BB#0:
161 ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
162 ; SSE41-NEXT: roundpd $11, %xmm1, %xmm1
163 ; SSE41-NEXT: retq
164 ;
165 ; AVX-LABEL: trunc_v4f64:
166 ; AVX: ## BB#0:
167 ; AVX-NEXT: vroundpd $11, %ymm0, %ymm0
168 ; AVX-NEXT: retq
97169 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
98170 ret <4 x double> %t
99171 }
100172 declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
101173
102 define <8 x float> @trunc_v8f32(<8 x float> %p)
103 {
104 ; CHECK: trunc_v8f32
105 ; CHECK: vroundps
174 define <8 x float> @trunc_v8f32(<8 x float> %p) {
175 ; SSE41-LABEL: trunc_v8f32:
176 ; SSE41: ## BB#0:
177 ; SSE41-NEXT: roundps $11, %xmm0, %xmm0
178 ; SSE41-NEXT: roundps $11, %xmm1, %xmm1
179 ; SSE41-NEXT: retq
180 ;
181 ; AVX-LABEL: trunc_v8f32:
182 ; AVX: ## BB#0:
183 ; AVX-NEXT: vroundps $11, %ymm0, %ymm0
184 ; AVX-NEXT: retq
106185 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
107186 ret <8 x float> %t
108187 }
109188 declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
110189
111 define <2 x double> @rint_v2f64(<2 x double> %p)
112 {
113 ; CHECK: rint_v2f64
114 ; CHECK: vroundpd
190 define <2 x double> @rint_v2f64(<2 x double> %p) {
191 ; SSE41-LABEL: rint_v2f64:
192 ; SSE41: ## BB#0:
193 ; SSE41-NEXT: roundpd $4, %xmm0, %xmm0
194 ; SSE41-NEXT: retq
195 ;
196 ; AVX-LABEL: rint_v2f64:
197 ; AVX: ## BB#0:
198 ; AVX-NEXT: vroundpd $4, %xmm0, %xmm0
199 ; AVX-NEXT: retq
115200 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
116201 ret <2 x double> %t
117202 }
118203 declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
119204
120 define <4 x float> @rint_v4f32(<4 x float> %p)
121 {
122 ; CHECK: rint_v4f32
123 ; CHECK: vroundps
205 define <4 x float> @rint_v4f32(<4 x float> %p) {
206 ; SSE41-LABEL: rint_v4f32:
207 ; SSE41: ## BB#0:
208 ; SSE41-NEXT: roundps $4, %xmm0, %xmm0
209 ; SSE41-NEXT: retq
210 ;
211 ; AVX-LABEL: rint_v4f32:
212 ; AVX: ## BB#0:
213 ; AVX-NEXT: vroundps $4, %xmm0, %xmm0
214 ; AVX-NEXT: retq
124215 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
125216 ret <4 x float> %t
126217 }
127218 declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
128219
129 define <4 x double> @rint_v4f64(<4 x double> %p)
130 {
131 ; CHECK: rint_v4f64
132 ; CHECK: vroundpd
220 define <4 x double> @rint_v4f64(<4 x double> %p) {
221 ; SSE41-LABEL: rint_v4f64:
222 ; SSE41: ## BB#0:
223 ; SSE41-NEXT: roundpd $4, %xmm0, %xmm0
224 ; SSE41-NEXT: roundpd $4, %xmm1, %xmm1
225 ; SSE41-NEXT: retq
226 ;
227 ; AVX-LABEL: rint_v4f64:
228 ; AVX: ## BB#0:
229 ; AVX-NEXT: vroundpd $4, %ymm0, %ymm0
230 ; AVX-NEXT: retq
133231 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
134232 ret <4 x double> %t
135233 }
136234 declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
137235
138 define <8 x float> @rint_v8f32(<8 x float> %p)
139 {
140 ; CHECK: rint_v8f32
141 ; CHECK: vroundps
236 define <8 x float> @rint_v8f32(<8 x float> %p) {
237 ; SSE41-LABEL: rint_v8f32:
238 ; SSE41: ## BB#0:
239 ; SSE41-NEXT: roundps $4, %xmm0, %xmm0
240 ; SSE41-NEXT: roundps $4, %xmm1, %xmm1
241 ; SSE41-NEXT: retq
242 ;
243 ; AVX-LABEL: rint_v8f32:
244 ; AVX: ## BB#0:
245 ; AVX-NEXT: vroundps $4, %ymm0, %ymm0
246 ; AVX-NEXT: retq
142247 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
143248 ret <8 x float> %t
144249 }
145250 declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
146251
147 define <2 x double> @nearbyint_v2f64(<2 x double> %p)
148 {
149 ; CHECK: nearbyint_v2f64
150 ; CHECK: vroundpd
252 define <2 x double> @nearbyint_v2f64(<2 x double> %p) {
253 ; SSE41-LABEL: nearbyint_v2f64:
254 ; SSE41: ## BB#0:
255 ; SSE41-NEXT: roundpd $12, %xmm0, %xmm0
256 ; SSE41-NEXT: retq
257 ;
258 ; AVX-LABEL: nearbyint_v2f64:
259 ; AVX: ## BB#0:
260 ; AVX-NEXT: vroundpd $12, %xmm0, %xmm0
261 ; AVX-NEXT: retq
151262 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
152263 ret <2 x double> %t
153264 }
154265 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
155266
156 define <4 x float> @nearbyint_v4f32(<4 x float> %p)
157 {
158 ; CHECK: nearbyint_v4f32
159 ; CHECK: vroundps
267 define <4 x float> @nearbyint_v4f32(<4 x float> %p) {
268 ; SSE41-LABEL: nearbyint_v4f32:
269 ; SSE41: ## BB#0:
270 ; SSE41-NEXT: roundps $12, %xmm0, %xmm0
271 ; SSE41-NEXT: retq
272 ;
273 ; AVX-LABEL: nearbyint_v4f32:
274 ; AVX: ## BB#0:
275 ; AVX-NEXT: vroundps $12, %xmm0, %xmm0
276 ; AVX-NEXT: retq
160277 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
161278 ret <4 x float> %t
162279 }
163280 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
164281
165 define <4 x double> @nearbyint_v4f64(<4 x double> %p)
166 {
167 ; CHECK: nearbyint_v4f64
168 ; CHECK: vroundpd
282 define <4 x double> @nearbyint_v4f64(<4 x double> %p) {
283 ; SSE41-LABEL: nearbyint_v4f64:
284 ; SSE41: ## BB#0:
285 ; SSE41-NEXT: roundpd $12, %xmm0, %xmm0
286 ; SSE41-NEXT: roundpd $12, %xmm1, %xmm1
287 ; SSE41-NEXT: retq
288 ;
289 ; AVX-LABEL: nearbyint_v4f64:
290 ; AVX: ## BB#0:
291 ; AVX-NEXT: vroundpd $12, %ymm0, %ymm0
292 ; AVX-NEXT: retq
169293 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
170294 ret <4 x double> %t
171295 }
172296 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
173297
174 define <8 x float> @nearbyint_v8f32(<8 x float> %p)
175 {
176 ; CHECK: nearbyint_v8f32
177 ; CHECK: vroundps
298 define <8 x float> @nearbyint_v8f32(<8 x float> %p) {
299 ; SSE41-LABEL: nearbyint_v8f32:
300 ; SSE41: ## BB#0:
301 ; SSE41-NEXT: roundps $12, %xmm0, %xmm0
302 ; SSE41-NEXT: roundps $12, %xmm1, %xmm1
303 ; SSE41-NEXT: retq
304 ;
305 ; AVX-LABEL: nearbyint_v8f32:
306 ; AVX: ## BB#0:
307 ; AVX-NEXT: vroundps $12, %ymm0, %ymm0
308 ; AVX-NEXT: retq
178309 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
179310 ret <8 x float> %t
180311 }
185316 ;
186317
187318 define <2 x double> @const_floor_v2f64() {
188 ; CHECK: const_floor_v2f64
189 ; CHECK: movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
319 ; SSE41-LABEL: const_floor_v2f64:
320 ; SSE41: ## BB#0:
321 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
322 ; SSE41-NEXT: retq
323 ;
324 ; AVX-LABEL: const_floor_v2f64:
325 ; AVX: ## BB#0:
326 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
327 ; AVX-NEXT: retq
190328 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> )
191329 ret <2 x double> %t
192330 }
193331
194332 define <4 x float> @const_floor_v4f32() {
195 ; CHECK: const_floor_v4f32
196 ; CHECK: movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
333 ; SSE41-LABEL: const_floor_v4f32:
334 ; SSE41: ## BB#0:
335 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
336 ; SSE41-NEXT: retq
337 ;
338 ; AVX-LABEL: const_floor_v4f32:
339 ; AVX: ## BB#0:
340 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
341 ; AVX-NEXT: retq
197342 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> )
198343 ret <4 x float> %t
199344 }
200345
201346 define <2 x double> @const_ceil_v2f64() {
202 ; CHECK: const_ceil_v2f64
203 ; CHECK: movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
347 ; SSE41-LABEL: const_ceil_v2f64:
348 ; SSE41: ## BB#0:
349 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
350 ; SSE41-NEXT: retq
351 ;
352 ; AVX-LABEL: const_ceil_v2f64:
353 ; AVX: ## BB#0:
354 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
355 ; AVX-NEXT: retq
204356 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> )
205357 ret <2 x double> %t
206358 }
207359
208360 define <4 x float> @const_ceil_v4f32() {
209 ; CHECK: const_ceil_v4f32
210 ; CHECK: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
361 ; SSE41-LABEL: const_ceil_v4f32:
362 ; SSE41: ## BB#0:
363 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
364 ; SSE41-NEXT: retq
365 ;
366 ; AVX-LABEL: const_ceil_v4f32:
367 ; AVX: ## BB#0:
368 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
369 ; AVX-NEXT: retq
211370 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> )
212371 ret <4 x float> %t
213372 }
214373
215374 define <2 x double> @const_trunc_v2f64() {
216 ; CHECK: const_trunc_v2f64
217 ; CHECK: movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
375 ; SSE41-LABEL: const_trunc_v2f64:
376 ; SSE41: ## BB#0:
377 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
378 ; SSE41-NEXT: retq
379 ;
380 ; AVX-LABEL: const_trunc_v2f64:
381 ; AVX: ## BB#0:
382 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
383 ; AVX-NEXT: retq
218384 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> )
219385 ret <2 x double> %t
220386 }
221387
222388 define <4 x float> @const_trunc_v4f32() {
223 ; CHECK: const_trunc_v4f32
224 ; CHECK: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
389 ; SSE41-LABEL: const_trunc_v4f32:
390 ; SSE41: ## BB#0:
391 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
392 ; SSE41-NEXT: retq
393 ;
394 ; AVX-LABEL: const_trunc_v4f32:
395 ; AVX: ## BB#0:
396 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
397 ; AVX-NEXT: retq
225398 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> )
226399 ret <4 x float> %t
227400 }
None ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse | FileCheck %s
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE1
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE2
15
26 ; FNEG is defined as subtraction from -0.0.
37
48 ; This test verifies that we use an xor with a constant to flip the sign bits; no subtraction needed.
5 define <4 x float> @t1(<4 x float> %Q) {
6 ; CHECK-LABEL: t1:
7 ; CHECK: xorps {{.*}}LCPI0_0{{.*}}, %xmm0
8 ; CHECK-NEXT: retq
9 %tmp = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
10 ret <4 x float> %tmp
9 define <4 x float> @t1(<4 x float> %Q) nounwind {
10 ; X32-SSE-LABEL: t1:
11 ; X32-SSE: # BB#0:
12 ; X32-SSE-NEXT: xorps .LCPI0_0, %xmm0
13 ; X32-SSE-NEXT: retl
14 ;
15 ; X64-SSE-LABEL: t1:
16 ; X64-SSE: # BB#0:
17 ; X64-SSE-NEXT: xorps {{.*}}(%rip), %xmm0
18 ; X64-SSE-NEXT: retq
19 %tmp = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
20 ret <4 x float> %tmp
1121 }
1222
1323 ; This test verifies that we generate an FP subtraction because "0.0 - x" is not an fneg.
14 define <4 x float> @t2(<4 x float> %Q) {
15 ; CHECK-LABEL: t2:
16 ; CHECK: xorps %[[X:xmm[0-9]+]], %[[X]]
17 ; CHECK-NEXT: subps %xmm0, %[[X]]
18 ; CHECK-NEXT: movaps %[[X]], %xmm0
19 ; CHECK-NEXT: retq
20 %tmp = fsub <4 x float> zeroinitializer, %Q
21 ret <4 x float> %tmp
24 define <4 x float> @t2(<4 x float> %Q) nounwind {
25 ; X32-SSE-LABEL: t2:
26 ; X32-SSE: # BB#0:
27 ; X32-SSE-NEXT: xorps %xmm1, %xmm1
28 ; X32-SSE-NEXT: subps %xmm0, %xmm1
29 ; X32-SSE-NEXT: movaps %xmm1, %xmm0
30 ; X32-SSE-NEXT: retl
31 ;
32 ; X64-SSE-LABEL: t2:
33 ; X64-SSE: # BB#0:
34 ; X64-SSE-NEXT: xorps %xmm1, %xmm1
35 ; X64-SSE-NEXT: subps %xmm0, %xmm1
36 ; X64-SSE-NEXT: movaps %xmm1, %xmm0
37 ; X64-SSE-NEXT: retq
38 %tmp = fsub <4 x float> zeroinitializer, %Q
39 ret <4 x float> %tmp
2240 }
2341
2442 ; If we're bitcasting an integer to an FP vector, we should avoid the FPU/vector unit entirely.
3048 ; We should generate:
3149 ; movabsq (put sign bit mask in integer register))
3250 ; xorq (flip sign bits)
33 ; movd (move to xmm return register)
51 ; movd (move to xmm return register)
3452
35 define <2 x float> @fneg_bitcast(i64 %i) {
36 ; CHECK-LABEL: fneg_bitcast:
37 ; CHECK: movabsq $-9223372034707292160, %rax # imm = 0x8000000080000000
38 ; CHECK-NEXT: xorq %rdi, %rax
39 ; CHECK-NEXT: movd %rax, %xmm0
40 ; CHECK-NEXT: retq
53 define <2 x float> @fneg_bitcast(i64 %i) nounwind {
54 ; X32-SSE1-LABEL: fneg_bitcast:
55 ; X32-SSE1: # BB#0:
56 ; X32-SSE1-NEXT: pushl %ebp
57 ; X32-SSE1-NEXT: movl %esp, %ebp
58 ; X32-SSE1-NEXT: andl $-16, %esp
59 ; X32-SSE1-NEXT: subl $32, %esp
60 ; X32-SSE1-NEXT: movl $-2147483648, %eax # imm = 0xFFFFFFFF80000000
61 ; X32-SSE1-NEXT: movl 12(%ebp), %ecx
62 ; X32-SSE1-NEXT: xorl %eax, %ecx
63 ; X32-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
64 ; X32-SSE1-NEXT: xorl 8(%ebp), %eax
65 ; X32-SSE1-NEXT: movl %eax, (%esp)
66 ; X32-SSE1-NEXT: movaps (%esp), %xmm0
67 ; X32-SSE1-NEXT: movl %ebp, %esp
68 ; X32-SSE1-NEXT: popl %ebp
69 ; X32-SSE1-NEXT: retl
70 ;
71 ; X32-SSE2-LABEL: fneg_bitcast:
72 ; X32-SSE2: # BB#0:
73 ; X32-SSE2-NEXT: movl $-2147483648, %eax # imm = 0xFFFFFFFF80000000
74 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
75 ; X32-SSE2-NEXT: xorl %eax, %ecx
76 ; X32-SSE2-NEXT: xorl {{[0-9]+}}(%esp), %eax
77 ; X32-SSE2-NEXT: movd %eax, %xmm1
78 ; X32-SSE2-NEXT: movd %ecx, %xmm0
79 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
80 ; X32-SSE2-NEXT: retl
81 ;
82 ; X64-SSE1-LABEL: fneg_bitcast:
83 ; X64-SSE1: # BB#0:
84 ; X64-SSE1-NEXT: movabsq $-9223372034707292160, %rax # imm = 0x8000000080000000
85 ; X64-SSE1-NEXT: xorq %rdi, %rax
86 ; X64-SSE1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
87 ; X64-SSE1-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
88 ; X64-SSE1-NEXT: retq
89 ;
90 ; X64-SSE2-LABEL: fneg_bitcast:
91 ; X64-SSE2: # BB#0:
92 ; X64-SSE2-NEXT: movabsq $-9223372034707292160, %rax # imm = 0x8000000080000000
93 ; X64-SSE2-NEXT: xorq %rdi, %rax
94 ; X64-SSE2-NEXT: movd %rax, %xmm0
95 ; X64-SSE2-NEXT: retq
4196 %bitcast = bitcast i64 %i to <2 x float>
4297 %fneg = fsub <2 x float> , %bitcast
4398 ret <2 x float> %fneg
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
1 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX
35
46 ; PR11674
57 define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
6 ; SSE-LABEL: fpext_frommem:
7 ; SSE: # BB#0: # %entry
8 ; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
9 ; SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
10 ; SSE-NEXT: cvtps2pd (%ecx), %xmm0
11 ; SSE-NEXT: movups %xmm0, (%eax)
12 ; SSE-NEXT: retl
8 ; X32-SSE-LABEL: fpext_frommem:
9 ; X32-SSE: # BB#0: # %entry
10 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
11 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
12 ; X32-SSE-NEXT: cvtps2pd (%ecx), %xmm0
13 ; X32-SSE-NEXT: movups %xmm0, (%eax)
14 ; X32-SSE-NEXT: retl
1315 ;
14 ; AVX-LABEL: fpext_frommem:
15 ; AVX: # BB#0: # %entry
16 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
17 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
18 ; AVX-NEXT: vcvtps2pd (%ecx), %xmm0
19 ; AVX-NEXT: vmovups %xmm0, (%eax)
20 ; AVX-NEXT: retl
16 ; X32-AVX-LABEL: fpext_frommem:
17 ; X32-AVX: # BB#0: # %entry
18 ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
19 ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
20 ; X32-AVX-NEXT: vcvtps2pd (%ecx), %xmm0
21 ; X32-AVX-NEXT: vmovups %xmm0, (%eax)
22 ; X32-AVX-NEXT: retl
23 ;
24 ; X64-SSE-LABEL: fpext_frommem:
25 ; X64-SSE: # BB#0: # %entry
26 ; X64-SSE-NEXT: cvtps2pd (%rdi), %xmm0
27 ; X64-SSE-NEXT: movups %xmm0, (%rsi)
28 ; X64-SSE-NEXT: retq
29 ;
30 ; X64-AVX-LABEL: fpext_frommem:
31 ; X64-AVX: # BB#0: # %entry
32 ; X64-AVX-NEXT: vcvtps2pd (%rdi), %xmm0
33 ; X64-AVX-NEXT: vmovups %xmm0, (%rsi)
34 ; X64-AVX-NEXT: retq
2135 entry:
2236 %0 = load <2 x float>, <2 x float>* %in, align 8
2337 %1 = fpext <2 x float> %0 to <2 x double>
2640 }
2741
2842 define void @fpext_frommem4(<4 x float>* %in, <4 x double>* %out) {
29 ; SSE-LABEL: fpext_frommem4:
30 ; SSE: # BB#0: # %entry
31 ; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
32 ; SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
33 ; SSE-NEXT: cvtps2pd (%ecx), %xmm0
34 ; SSE-NEXT: cvtps2pd 8(%ecx), %xmm1
35 ; SSE-NEXT: movups %xmm1, 16(%eax)
36 ; SSE-NEXT: movups %xmm0, (%eax)
37 ; SSE-NEXT: retl
43 ; X32-SSE-LABEL: fpext_frommem4:
44 ; X32-SSE: # BB#0: # %entry
45 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
46 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
47 ; X32-SSE-NEXT: cvtps2pd (%ecx), %xmm0
48 ; X32-SSE-NEXT: cvtps2pd 8(%ecx), %xmm1
49 ; X32-SSE-NEXT: movups %xmm1, 16(%eax)
50 ; X32-SSE-NEXT: movups %xmm0, (%eax)
51 ; X32-SSE-NEXT: retl
3852 ;
39 ; AVX-LABEL: fpext_frommem4:
40 ; AVX: # BB#0: # %entry
41 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
42 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
43 ; AVX-NEXT: vcvtps2pd (%ecx), %ymm0
44 ; AVX-NEXT: vmovups %ymm0, (%eax)
45 ; AVX-NEXT: vzeroupper
46 ; AVX-NEXT: retl
53 ; X32-AVX-LABEL: fpext_frommem4:
54 ; X32-AVX: # BB#0: # %entry
55 ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
56 ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
57 ; X32-AVX-NEXT: vcvtps2pd (%ecx), %ymm0
58 ; X32-AVX-NEXT: vmovups %ymm0, (%eax)
59 ; X32-AVX-NEXT: vzeroupper
60 ; X32-AVX-NEXT: retl
61 ;
62 ; X64-SSE-LABEL: fpext_frommem4:
63 ; X64-SSE: # BB#0: # %entry
64 ; X64-SSE-NEXT: cvtps2pd (%rdi), %xmm0
65 ; X64-SSE-NEXT: cvtps2pd 8(%rdi), %xmm1
66 ; X64-SSE-NEXT: movups %xmm1, 16(%rsi)
67 ; X64-SSE-NEXT: movups %xmm0, (%rsi)
68 ; X64-SSE-NEXT: retq
69 ;
70 ; X64-AVX-LABEL: fpext_frommem4:
71 ; X64-AVX: # BB#0: # %entry
72 ; X64-AVX-NEXT: vcvtps2pd (%rdi), %ymm0
73 ; X64-AVX-NEXT: vmovups %ymm0, (%rsi)
74 ; X64-AVX-NEXT: vzeroupper
75 ; X64-AVX-NEXT: retq
4776 entry:
4877 %0 = load <4 x float>, <4 x float>* %in
4978 %1 = fpext <4 x float> %0 to <4 x double>
5281 }
5382
5483 define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
55 ; SSE-LABEL: fpext_frommem8:
56 ; SSE: # BB#0: # %entry
57 ; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
58 ; SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
59 ; SSE-NEXT: cvtps2pd (%ecx), %xmm0
60 ; SSE-NEXT: cvtps2pd 8(%ecx), %xmm1
61 ; SSE-NEXT: cvtps2pd 16(%ecx), %xmm2
62 ; SSE-NEXT: cvtps2pd 24(%ecx), %xmm3
63 ; SSE-NEXT: movups %xmm3, 48(%eax)
64 ; SSE-NEXT: movups %xmm2, 32(%eax)
65 ; SSE-NEXT: movups %xmm1, 16(%eax)
66 ; SSE-NEXT: movups %xmm0, (%eax)
67 ; SSE-NEXT: retl
84 ; X32-SSE-LABEL: fpext_frommem8:
85 ; X32-SSE: # BB#0: # %entry
86 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
87 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
88 ; X32-SSE-NEXT: cvtps2pd (%ecx), %xmm0
89 ; X32-SSE-NEXT: cvtps2pd 8(%ecx), %xmm1
90 ; X32-SSE-NEXT: cvtps2pd 16(%ecx), %xmm2
91 ; X32-SSE-NEXT: cvtps2pd 24(%ecx), %xmm3
92 ; X32-SSE-NEXT: movups %xmm3, 48(%eax)
93 ; X32-SSE-NEXT: movups %xmm2, 32(%eax)
94 ; X32-SSE-NEXT: movups %xmm1, 16(%eax)
95 ; X32-SSE-NEXT: movups %xmm0, (%eax)
96 ; X32-SSE-NEXT: retl
6897 ;
69 ; AVX-LABEL: fpext_frommem8:
70 ; AVX: # BB#0: # %entry
71 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
72 ; AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
73 ; AVX-NEXT: vcvtps2pd (%ecx), %ymm0
74 ; AVX-NEXT: vcvtps2pd 16(%ecx), %ymm1
75 ; AVX-NEXT: vmovups %ymm1, 32(%eax)
76 ; AVX-NEXT: vmovups %ymm0, (%eax)
77 ; AVX-NEXT: vzeroupper
78 ; AVX-NEXT: retl
98 ; X32-AVX-LABEL: fpext_frommem8:
99 ; X32-AVX: # BB#0: # %entry
100 ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
101 ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
102 ; X32-AVX-NEXT: vcvtps2pd (%ecx), %ymm0
103 ; X32-AVX-NEXT: vcvtps2pd 16(%ecx), %ymm1
104 ; X32-AVX-NEXT: vmovups %ymm1, 32(%eax)
105 ; X32-AVX-NEXT: vmovups %ymm0, (%eax)
106 ; X32-AVX-NEXT: vzeroupper
107 ; X32-AVX-NEXT: retl
108 ;
109 ; X64-SSE-LABEL: fpext_frommem8:
110 ; X64-SSE: # BB#0: # %entry
111 ; X64-SSE-NEXT: cvtps2pd (%rdi), %xmm0
112 ; X64-SSE-NEXT: cvtps2pd 8(%rdi), %xmm1
113 ; X64-SSE-NEXT: cvtps2pd 16(%rdi), %xmm2
114 ; X64-SSE-NEXT: cvtps2pd 24(%rdi), %xmm3
115 ; X64-SSE-NEXT: movups %xmm3, 48(%rsi)
116 ; X64-SSE-NEXT: movups %xmm2, 32(%rsi)
117 ; X64-SSE-NEXT: movups %xmm1, 16(%rsi)
118 ; X64-SSE-NEXT: movups %xmm0, (%rsi)
119 ; X64-SSE-NEXT: retq
120 ;
121 ; X64-AVX-LABEL: fpext_frommem8:
122 ; X64-AVX: # BB#0: # %entry
123 ; X64-AVX-NEXT: vcvtps2pd (%rdi), %ymm0
124 ; X64-AVX-NEXT: vcvtps2pd 16(%rdi), %ymm1
125 ; X64-AVX-NEXT: vmovups %ymm1, 32(%rsi)
126 ; X64-AVX-NEXT: vmovups %ymm0, (%rsi)
127 ; X64-AVX-NEXT: vzeroupper
128 ; X64-AVX-NEXT: retq
79129 entry:
80130 %0 = load <8 x float>, <8 x float>* %in
81131 %1 = fpext <8 x float> %0 to <8 x double>
84134 }
85135
86136 define <2 x double> @fpext_fromconst() {
87 ; SSE-LABEL: fpext_fromconst:
88 ; SSE: # BB#0: # %entry
89 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
90 ; SSE-NEXT: retl
137 ; X32-SSE-LABEL: fpext_fromconst:
138 ; X32-SSE: # BB#0: # %entry
139 ; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
140 ; X32-SSE-NEXT: retl
91141 ;
92 ; AVX-LABEL: fpext_fromconst:
93 ; AVX: # BB#0: # %entry
94 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
95 ; AVX-NEXT: retl
142 ; X32-AVX-LABEL: fpext_fromconst:
143 ; X32-AVX: # BB#0: # %entry
144 ; X32-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
145 ; X32-AVX-NEXT: retl
146 ;
147 ; X64-SSE-LABEL: fpext_fromconst:
148 ; X64-SSE: # BB#0: # %entry
149 ; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
150 ; X64-SSE-NEXT: retq
151 ;
152 ; X64-AVX-LABEL: fpext_fromconst:
153 ; X64-AVX: # BB#0: # %entry
154 ; X64-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
155 ; X64-AVX-NEXT: retq
96156 entry:
97157 %0 = insertelement <2 x float> undef, float 1.0, i32 0
98158 %1 = insertelement <2 x float> %0, float -2.0, i32 1