llvm.org GIT mirror llvm / 679df75
[X86][SSE] Regenerate float to/from i8/i16 vector tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326488 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 years ago
1 changed file(s) with 240 addition(s) and 22 deletion(s). Raw diff Collapse all Expand all
11 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
22 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
33
4 define <8 x float> @foo1_8(<8 x i8> %src) {
5 ; CHECK-LABEL: foo1_8:
4 define <8 x float> @cvt_v8i8_v8f32(<8 x i8> %src) {
5 ; CHECK-LABEL: cvt_v8i8_v8f32:
66 ; CHECK: ## %bb.0:
77 ; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
88 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1414 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
1515 ; CHECK-NEXT: retl
1616 ;
17 ; CHECK-WIDE-LABEL: foo1_8:
17 ; CHECK-WIDE-LABEL: cvt_v8i8_v8f32:
1818 ; CHECK-WIDE: ## %bb.0:
1919 ; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm1
2020 ; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2626 ret <8 x float> %res
2727 }
2828
29 define <4 x float> @foo1_4(<4 x i8> %src) {
30 ; CHECK-LABEL: foo1_4:
29 define <8 x float> @cvt_v8i16_v8f32(<8 x i16> %src) {
30 ; CHECK-LABEL: cvt_v8i16_v8f32:
31 ; CHECK: ## %bb.0:
32 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1
33 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
34 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
35 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
36 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
37 ; CHECK-NEXT: retl
38 ;
39 ; CHECK-WIDE-LABEL: cvt_v8i16_v8f32:
40 ; CHECK-WIDE: ## %bb.0:
41 ; CHECK-WIDE-NEXT: vpmovsxwd %xmm0, %xmm1
42 ; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
43 ; CHECK-WIDE-NEXT: vpmovsxwd %xmm0, %xmm0
44 ; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
45 ; CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0
46 ; CHECK-WIDE-NEXT: retl
47 %res = sitofp <8 x i16> %src to <8 x float>
48 ret <8 x float> %res
49 }
50
51 define <4 x float> @cvt_v4i8_v4f32(<4 x i8> %src) {
52 ; CHECK-LABEL: cvt_v4i8_v4f32:
3153 ; CHECK: ## %bb.0:
3254 ; CHECK-NEXT: vpslld $24, %xmm0, %xmm0
3355 ; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0
3456 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
3557 ; CHECK-NEXT: retl
3658 ;
37 ; CHECK-WIDE-LABEL: foo1_4:
59 ; CHECK-WIDE-LABEL: cvt_v4i8_v4f32:
3860 ; CHECK-WIDE: ## %bb.0:
3961 ; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm0
4062 ; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
4365 ret <4 x float> %res
4466 }
4567
46 define <8 x float> @foo2_8(<8 x i8> %src) {
47 ; CHECK-LABEL: foo2_8:
48 ; CHECK: ## %bb.0:
49 ; CHECK-NEXT: vpand LCPI2_0, %xmm0, %xmm0
68 define <4 x float> @cvt_v4i16_v4f32(<4 x i16> %src) {
69 ; CHECK-LABEL: cvt_v4i16_v4f32:
70 ; CHECK: ## %bb.0:
71 ; CHECK-NEXT: vpslld $16, %xmm0, %xmm0
72 ; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0
73 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
74 ; CHECK-NEXT: retl
75 ;
76 ; CHECK-WIDE-LABEL: cvt_v4i16_v4f32:
77 ; CHECK-WIDE: ## %bb.0:
78 ; CHECK-WIDE-NEXT: vpmovsxwd %xmm0, %xmm0
79 ; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
80 ; CHECK-WIDE-NEXT: retl
81 %res = sitofp <4 x i16> %src to <4 x float>
82 ret <4 x float> %res
83 }
84
85 define <8 x float> @cvt_v8u8_v8f32(<8 x i8> %src) {
86 ; CHECK-LABEL: cvt_v8u8_v8f32:
87 ; CHECK: ## %bb.0:
88 ; CHECK-NEXT: vpand LCPI4_0, %xmm0, %xmm0
5089 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5190 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
5291 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5493 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
5594 ; CHECK-NEXT: retl
5695 ;
57 ; CHECK-WIDE-LABEL: foo2_8:
96 ; CHECK-WIDE-LABEL: cvt_v8u8_v8f32:
5897 ; CHECK-WIDE: ## %bb.0:
5998 ; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6099 ; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
66105 ret <8 x float> %res
67106 }
68107
69 define <4 x float> @foo2_4(<4 x i8> %src) {
70 ; CHECK-LABEL: foo2_4:
71 ; CHECK: ## %bb.0:
72 ; CHECK-NEXT: vandps LCPI3_0, %xmm0, %xmm0
108 define <8 x float> @cvt_v8u16_v8f32(<8 x i16> %src) {
109 ; CHECK-LABEL: cvt_v8u16_v8f32:
110 ; CHECK: ## %bb.0:
111 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
112 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
113 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
114 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
115 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
116 ; CHECK-NEXT: retl
117 ;
118 ; CHECK-WIDE-LABEL: cvt_v8u16_v8f32:
119 ; CHECK-WIDE: ## %bb.0:
120 ; CHECK-WIDE-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
121 ; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
122 ; CHECK-WIDE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
123 ; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
124 ; CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0
125 ; CHECK-WIDE-NEXT: retl
126 %res = uitofp <8 x i16> %src to <8 x float>
127 ret <8 x float> %res
128 }
129
130 define <4 x float> @cvt_v4u8_v4f32(<4 x i8> %src) {
131 ; CHECK-LABEL: cvt_v4u8_v4f32:
132 ; CHECK: ## %bb.0:
133 ; CHECK-NEXT: vandps LCPI6_0, %xmm0, %xmm0
73134 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
74135 ; CHECK-NEXT: retl
75136 ;
76 ; CHECK-WIDE-LABEL: foo2_4:
137 ; CHECK-WIDE-LABEL: cvt_v4u8_v4f32:
77138 ; CHECK-WIDE: ## %bb.0:
78139 ; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
79140 ; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
82143 ret <4 x float> %res
83144 }
84145
85 define <8 x i8> @foo3_8(<8 x float> %src) {
86 ; CHECK-LABEL: foo3_8:
146 define <4 x float> @cvt_v4u16_v4f32(<4 x i16> %src) {
147 ; CHECK-LABEL: cvt_v4u16_v4f32:
148 ; CHECK: ## %bb.0:
149 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
150 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
151 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
152 ; CHECK-NEXT: retl
153 ;
154 ; CHECK-WIDE-LABEL: cvt_v4u16_v4f32:
155 ; CHECK-WIDE: ## %bb.0:
156 ; CHECK-WIDE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
157 ; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
158 ; CHECK-WIDE-NEXT: retl
159 %res = uitofp <4 x i16> %src to <4 x float>
160 ret <4 x float> %res
161 }
162
163 define <8 x i8> @cvt_v8f32_v8i8(<8 x float> %src) {
164 ; CHECK-LABEL: cvt_v8f32_v8i8:
87165 ; CHECK: ## %bb.0:
88166 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
89167 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
91169 ; CHECK-NEXT: vzeroupper
92170 ; CHECK-NEXT: retl
93171 ;
94 ; CHECK-WIDE-LABEL: foo3_8:
172 ; CHECK-WIDE-LABEL: cvt_v8f32_v8i8:
95173 ; CHECK-WIDE: ## %bb.0:
96174 ; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
97175 ; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
122200 ret <8 x i8> %res
123201 }
124202
125 define <4 x i8> @foo3_4(<4 x float> %src) {
126 ; CHECK-LABEL: foo3_4:
203 define <8 x i16> @cvt_v8f32_v8i16(<8 x float> %src) {
204 ; CHECK-LABEL: cvt_v8f32_v8i16:
205 ; CHECK: ## %bb.0:
206 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
207 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
208 ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
209 ; CHECK-NEXT: vzeroupper
210 ; CHECK-NEXT: retl
211 ;
212 ; CHECK-WIDE-LABEL: cvt_v8f32_v8i16:
213 ; CHECK-WIDE: ## %bb.0:
214 ; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
215 ; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
216 ; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
217 ; CHECK-WIDE-NEXT: vzeroupper
218 ; CHECK-WIDE-NEXT: retl
219 %res = fptosi <8 x float> %src to <8 x i16>
220 ret <8 x i16> %res
221 }
222
223 define <4 x i8> @cvt_v4f32_v4i8(<4 x float> %src) {
224 ; CHECK-LABEL: cvt_v4f32_v4i8:
127225 ; CHECK: ## %bb.0:
128226 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
129227 ; CHECK-NEXT: retl
130228 ;
131 ; CHECK-WIDE-LABEL: foo3_4:
229 ; CHECK-WIDE-LABEL: cvt_v4f32_v4i8:
132230 ; CHECK-WIDE: ## %bb.0:
133231 ; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
134232 ; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
146244 ret <4 x i8> %res
147245 }
148246
247 define <4 x i16> @cvt_v4f32_v4i16(<4 x float> %src) {
248 ; CHECK-LABEL: cvt_v4f32_v4i16:
249 ; CHECK: ## %bb.0:
250 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
251 ; CHECK-NEXT: retl
252 ;
253 ; CHECK-WIDE-LABEL: cvt_v4f32_v4i16:
254 ; CHECK-WIDE: ## %bb.0:
255 ; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
256 ; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
257 ; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
258 ; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
259 ; CHECK-WIDE-NEXT: vzeroupper
260 ; CHECK-WIDE-NEXT: retl
261 %res = fptosi <4 x float> %src to <4 x i16>
262 ret <4 x i16> %res
263 }
264
265 define <8 x i8> @cvt_v8f32_v8u8(<8 x float> %src) {
266 ; CHECK-LABEL: cvt_v8f32_v8u8:
267 ; CHECK: ## %bb.0:
268 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
269 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
270 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
271 ; CHECK-NEXT: vzeroupper
272 ; CHECK-NEXT: retl
273 ;
274 ; CHECK-WIDE-LABEL: cvt_v8f32_v8u8:
275 ; CHECK-WIDE: ## %bb.0:
276 ; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
277 ; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
278 ; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
279 ; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
280 ; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
281 ; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
282 ; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
283 ; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
284 ; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
285 ; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
286 ; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
287 ; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
288 ; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
289 ; CHECK-WIDE-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
290 ; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
291 ; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
292 ; CHECK-WIDE-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
293 ; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
294 ; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
295 ; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
296 ; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
297 ; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
298 ; CHECK-WIDE-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
299 ; CHECK-WIDE-NEXT: vzeroupper
300 ; CHECK-WIDE-NEXT: retl
301 %res = fptoui <8 x float> %src to <8 x i8>
302 ret <8 x i8> %res
303 }
304
305 define <8 x i16> @cvt_v8f32_v8u16(<8 x float> %src) {
306 ; CHECK-LABEL: cvt_v8f32_v8u16:
307 ; CHECK: ## %bb.0:
308 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
309 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
310 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
311 ; CHECK-NEXT: vzeroupper
312 ; CHECK-NEXT: retl
313 ;
314 ; CHECK-WIDE-LABEL: cvt_v8f32_v8u16:
315 ; CHECK-WIDE: ## %bb.0:
316 ; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
317 ; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
318 ; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
319 ; CHECK-WIDE-NEXT: vzeroupper
320 ; CHECK-WIDE-NEXT: retl
321 %res = fptoui <8 x float> %src to <8 x i16>
322 ret <8 x i16> %res
323 }
324
325 define <4 x i8> @cvt_v4f32_v4u8(<4 x float> %src) {
326 ; CHECK-LABEL: cvt_v4f32_v4u8:
327 ; CHECK: ## %bb.0:
328 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
329 ; CHECK-NEXT: retl
330 ;
331 ; CHECK-WIDE-LABEL: cvt_v4f32_v4u8:
332 ; CHECK-WIDE: ## %bb.0:
333 ; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
334 ; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
335 ; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
336 ; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
337 ; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
338 ; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
339 ; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
340 ; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
341 ; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
342 ; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
343 ; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
344 ; CHECK-WIDE-NEXT: retl
345 %res = fptoui <4 x float> %src to <4 x i8>
346 ret <4 x i8> %res
347 }
348
349 define <4 x i16> @cvt_v4f32_v4u16(<4 x float> %src) {
350 ; CHECK-LABEL: cvt_v4f32_v4u16:
351 ; CHECK: ## %bb.0:
352 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
353 ; CHECK-NEXT: retl
354 ;
355 ; CHECK-WIDE-LABEL: cvt_v4f32_v4u16:
356 ; CHECK-WIDE: ## %bb.0:
357 ; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
358 ; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
359 ; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
360 ; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
361 ; CHECK-WIDE-NEXT: vzeroupper
362 ; CHECK-WIDE-NEXT: retl
363 %res = fptoui <4 x float> %src to <4 x i16>
364 ret <4 x i16> %res
365 }
366