llvm.org GIT mirror llvm / 79fc52e
[X86][AVX512] Added VPERMPD/VPERMQ intrinsics fast-isel generic IR tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274503 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
2 changed file(s) with 252 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
261261 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32>
262262 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
263263 ret <16 x float> %res1
264 }
265
266 define <8 x i64> @test_mm512_permutex_epi64(<8 x i64> %a0) {
267 ; X32-LABEL: test_mm512_permutex_epi64:
268 ; X32: # BB#0:
269 ; X32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
270 ; X32-NEXT: retl
271 ;
272 ; X64-LABEL: test_mm512_permutex_epi64:
273 ; X64: # BB#0:
274 ; X64-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
275 ; X64-NEXT: retq
276 %res = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32>
277 ret <8 x i64> %res
278 }
279
280 define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2) {
281 ; X32-LABEL: test_mm512_mask_permutex_epi64:
282 ; X32: # BB#0:
283 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
284 ; X32-NEXT: kmovw %eax, %k1
285 ; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
286 ; X32-NEXT: retl
287 ;
288 ; X64-LABEL: test_mm512_mask_permutex_epi64:
289 ; X64: # BB#0:
290 ; X64-NEXT: kmovw %edi, %k1
291 ; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
292 ; X64-NEXT: retq
293 %arg1 = bitcast i8 %a1 to <8 x i1>
294 %res0 = shufflevector <8 x i64> %a2, <8 x i64> undef, <8 x i32>
295 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
296 ret <8 x i64> %res1
297 }
298
299 define <8 x i64> @test_mm512_maskz_permutex_epi64(i8 %a0, <8 x i64> %a1) {
300 ; X32-LABEL: test_mm512_maskz_permutex_epi64:
301 ; X32: # BB#0:
302 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
303 ; X32-NEXT: kmovw %eax, %k1
304 ; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
305 ; X32-NEXT: retl
306 ;
307 ; X64-LABEL: test_mm512_maskz_permutex_epi64:
308 ; X64: # BB#0:
309 ; X64-NEXT: kmovw %edi, %k1
310 ; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
311 ; X64-NEXT: retq
312 %arg0 = bitcast i8 %a0 to <8 x i1>
313 %res0 = shufflevector <8 x i64> %a1, <8 x i64> undef, <8 x i32>
314 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
315 ret <8 x i64> %res1
316 }
317
318 define <8 x double> @test_mm512_permutex_pd(<8 x double> %a0) {
319 ; X32-LABEL: test_mm512_permutex_pd:
320 ; X32: # BB#0:
321 ; X32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
322 ; X32-NEXT: retl
323 ;
324 ; X64-LABEL: test_mm512_permutex_pd:
325 ; X64: # BB#0:
326 ; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
327 ; X64-NEXT: retq
328 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32>
329 ret <8 x double> %res
330 }
331
332 define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
333 ; X32-LABEL: test_mm512_mask_permutex_pd:
334 ; X32: # BB#0:
335 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
336 ; X32-NEXT: kmovw %eax, %k1
337 ; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
338 ; X32-NEXT: retl
339 ;
340 ; X64-LABEL: test_mm512_mask_permutex_pd:
341 ; X64: # BB#0:
342 ; X64-NEXT: kmovw %edi, %k1
343 ; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
344 ; X64-NEXT: retq
345 %arg1 = bitcast i8 %a1 to <8 x i1>
346 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32>
347 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
348 ret <8 x double> %res1
349 }
350
351 define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) {
352 ; X32-LABEL: test_mm512_maskz_permutex_pd:
353 ; X32: # BB#0:
354 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
355 ; X32-NEXT: kmovw %eax, %k1
356 ; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
357 ; X32-NEXT: retl
358 ;
359 ; X64-LABEL: test_mm512_maskz_permutex_pd:
360 ; X64: # BB#0:
361 ; X64-NEXT: kmovw %edi, %k1
362 ; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
363 ; X64-NEXT: retq
364 %arg0 = bitcast i8 %a0 to <8 x i1>
365 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32>
366 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
367 ret <8 x double> %res1
264368 }
265369
266370 define <8 x i64> @test_mm512_shuffle_epi32(<8 x i64> %a0) {
403403 ret <8 x float> %res1
404404 }
405405
406 define <4 x i64> @test_mm256_permutex_epi64(<4 x i64> %a0) {
407 ; X32-LABEL: test_mm256_permutex_epi64:
408 ; X32: # BB#0:
409 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
410 ; X32-NEXT: retl
411 ;
412 ; X64-LABEL: test_mm256_permutex_epi64:
413 ; X64: # BB#0:
414 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
415 ; X64-NEXT: retq
416 %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32>
417 ret <4 x i64> %res
418 }
419
420 define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %a0, i8 %a1, <4 x i64> %a2) {
421 ; X32-LABEL: test_mm256_mask_permutex_epi64:
422 ; X32: # BB#0:
423 ; X32-NEXT: pushl %eax
424 ; X32-NEXT: .Ltmp8:
425 ; X32-NEXT: .cfi_def_cfa_offset 8
426 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
427 ; X32-NEXT: andb $15, %al
428 ; X32-NEXT: movb %al, (%esp)
429 ; X32-NEXT: movzbl (%esp), %eax
430 ; X32-NEXT: kmovw %eax, %k1
431 ; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
432 ; X32-NEXT: popl %eax
433 ; X32-NEXT: retl
434 ;
435 ; X64-LABEL: test_mm256_mask_permutex_epi64:
436 ; X64: # BB#0:
437 ; X64-NEXT: andb $15, %dil
438 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
439 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
440 ; X64-NEXT: kmovw %eax, %k1
441 ; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
442 ; X64-NEXT: retq
443 %trn1 = trunc i8 %a1 to i4
444 %arg1 = bitcast i4 %trn1 to <4 x i1>
445 %res0 = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32>
446 %res1 = select <4 x i1> %arg1, <4 x i64> %res0, <4 x i64> %a0
447 ret <4 x i64> %res1
448 }
449
450 define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 %a0, <4 x i64> %a1) {
451 ; X32-LABEL: test_mm256_maskz_permutex_epi64:
452 ; X32: # BB#0:
453 ; X32-NEXT: pushl %eax
454 ; X32-NEXT: .Ltmp9:
455 ; X32-NEXT: .cfi_def_cfa_offset 8
456 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
457 ; X32-NEXT: andb $15, %al
458 ; X32-NEXT: movb %al, (%esp)
459 ; X32-NEXT: movzbl (%esp), %eax
460 ; X32-NEXT: kmovw %eax, %k1
461 ; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
462 ; X32-NEXT: popl %eax
463 ; X32-NEXT: retl
464 ;
465 ; X64-LABEL: test_mm256_maskz_permutex_epi64:
466 ; X64: # BB#0:
467 ; X64-NEXT: andb $15, %dil
468 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
469 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
470 ; X64-NEXT: kmovw %eax, %k1
471 ; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
472 ; X64-NEXT: retq
473 %trn1 = trunc i8 %a0 to i4
474 %arg0 = bitcast i4 %trn1 to <4 x i1>
475 %res0 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32>
476 %res1 = select <4 x i1> %arg0, <4 x i64> %res0, <4 x i64> zeroinitializer
477 ret <4 x i64> %res1
478 }
479
480 define <4 x double> @test_mm256_permutex_pd(<4 x double> %a0) {
481 ; X32-LABEL: test_mm256_permutex_pd:
482 ; X32: # BB#0:
483 ; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
484 ; X32-NEXT: retl
485 ;
486 ; X64-LABEL: test_mm256_permutex_pd:
487 ; X64: # BB#0:
488 ; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
489 ; X64-NEXT: retq
490 %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32>
491 ret <4 x double> %res
492 }
493
494 define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) {
495 ; X32-LABEL: test_mm256_mask_permutex_pd:
496 ; X32: # BB#0:
497 ; X32-NEXT: pushl %eax
498 ; X32-NEXT: .Ltmp10:
499 ; X32-NEXT: .cfi_def_cfa_offset 8
500 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
501 ; X32-NEXT: andb $15, %al
502 ; X32-NEXT: movb %al, (%esp)
503 ; X32-NEXT: movzbl (%esp), %eax
504 ; X32-NEXT: kmovw %eax, %k1
505 ; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
506 ; X32-NEXT: popl %eax
507 ; X32-NEXT: retl
508 ;
509 ; X64-LABEL: test_mm256_mask_permutex_pd:
510 ; X64: # BB#0:
511 ; X64-NEXT: andb $15, %dil
512 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
513 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
514 ; X64-NEXT: kmovw %eax, %k1
515 ; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
516 ; X64-NEXT: retq
517 %trn1 = trunc i8 %a1 to i4
518 %arg1 = bitcast i4 %trn1 to <4 x i1>
519 %res0 = shufflevector <4 x double> %a2, <4 x double> undef, <4 x i32>
520 %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0
521 ret <4 x double> %res1
522 }
523
524 define <4 x double> @test_mm256_maskz_permutex_pd(i8 %a0, <4 x double> %a1) {
525 ; X32-LABEL: test_mm256_maskz_permutex_pd:
526 ; X32: # BB#0:
527 ; X32-NEXT: pushl %eax
528 ; X32-NEXT: .Ltmp11:
529 ; X32-NEXT: .cfi_def_cfa_offset 8
530 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
531 ; X32-NEXT: andb $15, %al
532 ; X32-NEXT: movb %al, (%esp)
533 ; X32-NEXT: movzbl (%esp), %eax
534 ; X32-NEXT: kmovw %eax, %k1
535 ; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
536 ; X32-NEXT: popl %eax
537 ; X32-NEXT: retl
538 ;
539 ; X64-LABEL: test_mm256_maskz_permutex_pd:
540 ; X64: # BB#0:
541 ; X64-NEXT: andb $15, %dil
542 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
543 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
544 ; X64-NEXT: kmovw %eax, %k1
545 ; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
546 ; X64-NEXT: retq
547 %trn1 = trunc i8 %a0 to i4
548 %arg0 = bitcast i4 %trn1 to <4 x i1>
549 %res0 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32>
550 %res1 = select <4 x i1> %arg0, <4 x double> %res0, <4 x double> zeroinitializer
551 ret <4 x double> %res1
552 }
553
406554 !0 = !{i32 1}