llvm.org GIT mirror llvm / db85662
[X86][AVX2] Allow VPERMPD/VPERMQ shuffles to call combineShuffle (reapplied) This improves the situation discussed in D19228 where we were forcing VPERMPD/VPERMQ where VPERM2F128/VPERM2I128 would have been better. This was incorrectly reverted in rL275421 during triage of PR28552. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275497 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
6 changed file(s) with 19 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
3101031010 case X86ISD::MOVSS:
3101131011 case X86ISD::MOVSD:
3101231012 case X86ISD::VPPERM:
31013 case X86ISD::VPERMI:
3101331014 case X86ISD::VPERMV:
3101431015 case X86ISD::VPERMV3:
3101531016 case X86ISD::VPERMIL2:
4141 ;
4242 ; AVX2-LABEL: shuffle_v8f32_01230123:
4343 ; AVX2: ## BB#0: ## %entry
44 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
44 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
4545 ; AVX2-NEXT: retq
4646 entry:
4747 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
5757 ;
5858 ; AVX2-LABEL: shuffle_v8f32_01230123_mem:
5959 ; AVX2: ## BB#0: ## %entry
60 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,1,0,1]
60 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[0,1,0,1]
6161 ; AVX2-NEXT: retq
6262 entry:
6363 %a = load <8 x float>, <8 x float>* %pa
492492 ;
493493 ; X64-LABEL: test_mm256_broadcastsi128_si256:
494494 ; X64: # BB#0:
495 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
495 ; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
496496 ; X64-NEXT: retq
497497 %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32>
498498 ret <4 x i64> %res
454454 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
455455 ; AVX2: # BB#0:
456456 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
457 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
457 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
458458 ; AVX2-NEXT: retq
459459 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32>
460460 ret <16 x i16> %shuffle
470470 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
471471 ; AVX2: # BB#0:
472472 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
473 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
473 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
474474 ; AVX2-NEXT: retq
475475 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32>
476476 ret <16 x i16> %shuffle
486486 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
487487 ; AVX2: # BB#0:
488488 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
489 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
489 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
490490 ; AVX2-NEXT: retq
491491 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32>
492492 ret <16 x i16> %shuffle
502502 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
503503 ; AVX2: # BB#0:
504504 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
505 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
505 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
506506 ; AVX2-NEXT: retq
507507 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32>
508508 ret <16 x i16> %shuffle
518518 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
519519 ; AVX2: # BB#0:
520520 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
521 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
521 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
522522 ; AVX2-NEXT: retq
523523 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32>
524524 ret <16 x i16> %shuffle
534534 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
535535 ; AVX2: # BB#0:
536536 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
537 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
537 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
538538 ; AVX2-NEXT: retq
539539 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32>
540540 ret <16 x i16> %shuffle
550550 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
551551 ; AVX2: # BB#0:
552552 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
553 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
553 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
554554 ; AVX2-NEXT: retq
555555 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32>
556556 ret <16 x i16> %shuffle
817817 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
818818 ; AVX2: # BB#0:
819819 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
820 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
820 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
821821 ; AVX2-NEXT: retq
822822 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>
823823 ret <32 x i8> %shuffle
833833 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
834834 ; AVX2: # BB#0:
835835 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
836 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
836 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
837837 ; AVX2-NEXT: retq
838838 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>
839839 ret <32 x i8> %shuffle
849849 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
850850 ; AVX2: # BB#0:
851851 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
852 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
852 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
853853 ; AVX2-NEXT: retq
854854 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>
855855 ret <32 x i8> %shuffle
865865 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
866866 ; AVX2: # BB#0:
867867 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
868 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
868 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
869869 ; AVX2-NEXT: retq
870870 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>
871871 ret <32 x i8> %shuffle
881881 ; AVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
882882 ; AVX2: # BB#0:
883883 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
884 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
884 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
885885 ; AVX2-NEXT: retq
886886 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>
887887 ret <32 x i8> %shuffle
901901 ; AVX2-NEXT: movl $15, %eax
902902 ; AVX2-NEXT: vmovd %eax, %xmm1
903903 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
904 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
904 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
905905 ; AVX2-NEXT: retq
906906 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>
907907 ret <32 x i8> %shuffle
687687 ; AVX2-LABEL: shuffle_v8f32_32103210:
688688 ; AVX2: # BB#0:
689689 ; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
690 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
690 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
691691 ; AVX2-NEXT: retq
692692 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
693693 ret <8 x float> %shuffle
17691769 ; AVX2-LABEL: shuffle_v8i32_32103210:
17701770 ; AVX2: # BB#0:
17711771 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
1772 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1772 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
17731773 ; AVX2-NEXT: retq
17741774 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
17751775 ret <8 x i32> %shuffle