llvm.org GIT mirror llvm / e775c34
[X86] Teach EVEX->VEX pass to turn SHUFI32X4/SHUFF32X4/SHUFI64X/SHUFF64X2 into VPERM2F128/VPERM2I128. This recovers some of the tests that were changed by r317403. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317410 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 11 months ago
9 changed file(s) with 77 addition(s) and 104 deletion(s). Raw diff Collapse all Expand all
170170 case X86::VALIGNDZ128rri:
171171 case X86::VALIGNDZ128rmi:
172172 case X86::VALIGNQZ128rri:
173 case X86::VALIGNQZ128rmi:
173 case X86::VALIGNQZ128rmi: {
174174 assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
175175 "Unexpected new opcode!");
176176 unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
178178 MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
179179 Imm.setImm(Imm.getImm() * Scale);
180180 break;
181 }
182 case X86::VSHUFF32X4Z256rmi:
183 case X86::VSHUFF32X4Z256rri:
184 case X86::VSHUFF64X2Z256rmi:
185 case X86::VSHUFF64X2Z256rri:
186 case X86::VSHUFI32X4Z256rmi:
187 case X86::VSHUFI32X4Z256rri:
188 case X86::VSHUFI64X2Z256rmi:
189 case X86::VSHUFI64X2Z256rri: {
190 assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
191 NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
192 "Unexpected new opcode!");
193 MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
194 int64_t ImmVal = Imm.getImm();
195 // Set bit 5, move bit 1 to bit 4, copy bit 0.
196 Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
197 break;
198 }
181199 }
182200 }
183201
34463446 ;
34473447 ; SKX-LABEL: test_perm2f128:
34483448 ; SKX: # BB#0:
3449 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
3450 ; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
3449 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
3450 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
34513451 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
34523452 ; SKX-NEXT: retq # sched: [7:1.00]
34533453 ;
25302530 ;
25312531 ; SKX-LABEL: test_perm2i128:
25322532 ; SKX: # BB#0:
2533 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2534 ; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2533 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2534 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
25352535 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
25362536 ; SKX-NEXT: retq # sched: [7:1.00]
25372537 ;
27282728 ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7]
27292729 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16]
27302730 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
2731 ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16]
2732 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2731 ; CHECK-NEXT: vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30]
2732 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
27332733 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
27342734 ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
27352735 ; CHECK-NEXT: retq ## encoding: [0xc3]
27512751 ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3]
27522752 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16]
27532753 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
2754 ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16]
2754 ; CHECK-NEXT: vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30]
27552755 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
27562756 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
27572757 ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
27722772 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
27732773 ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16]
27742774 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
2775 ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16]
2776 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2775 ; CHECK-NEXT: vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30]
2776 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
27772777 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
27782778 ; CHECK-NEXT: retq ## encoding: [0xc3]
27792779 %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
27902790 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
27912791 ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16]
27922792 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
2793 ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16]
2793 ; CHECK-NEXT: vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30]
27942794 ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
27952795 ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
27962796 ; CHECK-NEXT: retq ## encoding: [0xc3]
39743974 ;
39753975 ; AVX512VL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
39763976 ; AVX512VL: # BB#0:
3977 ; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3977 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
39783978 ; AVX512VL-NEXT: retq
39793979 %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32>
39803980 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32>
16811681 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
16821682 ; AVX1-NEXT: retq
16831683 ;
1684 ; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
1685 ; AVX2: # BB#0:
1686 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1687 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1688 ; AVX2-NEXT: retq
1689 ;
1690 ; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
1691 ; AVX512VL: # BB#0:
1692 ; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1693 ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1694 ; AVX512VL-NEXT: retq
1684 ; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
1685 ; AVX2OR512VL: # BB#0:
1686 ; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1687 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1688 ; AVX2OR512VL-NEXT: retq
16951689 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>
16961690 ret <32 x i8> %shuffle
16971691 }
17071701 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
17081702 ; AVX1-NEXT: retq
17091703 ;
1710 ; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
1711 ; AVX2: # BB#0:
1712 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1713 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1714 ; AVX2-NEXT: retq
1715 ;
1716 ; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
1717 ; AVX512VL: # BB#0:
1718 ; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1719 ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1720 ; AVX512VL-NEXT: retq
1704 ; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
1705 ; AVX2OR512VL: # BB#0:
1706 ; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1707 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1708 ; AVX2OR512VL-NEXT: retq
17211709 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>
17221710 ret <32 x i8> %shuffle
17231711 }
417417 }
418418
419419 define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
420 ; AVX1-LABEL: shuffle_v4f64_3254:
421 ; AVX1: # BB#0:
422 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
423 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
424 ; AVX1-NEXT: retq
425 ;
426 ; AVX2-LABEL: shuffle_v4f64_3254:
427 ; AVX2: # BB#0:
428 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
429 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
430 ; AVX2-NEXT: retq
431 ;
432 ; AVX512VL-LABEL: shuffle_v4f64_3254:
433 ; AVX512VL: # BB#0:
434 ; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
435 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
436 ; AVX512VL-NEXT: retq
420 ; ALL-LABEL: shuffle_v4f64_3254:
421 ; ALL: # BB#0:
422 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
423 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
424 ; ALL-NEXT: retq
437425 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
438426 ret <4 x double> %shuffle
439427 }
440428
441429 define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
442 ; AVX1-LABEL: shuffle_v4f64_3276:
443 ; AVX1: # BB#0:
444 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
445 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
446 ; AVX1-NEXT: retq
447 ;
448 ; AVX2-LABEL: shuffle_v4f64_3276:
449 ; AVX2: # BB#0:
450 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
451 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
452 ; AVX2-NEXT: retq
453 ;
454 ; AVX512VL-LABEL: shuffle_v4f64_3276:
455 ; AVX512VL: # BB#0:
456 ; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
457 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
458 ; AVX512VL-NEXT: retq
430 ; ALL-LABEL: shuffle_v4f64_3276:
431 ; ALL: # BB#0:
432 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
433 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
434 ; ALL-NEXT: retq
459435 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
460436 ret <4 x double> %shuffle
461437 }
10761052 ;
10771053 ; AVX512VL-LABEL: shuffle_v4i64_3254:
10781054 ; AVX512VL: # BB#0:
1079 ; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1055 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
10801056 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
10811057 ; AVX512VL-NEXT: retq
10821058 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32>
10981074 ;
10991075 ; AVX512VL-LABEL: shuffle_v4i64_3276:
11001076 ; AVX512VL: # BB#0:
1101 ; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1077 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
11021078 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
11031079 ; AVX512VL-NEXT: retq
11041080 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32>
767767 }
768768
769769 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
770 ; AVX1OR2-LABEL: shuffle_v8f32_7654fedc:
771 ; AVX1OR2: # BB#0:
772 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
773 ; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
774 ; AVX1OR2-NEXT: retq
775 ;
776 ; AVX512VL-LABEL: shuffle_v8f32_7654fedc:
777 ; AVX512VL: # BB#0:
778 ; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
779 ; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
780 ; AVX512VL-NEXT: retq
770 ; ALL-LABEL: shuffle_v8f32_7654fedc:
771 ; ALL: # BB#0:
772 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
773 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
774 ; ALL-NEXT: retq
781775 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
782776 ret <8 x float> %shuffle
783777 }
784778
785779 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
786 ; AVX1OR2-LABEL: shuffle_v8f32_fedc7654:
787 ; AVX1OR2: # BB#0:
788 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
789 ; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
790 ; AVX1OR2-NEXT: retq
791 ;
792 ; AVX512VL-LABEL: shuffle_v8f32_fedc7654:
793 ; AVX512VL: # BB#0:
794 ; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
795 ; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
796 ; AVX512VL-NEXT: retq
780 ; ALL-LABEL: shuffle_v8f32_fedc7654:
781 ; ALL: # BB#0:
782 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
783 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
784 ; ALL-NEXT: retq
797785 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
798786 ret <8 x float> %shuffle
799787 }
18081796 ;
18091797 ; AVX512VL-LABEL: shuffle_v8i32_7654fedc:
18101798 ; AVX512VL: # BB#0:
1811 ; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1799 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
18121800 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
18131801 ; AVX512VL-NEXT: retq
18141802 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
18241812 ;
18251813 ; AVX512VL-LABEL: shuffle_v8i32_fedc7654:
18261814 ; AVX512VL: # BB#0:
1827 ; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1815 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
18281816 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
18291817 ; AVX512VL-NEXT: retq
18301818 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
22072195 ;
22082196 ; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc:
22092197 ; AVX512VL: # BB#0:
2210 ; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2198 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
22112199 ; AVX512VL-NEXT: retq
22122200 %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32>
22132201 %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32>
22192207 }
22202208
22212209 define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
2222 ; AVX1OR2-LABEL: concat_v8f32_4567CDEF_bc:
2223 ; AVX1OR2: # BB#0:
2224 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2225 ; AVX1OR2-NEXT: retq
2226 ;
2227 ; AVX512VL-LABEL: concat_v8f32_4567CDEF_bc:
2228 ; AVX512VL: # BB#0:
2229 ; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2230 ; AVX512VL-NEXT: retq
2210 ; ALL-LABEL: concat_v8f32_4567CDEF_bc:
2211 ; ALL: # BB#0:
2212 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2213 ; ALL-NEXT: retq
22312214 %a0 = bitcast <8 x float> %f0 to <4 x i64>
22322215 %a1 = bitcast <8 x float> %f1 to <8 x i32>
22332216 %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32>
154154 {"VALIGNQZ128rri", "VPALIGNRrri", true},
155155 {"VALIGNDZ128rmi", "VPALIGNRrmi", true},
156156 {"VALIGNQZ128rmi", "VPALIGNRrmi", true},
157 {"VSHUFF32X4Z256rmi", "VPERM2F128rm", false},
158 {"VSHUFF32X4Z256rri", "VPERM2F128rr", false},
159 {"VSHUFF64X2Z256rmi", "VPERM2F128rm", false},
160 {"VSHUFF64X2Z256rri", "VPERM2F128rr", false},
161 {"VSHUFI32X4Z256rmi", "VPERM2I128rm", false},
162 {"VSHUFI32X4Z256rri", "VPERM2I128rr", false},
163 {"VSHUFI64X2Z256rmi", "VPERM2I128rm", false},
164 {"VSHUFI64X2Z256rri", "VPERM2I128rr", false},
157165 };
158166
159167 // Print the manually added entries