llvm.org GIT mirror llvm / ca3a0c9
[AVX-512] Change another pattern that was using BLENDM to use masked moves. A future patch will conver it back to BLENDM if its beneficial to register allocation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291419 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 3 years ago
3 changed file(s) with 47 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
15871587 defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;
15881588
15891589
1590 let Predicates = [HasAVX512, NoVLX] in {
1591 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
1592 (v8f32 VR256X:$src2))),
1593 (EXTRACT_SUBREG
1594 (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
1595 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
1596 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
1597
1598 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
1599 (v8i32 VR256X:$src2))),
1600 (EXTRACT_SUBREG
1601 (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
1602 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
1603 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
1604 }
16051590 //===----------------------------------------------------------------------===//
16061591 // Compare Instructions
16071592 //===----------------------------------------------------------------------===//
29772962 (v16i32 VR512:$src))),
29782963 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
29792964
2965 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
2966 // available. Use a 512-bit operation and extract.
2967 let Predicates = [HasAVX512, NoVLX] in {
2968 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
2969 (v8f32 VR256X:$src0))),
2970 (EXTRACT_SUBREG
2971 (v16f32
2972 (VMOVAPSZrrk
2973 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
2974 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
2975 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
2976 sub_ymm)>;
2977
2978 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
2979 (v8i32 VR256X:$src0))),
2980 (EXTRACT_SUBREG
2981 (v16i32
2982 (VMOVDQA32Zrrk
2983 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
2984 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
2985 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
2986 sub_ymm)>;
2987 }
2988
29802989 let Predicates = [HasVLX, NoBWI] in {
29812990 // 128-bit load/store without BWI.
29822991 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
344344 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
345345 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
346346 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
347 ; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
348 ; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
349 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
347 ; KNL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
348 ; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
349 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
350350 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
351351 ; KNL-NEXT: retq
352352 ;
368368 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
369369 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
370370 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
371 ; KNL-NEXT: vpmovsxbd (%rdi), %ymm0
372 ; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
373 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
371 ; KNL-NEXT: vpmovsxbd (%rdi), %ymm1
372 ; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
373 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
374374 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
375375 ; KNL-NEXT: retq
376376 ;
703703 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
704704 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
705705 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
706 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
707 ; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
708 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
706 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
707 ; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
708 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
709709 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
710710 ; KNL-NEXT: retq
711711 ;
727727 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
728728 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
729729 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
730 ; KNL-NEXT: vpmovsxwd (%rdi), %ymm0
731 ; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
732 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
730 ; KNL-NEXT: vpmovsxwd (%rdi), %ymm1
731 ; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
732 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
733733 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
734734 ; KNL-NEXT: retq
735735 ;
761761 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
762762 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
763763 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
764 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
765 ; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
766 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
764 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
765 ; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0
766 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
767767 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
768768 ; KNL-NEXT: retq
769769 ;
121121 ; KNL-NEXT: ## kill: %YMM1 %YMM1 %ZMM1
122122 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
123123 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
124 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
125 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
124 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
125 ; KNL-NEXT: vmovdqa %ymm1, %ymm0
126126 ; KNL-NEXT: retq
127127 ;
128128 ; SKX-LABEL: test9:
142142 ; KNL-NEXT: ## kill: %YMM1 %YMM1 %ZMM1
143143 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
144144 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
145 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
146 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
145 ; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
146 ; KNL-NEXT: vmovaps %ymm1, %ymm0
147147 ; KNL-NEXT: retq
148148 ;
149149 ; SKX-LABEL: test10:
10191019 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
10201020 ; KNL-NEXT: vmovups (%rdi), %ymm2
10211021 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1
1022 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
1023 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
1022 ; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
1023 ; KNL-NEXT: vmovaps %ymm1, %ymm0
10241024 ; KNL-NEXT: retq
10251025 ;
10261026 ; SKX-LABEL: test35:
11391139 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
11401140 ; KNL-NEXT: vbroadcastss (%rdi), %ymm2
11411141 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1
1142 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
1143 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0
1142 ; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
1143 ; KNL-NEXT: vmovaps %ymm1, %ymm0
11441144 ; KNL-NEXT: retq
11451145 ;
11461146 ; SKX-LABEL: test41: