llvm.org GIT mirror llvm / 91a1df2
[X86][AVX512] Add support for 512-bit PSHUFB lowering git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274444 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
2 changed file(s) with 10 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
72557255 const int NumEltBytes = VT.getScalarSizeInBits() / 8;
72567256
72577257 assert((Subtarget.hasSSSE3() && VT.is128BitVector()) ||
7258 (Subtarget.hasAVX2() && VT.is256BitVector()));
7258 (Subtarget.hasAVX2() && VT.is256BitVector()) ||
7259 (Subtarget.hasBWI() && VT.is512BitVector()));
72597260
72607261 SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
72617262
7262 SmallVector32> PSHUFBMask(NumBytes);
7263 SmallVector64> PSHUFBMask(NumBytes);
72637264 // Sign bit set in i8 mask means zero element.
72647265 SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
72657266
1190711908 if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
1190811909 DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
1190911910 return Rotate;
11911
11912 if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1,
11913 V2, Subtarget, DAG))
11914 return PSHUFB;
1191011915
1191111916 // FIXME: Implement direct support for this type!
1191211917 return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
27432743 ;
27442744 ; AVX512BW-LABEL: test_bitreverse_v32i16:
27452745 ; AVX512BW: # BB#0:
2746 ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2747 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
2748 ; AVX512BW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
2749 ; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
2750 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2746 ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30,33,32,35,34,37,36,39,38,41,40,43,42,45,44,47,46,49,48,51,50,53,52,55,54,57,56,59,58,61,60,63,62]
27512747 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27522748 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
27532749 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
31743170 ;
31753171 ; AVX512BW-LABEL: test_bitreverse_v16i32:
31763172 ; AVX512BW: # BB#0:
3177 ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3178 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
3179 ; AVX512BW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
3180 ; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
3181 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
3173 ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28,35,34,33,32,39,38,37,36,43,42,41,40,47,46,45,44,51,50,49,48,55,54,53,52,59,58,57,56,63,62,61,60]
31823174 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31833175 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
31843176 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
37093701 ;
37103702 ; AVX512BW-LABEL: test_bitreverse_v8i64:
37113703 ; AVX512BW: # BB#0:
3712 ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3713 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
3714 ; AVX512BW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
3715 ; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
3716 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
3704 ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24,39,38,37,36,35,34,33,32,47,46,45,44,43,42,41,40,55,54,53,52,51,50,49,48,63,62,61,60,59,58,57,56]
37173705 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37183706 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
37193707 ; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]