llvm.org GIT mirror llvm / 63ae300
[X86] Fix DecodeVPERMVMask to handle cases where the constant pool entry has a different type than the shuffle itself. This is especially important for 32-bit targets with 64-bit shuffle elements. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284453 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 4 years ago
4 changed file(s) with 28 addition(s) and 40 deletion(s). Raw diff Collapse all Expand all
50905090 break;
50915091 }
50925092 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5093 DecodeVPERMVMask(C, VT, Mask);
5093 DecodeVPERMVMask(C, MaskEltSize, Mask);
50945094 break;
50955095 }
50965096 return false;
274274 }
275275 }
276276
277 void DecodeVPERMVMask(const Constant *C, MVT VT,
277 void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
278278 SmallVectorImpl &ShuffleMask) {
279279 Type *MaskTy = C->getType();
280 if (MaskTy->isVectorTy()) {
281 unsigned NumElements = MaskTy->getVectorNumElements();
282 if (NumElements == VT.getVectorNumElements()) {
283 unsigned EltMaskSize = Log2_64(NumElements);
284 for (unsigned i = 0; i < NumElements; ++i) {
285 Constant *COp = C->getAggregateElement(i);
286 if (!COp || (!isa(COp) && !isa(COp))) {
287 ShuffleMask.clear();
288 return;
289 }
290 if (isa(COp))
291 ShuffleMask.push_back(SM_SentinelUndef);
292 else {
293 APInt Element = cast(COp)->getValue();
294 Element = Element.getLoBits(EltMaskSize);
295 ShuffleMask.push_back(Element.getZExtValue());
296 }
297 }
298 }
299 return;
300 }
301 // Scalar value; just broadcast it
302 if (!isa(C))
303 return;
304 uint64_t Element = cast(C)->getZExtValue();
305 int NumElements = VT.getVectorNumElements();
306 Element &= (1 << NumElements) - 1;
307 for (int i = 0; i < NumElements; ++i)
308 ShuffleMask.push_back(Element);
280 unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
281 (void)MaskTySize;
282 assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
283 "Unexpected vector size.");
284 assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
285 "Unexpected vector element size.");
286
287 // The shuffle mask requires elements the same size as the target.
288 SmallBitVector UndefElts;
289 SmallVector RawMask;
290 if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
291 return;
292
293 unsigned NumElts = RawMask.size();
294
295 for (unsigned i = 0; i != NumElts; ++i) {
296 if (UndefElts[i]) {
297 ShuffleMask.push_back(SM_SentinelUndef);
298 continue;
299 }
300 int Index = RawMask[i] & (NumElts - 1);
301 ShuffleMask.push_back(Index);
302 }
309303 }
310304
311305 void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
3939 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl &ShuffleMask);
4040
4141 /// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
42 void DecodeVPERMVMask(const Constant *C, MVT VT,
42 void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
4343 SmallVectorImpl &ShuffleMask);
4444
4545 /// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
2727 define <8 x double> @combine_permvar_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
2828 ; X32-LABEL: combine_permvar_8f64_identity:
2929 ; X32: # BB#0:
30 ; X32-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
31 ; X32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
32 ; X32-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
33 ; X32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
3430 ; X32-NEXT: retl
3531 ;
3632 ; X64-LABEL: combine_permvar_8f64_identity:
702698 define <8 x double> @combine_permvar_8f64_as_permpd(<8 x double> %x0, <8 x double> %x1) {
703699 ; X32-LABEL: combine_permvar_8f64_as_permpd:
704700 ; X32: # BB#0:
705 ; X32-NEXT: vmovapd {{.*#+}} zmm1 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
706 ; X32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
701 ; X32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
707702 ; X32-NEXT: retl
708703 ;
709704 ; X64-LABEL: combine_permvar_8f64_as_permpd:
718713 ; X32: # BB#0:
719714 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
720715 ; X32-NEXT: kmovd %eax, %k1
721 ; X32-NEXT: vmovapd {{.*#+}} zmm2 = <3,0,2,0,1,0,u,u,u,u,6,0,5,0,4,0>
722 ; X32-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1}
716 ; X32-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
723717 ; X32-NEXT: vmovapd %zmm1, %zmm0
724718 ; X32-NEXT: retl
725719 ;