llvm.org GIT mirror llvm / a97a64c
[AVX-512] Fix DecodeVPERMV3Mask to handle cases where the constant pool entry has a different type than the shuffle itself. Summary: This is especially important for 32-bit targets with 64-bit shuffle elements.This is similar to how PSHUFB and VPERMIL handle the same problem. Reviewers: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D25666 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284451 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 4 years ago
4 changed file(s) with 31 addition(s) and 37 deletion(s). Raw diff Collapse all Expand all
51015101 Ops.push_back(N->getOperand(0));
51025102 Ops.push_back(N->getOperand(2));
51035103 SDValue MaskNode = N->getOperand(1);
5104 unsigned MaskEltSize = VT.getScalarSizeInBits();
51045105 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5105 DecodeVPERMV3Mask(C, VT, Mask);
5106 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
51065107 break;
51075108 }
51085109 return false;
51135114 Ops.push_back(N->getOperand(1));
51145115 Ops.push_back(N->getOperand(2));
51155116 SDValue MaskNode = N->getOperand(0);
5117 unsigned MaskEltSize = VT.getScalarSizeInBits();
51165118 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5117 DecodeVPERMV3Mask(C, VT, Mask);
5119 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
51185120 break;
51195121 }
51205122 return false;
308308 ShuffleMask.push_back(Element);
309309 }
310310
311 void DecodeVPERMV3Mask(const Constant *C, MVT VT,
311 void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
312312 SmallVectorImpl &ShuffleMask) {
313313 Type *MaskTy = C->getType();
314 unsigned NumElements = MaskTy->getVectorNumElements();
315 if (NumElements == VT.getVectorNumElements()) {
316 unsigned EltMaskSize = Log2_64(NumElements * 2);
317 for (unsigned i = 0; i < NumElements; ++i) {
318 Constant *COp = C->getAggregateElement(i);
319 if (!COp) {
320 ShuffleMask.clear();
321 return;
322 }
323 if (isa(COp))
324 ShuffleMask.push_back(SM_SentinelUndef);
325 else {
326 APInt Element = cast(COp)->getValue();
327 Element = Element.getLoBits(EltMaskSize);
328 ShuffleMask.push_back(Element.getZExtValue());
329 }
330 }
314 unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
315 (void)MaskTySize;
316 assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
317 "Unexpected vector size.");
318 assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
319 "Unexpected vector element size.");
320
321 // The shuffle mask requires elements the same size as the target.
322 SmallBitVector UndefElts;
323 SmallVector RawMask;
324 if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
325 return;
326
327 unsigned NumElts = RawMask.size();
328
329 for (unsigned i = 0; i != NumElts; ++i) {
330 if (UndefElts[i]) {
331 ShuffleMask.push_back(SM_SentinelUndef);
332 continue;
333 }
334 int Index = RawMask[i] & (NumElts*2 - 1);
335 ShuffleMask.push_back(Index);
331336 }
332337 }
333338 } // llvm namespace
4343 SmallVectorImpl &ShuffleMask);
4444
4545 /// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
46 void DecodeVPERMV3Mask(const Constant *C, MVT VT,
46 void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
4747 SmallVectorImpl &ShuffleMask);
4848
4949 } // llvm namespace
111111 define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
112112 ; X32-LABEL: combine_vpermt2var_8f64_identity:
113113 ; X32: # BB#0:
114 ; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
115 ; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
116 ; X32-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
117 ; X32-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0
118114 ; X32-NEXT: retl
119115 ;
120116 ; X64-LABEL: combine_vpermt2var_8f64_identity:
151147 define <8 x double> @combine_vpermt2var_8f64_movddup(<8 x double> %x0, <8 x double> %x1) {
152148 ; X32-LABEL: combine_vpermt2var_8f64_movddup:
153149 ; X32: # BB#0:
154 ; X32-NEXT: vmovapd {{.*#+}} zmm2 = <0,0,0,0,2,0,2,0,4,0,4,0,u,u,u,u>
155 ; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
150 ; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
156151 ; X32-NEXT: retl
157152 ;
158153 ; X64-LABEL: combine_vpermt2var_8f64_movddup:
166161 ; X32-LABEL: combine_vpermt2var_8f64_movddup_load:
167162 ; X32: # BB#0:
168163 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
169 ; X32-NEXT: vmovapd (%eax), %zmm1
170 ; X32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
171 ; X32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
172 ; X32-NEXT: vmovapd %zmm1, %zmm0
164 ; X32-NEXT: vmovddup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6]
173165 ; X32-NEXT: retl
174166 ;
175167 ; X64-LABEL: combine_vpermt2var_8f64_movddup_load:
185177 ; X32: # BB#0:
186178 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
187179 ; X32-NEXT: kmovd %eax, %k1
188 ; X32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
189 ; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0 {%k1} {z}
180 ; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
190181 ; X32-NEXT: retl
191182 ;
192183 ; X64-LABEL: combine_vpermt2var_8f64_movddup_mask:
867858 define <8 x double> @combine_vpermi2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
868859 ; X32-LABEL: combine_vpermi2var_8f64_identity:
869860 ; X32: # BB#0:
870 ; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
871 ; X32-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2
872 ; X32-NEXT: vmovapd {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
873 ; X32-NEXT: vpermi2pd %zmm2, %zmm2, %zmm0
874861 ; X32-NEXT: retl
875862 ;
876863 ; X64-LABEL: combine_vpermi2var_8f64_identity: