llvm.org GIT mirror llvm / 4850be4
[x86] Teach the vector comment parsing and printing to correctly handle undef in the shuffle mask. This shows up when we're printing comments during lowering and we still have an IR-level constant hanging around that models undef. A nice consequence of this is *much* prettier test cases where the undef lanes actually show up as undef rather than as a particular set of values. This also allows us to print shuffle comments in cases that use undef such as the recently added variable VPERMILPS lowering. Now those test cases have nice shuffle comments attached with their details. The shuffle lowering for PSHUFB has been augmented to use undef, and the shuffle combining has been augmented to comprehend it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218301 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 6 years ago
7 changed file(s) with 127 addition(s) and 70 deletion(s). Raw diff Collapse all Expand all
627627 const char *SrcName = isSrc1 ? Src1Name : Src2Name;
628628 OS << (SrcName ? SrcName : "mem") << '[';
629629 bool IsFirst = true;
630 while (i != e &&
631 (int)ShuffleMask[i] >= 0 &&
630 while (i != e && (int)ShuffleMask[i] != SM_SentinelZero &&
632631 (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
633632 if (!IsFirst)
634633 OS << ',';
635634 else
636635 IsFirst = false;
637 OS << ShuffleMask[i] % ShuffleMask.size();
636 if (ShuffleMask[i] == SM_SentinelUndef)
637 OS << "u";
638 else
639 OS << ShuffleMask[i] % ShuffleMask.size();
638640 ++i;
639641 }
640642 OS << ']';
223223 }
224224 }
225225
226 void DecodePSHUFBMask(const ConstantDataSequential *C,
227 SmallVectorImpl &ShuffleMask) {
226 void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask) {
228227 Type *MaskTy = C->getType();
229228 assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
230229 assert(MaskTy->getVectorElementType()->isIntegerTy(8) &&
233232 // FIXME: Add support for AVX-512.
234233 assert((NumElements == 16 || NumElements == 32) &&
235234 "Only 128-bit and 256-bit vectors supported!");
236 assert((unsigned)NumElements == C->getNumElements() &&
237 "Constant mask has a different number of elements!");
238
239235 ShuffleMask.reserve(NumElements);
240 for (int i = 0; i < NumElements; ++i) {
241 // For AVX vectors with 32 bytes the base of the shuffle is the half of the
242 // vector we're inside.
243 int Base = i < 16 ? 0 : 16;
244 uint64_t Element = C->getElementAsInteger(i);
245 // If the high bit (7) of the byte is set, the element is zeroed.
246 if (Element & (1 << 7))
247 ShuffleMask.push_back(SM_SentinelZero);
248 else {
249 // Only the least significant 4 bits of the byte are used.
250 int Index = Base + (Element & 0xf);
251 ShuffleMask.push_back(Index);
236
237 if (auto *CDS = dyn_cast(C)) {
238 assert((unsigned)NumElements == CDS->getNumElements() &&
239 "Constant mask has a different number of elements!");
240
241 for (int i = 0; i < NumElements; ++i) {
242 // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
243 // lane of the vector we're inside.
244 int Base = i < 16 ? 0 : 16;
245 uint64_t Element = CDS->getElementAsInteger(i);
246 // If the high bit (7) of the byte is set, the element is zeroed.
247 if (Element & (1 << 7))
248 ShuffleMask.push_back(SM_SentinelZero);
249 else {
250 // Only the least significant 4 bits of the byte are used.
251 int Index = Base + (Element & 0xf);
252 ShuffleMask.push_back(Index);
253 }
254 }
255 } else if (auto *CV = dyn_cast(C)) {
256 assert((unsigned)NumElements == CV->getNumOperands() &&
257 "Constant mask has a different number of elements!");
258
259 for (int i = 0; i < NumElements; ++i) {
260 // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
261 // lane of the vector we're inside.
262 int Base = i < 16 ? 0 : 16;
263 Constant *COp = CV->getOperand(i);
264 if (isa(COp)) {
265 ShuffleMask.push_back(SM_SentinelUndef);
266 continue;
267 }
268 uint64_t Element = cast(COp)->getZExtValue();
269 // If the high bit (7) of the byte is set, the element is zeroed.
270 if (Element & (1 << 7))
271 ShuffleMask.push_back(SM_SentinelZero);
272 else {
273 // Only the least significant 4 bits of the byte are used.
274 int Index = Base + (Element & 0xf);
275 ShuffleMask.push_back(Index);
276 }
252277 }
253278 }
254279 }
257282 SmallVectorImpl &ShuffleMask) {
258283 for (int i = 0, e = RawMask.size(); i < e; ++i) {
259284 uint64_t M = RawMask[i];
285 if (M == (uint64_t)SM_SentinelUndef) {
286 ShuffleMask.push_back(M);
287 continue;
288 }
260289 // For AVX vectors with 32 bytes the base of the shuffle is the half of
261290 // the vector we're inside.
262291 int Base = i < 16 ? 0 : 16;
286315 }
287316 }
288317
289 void DecodeVPERMILPMask(const ConstantDataSequential *C,
290 SmallVectorImpl &ShuffleMask) {
318 void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl &ShuffleMask) {
291319 Type *MaskTy = C->getType();
292320 assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
293321 assert(MaskTy->getVectorElementType()->isIntegerTy() &&
296324 int NumElements = MaskTy->getVectorNumElements();
297325 assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
298326 "Unexpected number of vector elements.");
299 assert((unsigned)NumElements == C->getNumElements() &&
300 "Constant mask has a different number of elements!");
301
302327 ShuffleMask.reserve(NumElements);
303 for (int i = 0; i < NumElements; ++i) {
304 int Base = (i * ElementBits / 128) * (128 / ElementBits);
305 uint64_t Element = C->getElementAsInteger(i);
306 // Only the least significant 2 bits of the integer are used.
307 int Index = Base + (Element & 0x3);
308 ShuffleMask.push_back(Index);
328 if (auto *CDS = dyn_cast(C)) {
329 assert((unsigned)NumElements == CDS->getNumElements() &&
330 "Constant mask has a different number of elements!");
331
332 for (int i = 0; i < NumElements; ++i) {
333 int Base = (i * ElementBits / 128) * (128 / ElementBits);
334 uint64_t Element = CDS->getElementAsInteger(i);
335 // Only the least significant 2 bits of the integer are used.
336 int Index = Base + (Element & 0x3);
337 ShuffleMask.push_back(Index);
338 }
339 } else if (auto *CV = dyn_cast(C)) {
340 assert((unsigned)NumElements == C->getNumOperands() &&
341 "Constant mask has a different number of elements!");
342
343 for (int i = 0; i < NumElements; ++i) {
344 int Base = (i * ElementBits / 128) * (128 / ElementBits);
345 Constant *COp = CV->getOperand(i);
346 if (isa(COp)) {
347 ShuffleMask.push_back(SM_SentinelUndef);
348 continue;
349 }
350 uint64_t Element = cast(COp)->getZExtValue();
351 // Only the least significant 2 bits of the integer are used.
352 int Index = Base + (Element & 0x3);
353 ShuffleMask.push_back(Index);
354 }
309355 }
310356 }
311357
2222 //===----------------------------------------------------------------------===//
2323
2424 namespace llvm {
25 class ConstantDataSequential;
25 class Constant;
2626 class MVT;
2727
28 enum {
29 SM_SentinelZero = -1
30 };
28 enum { SM_SentinelZero = -1, SM_SentinelUndef = -2 };
3129
3230 void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask);
3331
6563 void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask);
6664
6765 /// \brief Decode a PSHUFB mask from an IR-level vector constant.
68 void DecodePSHUFBMask(const ConstantDataSequential *C,
69 SmallVectorImpl &ShuffleMask);
66 void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask);
7067
7168 /// \brief Decode a PSHUFB mask from a raw array of constants such as from
7269 /// BUILD_VECTOR.
8481 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask);
8582
8683 /// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
87 void DecodeVPERMILPMask(const ConstantDataSequential *C,
88 SmallVectorImpl &ShuffleMask);
84 void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl &ShuffleMask);
8985
9086 } // llvm namespace
9187
53465346
53475347 SmallVector RawMask;
53485348 for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
5349 auto *CN = dyn_cast(MaskNode->getOperand(i));
5349 SDValue Op = MaskNode->getOperand(i);
5350 if (Op->getOpcode() == ISD::UNDEF) {
5351 RawMask.push_back((uint64_t)SM_SentinelUndef);
5352 continue;
5353 }
5354 auto *CN = dyn_cast(Op.getNode());
53505355 if (!CN)
53515356 return false;
53525357 APInt MaskElement = CN->getAPIntValue();
53765381 if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
53775382 return false;
53785383
5379 if (auto *C = dyn_castDataSequential>(MaskCP->getConstVal())) {
5384 if (auto *C = dyn_cast>(MaskCP->getConstVal())) {
53805385 // FIXME: Support AVX-512 here.
5381 if (!C->getType()->isVectorTy() ||
5382 (C->getNumElements() != 16 && C->getNumElements() != 32))
5386 Type *Ty = C->getType();
5387 if (!Ty->isVectorTy() || (Ty->getVectorNumElements() != 16 &&
5388 Ty->getVectorNumElements() != 32))
53835389 return false;
53845390
5385 assert(C->getType()->isVectorTy() && "Expected a vector constant.");
53865391 DecodePSHUFBMask(C, Mask);
53875392 break;
53885393 }
89938998 SDValue V2Mask[16];
89948999 for (int i = 0; i < 16; ++i)
89959000 if (Mask[i] == -1) {
8996 V1Mask[i] = V2Mask[i] = DAG.getConstant(0x80, MVT::i8);
9001 V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
89979002 } else {
89989003 V1Mask[i] = DAG.getConstant(Mask[i] < 16 ? Mask[i] : 0x80, MVT::i8);
89999004 V2Mask[i] =
2016620171 assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
2016720172 int Ratio = 16 / Mask.size();
2016820173 for (unsigned i = 0; i < 16; ++i) {
20174 if (Mask[i / Ratio] == SM_SentinelUndef) {
20175 PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
20176 continue;
20177 }
2016920178 int M = Mask[i / Ratio] != SM_SentinelZero
2017020179 ? Ratio * Mask[i / Ratio] + i % Ratio
2017120180 : 255;
2027620285 // for this order is that we are recursing up the operation chain.
2027720286 for (int i = 0, e = std::max(OpMask.size(), RootMask.size()); i < e; ++i) {
2027820287 int RootIdx = i / RootRatio;
20279 if (RootMask[RootIdx] == SM_SentinelZero) {
20280 // This is a zero-ed lane, we're done.
20281 Mask.push_back(SM_SentinelZero);
20288 if (RootMask[RootIdx] < 0) {
20289 // This is a zero or undef lane, we're done.
20290 Mask.push_back(RootMask[RootIdx]);
2028220291 continue;
2028320292 }
2028420293
2028520294 int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio;
2028620295 int OpIdx = RootMaskedIdx / OpRatio;
20287 if (OpMask[OpIdx] == SM_SentinelZero) {
20288 // The incoming lanes are zero, it doesn't matter which ones we are using.
20289 Mask.push_back(SM_SentinelZero);
20296 if (OpMask[OpIdx] < 0) {
20297 // The incoming lanes are zero or undef, it doesn't matter which ones we
20298 // are using.
20299 Mask.push_back(OpMask[OpIdx]);
2029020300 continue;
2029120301 }
2029220302
10591059 Type *MaskTy = MaskConstantEntry.getType();
10601060 (void)MaskTy;
10611061 if (!MaskConstantEntry.isMachineConstantPoolEntry())
1062 if (auto *C = dyn_cast(
1063 MaskConstantEntry.Val.ConstVal)) {
1062 if (auto *C = dyn_cast(MaskConstantEntry.Val.ConstVal)) {
10641063 assert(MaskTy == C->getType() &&
10651064 "Expected a constant of the same type!");
10661065
10761075 DecodeVPERMILPMask(C, Mask);
10771076 }
10781077
1079 assert(Mask.size() == MaskTy->getVectorNumElements() &&
1080 "Shuffle mask has a different size than its type!");
1078 assert(
1079 (Mask.empty() || Mask.size() == MaskTy->getVectorNumElements()) &&
1080 "Shuffle mask has a different size than its type!");
10811081 }
10821082 }
10831083
11031103 InSrc = true;
11041104 CS << SrcName << "[";
11051105 }
1106 CS << M;
1106 if (M == SM_SentinelUndef)
1107 CS << "u";
1108 else
1109 CS << M;
11071110 }
11081111 }
11091112 if (InSrc)
300300 ;
301301 ; SSSE3-LABEL: @trunc_v4i32_shuffle
302302 ; SSSE3: # BB#0:
303 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
303 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
304304 ; SSSE3-NEXT: retq
305305 ;
306306 ; SSE41-LABEL: @trunc_v4i32_shuffle
307307 ; SSE41: # BB#0:
308 ; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
308 ; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
309309 ; SSE41-NEXT: retq
310310 %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32>
311311 ret <16 x i8> %shuffle
497497 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
498498 ; ALL-LABEL: @shuffle_v8f32_002u6u44
499499 ; ALL: # BB#0:
500 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
500 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,u,6,u,4,4]
501501 ; ALL-NEXT: retq
502502 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
503503 ret <8 x float> %shuffle
506506 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
507507 ; ALL-LABEL: @shuffle_v8f32_00uu66uu
508508 ; ALL: # BB#0:
509 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
509 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,u,u,6,6,u,u]
510510 ; ALL-NEXT: retq
511511 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
512512 ret <8 x float> %shuffle
515515 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
516516 ; ALL-LABEL: @shuffle_v8f32_103245uu
517517 ; ALL: # BB#0:
518 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
518 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,u,u]
519519 ; ALL-NEXT: retq
520520 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
521521 ret <8 x float> %shuffle
524524 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
525525 ; ALL-LABEL: @shuffle_v8f32_1133uu67
526526 ; ALL: # BB#0:
527 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
527 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,u,u,6,7]
528528 ; ALL-NEXT: retq
529529 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
530530 ret <8 x float> %shuffle
533533 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
534534 ; ALL-LABEL: @shuffle_v8f32_0uu354uu
535535 ; ALL: # BB#0:
536 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
536 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,u,u,3,5,4,u,u]
537537 ; ALL-NEXT: retq
538538 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
539539 ret <8 x float> %shuffle
542542 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
543543 ; ALL-LABEL: @shuffle_v8f32_uuu3uu66
544544 ; ALL: # BB#0:
545 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
545 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[u,u,u,3,u,u,6,6]
546546 ; ALL-NEXT: retq
547547 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
548548 ret <8 x float> %shuffle
10431043 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
10441044 ; ALL-LABEL: @shuffle_v8i32_002u6u44
10451045 ; ALL: # BB#0:
1046 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
1046 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,u,6,u,4,4]
10471047 ; ALL-NEXT: retq
10481048 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
10491049 ret <8 x i32> %shuffle
10521052 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
10531053 ; ALL-LABEL: @shuffle_v8i32_00uu66uu
10541054 ; ALL: # BB#0:
1055 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
1055 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,u,u,6,6,u,u]
10561056 ; ALL-NEXT: retq
10571057 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
10581058 ret <8 x i32> %shuffle
10611061 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
10621062 ; ALL-LABEL: @shuffle_v8i32_103245uu
10631063 ; ALL: # BB#0:
1064 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
1064 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,u,u]
10651065 ; ALL-NEXT: retq
10661066 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
10671067 ret <8 x i32> %shuffle
10701070 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
10711071 ; ALL-LABEL: @shuffle_v8i32_1133uu67
10721072 ; ALL: # BB#0:
1073 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
1073 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,u,u,6,7]
10741074 ; ALL-NEXT: retq
10751075 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
10761076 ret <8 x i32> %shuffle
10791079 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
10801080 ; ALL-LABEL: @shuffle_v8i32_0uu354uu
10811081 ; ALL: # BB#0:
1082 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
1082 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,u,u,3,5,4,u,u]
10831083 ; ALL-NEXT: retq
10841084 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
10851085 ret <8 x i32> %shuffle
10881088 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
10891089 ; ALL-LABEL: @shuffle_v8i32_uuu3uu66
10901090 ; ALL: # BB#0:
1091 ; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0
1091 ; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[u,u,u,3,u,u,6,6]
10921092 ; ALL-NEXT: retq
10931093 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>
10941094 ret <8 x i32> %shuffle