llvm.org GIT mirror llvm / 0ba66e5
[X86][XOP] Added VPPERM constant mask decoding and target shuffle combining support Added additional test that peeks through bitcast to v16i8 mask git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266533 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
4 changed file(s) with 80 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
341341 }
342342 }
343343
344 /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
344 void DecodeVPPERMMask(ArrayRef RawMask,
345 SmallVectorImpl &ShuffleMask) {
346 assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
347
348 // VPPERM Operation
349 // Bits[4:0] - Byte Index (0 - 31)
350 // Bits[7:5] - Permute Operation
351 //
352 // Permute Operation:
353 // 0 - Source byte (no logical operation).
354 // 1 - Invert source byte.
355 // 2 - Bit reverse of source byte.
356 // 3 - Bit reverse of inverted source byte.
357 // 4 - 00h (zero - fill).
358 // 5 - FFh (ones - fill).
359 // 6 - Most significant bit of source byte replicated in all bit positions.
360 // 7 - Invert most significant bit of source byte and replicate in all bit positions.
361 for (int i = 0, e = RawMask.size(); i < e; ++i) {
362 uint64_t M = RawMask[i];
363 if (M == (uint64_t)SM_SentinelUndef) {
364 ShuffleMask.push_back(M);
365 continue;
366 }
367
368 uint64_t PermuteOp = (M >> 5) & 0x3;
369 if (PermuteOp == 4) {
370 ShuffleMask.push_back(SM_SentinelZero);
371 continue;
372 }
373 if (PermuteOp != 0) {
374 ShuffleMask.clear();
375 return;
376 }
377
378 uint64_t Index = M & 0x1F;
379 ShuffleMask.push_back((int)Index);
380 }
381 }
382
383 /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
345384 /// No VT provided since it only works on 256-bit, 4 element vectors.
346385 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask) {
347386 for (unsigned i = 0; i != 4; ++i) {
7272 /// Decodes a PSWAPD 3DNow! instruction.
7373 void DecodePSWAPMask(MVT VT, SmallVectorImpl &ShuffleMask);
7474
75 /// Decodes the shuffle masks for shufp*.
75 /// Decodes the shuffle masks for shufp*.
7676 /// VT indicates the type of the vector allowing it to handle different
7777 /// datatypes and vector widths.
7878 void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask);
107107 /// No VT provided since it only works on 256-bit, 4 element vectors.
108108 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask);
109109
110 /// Decode a VPPERM mask from a raw array of constants such as from
111 /// BUILD_VECTOR.
112 /// This can only basic masks (permutes + zeros), not any of the other
113 /// operations that VPPERM can perform.
114 void DecodeVPPERMMask(ArrayRef RawMask,
115 SmallVectorImpl &ShuffleMask);
116
110117 /// Decode a zero extension instruction as a shuffle mask.
111118 void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT,
112119 SmallVectorImpl &ShuffleMask);
38703870 case X86ISD::VPERMILPV:
38713871 case X86ISD::VPERM2X128:
38723872 case X86ISD::VPERMI:
3873 case X86ISD::VPPERM:
38733874 case X86ISD::VPERMV:
38743875 case X86ISD::VPERMV3:
38753876 case X86ISD::VZEXT_MOVL:
50075008 case X86ISD::MOVLPS:
50085009 // Not yet implemented
50095010 return false;
5011 case X86ISD::VPPERM: {
5012 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5013 SDValue MaskNode = N->getOperand(2);
5014 SmallVector RawMask;
5015 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5016 DecodeVPPERMMask(RawMask, Mask);
5017 break;
5018 }
5019 if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
5020 DecodeVPPERMMask(C, Mask);
5021 break;
5022 }
5023 return false;
5024 }
50105025 case X86ISD::VPERMV: {
50115026 IsUnary = true;
50125027 // Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
2968729702 case X86ISD::MOVDDUP:
2968829703 case X86ISD::MOVSS:
2968929704 case X86ISD::MOVSD:
29705 case X86ISD::VPPERM:
2969029706 case X86ISD::VPERMV3:
2969129707 case X86ISD::VPERMILPI:
2969229708 case X86ISD::VPERMILPV:
1212 define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) {
1313 ; CHECK-LABEL: combine_vpperm_identity:
1414 ; CHECK: # BB#0:
15 ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm1[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
16 ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
15 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
1716 ; CHECK-NEXT: retq
1817 %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> )
1918 %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> )
2019 ret <16 x i8> %res1
2120 }
2221
22 define <16 x i8> @combine_vpperm_identity_bitcast(<16 x i8> %a0, <16 x i8> %a1) {
23 ; CHECK-LABEL: combine_vpperm_identity_bitcast:
24 ; CHECK: # BB#0:
25 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
26 ; CHECK-NEXT: retq
27 %mask = bitcast <2 x i64> to <16 x i8>
28 %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %mask)
29 %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> %mask)
30 %res2 = bitcast <16 x i8> %res1 to <2 x i64>
31 %res3 = add <2 x i64> %res2,
32 %res4 = bitcast <2 x i64> %res3 to <16 x i8>
33 ret <16 x i8> %res4
34 }
35
2336 define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
2437 ; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd:
2538 ; CHECK: # BB#0:
26 ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
39 ; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2740 ; CHECK-NEXT: retq
2841 %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> )
2942 ret <16 x i8> %res0