llvm.org GIT mirror llvm / c710179
[X86][XOP] Support for VPPERM 2-input shuffle mask decoding This patch adds support for decoding XOP VPPERM instruction when it represents a basic shuffle. The mask decoding required the existing MCInstrLowering code to be updated to support binary shuffles - the implementation now matches what is done in X86InstrComments.cpp. Differential Revision: http://reviews.llvm.org/D18441 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265874 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
4 changed file(s) with 133 addition(s) and 33 deletion(s). Raw diff Collapse all Expand all
10171017 }
10181018
10191019 static std::string getShuffleComment(const MachineOperand &DstOp,
1020 const MachineOperand &SrcOp,
1020 const MachineOperand &SrcOp1,
1021 const MachineOperand &SrcOp2,
10211022 ArrayRef Mask) {
10221023 std::string Comment;
10231024
10311032 };
10321033
10331034 StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1034 StringRef SrcName = SrcOp.isReg() ? GetRegisterName(SrcOp.getReg()) : "mem";
1035 StringRef Src1Name =
1036 SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1037 StringRef Src2Name =
1038 SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1039
1040 // One source operand, fix the mask to print all elements in one span.
1041 SmallVector ShuffleMask(Mask.begin(), Mask.end());
1042 if (Src1Name == Src2Name)
1043 for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1044 if (ShuffleMask[i] >= e)
1045 ShuffleMask[i] -= e;
10351046
10361047 raw_string_ostream CS(Comment);
10371048 CS << DstName << " = ";
1038 bool NeedComma = false;
1039 bool InSrc = false;
1040 for (int M : Mask) {
1041 // Wrap up any prior entry...
1042 if (M == SM_SentinelZero && InSrc) {
1043 InSrc = false;
1044 CS << "]";
1045 }
1046 if (NeedComma)
1049 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1050 if (i != 0)
10471051 CS << ",";
1048 else
1049 NeedComma = true;
1050
1051 // Print this shuffle...
1052 if (M == SM_SentinelZero) {
1052 if (ShuffleMask[i] == SM_SentinelZero) {
10531053 CS << "zero";
1054 } else {
1055 if (!InSrc) {
1056 InSrc = true;
1057 CS << SrcName << "[";
1058 }
1059 if (M == SM_SentinelUndef)
1054 continue;
1055 }
1056
1057 // Otherwise, it must come from src1 or src2. Print the span of elements
1058 // that comes from this src.
1059 bool isSrc1 = ShuffleMask[i] < (int)e;
1060 CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1061
1062 bool IsFirst = true;
1063 while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1064 (ShuffleMask[i] < (int)e) == isSrc1) {
1065 if (!IsFirst)
1066 CS << ',';
1067 else
1068 IsFirst = false;
1069 if (ShuffleMask[i] == SM_SentinelUndef)
10601070 CS << "u";
10611071 else
1062 CS << M;
1063 }
1064 }
1065 if (InSrc)
1066 CS << "]";
1072 CS << ShuffleMask[i] % (int)e;
1073 ++i;
1074 }
1075 CS << ']';
1076 --i; // For loop increments element #.
1077 }
10671078 CS.flush();
10681079
10691080 return Comment;
13121323 SmallVector Mask;
13131324 DecodePSHUFBMask(C, Mask);
13141325 if (!Mask.empty())
1315 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
1326 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
13161327 }
13171328 break;
13181329 }
13391350 SmallVector Mask;
13401351 DecodeVPERMILPMask(C, ElSize, Mask);
13411352 if (!Mask.empty())
1342 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
1353 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
1354 }
1355 break;
1356 }
1357 case X86::VPPERMrrm: {
1358 if (!OutStreamer->isVerboseAsm())
1359 break;
1360 assert(MI->getNumOperands() > 6 &&
1361 "We should always have at least 6 operands!");
1362 const MachineOperand &DstOp = MI->getOperand(0);
1363 const MachineOperand &SrcOp1 = MI->getOperand(1);
1364 const MachineOperand &SrcOp2 = MI->getOperand(2);
1365 const MachineOperand &MaskOp = MI->getOperand(6);
1366
1367 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1368 SmallVector Mask;
1369 DecodeVPPERMMask(C, Mask);
1370 if (!Mask.empty())
1371 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask));
13431372 }
13441373 break;
13451374 }
150150 }
151151
152152 // TODO: Handle funny-looking vectors too.
153 }
154
155 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl &ShuffleMask) {
156 Type *MaskTy = C->getType();
157 assert(MaskTy->getPrimitiveSizeInBits() == 128);
158
159 // Only support vector types.
160 if (!MaskTy->isVectorTy())
161 return;
162
163 // Make sure its an integer type.
164 Type *VecEltTy = MaskTy->getVectorElementType();
165 if (!VecEltTy->isIntegerTy())
166 return;
167
168 // The shuffle mask requires a byte vector - decode cases with
169 // wider elements as well.
170 unsigned BitWidth = cast(VecEltTy)->getBitWidth();
171 if ((BitWidth % 8) != 0)
172 return;
173
174 int NumElts = MaskTy->getVectorNumElements();
175 int Scale = BitWidth / 8;
176 int NumBytes = NumElts * Scale;
177 ShuffleMask.reserve(NumBytes);
178
179 for (int i = 0; i != NumElts; ++i) {
180 Constant *COp = C->getAggregateElement(i);
181 if (!COp) {
182 ShuffleMask.clear();
183 return;
184 } else if (isa(COp)) {
185 ShuffleMask.append(Scale, SM_SentinelUndef);
186 continue;
187 }
188
189 // VPPERM Operation
190 // Bits[4:0] - Byte Index (0 - 31)
191 // Bits[7:5] - Permute Operation
192 //
193 // Permute Operation:
194 // 0 - Source byte (no logical operation).
195 // 1 - Invert source byte.
196 // 2 - Bit reverse of source byte.
197 // 3 - Bit reverse of inverted source byte.
198 // 4 - 00h (zero - fill).
199 // 5 - FFh (ones - fill).
200 // 6 - Most significant bit of source byte replicated in all bit positions.
201 // 7 - Invert most significant bit of source byte and replicate in all bit positions.
202 APInt MaskElt = cast(COp)->getValue();
203 for (int j = 0; j != Scale; ++j) {
204 APInt Index = MaskElt.getLoBits(5);
205 APInt PermuteOp = MaskElt.lshr(5).getLoBits(3);
206 MaskElt = MaskElt.lshr(8);
207
208 if (PermuteOp == 4) {
209 ShuffleMask.push_back(SM_SentinelZero);
210 continue;
211 }
212 if (PermuteOp != 0) {
213 ShuffleMask.clear();
214 return;
215 }
216 ShuffleMask.push_back((int)Index.getZExtValue());
217 }
218 }
219
220 assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
153221 }
154222
155223 void DecodeVPERMVMask(const Constant *C, MVT VT,
3131 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
3232 SmallVectorImpl &ShuffleMask);
3333
34 /// Decode a VPPERM variable mask from an IR-level vector constant.
35 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl &ShuffleMask);
36
3437 /// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
3538 void DecodeVPERMVMask(const Constant *C, MVT VT,
3639 SmallVectorImpl &ShuffleMask);
1212 define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) {
1313 ; CHECK-LABEL: combine_vpperm_identity:
1414 ; CHECK: # BB#0:
15 ; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0
16 ; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
15 ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm1[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
16 ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
1717 ; CHECK-NEXT: retq
1818 %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> )
1919 %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> )
2323 define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
2424 ; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd:
2525 ; CHECK: # BB#0:
26 ; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
26 ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2727 ; CHECK-NEXT: retq
2828 %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> )
2929 ret <16 x i8> %res0
3232 define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
3333 ; CHECK-LABEL: combine_vpperm_as_unpckhwd:
3434 ; CHECK: # BB#0:
35 ; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0
35 ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3636 ; CHECK-NEXT: retq
3737 %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> )
3838 ret <16 x i8> %res0