llvm.org GIT mirror llvm / 53cae13
The VPERM2F128 is a AVX instruction which permutes between two 256-bit vectors. It operates on 128-bit elements instead of regular scalar types. Recognize shuffles that are suitable for VPERM2F128 and teach the x86 legalizer how to handle them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137519 91177308-0d34-0410-b5e6-96231b3b80d8 Bruno Cardoso Lopes 9 years ago
8 changed file(s) with 194 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
223223 DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
224224 ShuffleMask);
225225 Src1Name = getRegName(MI->getOperand(0).getReg());
226 break;
227 case X86::VPERM2F128rr:
228 DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
229 Src1Name = getRegName(MI->getOperand(1).getReg());
230 Src2Name = getRegName(MI->getOperand(2).getReg());
226231 break;
227232 }
228233
219219 }
220220 }
221221
222 void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
223 SmallVectorImpl &ShuffleMask) {
224 unsigned HalfSize = VT.getVectorNumElements()/2;
225 unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
226 unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
227
228 for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
229 ShuffleMask.push_back(i);
230 for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
231 ShuffleMask.push_back(i);
232 }
233
234 void DecodeVPERM2F128Mask(unsigned Imm,
235 SmallVectorImpl &ShuffleMask) {
236 // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only
237 // has information about the instruction and not the types. So for
238 // instruction comments purpose, assume the 256-bit vector is v4i64.
239 return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask);
240 }
241
222242 } // llvm namespace
9696 void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
9797 SmallVectorImpl &ShuffleMask);
9898
99 void DecodeVPERM2F128Mask(unsigned Imm,
100 SmallVectorImpl &ShuffleMask);
101 void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
102 SmallVectorImpl &ShuffleMask);
103
99104 } // llvm namespace
100105
101106 #endif
27522752 case X86ISD::VPERMILPSY:
27532753 case X86ISD::VPERMILPD:
27542754 case X86ISD::VPERMILPDY:
2755 case X86ISD::VPERM2F128:
27552756 return true;
27562757 }
27572758 return false;
27942795 case X86ISD::PALIGN:
27952796 case X86ISD::SHUFPD:
27962797 case X86ISD::SHUFPS:
2798 case X86ISD::VPERM2F128:
27972799 return DAG.getNode(Opc, dl, VT, V1, V2,
27982800 DAG.getConstant(TargetMask, MVT::i8));
27992801 }
30323034 return false;
30333035 }
30343036
3037 /// isUndefOrInRange - Return true if every element in Mask, begining from
3038 /// position Pos and ending in Pos+Size, falls within the specified sequential
3039 /// range (L, L+Pos]. or is undef.
3040 static bool isSequentialOrUndefInRange(const SmallVectorImpl &Mask,
3041 int Pos, int Size, int Low) {
3042 for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
3043 if (!isUndefOrEqual(Mask[i], Low))
3044 return false;
3045 return true;
3046 }
3047
30353048 /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
30363049 /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
30373050 /// the second operand.
34413454 SmallVector M;
34423455 N->getMask(M);
34433456 return ::isMOVLMask(M, N->getValueType(0));
3457 }
3458
3459 /// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
3460 /// as permutations between 128-bit chunks or halves. As an example: this
3461 /// shuffle bellow:
3462 /// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
3463 /// The first half comes from the second half of V1 and the second half from the
3464 /// the second half of V2.
3465 static bool isVPERM2F128Mask(const SmallVectorImpl &Mask, EVT VT,
3466 const X86Subtarget *Subtarget) {
3467 if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
3468 return false;
3469
3470 // The shuffle result is divided into half A and half B. In total the two
3471 // sources have 4 halves, namely: C, D, E, F. The final values of A and
3472 // B must come from C, D, E or F.
3473 int HalfSize = VT.getVectorNumElements()/2;
3474 bool MatchA = false, MatchB = false;
3475
3476 // Check if A comes from one of C, D, E, F.
3477 for (int Half = 0; Half < 4; ++Half) {
3478 if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
3479 MatchA = true;
3480 break;
3481 }
3482 }
3483
3484 // Check if B comes from one of C, D, E, F.
3485 for (int Half = 0; Half < 4; ++Half) {
3486 if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
3487 MatchB = true;
3488 break;
3489 }
3490 }
3491
3492 return MatchA && MatchB;
3493 }
3494
3495 /// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
3496 /// the specified VECTOR_MASK mask with VPERM2F128 instructions.
3497 static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
3498 ShuffleVectorSDNode *SVOp = cast(N);
3499 EVT VT = SVOp->getValueType(0);
3500
3501 int HalfSize = VT.getVectorNumElements()/2;
3502
3503 int FstHalf = 0, SndHalf = 0;
3504 for (int i = 0; i < HalfSize; ++i) {
3505 if (SVOp->getMaskElt(i) > 0) {
3506 FstHalf = SVOp->getMaskElt(i)/HalfSize;
3507 break;
3508 }
3509 }
3510 for (int i = HalfSize; i < HalfSize*2; ++i) {
3511 if (SVOp->getMaskElt(i) > 0) {
3512 SndHalf = SVOp->getMaskElt(i)/HalfSize;
3513 break;
3514 }
3515 }
3516
3517 return (FstHalf | (SndHalf << 4));
34443518 }
34453519
34463520 /// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
43164390 DecodeVPERMILPDMask(4, cast(ImmN)->getZExtValue(),
43174391 ShuffleMask);
43184392 break;
4393 case X86ISD::VPERM2F128:
4394 ImmN = N->getOperand(N->getNumOperands()-1);
4395 DecodeVPERM2F128Mask(VT, cast(ImmN)->getZExtValue(),
4396 ShuffleMask);
4397 break;
43194398 default:
43204399 assert("not implemented for target shuffle node");
43214400 return SDValue();
63336412 if (isVPERMILPDMask(M, VT, Subtarget))
63346413 return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
63356414 getShuffleVPERMILPDImmediate(SVOp), DAG);
6415
6416 // Handle VPERM2F128 permutations
6417 if (isVPERM2F128Mask(M, VT, Subtarget))
6418 return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
6419 getShuffleVPERM2F128Immediate(SVOp), DAG);
63366420
63376421 //===--------------------------------------------------------------------===//
63386422 // Since no target specific shuffle was selected for this generic one,
1005110135 case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
1005210136 case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
1005310137 case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY";
10138 case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
1005410139 case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
1005510140 case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
1005610141 case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
1313313218 case X86ISD::VPERMILPSY:
1313413219 case X86ISD::VPERMILPD:
1313513220 case X86ISD::VPERMILPDY:
13221 case X86ISD::VPERM2F128:
1313613222 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
1313713223 }
1313813224
274274 VPERMILPSY,
275275 VPERMILPD,
276276 VPERMILPDY,
277 VPERM2F128,
277278
278279 // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
279280 // according to %al. An operator is needed so that this can be expanded
157157 def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
158158 def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
159159
160 def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
161
160162 //===----------------------------------------------------------------------===//
161163 // SSE Complex Patterns
162164 //===----------------------------------------------------------------------===//
56945694 VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
56955695 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
56965696
5697 def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5698 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
5699 def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5700 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
5701 def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5702 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
5703 def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5704 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
5705 def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5706 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
5707 def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5708 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
5709
56975710 //===----------------------------------------------------------------------===//
56985711 // VZERO - Zero YMM registers
56995712 //
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
1
2 ; CHECK: vperm2f128 $1
3 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
4 entry:
5 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
6 ret <8 x float> %shuffle
7 }
8
9 ; CHECK: vperm2f128 $48
10 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
11 entry:
12 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
13 ret <8 x float> %shuffle
14 }
15
16 ; CHECK: vperm2f128 $0
17 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
18 entry:
19 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
20 ret <8 x float> %shuffle
21 }
22
23 ; CHECK: vperm2f128 $17
24 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
25 entry:
26 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
27 ret <8 x float> %shuffle
28 }
29
30 ; CHECK: vperm2f128 $17
31 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
32 entry:
33 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>
34 ret <32 x i8> %shuffle
35 }
36
37 ; CHECK: vperm2f128 $33
38 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
39 entry:
40 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32>
41 ret <4 x i64> %shuffle
42 }
43
44 ;;;; Cases with undef indicies mixed in the mask
45
46 ; CHECK: vperm2f128 $33
47 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
48 entry:
49 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
50 ret <8 x float> %shuffle
51 }
52
53 ;;;; Cases we must not select vperm2f128
54
55 ; CHECK: _G
56 ; CHECK-NOT: vperm2f128
57 define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
58 entry:
59 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
60 ret <8 x float> %shuffle
61 }