llvm.org GIT mirror llvm / 324f4f1
Recognize canonical forms of vector shuffles where the same vector is used for both source operands. In the canonical form, the 2nd operand is changed to an undef and the shuffle mask is adjusted to only reference elements from the 1st operand. Radar 7434842. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90417 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 10 years ago
2 changed file(s) with 106 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
25272527 return true;
25282528 }
25292529
2530 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
2531 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2532 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
2533 static bool isVTRN_v_undef_Mask(const SmallVectorImpl &M, EVT VT,
2534 unsigned &WhichResult) {
2535 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2536 if (EltSz == 64)
2537 return false;
2538
2539 unsigned NumElts = VT.getVectorNumElements();
2540 WhichResult = (M[0] == 0 ? 0 : 1);
2541 for (unsigned i = 0; i < NumElts; i += 2) {
2542 if ((unsigned) M[i] != i + WhichResult ||
2543 (unsigned) M[i+1] != i + WhichResult)
2544 return false;
2545 }
2546 return true;
2547 }
2548
25302549 static bool isVUZPMask(const SmallVectorImpl &M, EVT VT,
25312550 unsigned &WhichResult) {
25322551 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
25382557 for (unsigned i = 0; i != NumElts; ++i) {
25392558 if ((unsigned) M[i] != 2 * i + WhichResult)
25402559 return false;
2560 }
2561
2562 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2563 if (VT.is64BitVector() && EltSz == 32)
2564 return false;
2565
2566 return true;
2567 }
2568
2569 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
2570 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2571 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
2572 static bool isVUZP_v_undef_Mask(const SmallVectorImpl &M, EVT VT,
2573 unsigned &WhichResult) {
2574 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2575 if (EltSz == 64)
2576 return false;
2577
2578 unsigned Half = VT.getVectorNumElements() / 2;
2579 WhichResult = (M[0] == 0 ? 0 : 1);
2580 for (unsigned j = 0; j != 2; ++j) {
2581 unsigned Idx = WhichResult;
2582 for (unsigned i = 0; i != Half; ++i) {
2583 if ((unsigned) M[i + j * Half] != Idx)
2584 return false;
2585 Idx += 2;
2586 }
25412587 }
25422588
25432589 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
25692615
25702616 return true;
25712617 }
2618
2619 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
2620 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2621 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
2622 static bool isVZIP_v_undef_Mask(const SmallVectorImpl &M, EVT VT,
2623 unsigned &WhichResult) {
2624 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2625 if (EltSz == 64)
2626 return false;
2627
2628 unsigned NumElts = VT.getVectorNumElements();
2629 WhichResult = (M[0] == 0 ? 0 : 1);
2630 unsigned Idx = WhichResult * NumElts / 2;
2631 for (unsigned i = 0; i != NumElts; i += 2) {
2632 if ((unsigned) M[i] != Idx ||
2633 (unsigned) M[i+1] != Idx)
2634 return false;
2635 Idx += 1;
2636 }
2637
2638 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2639 if (VT.is64BitVector() && EltSz == 32)
2640 return false;
2641
2642 return true;
2643 }
2644
25722645
25732646 static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
25742647 // Canonicalize all-zeros and all-ones vectors.
26822755 isVEXTMask(M, VT, ReverseVEXT, Imm) ||
26832756 isVTRNMask(M, VT, WhichResult) ||
26842757 isVUZPMask(M, VT, WhichResult) ||
2685 isVZIPMask(M, VT, WhichResult));
2758 isVZIPMask(M, VT, WhichResult) ||
2759 isVTRN_v_undef_Mask(M, VT, WhichResult) ||
2760 isVUZP_v_undef_Mask(M, VT, WhichResult) ||
2761 isVZIP_v_undef_Mask(M, VT, WhichResult));
26862762 }
26872763
26882764 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
28132889 if (isVZIPMask(ShuffleMask, VT, WhichResult))
28142890 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
28152891 V1, V2).getValue(WhichResult);
2892
2893 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
2894 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
2895 V1, V1).getValue(WhichResult);
2896 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
2897 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
2898 V1, V1).getValue(WhichResult);
2899 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
2900 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
2901 V1, V1).getValue(WhichResult);
28162902
28172903 // If the shuffle is not directly supported and it has 4 elements, use
28182904 // the PerfectShuffle-generated table to synthesize it from other shuffles.
0 ; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
1
2 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
3 target triple = "armv7-apple-darwin10"
4
5 %struct.int16x8_t = type { <8 x i16> }
6 %struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
7
8 define arm_apcscc void @t(%struct.int16x8x2_t* noalias nocapture sret %agg.result, <8 x i16> %tmp.0, %struct.int16x8x2_t* nocapture %dst) nounwind {
9 entry:
10 ;CHECK: vtrn.16
11 %0 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32>
12 %1 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32>
13 %agg.result1218.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0, i32 0 ; <<8 x i16>*>
14 store <8 x i16> %0, <8 x i16>* %agg.result1218.0, align 16
15 %agg.result12.1.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1, i32 0 ; <<8 x i16>*>
16 store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16
17 ret void
18 }