llvm.org GIT mirror llvm / ee1d801
[Power9] Exploit vector integer extend instructions when indices aren't correct. This patch adds on to the exploitation added by https://reviews.llvm.org/D33510. This now catches build vector nodes where the inputs are coming from sign extended vector extract elements where the indices used by the vector extract are not correct. We can still use the new hardware instructions by adding a shuffle to move the elements to the correct indices. I introduced a new PPCISD node here because adding a vector_shuffle and changing the elements of the vector_extracts was getting undone by another DAG combine. Commit on behalf of Zaara Syeda (syzaara@ca.ibm.com) Differential Revision: https://reviews.llvm.org/D34009 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307169 91177308-0d34-0410-b5e6-96231b3b80d8 Tony Jiang 2 years ago
5 changed file(s) with 445 addition(s) and 58 deletion(s). Raw diff Collapse all Expand all
11671167 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
11681168 case PPCISD::STXSIX: return "PPCISD::STXSIX";
11691169 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1170 case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
11701171 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
11711172 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
11721173 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1131011311 return SDValue();
1131111312 }
1131211313
11314 // This function adds the required vector_shuffle needed to get
11315 // the elements of the vector extract in the correct position
11316 // as specified by the CorrectElems encoding.
11317 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
11318 SDValue Input, uint64_t Elems,
11319 uint64_t CorrectElems) {
11320 SDLoc dl(N);
11321
11322 unsigned NumElems = Input.getValueType().getVectorNumElements();
11323 SmallVector ShuffleMask(NumElems, -1);
11324
11325 // Knowing the element indices being extracted from the original
11326 // vector and the order in which they're being inserted, just put
11327 // them at element indices required for the instruction.
11328 for (unsigned i = 0; i < N->getNumOperands(); i++) {
11329 if (DAG.getDataLayout().isLittleEndian())
11330 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
11331 else
11332 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
11333 CorrectElems = CorrectElems >> 8;
11334 Elems = Elems >> 8;
11335 }
11336
11337 SDValue Shuffle =
11338 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
11339 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
11340
11341 EVT Ty = N->getValueType(0);
11342 SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
11343 return BV;
11344 }
11345
11346 // Look for build vector patterns where input operands come from sign
11347 // extended vector_extract elements of specific indices. If the correct indices
11348 // aren't used, add a vector shuffle to fix up the indices and create a new
11349 // PPCISD:SExtVElems node which selects the vector sign extend instructions
11350 // during instruction selection.
11351 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
11352 // This array encodes the indices that the vector sign extend instructions
11353 // extract from when extending from one type to another for both BE and LE.
11354 // The right nibble of each byte corresponds to the LE incides.
11355 // and the left nibble of each byte corresponds to the BE incides.
11356 // For example: 0x3074B8FC byte->word
11357 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
11358 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
11359 // For example: 0x000070F8 byte->double word
11360 // For LE: the allowed indices are: 0x0,0x8
11361 // For BE: the allowed indices are: 0x7,0xF
11362 uint64_t TargetElems[] = {
11363 0x3074B8FC, // b->w
11364 0x000070F8, // b->d
11365 0x10325476, // h->w
11366 0x00003074, // h->d
11367 0x00001032, // w->d
11368 };
11369
11370 uint64_t Elems = 0;
11371 int Index;
11372 SDValue Input;
11373
11374 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
11375 if (!Op)
11376 return false;
11377 if (Op.getOpcode() != ISD::SIGN_EXTEND)
11378 return false;
11379
11380 SDValue Extract = Op.getOperand(0);
11381 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11382 return false;
11383
11384 ConstantSDNode *ExtOp = dyn_cast(Extract.getOperand(1));
11385 if (!ExtOp)
11386 return false;
11387
11388 Index = ExtOp->getZExtValue();
11389 if (Input && Input != Extract.getOperand(0))
11390 return false;
11391
11392 if (!Input)
11393 Input = Extract.getOperand(0);
11394
11395 Elems = Elems << 8;
11396 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
11397 Elems |= Index;
11398
11399 return true;
11400 };
11401
11402 // If the build vector operands aren't sign extended vector extracts,
11403 // of the same input vector, then return.
11404 for (unsigned i = 0; i < N->getNumOperands(); i++) {
11405 if (!isSExtOfVecExtract(N->getOperand(i))) {
11406 return SDValue();
11407 }
11408 }
11409
11410 // If the vector extract indicies are not correct, add the appropriate
11411 // vector_shuffle.
11412 int TgtElemArrayIdx;
11413 int InputSize = Input.getValueType().getScalarSizeInBits();
11414 int OutputSize = N->getValueType(0).getScalarSizeInBits();
11415 if (InputSize + OutputSize == 40)
11416 TgtElemArrayIdx = 0;
11417 else if (InputSize + OutputSize == 72)
11418 TgtElemArrayIdx = 1;
11419 else if (InputSize + OutputSize == 48)
11420 TgtElemArrayIdx = 2;
11421 else if (InputSize + OutputSize == 80)
11422 TgtElemArrayIdx = 3;
11423 else if (InputSize + OutputSize == 96)
11424 TgtElemArrayIdx = 4;
11425 else
11426 return SDValue();
11427
11428 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
11429 CorrectElems = DAG.getDataLayout().isLittleEndian()
11430 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
11431 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
11432 if (Elems != CorrectElems) {
11433 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
11434 }
11435
11436 // Regular lowering will catch cases where a shuffle is not needed.
11437 return SDValue();
11438 }
11439
1131311440 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
1131411441 DAGCombinerInfo &DCI) const {
1131511442 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
1133611463 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
1133711464 if (Reduced)
1133811465 return Reduced;
11466
11467 // If we're building a vector out of extended elements from another vector
11468 // we have P9 vector integer extend instructions.
11469 if (Subtarget.hasP9Altivec()) {
11470 Reduced = combineBVOfVecSExt(N, DAG);
11471 if (Reduced)
11472 return Reduced;
11473 }
11474
1133911475
1134011476 if (N->getValueType(0) != MVT::v2f64)
1134111477 return SDValue();
6565 /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
6666 /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
6767 VEXTS,
68
69 /// SExtVElems, takes an input vector of a smaller type and sign
70 /// extends to an output vector of a larger type.
71 SExtVElems,
6872
6973 /// Reciprocal estimate instructions (unary FP ops).
7074 FRE, FRSQRTE,
3030 ]>;
3131 def SDT_PPCVexts : SDTypeProfile<1, 2, [
3232 SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
33 ]>;
34 def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [
35 SDTCisVec<0>, SDTCisVec<1>
3336 ]>;
3437
3538 def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
130133 def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
131134 [SDNPHasChain, SDNPMayStore]>;
132135 def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
136 def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>;
133137
134138 // Extract FPSCR (not modeled at the DAG level).
135139 def PPCmffs : SDNode<"PPCISD::MFFS",
27282728 }
27292729
27302730 def ByteToWord {
2731 dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
2732 dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
2733 dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
2734 dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
2731 dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
2732 dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
2733 dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
2734 dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
2735 dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8));
2736 dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8));
2737 dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8));
2738 dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8));
27352739 }
27362740
27372741 def ByteToDWord {
2738 dag A0 = (i64 (sext_inreg
2739 (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
2740 dag A1 = (i64 (sext_inreg
2741 (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
2742 dag LE_A0 = (i64 (sext_inreg
2743 (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
2744 dag LE_A1 = (i64 (sext_inreg
2745 (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
2746 dag BE_A0 = (i64 (sext_inreg
2747 (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8));
2748 dag BE_A1 = (i64 (sext_inreg
2749 (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8));
27422750 }
27432751
27442752 def HWordToWord {
2745 dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
2746 dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
2747 dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
2748 dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
2753 dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
2754 dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
2755 dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
2756 dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
2757 dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16));
2758 dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16));
2759 dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16));
2760 dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16));
27492761 }
27502762
27512763 def HWordToDWord {
2752 dag A0 = (i64 (sext_inreg
2753 (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
2754 dag A1 = (i64 (sext_inreg
2755 (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
2764 dag LE_A0 = (i64 (sext_inreg
2765 (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
2766 dag LE_A1 = (i64 (sext_inreg
2767 (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
2768 dag BE_A0 = (i64 (sext_inreg
2769 (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16));
2770 dag BE_A1 = (i64 (sext_inreg
2771 (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16));
27562772 }
27572773
27582774 def WordToDWord {
2759 dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
2760 dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
2775 dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
2776 dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
2777 dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1))));
2778 dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3))));
27612779 }
27622780
27632781 def FltToIntLoad {
30153033 // P9 Altivec instructions that can be used to build vectors.
30163034 // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
30173035 // with complexities of existing build vector patterns in this file.
3036 let Predicates = [HasP9Altivec, IsLittleEndian] in {
3037 def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
3038 (v2i64 (VEXTSW2D $A))>;
3039 def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
3040 (v2i64 (VEXTSH2D $A))>;
3041 def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
3042 HWordToWord.LE_A2, HWordToWord.LE_A3)),
3043 (v4i32 (VEXTSH2W $A))>;
3044 def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
3045 ByteToWord.LE_A2, ByteToWord.LE_A3)),
3046 (v4i32 (VEXTSB2W $A))>;
3047 def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
3048 (v2i64 (VEXTSB2D $A))>;
3049 }
3050
3051 let Predicates = [HasP9Altivec, IsBigEndian] in {
3052 def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
3053 (v2i64 (VEXTSW2D $A))>;
3054 def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
3055 (v2i64 (VEXTSH2D $A))>;
3056 def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
3057 HWordToWord.BE_A2, HWordToWord.BE_A3)),
3058 (v4i32 (VEXTSH2W $A))>;
3059 def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
3060 ByteToWord.BE_A2, ByteToWord.BE_A3)),
3061 (v4i32 (VEXTSB2W $A))>;
3062 def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
3063 (v2i64 (VEXTSB2D $A))>;
3064 }
3065
30183066 let Predicates = [HasP9Altivec] in {
3019 def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)),
3067 def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)),
3068 (v2i64 (VEXTSB2D $A))>;
3069 def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)),
3070 (v2i64 (VEXTSH2D $A))>;
3071 def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)),
30203072 (v2i64 (VEXTSW2D $A))>;
3021 def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)),
3022 (v2i64 (VEXTSH2D $A))>;
3023 def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1,
3024 HWordToWord.A2, HWordToWord.A3)),
3073 def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)),
3074 (v4i32 (VEXTSB2W $A))>;
3075 def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)),
30253076 (v4i32 (VEXTSH2W $A))>;
3026 def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1,
3027 ByteToWord.A2, ByteToWord.A3)),
3028 (v4i32 (VEXTSB2W $A))>;
3029 def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)),
3030 (v2i64 (VEXTSB2D $A))>;
3031 }
3032 }
3077 }
3078 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9
2 target triple = "powerpc64le-unknown-linux-gnu"
3
4 define <4 x i32> @vextsb2w(<16 x i8> %a) {
5 ; PWR9-LABEL: vextsb2w:
6 ; PWR9: # BB#0: # %entry
7 ; PWR9-NEXT: vextsb2w 2, 2
8 ; PWR9-NEXT: blr
1 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-LE
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-BE
3
4 define <4 x i32> @vextsb2wLE(<16 x i8> %a) {
5 ; CHECK-LE-LABEL: vextsb2wLE:
6 ; CHECK-LE: # BB#0: # %entry
7 ; CHECK-LE-NEXT: vextsb2w 2, 2
8 ; CHECK-LE-NEXT: blr
9 ; CHECK-BE-LABEL: vextsb2wLE:
10 ; CHECK-BE: # BB#0: # %entry
11 ; CHECK-BE: vperm 2, 2, 2, 3
12 ; CHECK-BE-NEXT: vextsb2w 2, 2
13 ; CHECK-BE-NEXT: blr
14
915 entry:
1016 %vecext = extractelement <16 x i8> %a, i32 0
1117 %conv = sext i8 %vecext to i32
2228 ret <4 x i32> %vecinit9
2329 }
2430
25 define <2 x i64> @vextsb2d(<16 x i8> %a) {
26 ; PWR9-LABEL: vextsb2d:
27 ; PWR9: # BB#0: # %entry
28 ; PWR9-NEXT: vextsb2d 2, 2
29 ; PWR9-NEXT: blr
31 define <2 x i64> @vextsb2dLE(<16 x i8> %a) {
32 ; CHECK-LE-LABEL: vextsb2dLE:
33 ; CHECK-LE: # BB#0: # %entry
34 ; CHECK-LE-NEXT: vextsb2d 2, 2
35 ; CHECK-LE-NEXT: blr
36 ; CHECK-BE-LABEL: vextsb2dLE:
37 ; CHECK-BE: # BB#0: # %entry
38 ; CHECK-BE: vperm 2, 2, 2, 3
39 ; CHECK-BE-NEXT: vextsb2d 2, 2
40 ; CHECK-BE-NEXT: blr
41
3042 entry:
3143 %vecext = extractelement <16 x i8> %a, i32 0
3244 %conv = sext i8 %vecext to i64
3749 ret <2 x i64> %vecinit3
3850 }
3951
40 define <4 x i32> @vextsh2w(<8 x i16> %a) {
41 ; PWR9-LABEL: vextsh2w:
42 ; PWR9: # BB#0: # %entry
43 ; PWR9-NEXT: vextsh2w 2, 2
44 ; PWR9-NEXT: blr
52 define <4 x i32> @vextsh2wLE(<8 x i16> %a) {
53 ; CHECK-LE-LABEL: vextsh2wLE:
54 ; CHECK-LE: # BB#0: # %entry
55 ; CHECK-LE-NEXT: vextsh2w 2, 2
56 ; CHECK-LE-NEXT: blr
57 ; CHECK-BE-LABEL: vextsh2wLE:
58 ; CHECK-BE: # BB#0: # %entry
59 ; CHECK-BE: vperm 2, 2, 2, 3
60 ; CHECK-BE-NEXT: vextsh2w 2, 2
61 ; CHECK-BE-NEXT: blr
62
4563 entry:
4664 %vecext = extractelement <8 x i16> %a, i32 0
4765 %conv = sext i16 %vecext to i32
5876 ret <4 x i32> %vecinit9
5977 }
6078
61 define <2 x i64> @vextsh2d(<8 x i16> %a) {
62 ; PWR9-LABEL: vextsh2d:
63 ; PWR9: # BB#0: # %entry
64 ; PWR9-NEXT: vextsh2d 2, 2
65 ; PWR9-NEXT: blr
79 define <2 x i64> @vextsh2dLE(<8 x i16> %a) {
80 ; CHECK-LE-LABEL: vextsh2dLE:
81 ; CHECK-LE: # BB#0: # %entry
82 ; CHECK-LE-NEXT: vextsh2d 2, 2
83 ; CHECK-LE-NEXT: blr
84 ; CHECK-BE-LABEL: vextsh2dLE:
85 ; CHECK-BE: # BB#0: # %entry
86 ; CHECK-BE: vperm 2, 2, 2, 3
87 ; CHECK-BE-NEXT: vextsh2d 2, 2
88 ; CHECK-BE-NEXT: blr
89
6690 entry:
6791 %vecext = extractelement <8 x i16> %a, i32 0
6892 %conv = sext i16 %vecext to i64
7397 ret <2 x i64> %vecinit3
7498 }
7599
76 define <2 x i64> @vextsw2d(<4 x i32> %a) {
77 ; PWR9-LABEL: vextsw2d:
78 ; PWR9: # BB#0: # %entry
79 ; PWR9-NEXT: vextsw2d 2, 2
80 ; PWR9-NEXT: blr
100 define <2 x i64> @vextsw2dLE(<4 x i32> %a) {
101 ; CHECK-LE-LABEL: vextsw2dLE:
102 ; CHECK-LE: # BB#0: # %entry
103 ; CHECK-LE-NEXT: vextsw2d 2, 2
104 ; CHECK-LE-NEXT: blr
105 ; CHECK-BE-LABEL: vextsw2dLE:
106 ; CHECK-BE: # BB#0: # %entry
107 ; CHECK-BE: vmrgew
108 ; CHECK-BE-NEXT: vextsw2d 2, 2
109 ; CHECK-BE-NEXT: blr
110
81111 entry:
82112 %vecext = extractelement <4 x i32> %a, i32 0
83113 %conv = sext i32 %vecext to i64
87117 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
88118 ret <2 x i64> %vecinit3
89119 }
120
121 define <4 x i32> @vextsb2wBE(<16 x i8> %a) {
122 ; CHECK-BE-LABEL: vextsb2wBE:
123 ; CHECK-BE: # BB#0: # %entry
124 ; CHECK-BE-NEXT: vextsb2w 2, 2
125 ; CHECK-BE-NEXT: blr
126 ; CHECK-LE-LABEL: vextsb2wBE:
127 ; CHECK-LE: # BB#0: # %entry
128 ; CHECK-LE-NEXT: vsldoi 2, 2, 2, 13
129 ; CHECK-LE-NEXT: vextsb2w 2, 2
130 ; CHECK-LE-NEXT: blr
131 entry:
132 %vecext = extractelement <16 x i8> %a, i32 3
133 %conv = sext i8 %vecext to i32
134 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
135 %vecext1 = extractelement <16 x i8> %a, i32 7
136 %conv2 = sext i8 %vecext1 to i32
137 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
138 %vecext4 = extractelement <16 x i8> %a, i32 11
139 %conv5 = sext i8 %vecext4 to i32
140 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
141 %vecext7 = extractelement <16 x i8> %a, i32 15
142 %conv8 = sext i8 %vecext7 to i32
143 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
144 ret <4 x i32> %vecinit9
145 }
146
147 define <2 x i64> @vextsb2dBE(<16 x i8> %a) {
148 ; CHECK-BE-LABEL: vextsb2dBE:
149 ; CHECK-BE: # BB#0: # %entry
150 ; CHECK-BE-NEXT: vextsb2d 2, 2
151 ; CHECK-BE-NEXT: blr
152 ; CHECK-LE-LABEL: vextsb2dBE:
153 ; CHECK-LE: # BB#0: # %entry
154 ; CHECK-LE-NEXT: vsldoi 2, 2, 2, 9
155 ; CHECK-LE-NEXT: vextsb2d 2, 2
156 ; CHECK-LE-NEXT: blr
157 entry:
158 %vecext = extractelement <16 x i8> %a, i32 7
159 %conv = sext i8 %vecext to i64
160 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
161 %vecext1 = extractelement <16 x i8> %a, i32 15
162 %conv2 = sext i8 %vecext1 to i64
163 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
164 ret <2 x i64> %vecinit3
165 }
166
167 define <4 x i32> @vextsh2wBE(<8 x i16> %a) {
168 ; CHECK-BE-LABEL: vextsh2wBE:
169 ; CHECK-BE: # BB#0: # %entry
170 ; CHECK-BE-NEXT: vextsh2w 2, 2
171 ; CHECK-BE-NEXT: blr
172 ; CHECK-LE-LABEL: vextsh2wBE:
173 ; CHECK-LE: # BB#0: # %entry
174 ; CHECK-LE-NEXT: vsldoi 2, 2, 2, 14
175 ; CHECK-LE-NEXT: vextsh2w 2, 2
176 ; CHECK-LE-NEXT: blr
177 entry:
178 %vecext = extractelement <8 x i16> %a, i32 1
179 %conv = sext i16 %vecext to i32
180 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
181 %vecext1 = extractelement <8 x i16> %a, i32 3
182 %conv2 = sext i16 %vecext1 to i32
183 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
184 %vecext4 = extractelement <8 x i16> %a, i32 5
185 %conv5 = sext i16 %vecext4 to i32
186 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
187 %vecext7 = extractelement <8 x i16> %a, i32 7
188 %conv8 = sext i16 %vecext7 to i32
189 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
190 ret <4 x i32> %vecinit9
191 }
192
193 define <2 x i64> @vextsh2dBE(<8 x i16> %a) {
194 ; CHECK-BE-LABEL: vextsh2dBE:
195 ; CHECK-BE: # BB#0: # %entry
196 ; CHECK-BE-NEXT: vextsh2d 2, 2
197 ; CHECK-BE-NEXT: blr
198 ; CHECK-LE-LABEL: vextsh2dBE:
199 ; CHECK-LE: # BB#0: # %entry
200 ; CHECK-LE-NEXT: vsldoi 2, 2, 2, 10
201 ; CHECK-LE-NEXT: vextsh2d 2, 2
202 ; CHECK-LE-NEXT: blr
203 entry:
204 %vecext = extractelement <8 x i16> %a, i32 3
205 %conv = sext i16 %vecext to i64
206 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
207 %vecext1 = extractelement <8 x i16> %a, i32 7
208 %conv2 = sext i16 %vecext1 to i64
209 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
210 ret <2 x i64> %vecinit3
211 }
212
213 define <2 x i64> @vextsw2dBE(<4 x i32> %a) {
214 ; CHECK-BE-LABEL: vextsw2dBE:
215 ; CHECK-BE: # BB#0: # %entry
216 ; CHECK-BE-NEXT: vextsw2d 2, 2
217 ; CHECK-BE-NEXT: blr
218 ; CHECK-LE-LABEL: vextsw2dBE:
219 ; CHECK-LE: # BB#0: # %entry
220 ; CHECK-LE-NEXT: vsldoi 2, 2, 2, 12
221 ; CHECK-LE-NEXT: vextsw2d 2, 2
222 ; CHECK-LE-NEXT: blr
223 entry:
224 %vecext = extractelement <4 x i32> %a, i32 1
225 %conv = sext i32 %vecext to i64
226 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
227 %vecext1 = extractelement <4 x i32> %a, i32 3
228 %conv2 = sext i32 %vecext1 to i64
229 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
230 ret <2 x i64> %vecinit3
231 }
232
233 define <2 x i64> @vextDiffVectors(<4 x i32> %a, <4 x i32> %b) {
234 ; CHECK-LE-LABEL: vextDiffVectors:
235 ; CHECK-LE: # BB#0: # %entry
236 ; CHECK-LE-NOT: vextsw2d
237
238 ; CHECK-BE-LABEL: vextDiffVectors:
239 ; CHECK-BE: # BB#0: # %entry
240 ; CHECK-BE-NOT: vextsw2d
241 entry:
242 %vecext = extractelement <4 x i32> %a, i32 0
243 %conv = sext i32 %vecext to i64
244 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
245 %vecext1 = extractelement <4 x i32> %b, i32 2
246 %conv2 = sext i32 %vecext1 to i64
247 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
248 ret <2 x i64> %vecinit3
249 }
250
251 define <8 x i16> @testInvalidExtend(<16 x i8> %a) {
252 entry:
253 ; CHECK-LE-LABEL: testInvalidExtend:
254 ; CHECK-LE: # BB#0: # %entry
255 ; CHECK-LE-NOT: vexts
256
257 ; CHECK-BE-LABEL: testInvalidExtend:
258 ; CHECK-BE: # BB#0: # %entry
259 ; CHECK-BE-NOT: vexts
260
261 %vecext = extractelement <16 x i8> %a, i32 0
262 %conv = sext i8 %vecext to i16
263 %vecinit = insertelement <8 x i16> undef, i16 %conv, i32 0
264 %vecext1 = extractelement <16 x i8> %a, i32 2
265 %conv2 = sext i8 %vecext1 to i16
266 %vecinit3 = insertelement <8 x i16> %vecinit, i16 %conv2, i32 1
267 %vecext4 = extractelement <16 x i8> %a, i32 4
268 %conv5 = sext i8 %vecext4 to i16
269 %vecinit6 = insertelement <8 x i16> %vecinit3, i16 %conv5, i32 2
270 %vecext7 = extractelement <16 x i8> %a, i32 6
271 %conv8 = sext i8 %vecext7 to i16
272 %vecinit9 = insertelement <8 x i16> %vecinit6, i16 %conv8, i32 3
273 %vecext10 = extractelement <16 x i8> %a, i32 8
274 %conv11 = sext i8 %vecext10 to i16
275 %vecinit12 = insertelement <8 x i16> %vecinit9, i16 %conv11, i32 4
276 %vecext13 = extractelement <16 x i8> %a, i32 10
277 %conv14 = sext i8 %vecext13 to i16
278 %vecinit15 = insertelement <8 x i16> %vecinit12, i16 %conv14, i32 5
279 %vecext16 = extractelement <16 x i8> %a, i32 12
280 %conv17 = sext i8 %vecext16 to i16
281 %vecinit18 = insertelement <8 x i16> %vecinit15, i16 %conv17, i32 6
282 %vecext19 = extractelement <16 x i8> %a, i32 14
283 %conv20 = sext i8 %vecext19 to i16
284 %vecinit21 = insertelement <8 x i16> %vecinit18, i16 %conv20, i32 7
285 ret <8 x i16> %vecinit21
286 }