llvm.org GIT mirror llvm / 69d2b2a
InstCombine: form shuffles from wider range of insert/extractelements Sequences of insertelement/extractelements are sometimes used to build vectorsr; this code tries to put them back together into shuffles, but could only produce a completely uniform shuffle types (<N x T> from two <N x T> sources). This should allow shuffles with different numbers of elements on the input and output sides as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203229 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 5 years ago
3 changed file(s) with 139 addition(s) and 52 deletion(s). Raw diff Collapse all Expand all
33203320 (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
33213321 NoItinerary>;
33223322 }
3323
3324 def : Pat<(v16i8 (int_aarch64_neon_vmull_p64
3325 (v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 1))),
3326 (v1i64 (extract_subvector (v2i64 VPR128:$Rm), (i64 1))))),
3327 (!cast(NAME # "_1q2d") VPR128:$Rn, VPR128:$Rm)>;
33233328 }
33243329
33253330 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
58775882 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
58785883 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
58795884
5885 def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
5886 (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
5887 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5888
58805889 //swapped operands
58815890 def : Pat<(ResTy (opnode
58825891 (OpVTy (scalar_to_vector
58835892 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
58845893 (OpVTy FPRC:$Rn))),
5894 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5895
5896 def : Pat<(ResTy (opnode
5897 (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)),
5898 (OpVTy FPRC:$Rn))),
58855899 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
58865900 }
58875901
59745988 (ResTy (INST (ResTy ResFPRC:$Ra),
59755989 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
59765990
5991 def : Pat<(ResTy (opnode
5992 (ResTy ResFPRC:$Ra),
5993 (ResTy (coreopnode (OpTy FPRC:$Rn),
5994 (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)))))),
5995 (ResTy (INST (ResTy ResFPRC:$Ra),
5996 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5997
59775998 // swapped operands
59785999 def : Pat<(ResTy (opnode
59796000 (ResTy ResFPRC:$Ra),
59806001 (ResTy (coreopnode
59816002 (OpTy (scalar_to_vector
59826003 (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
6004 (OpTy FPRC:$Rn))))),
6005 (ResTy (INST (ResTy ResFPRC:$Ra),
6006 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
6007
6008 def : Pat<(ResTy (opnode
6009 (ResTy ResFPRC:$Ra),
6010 (ResTy (coreopnode
6011 (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)),
59836012 (OpTy FPRC:$Rn))))),
59846013 (ResTy (INST (ResTy ResFPRC:$Ra),
59856014 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
325325 /// Otherwise, return false.
326326 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
327327 SmallVectorImpl &Mask) {
328 assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
328 assert(LHS->getType() == RHS->getType() &&
329329 "Invalid CollectSingleShuffleElements");
330330 unsigned NumElts = V->getType()->getVectorNumElements();
331331
366366 return true;
367367 }
368368 } else if (ExtractElementInst *EI = dyn_cast(ScalarOp)){
369 if (isa(EI->getOperand(1)) &&
370 EI->getOperand(0)->getType() == V->getType()) {
369 if (isa(EI->getOperand(1))) {
371370 unsigned ExtractedIdx =
372371 cast(EI->getOperand(1))->getZExtValue();
372 unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
373373
374374 // This must be extracting from either LHS or RHS.
375375 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
385385 assert(EI->getOperand(0) == RHS);
386386 Mask[InsertedIdx % NumElts] =
387387 ConstantInt::get(Type::getInt32Ty(V->getContext()),
388 ExtractedIdx+NumElts);
388 ExtractedIdx + NumLHSElts);
389389 }
390390 return true;
391391 }
393393 }
394394 }
395395 }
396 // TODO: Handle shufflevector here!
397396
398397 return false;
399398 }
400399
401 /// CollectShuffleElements - We are building a shuffle of V, using RHS as the
402 /// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
403 /// that computes V and the LHS value of the shuffle.
404 static Value *CollectShuffleElements(Value *V, SmallVectorImpl &Mask,
405 Value *&RHS) {
406 assert(V->getType()->isVectorTy() &&
407 (RHS == 0 || V->getType() == RHS->getType()) &&
408 "Invalid shuffle!");
400
401 /// We are building a shuffle to create V, which is a sequence of insertelement,
402 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
403 /// not rely on the second vector source. Return an std::pair containing the
404 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
405 /// parameter as required.
406 ///
407 /// Note: we intentionally don't try to fold earlier shuffles since they have
408 /// often been chosen carefully to be efficiently implementable on the target.
409 typedef std::pair ShuffleOps;
410
411 static ShuffleOps CollectShuffleElements(Value *V,
412 SmallVectorImpl &Mask,
413 Value *PermittedRHS) {
414 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
409415 unsigned NumElts = cast(V->getType())->getNumElements();
410416
411417 if (isa(V)) {
412418 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
413 return V;
419 return std::make_pair(
420 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
414421 }
415422
416423 if (isa(V)) {
417424 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
418 return V;
425 return std::make_pair(V, nullptr);
419426 }
420427
421428 if (InsertElementInst *IEI = dyn_cast(V)) {
425432 Value *IdxOp = IEI->getOperand(2);
426433
427434 if (ExtractElementInst *EI = dyn_cast(ScalarOp)) {
428 if (isa(EI->getOperand(1)) && isa(IdxOp) &&
429 EI->getOperand(0)->getType() == V->getType()) {
435 if (isa(EI->getOperand(1)) && isa(IdxOp)) {
430436 unsigned ExtractedIdx =
431437 cast(EI->getOperand(1))->getZExtValue();
432438 unsigned InsertedIdx = cast(IdxOp)->getZExtValue();
433439
434440 // Either the extracted from or inserted into vector must be RHSVec,
435441 // otherwise we'd end up with a shuffle of three inputs.
436 if (EI->getOperand(0) == RHS || RHS == 0) {
437 RHS = EI->getOperand(0);
438 Value *V = CollectShuffleElements(VecOp, Mask, RHS);
442 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) {
443 Value *RHS = EI->getOperand(0);
444 ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
445 assert(LR.second == 0 || LR.second == RHS);
446
447 if (LR.first->getType() != RHS->getType()) {
448 // We tried our best, but we can't find anything compatible with RHS
449 // further up the chain. Return a trivial shuffle.
450 for (unsigned i = 0; i < NumElts; ++i)
451 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
452 return std::make_pair(V, nullptr);
453 }
454
455 unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
439456 Mask[InsertedIdx % NumElts] =
440457 ConstantInt::get(Type::getInt32Ty(V->getContext()),
441 NumElts+ExtractedIdx);
442 return V;
458 NumLHSElts+ExtractedIdx);
459 return std::make_pair(LR.first, RHS);
443460 }
444461
445 if (VecOp == RHS) {
446 Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
447 // Update Mask to reflect that `ScalarOp' has been inserted at
448 // position `InsertedIdx' within the vector returned by IEI.
449 Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
450
451 // Everything but the extracted element is replaced with the RHS.
452 for (unsigned i = 0; i != NumElts; ++i) {
453 if (i != InsertedIdx)
454 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
455 NumElts+i);
456 }
457 return V;
462 if (VecOp == PermittedRHS) {
463 // We've gone as far as we can: anything on the other side of the
464 // extractelement will already have been converted into a shuffle.
465 unsigned NumLHSElts =
466 EI->getOperand(0)->getType()->getVectorNumElements();
467 for (unsigned i = 0; i != NumElts; ++i)
468 Mask.push_back(ConstantInt::get(
469 Type::getInt32Ty(V->getContext()),
470 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
471 return std::make_pair(EI->getOperand(0), PermittedRHS);
458472 }
459473
460474 // If this insertelement is a chain that comes from exactly these two
461475 // vectors, return the vector and the effective shuffle.
462 if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
463 return EI->getOperand(0);
464 }
465 }
466 }
467 // TODO: Handle shufflevector here!
476 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
477 CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
478 Mask))
479 return std::make_pair(EI->getOperand(0), PermittedRHS);
480 }
481 }
482 }
468483
469484 // Otherwise, can't do anything fancy. Return an identity vector.
470485 for (unsigned i = 0; i != NumElts; ++i)
471486 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
472 return V;
487 return std::make_pair(V, nullptr);
473488 }
474489
475490 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
484499 // If the inserted element was extracted from some other vector, and if the
485500 // indexes are constant, try to turn this into a shufflevector operation.
486501 if (ExtractElementInst *EI = dyn_cast(ScalarOp)) {
487 if (isa(EI->getOperand(1)) && isa(IdxOp) &&
488 EI->getOperand(0)->getType() == IE.getType()) {
489 unsigned NumVectorElts = IE.getType()->getNumElements();
502 if (isa(EI->getOperand(1)) && isa(IdxOp)) {
503 unsigned NumInsertVectorElts = IE.getType()->getNumElements();
504 unsigned NumExtractVectorElts =
505 EI->getOperand(0)->getType()->getVectorNumElements();
490506 unsigned ExtractedIdx =
491507 cast(EI->getOperand(1))->getZExtValue();
492508 unsigned InsertedIdx = cast(IdxOp)->getZExtValue();
493509
494 if (ExtractedIdx >= NumVectorElts) // Out of range extract.
510 if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
495511 return ReplaceInstUsesWith(IE, VecOp);
496512
497 if (InsertedIdx >= NumVectorElts) // Out of range insert.
513 if (InsertedIdx >= NumInsertVectorElts) // Out of range insert.
498514 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
499515
500516 // If we are extracting a value from a vector, then inserting it right
506522 // (and any insertelements it points to), into one big shuffle.
507523 if (!IE.hasOneUse() || !isa(IE.use_back())) {
508524 SmallVector Mask;
509 Value *RHS = 0;
510 Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
511 if (RHS == 0) RHS = UndefValue::get(LHS->getType());
512 // We now have a shuffle of LHS, RHS, Mask.
513 return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
525 ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0);
526
527 // The proposed shuffle may be trivial, in which case we shouldn't
528 // perform the combine.
529 if (LR.first != &IE && LR.second != &IE) {
530 // We now have a shuffle of LHS, RHS, Mask.
531 if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType());
532 return new ShuffleVectorInst(LR.first, LR.second,
533 ConstantVector::get(Mask));
534 }
514535 }
515536 }
516537 }
0 ; RUN: opt -S -instcombine %s | FileCheck %s
1
2 define <1 x i8> @test1(<8 x i8> %in) {
3 ; CHECK-LABEL: @test1
4 ; CHECK: shufflevector <8 x i8> %in, <8 x i8> undef, <1 x i32>
5 %val = extractelement <8 x i8> %in, i32 5
6 %vec = insertelement <1 x i8> undef, i8 %val, i32 0
7 ret <1 x i8> %vec
8 }
9
10 define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) {
11 ; CHECK-LABEL: @test2
12 ; CHECK: shufflevector <8 x i16> %in2, <8 x i16> %in, <4 x i32>
13 %elt0 = extractelement <8 x i16> %in, i32 3
14 %elt1 = extractelement <8 x i16> %in, i32 1
15 %elt2 = extractelement <8 x i16> %in2, i32 0
16 %elt3 = extractelement <8 x i16> %in, i32 2
17
18 %vec.0 = insertelement <4 x i16> undef, i16 %elt0, i32 0
19 %vec.1 = insertelement <4 x i16> %vec.0, i16 %elt1, i32 1
20 %vec.2 = insertelement <4 x i16> %vec.1, i16 %elt2, i32 2
21 %vec.3 = insertelement <4 x i16> %vec.2, i16 %elt3, i32 3
22
23 ret <4 x i16> %vec.3
24 }
25
26 define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
27 ; CHECK-LABEL: @test_vcopyq_lane_p64
28 ; CHECK: extractelement
29 ; CHECK: insertelement
30 ; CHECK-NOT: shufflevector
31 entry:
32 %elt = extractelement <1 x i64> %b, i32 0
33 %res = insertelement <2 x i64> %a, i64 %elt, i32 1
34 ret <2 x i64> %res
35 }
36