llvm.org GIT mirror llvm / 298d1a6
[DAG] Teach DAG to also reassociate vector operations This commit teaches DAG to reassociate vector ops, which in turn enables constant folding of vector op chains that appear later on during custom lowering and DAG combine. Reviewed by Andrea Di Biagio git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199135 91177308-0d34-0410-b5e6-96231b3b80d8 Juergen Ributzka 6 years ago
4 changed file(s) with 82 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
14911491 unsigned &SplatBitSize, bool &HasAnyUndefs,
14921492 unsigned MinSplatBits = 0, bool isBigEndian = false);
14931493
1494 bool isConstant() const;
1495
14941496 static inline bool classof(const SDNode *N) {
14951497 return N->getOpcode() == ISD::BUILD_VECTOR;
14961498 }
609609 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
610610 SDValue N0, SDValue N1) {
611611 EVT VT = N0.getValueType();
612 if (VT.isVector()) {
613 if (N0.getOpcode() == Opc) {
614 BuildVectorSDNode *L = dyn_cast(N0.getOperand(1));
615 if(L && L->isConstant()) {
616 BuildVectorSDNode *R = dyn_cast(N1);
617 if (R && R->isConstant()) {
618 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
619 SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R);
620 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
621 }
622
623 if (N0.hasOneUse()) {
624 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
625 // use
626 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
627 N0.getOperand(0), N1);
628 AddToWorkList(OpNode.getNode());
629 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
630 }
631 }
632 }
633
634 if (N1.getOpcode() == Opc) {
635 BuildVectorSDNode *R = dyn_cast(N1.getOperand(1));
636 if (R && R->isConstant()) {
637 BuildVectorSDNode *L = dyn_cast(N0);
638 if (L && L->isConstant()) {
639 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
640 SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L);
641 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
642 }
643 if (N1.hasOneUse()) {
644 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
645 // use
646 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
647 N1.getOperand(0), N0);
648 AddToWorkList(OpNode.getNode());
649 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
650 }
651 }
652 }
653
654 return SDValue();
655 }
656
612657 if (N0.getOpcode() == Opc && isa(N0.getOperand(1))) {
613658 if (isa(N1)) {
614659 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
58675912 if (!LegalTypes &&
58685913 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
58695914 VT.isVector()) {
5870 bool isSimple = true;
5871 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
5872 if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
5873 N0.getOperand(i).getOpcode() != ISD::Constant &&
5874 N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
5875 isSimple = false;
5876 break;
5877 }
5915 bool isSimple = cast(N0)->isConstant();
58785916
58795917 EVT DestEltVT = N->getValueType(0).getVectorElementType();
58805918 assert(!DestEltVT.isVector() &&
1038010418 // this operation.
1038110419 if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
1038210420 RHS.getOpcode() == ISD::BUILD_VECTOR) {
10421 // Check if both vectors are constants. If not bail out.
10422 if (!cast(LHS)->isConstant() &&
10423 !cast(RHS)->isConstant())
10424 return SDValue();
10425
1038310426 SmallVector Ops;
1038410427 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
1038510428 SDValue LHSOp = LHS.getOperand(i);
1038610429 SDValue RHSOp = RHS.getOperand(i);
10387 // If these two elements can't be folded, bail out.
10388 if ((LHSOp.getOpcode() != ISD::UNDEF &&
10389 LHSOp.getOpcode() != ISD::Constant &&
10390 LHSOp.getOpcode() != ISD::ConstantFP) ||
10391 (RHSOp.getOpcode() != ISD::UNDEF &&
10392 RHSOp.getOpcode() != ISD::Constant &&
10393 RHSOp.getOpcode() != ISD::ConstantFP))
10394 break;
1039510430
1039610431 // Can't fold divide by zero.
1039710432 if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
65326532 return true;
65336533 }
65346534
6535 bool BuildVectorSDNode::isConstant() const {
6536 for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
6537 unsigned Opc = getOperand(i).getOpcode();
6538 if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP)
6539 return false;
6540 }
6541 return true;
6542 }
6543
65356544 bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
65366545 // Find the first non-undef value in the shuffle mask.
65376546 unsigned i, e;
33 ;CHECK-LABEL: AGEP0:
44 define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
55 entry:
6 ;CHECK-LABEL: AGEP0
7 ;CHECK: vbroadcast
8 ;CHECK-NEXT: vpaddd
9 ;CHECK-NEXT: ret
610 %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
711 %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
812 %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
913 %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
10 ;CHECK: padd
1114 %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32>
12 ;CHECK: padd
1315 %A3 = getelementptr <4 x i32*> %A2, <4 x i32>
1416 ret <4 x i32*> %A3
15 ;CHECK: ret
1617 }
1718
1819 ;CHECK-LABEL: AGEP1:
1920 define i32 @AGEP1(<4 x i32*> %param) nounwind {
2021 entry:
21 ;CHECK: padd
22 ;CHECK-LABEL: AGEP1
23 ;CHECK: vpaddd
24 ;CHECK-NEXT: vpextrd
25 ;CHECK-NEXT: movl
2226 %A2 = getelementptr <4 x i32*> %param, <4 x i32>
2327 %k = extractelement <4 x i32*> %A2, i32 3
2428 %v = load i32* %k
2933 ;CHECK-LABEL: AGEP2:
3034 define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
3135 entry:
32 ;CHECK: pslld $2
33 ;CHECK: padd
36 ;CHECK_LABEL: AGEP2
37 ;CHECK: vpslld $2
38 ;CHECK-NEXT: vpadd
3439 %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
3540 %k = extractelement <4 x i32*> %A2, i32 3
3641 %v = load i32* %k
4146 ;CHECK-LABEL: AGEP3:
4247 define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
4348 entry:
44 ;CHECK: pslld $2
45 ;CHECK: padd
49 ;CHECK-LABEL: AGEP3
50 ;CHECK: vpslld $2
51 ;CHECK-NEXT: vpadd
4652 %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
4753 %v = alloca i32
4854 %k = insertelement <4 x i32*> %A2, i32* %v, i32 3
5359 ;CHECK-LABEL: AGEP4:
5460 define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
5561 entry:
62 ;CHECK-LABEL: AGEP4
5663 ; Multiply offset by two (add it to itself).
57 ;CHECK: padd
64 ;CHECK: vpadd
5865 ; add the base to the offset
59 ;CHECK: padd
66 ;CHECK-NEXT: vpadd
6067 %A = getelementptr <4 x i16*> %param, <4 x i32> %off
6168 ret <4 x i16*> %A
6269 ;CHECK: ret
6572 ;CHECK-LABEL: AGEP5:
6673 define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
6774 entry:
68 ;CHECK: paddd
75 ;CHECK-LABEL: AGEP5
76 ;CHECK: vpaddd
6977 %A = getelementptr <4 x i8*> %param, <4 x i8> %off
7078 ret <4 x i8*> %A
7179 ;CHECK: ret
7684 ;CHECK-LABEL: AGEP6:
7785 define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
7886 entry:
87 ;CHECK-LABEL: AGEP6
7988 ;CHECK-NOT: pslld
8089 %A = getelementptr <4 x i8*> %param, <4 x i32> %off
8190 ret <4 x i8*> %A