llvm.org GIT mirror llvm / ea2c50c
When merging connsecutive stores, use vectors to store the constant zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165267 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 7 years ago
2 changed file(s) with 96 addition(s) and 41 deletion(s). Raw diff Collapse all Expand all
75697569 if (!IsLoadSrc) {
75707570 unsigned LastConst = 0;
75717571 unsigned LastLegalType = 0;
7572 unsigned LastLegalVectorType = 0;
7573 bool NonZero = false;
75727574 for (unsigned i=0; i
75737575 StoreSDNode *St = cast(StoreNodes[i].MemNode);
75747576 SDValue StoredVal = St->getValue();
7575 bool IsConst = (isa(StoredVal) ||
7576 isa(StoredVal));
7577 if (!IsConst)
7577
7578 if (ConstantSDNode *C = dyn_cast(StoredVal)) {
7579 NonZero |= (C->getZExtValue() != 0);
7580 } else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) {
7581 NonZero |= C->getValueAPF().bitcastToAPInt().getZExtValue();
7582 } else {
7583 // Non constant.
75787584 break;
7585 }
75797586
75807587 // Mark this index as the largest legal constant.
75817588 LastConst = i;
75857592 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
75867593 if (TLI.isTypeLegal(StoreTy))
75877594 LastLegalType = i+1;
7588 }
7595
7596 // Find a legal type for the vector store.
7597 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
7598 if (TLI.isTypeLegal(Ty))
7599 LastLegalVectorType = i + 1;
7600 }
7601
7602 // We only use vectors if the constant is known to be zero.
7603 if (NonZero)
7604 LastLegalVectorType = 0;
75897605
75907606 // Check if we found a legal integer type to store.
7591 if (LastLegalType == 0)
7607 if (LastLegalType == 0 && LastLegalVectorType == 0)
75927608 return false;
75937609
7594 // We add a +1 because the LastXXX variables refer to array location
7595 // while NumElem holds the size.
7596 unsigned NumElem = std::min(LastConsecutiveStore, LastConst) + 1;
7597 NumElem = std::min(LastLegalType, NumElem);
7610 bool UseVector = LastLegalVectorType > LastLegalType;
7611 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
7612
7613 // Make sure we have something to merge.
7614 if (NumElem < 2)
7615 return false;
75987616
75997617 unsigned EarliestNodeUsed = 0;
76007618 for (unsigned i=0; i < NumElem; ++i) {
76087626
76097627 // The earliest Node in the DAG.
76107628 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
7611
7612 // Make sure we have something to merge.
7613 if (NumElem < 2)
7614 return false;
7615
76167629 DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc();
7617 unsigned StoreBW = NumElem * ElementSizeBytes * 8;
7618 APInt StoreInt(StoreBW, 0);
7619
7620 // Construct a single integer constant which is made of the smaller
7621 // constant inputs.
7622 bool IsLE = TLI.isLittleEndian();
7623 for (unsigned i = 0; i < NumElem ; ++i) {
7624 unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
7625 StoreSDNode *St = cast(StoreNodes[Idx].MemNode);
7626 SDValue Val = St->getValue();
7627 StoreInt<<=ElementSizeBytes*8;
7628 if (ConstantSDNode *C = dyn_cast(Val)) {
7629 StoreInt|=C->getAPIntValue().zext(StoreBW);
7630 } else if (ConstantFPSDNode *C = dyn_cast(Val)) {
7631 StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
7632 } else {
7633 assert(false && "Invalid constant element type");
7630
7631 SDValue StoredVal;
7632 if (UseVector) {
7633 // Find a legal type for the vector store.
7634 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
7635 assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
7636 StoredVal = DAG.getConstant(0, Ty);
7637 } else {
7638 unsigned StoreBW = NumElem * ElementSizeBytes * 8;
7639 APInt StoreInt(StoreBW, 0);
7640
7641 // Construct a single integer constant which is made of the smaller
7642 // constant inputs.
7643 bool IsLE = TLI.isLittleEndian();
7644 for (unsigned i = 0; i < NumElem ; ++i) {
7645 unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
7646 StoreSDNode *St = cast(StoreNodes[Idx].MemNode);
7647 SDValue Val = St->getValue();
7648 StoreInt<<=ElementSizeBytes*8;
7649 if (ConstantSDNode *C = dyn_cast(Val)) {
7650 StoreInt|=C->getAPIntValue().zext(StoreBW);
7651 } else if (ConstantFPSDNode *C = dyn_cast(Val)) {
7652 StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
7653 } else {
7654 assert(false && "Invalid constant element type");
7655 }
76347656 }
7635 }
7636
7637 // Create the new Load and Store operations.
7638 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
7639 SDValue WideInt = DAG.getConstant(StoreInt, StoreTy);
7640 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, WideInt,
7657
7658 // Create the new Load and Store operations.
7659 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
7660 StoredVal = DAG.getConstant(StoreInt, StoreTy);
7661 }
7662
7663 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
76417664 FirstInChain->getBasePtr(),
76427665 FirstInChain->getPointerInfo(),
76437666 false, false,
80268049 }
80278050
80288051 // Only perform this optimization before the types are legal, because we
8029 // don't want to perform this optimization multiple times.
8052 // don't want to perform this optimization on every DAGCombine invocation.
80308053 if (!LegalTypes && MergeConsecutiveStores(ST))
80318054 return SDValue(N, 0);
80328055
None ; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
0 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
55 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
66 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
77
8 ; Move all of the constants using a single vector store.
98 ; CHECK: merge_const_store
109 ; save 1,2,3 ... as one big integer.
1110 ; CHECK: movabsq $578437695752307201
4039 ret void
4140 }
4241
42 ; Move the constants using a single vector store.
43 ; CHECK: merge_const_store_vec
44 ; CHECK: vmovups %ymm0, (%rsi)
45 ; CHECK: ret
46 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
47 %1 = icmp sgt i32 %count, 0
48 br i1 %1, label %.lr.ph, label %._crit_edge
49 .lr.ph:
50 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
51 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
52 %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
53 store i32 0, i32* %2, align 4
54 %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
55 store i32 0, i32* %3, align 4
56 %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
57 store i32 0, i32* %4, align 4
58 %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
59 store i32 0, i32* %5, align 4
60 %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
61 store i32 0, i32* %6, align 4
62 %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
63 store i32 0, i32* %7, align 4
64 %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
65 store i32 0, i32* %8, align 4
66 %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
67 store i32 0, i32* %9, align 4
68 %10 = add nsw i32 %i.02, 1
69 %11 = getelementptr inbounds %struct.B* %.01, i64 1
70 %exitcond = icmp eq i32 %10, %count
71 br i1 %exitcond, label %._crit_edge, label %.lr.ph
72 ._crit_edge:
73 ret void
74 }
75
4376 ; Move the first 4 constants as a single vector. Move the rest as scalars.
4477 ; CHECK: merge_nonconst_store
4578 ; CHECK: movl $67305985
222255 ret void
223256 }
224257
225
226258 ;CHECK: merge_loads_no_align
227259 ; load:
228260 ;CHECK: movl