llvm.org GIT mirror llvm / 24508d3
Revert r227242 - Merge vector stores into wider vector stores (PR21711). This commit creates infinite loop in DAG combine for in the LLVM test-suite for aarch64 with mcpu=cylcone (just having neon may be enough to expose this). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227272 91177308-0d34-0410-b5e6-96231b3b80d8 Quentin Colombet 5 years ago
2 changed file(s) with 32 addition(s) and 114 deletion(s). Raw diff Collapse all Expand all
381381 /// vector elements, try to merge them into one larger store.
382382 /// \return True if a merged store was created.
383383 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes,
384 EVT MemVT, unsigned StoresToMerge,
384 EVT MemVT, unsigned NumElem,
385385 bool IsConstantSrc, bool UseVector);
386386
387387 /// Merge consecutive store operations into a wide store.
97299729
97309730 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
97319731 SmallVectorImpl &StoreNodes, EVT MemVT,
9732 unsigned StoresToMerge, bool IsConstantSrc, bool UseVector) {
9732 unsigned NumElem, bool IsConstantSrc, bool UseVector) {
97339733 // Make sure we have something to merge.
9734 if (StoresToMerge < 2)
9734 if (NumElem < 2)
97359735 return false;
97369736
97379737 int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
97389738 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
97399739 unsigned EarliestNodeUsed = 0;
97409740
9741 for (unsigned i=0; i < StoresToMerge; ++i) {
9741 for (unsigned i=0; i < NumElem; ++i) {
97429742 // Find a chain for the new wide-store operand. Notice that some
97439743 // of the store nodes that we found may not be selected for inclusion
97449744 // in the wide store. The chain we use needs to be the chain of the
97539753
97549754 SDValue StoredVal;
97559755 if (UseVector) {
9756 bool IsVec = MemVT.isVector();
9757 unsigned Elts = StoresToMerge;
9758 if (IsVec) {
9759 // When merging vector stores, get the total number of elements.
9760 Elts *= MemVT.getVectorNumElements();
9761 }
9762 // Get the type for the merged vector store.
9763 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
9756 // Find a legal type for the vector store.
9757 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
97649758 assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
9765
97669759 if (IsConstantSrc) {
97679760 // A vector store with a constant source implies that the constant is
97689761 // zero; we only handle merging stores of constant zeros because the zero
97729765 StoredVal = DAG.getConstant(0, Ty);
97739766 } else {
97749767 SmallVector Ops;
9775 for (unsigned i = 0; i < StoresToMerge ; ++i) {
9768 for (unsigned i = 0; i < NumElem ; ++i) {
97769769 StoreSDNode *St = cast(StoreNodes[i].MemNode);
97779770 SDValue Val = St->getValue();
9778 // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
9771 // All of the operands of a BUILD_VECTOR must have the same type.
97799772 if (Val.getValueType() != MemVT)
97809773 return false;
97819774 Ops.push_back(Val);
97829775 }
9776
97839777 // Build the extracted vector elements back into a vector.
9784 StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
9785 DL, Ty, Ops);
9778 StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
97869779 }
97879780 } else {
97889781 // We should always use a vector store when merging extracted vector
97899782 // elements, so this path implies a store of constants.
97909783 assert(IsConstantSrc && "Merged vector elements should use vector store");
97919784
9792 unsigned StoreBW = StoresToMerge * ElementSizeBytes * 8;
9785 unsigned StoreBW = NumElem * ElementSizeBytes * 8;
97939786 APInt StoreInt(StoreBW, 0);
97949787
97959788 // Construct a single integer constant which is made of the smaller
97969789 // constant inputs.
97979790 bool IsLE = TLI.isLittleEndian();
9798 for (unsigned i = 0; i < StoresToMerge ; ++i) {
9799 unsigned Idx = IsLE ? (StoresToMerge - 1 - i) : i;
9791 for (unsigned i = 0; i < NumElem ; ++i) {
9792 unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
98009793 StoreSDNode *St = cast(StoreNodes[Idx].MemNode);
98019794 SDValue Val = St->getValue();
98029795 StoreInt <<= ElementSizeBytes*8;
98239816 // Replace the first store with the new store
98249817 CombineTo(EarliestOp, NewStore);
98259818 // Erase all other stores.
9826 for (unsigned i = 0; i < StoresToMerge ; ++i) {
9819 for (unsigned i = 0; i < NumElem ; ++i) {
98279820 if (StoreNodes[i].MemNode == EarliestOp)
98289821 continue;
98299822 StoreSDNode *St = cast(StoreNodes[i].MemNode);
98469839 }
98479840
98489841 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
9842 EVT MemVT = St->getMemoryVT();
9843 int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
98499844 bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
98509845 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
9846
9847 // Don't merge vectors into wider inputs.
9848 if (MemVT.isVector() || !MemVT.isSimple())
9849 return false;
98519850
98529851 // Perform an early exit check. Do not bother looking at stored values that
98539852 // are not constants, loads, or extracted vector elements.
98559854 bool IsLoadSrc = isa(StoredVal);
98569855 bool IsConstantSrc = isa(StoredVal) ||
98579856 isa(StoredVal);
9858 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
9859 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
9857 bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
98609858
9861 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
9859 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
98629860 return false;
9863
9864 EVT MemVT = St->getMemoryVT();
9865 int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
9866
9867 // Don't merge vectors into wider vectors if the source data comes from loads.
9868 // TODO: This restriction can be lifted by using logic similar to the
9869 // ExtractVecSrc case.
9870 // There's no such thing as a vector constant node; that merge case should be
9871 // handled by looking through a BUILD_VECTOR source with all constant inputs.
9872 if (MemVT.isVector() && IsLoadSrc)
9873 return false;
9874
9875 if (!MemVT.isSimple())
9876 return false;
9877
98789861
98799862 // Only look at ends of store sequences.
98809863 SDValue Chain = SDValue(St, 0);
1007010053
1007110054 // When extracting multiple vector elements, try to store them
1007210055 // in one vector store rather than a sequence of scalar stores.
10073 if (IsExtractVecSrc) {
10074 unsigned StoresToMerge = 0;
10075 bool IsVec = MemVT.isVector();
10056 if (IsExtractVecEltSrc) {
10057 unsigned NumElem = 0;
1007610058 for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
1007710059 StoreSDNode *St = cast(StoreNodes[i].MemNode);
10078 unsigned StoreValOpcode = St->getValue().getOpcode();
10060 SDValue StoredVal = St->getValue();
1007910061 // This restriction could be loosened.
1008010062 // Bail out if any stored values are not elements extracted from a vector.
1008110063 // It should be possible to handle mixed sources, but load sources need
1008210064 // more careful handling (see the block of code below that handles
1008310065 // consecutive loads).
10084 if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
10085 StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
10066 if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1008610067 return false;
1008710068
1008810069 // Find a legal type for the vector store.
10089 unsigned Elts = i + 1;
10090 if (IsVec) {
10091 // When merging vector stores, get the total number of elements.
10092 Elts *= MemVT.getVectorNumElements();
10093 }
10094 if (TLI.isTypeLegal(EVT::getVectorVT(*DAG.getContext(),
10095 MemVT.getScalarType(), Elts)))
10096 StoresToMerge = i + 1;
10097 }
10098
10099 return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, StoresToMerge,
10070 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
10071 if (TLI.isTypeLegal(Ty))
10072 NumElem = i + 1;
10073 }
10074
10075 return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
1010010076 false, true);
1010110077 }
1010210078
467467 ; CHECK-NEXT: retq
468468 }
469469
470 ; PR21711 - Merge vector stores into wider vector stores.
471 define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) {
472 %idx0 = getelementptr inbounds <4 x float>* %ptr, i64 3
473 %idx1 = getelementptr inbounds <4 x float>* %ptr, i64 4
474 %idx2 = getelementptr inbounds <4 x float>* %ptr, i64 5
475 %idx3 = getelementptr inbounds <4 x float>* %ptr, i64 6
476 %shuffle0 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32>
477 %shuffle1 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32>
478 %shuffle2 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32>
479 %shuffle3 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32>
480 store <4 x float> %shuffle0, <4 x float>* %idx0, align 16
481 store <4 x float> %shuffle1, <4 x float>* %idx1, align 16
482 store <4 x float> %shuffle2, <4 x float>* %idx2, align 16
483 store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
484 ret void
485
486 ; CHECK-LABEL: merge_vec_extract_stores
487 ; CHECK: vmovups %ymm0, 48(%rdi)
488 ; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
489 ; CHECK-NEXT: vzeroupper
490 ; CHECK-NEXT: retq
491 }
492
493 ; Merging vector stores when sourced from vector loads is not currently handled.
494 define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {
495 %load_idx0 = getelementptr inbounds <4 x float>* %v, i64 0
496 %load_idx1 = getelementptr inbounds <4 x float>* %v, i64 1
497 %v0 = load <4 x float>* %load_idx0
498 %v1 = load <4 x float>* %load_idx1
499 %store_idx0 = getelementptr inbounds <4 x float>* %ptr, i64 0
500 %store_idx1 = getelementptr inbounds <4 x float>* %ptr, i64 1
501 store <4 x float> %v0, <4 x float>* %store_idx0, align 16
502 store <4 x float> %v1, <4 x float>* %store_idx1, align 16
503 ret void
504
505 ; CHECK-LABEL: merge_vec_stores_from_loads
506 ; CHECK: vmovaps
507 ; CHECK-NEXT: vmovaps
508 ; CHECK-NEXT: vmovaps
509 ; CHECK-NEXT: vmovaps
510 ; CHECK-NEXT: retq
511 }
512
513 ; Merging vector stores when sourced from a constant vector is not currently handled.
514 define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
515 %idx0 = getelementptr inbounds <4 x i32>* %ptr, i64 3
516 %idx1 = getelementptr inbounds <4 x i32>* %ptr, i64 4
517 store <4 x i32> , <4 x i32>* %idx0, align 16
518 store <4 x i32> , <4 x i32>* %idx1, align 16
519 ret void
520
521 ; CHECK-LABEL: merge_vec_stores_of_constants
522 ; CHECK: vxorps
523 ; CHECK-NEXT: vmovaps
524 ; CHECK-NEXT: vmovaps
525 ; CHECK-NEXT: retq
526 }
527
528470 ; This is a minimized test based on real code that was failing.
529471 ; We could merge stores (and loads) like this...
530472