llvm.org GIT mirror llvm / 3c3cd10
merge consecutive stores of extracted vector elements Add a path to DAGCombiner::MergeConsecutiveStores() to combine multiple scalar stores when the store operands are extracted vector elements. This is a partial fix for PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ). For the new test case, codegen improves from: vmovss %xmm0, (%rdi) vextractps $1, %xmm0, 4(%rdi) vextractps $2, %xmm0, 8(%rdi) vextractps $3, %xmm0, 12(%rdi) vextractf128 $1, %ymm0, %xmm0 vmovss %xmm0, 16(%rdi) vextractps $1, %xmm0, 20(%rdi) vextractps $2, %xmm0, 24(%rdi) vextractps $3, %xmm0, 28(%rdi) vzeroupper retq To: vmovups %ymm0, (%rdi) vzeroupper retq Patch reviewed by Nadav Rotem. Differential Revision: http://reviews.llvm.org/D6698 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224611 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 5 years ago
2 changed file(s) with 108 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
94979497 return false;
94989498
94999499 // Perform an early exit check. Do not bother looking at stored values that
9500 // are not constants or loads.
9500 // are not constants, loads, or extracted vector elements.
95019501 SDValue StoredVal = St->getValue();
95029502 bool IsLoadSrc = isa(StoredVal);
9503 if (!isa(StoredVal) && !isa(StoredVal) &&
9504 !IsLoadSrc)
9503 bool IsConstantSrc = isa(StoredVal) ||
9504 isa(StoredVal);
9505 bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
9506
9507 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
95059508 return false;
95069509
95079510 // Only look at ends of store sequences.
96439646 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
96449647
96459648 // Store the constants into memory as one consecutive store.
9646 if (!IsLoadSrc) {
9649 if (IsConstantSrc) {
96479650 unsigned LastLegalType = 0;
96489651 unsigned LastLegalVectorType = 0;
96499652 bool NonZero = false;
97659768 // When we change it's chain to be St's chain they become identical,
97669769 // get CSEed and the net result is that X is now a use of St.
97679770 // Since we know that St is redundant, just iterate.
9771 while (!St->use_empty())
9772 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
9773 deleteAndRecombine(St);
9774 }
9775
9776 return true;
9777 }
9778
9779 // When extracting multiple vector elements, try to store them
9780 // in one vector store rather than a sequence of scalar stores.
9781 if (IsExtractVecEltSrc) {
9782 unsigned NumElem = 0;
9783 for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
9784 // Find a legal type for the vector store.
9785 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
9786 if (TLI.isTypeLegal(Ty))
9787 NumElem = i + 1;
9788 }
9789
9790 // Make sure we have a legal type and something to merge.
9791 if (NumElem < 2)
9792 return false;
9793
9794 unsigned EarliestNodeUsed = 0;
9795 for (unsigned i=0; i < NumElem; ++i) {
9796 // Find a chain for the new wide-store operand. Notice that some
9797 // of the store nodes that we found may not be selected for inclusion
9798 // in the wide store. The chain we use needs to be the chain of the
9799 // earliest store node which is *used* and replaced by the wide store.
9800 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
9801 EarliestNodeUsed = i;
9802 }
9803
9804 // The earliest Node in the DAG.
9805 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
9806 SDLoc DL(StoreNodes[0].MemNode);
9807
9808 SDValue StoredVal;
9809
9810 // Find a legal type for the vector store.
9811 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
9812
9813 SmallVector Ops;
9814 for (unsigned i = 0; i < NumElem ; ++i) {
9815 StoreSDNode *St = cast(StoreNodes[i].MemNode);
9816 SDValue Val = St->getValue();
9817 // All of the operands of a BUILD_VECTOR must have the same type.
9818 if (Val.getValueType() != MemVT)
9819 return false;
9820 Ops.push_back(Val);
9821 }
9822
9823 // Build the extracted vector elements back into a vector.
9824 StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
9825
9826 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
9827 FirstInChain->getBasePtr(),
9828 FirstInChain->getPointerInfo(),
9829 false, false,
9830 FirstInChain->getAlignment());
9831
9832 // Replace the first store with the new store
9833 CombineTo(EarliestOp, NewStore);
9834 // Erase all other stores.
9835 for (unsigned i = 0; i < NumElem ; ++i) {
9836 if (StoreNodes[i].MemNode == EarliestOp)
9837 continue;
9838 StoreSDNode *St = cast(StoreNodes[i].MemNode);
97689839 while (!St->use_empty())
97699840 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
97709841 deleteAndRecombine(St);
433433 ;
434434 ret void
435435 }
436
437 define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
438 %vecext0 = extractelement <8 x float> %v, i32 0
439 %vecext1 = extractelement <8 x float> %v, i32 1
440 %vecext2 = extractelement <8 x float> %v, i32 2
441 %vecext3 = extractelement <8 x float> %v, i32 3
442 %vecext4 = extractelement <8 x float> %v, i32 4
443 %vecext5 = extractelement <8 x float> %v, i32 5
444 %vecext6 = extractelement <8 x float> %v, i32 6
445 %vecext7 = extractelement <8 x float> %v, i32 7
446 %arrayidx1 = getelementptr inbounds float* %ptr, i64 1
447 %arrayidx2 = getelementptr inbounds float* %ptr, i64 2
448 %arrayidx3 = getelementptr inbounds float* %ptr, i64 3
449 %arrayidx4 = getelementptr inbounds float* %ptr, i64 4
450 %arrayidx5 = getelementptr inbounds float* %ptr, i64 5
451 %arrayidx6 = getelementptr inbounds float* %ptr, i64 6
452 %arrayidx7 = getelementptr inbounds float* %ptr, i64 7
453 store float %vecext0, float* %ptr, align 4
454 store float %vecext1, float* %arrayidx1, align 4
455 store float %vecext2, float* %arrayidx2, align 4
456 store float %vecext3, float* %arrayidx3, align 4
457 store float %vecext4, float* %arrayidx4, align 4
458 store float %vecext5, float* %arrayidx5, align 4
459 store float %vecext6, float* %arrayidx6, align 4
460 store float %vecext7, float* %arrayidx7, align 4
461 ret void
462
463 ; CHECK-LABEL: merge_vec_element_store
464 ; CHECK: vmovups
465 ; CHECK-NEXT: vzeroupper
466 ; CHECK-NEXT: retq
467 }
468