llvm.org GIT mirror llvm / e2d124d
Reapply "SLPVectorizer: Ignore users that are insertelements we can reschedule them" This commit reapplies 205018. After 205855 we should correctly vectorize intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205965 91177308-0d34-0410-b5e6-96231b3b80d8 Arnold Schwaighofer 5 years ago
2 changed file(s) with 89 addition(s) and 30 deletion(s). Raw diff Collapse all Expand all
365365 /// A negative number means that this is profitable.
366366 int getTreeCost();
367367
368 /// Construct a vectorizable tree that starts at \p Roots and is possibly
369 /// used by a reduction of \p RdxOps.
370 void buildTree(ArrayRef Roots, ValueSet *RdxOps = 0);
368 /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
369 /// the purpose of scheduling and extraction in the \p UserIgnoreLst.
370 void buildTree(ArrayRef Roots,
371 ArrayRef UserIgnoreLst = None);
371372
372373 /// Clear the internal data structures that are created by 'buildTree'.
373374 void deleteTree() {
374 RdxOps = 0;
375375 VectorizableTree.clear();
376376 ScalarToTreeEntry.clear();
377377 MustGather.clear();
527527 /// Numbers instructions in different blocks.
528528 DenseMap BlocksNumbers;
529529
530 /// Reduction operators.
531 ValueSet *RdxOps;
530 /// List of users to ignore during scheduling and that don't need extracting.
531 ArrayRef UserIgnoreList;
532532
533533 // Analysis and block reference.
534534 Function *F;
542542 IRBuilder<> Builder;
543543 };
544544
545 void BoUpSLP::buildTree(ArrayRef Roots, ValueSet *Rdx) {
545 void BoUpSLP::buildTree(ArrayRef Roots,
546 ArrayRef UserIgnoreLst) {
546547 deleteTree();
547 RdxOps = Rdx;
548 UserIgnoreList = UserIgnoreLst;
548549 if (!getSameType(Roots))
549550 return;
550551 buildTree_rec(Roots, 0);
576577 if (!UserInst)
577578 continue;
578579
579 // Ignore uses that are part of the reduction.
580 if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
580 // Ignore users in the user ignore list.
581 if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) !=
582 UserIgnoreList.end())
581583 continue;
582584
583585 DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " <<
708710 continue;
709711 }
710712
711 // This user is part of the reduction.
712 if (RdxOps && RdxOps->count(UI))
713 // Ignore users in the user ignore list.
714 if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) !=
715 UserIgnoreList.end())
713716 continue;
714717
715718 // Make sure that we can schedule this unknown user.
17481751 DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
17491752
17501753 assert((ScalarToTreeEntry.count(U) ||
1751 // It is legal to replace the reduction users by undef.
1752 (RdxOps && RdxOps->count(U))) &&
1754 // It is legal to replace users in the ignorelist by undef.
1755 (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) !=
1756 UserIgnoreList.end())) &&
17531757 "Replacing out-of-tree value with undef");
17541758 }
17551759 #endif
19531957 bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
19541958
19551959 /// \brief Try to vectorize a list of operands.
1960 /// \@param BuildVector A list of users to ignore for the purpose of
1961 /// scheduling and that don't need extracting.
19561962 /// \returns true if a value was vectorized.
1957 bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R);
1963 bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R,
1964 ArrayRef BuildVector = None);
19581965
19591966 /// \brief Try to vectorize a chain that may start at the operands of \V;
19601967 bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
21272134 return tryToVectorizeList(VL, R);
21282135 }
21292136
2130 bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) {
2137 bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R,
2138 ArrayRef BuildVector) {
21312139 if (VL.size() < 2)
21322140 return false;
21332141
21772185 << "\n");
21782186 ArrayRef Ops = VL.slice(i, OpsWidth);
21792187
2180 R.buildTree(Ops);
2188 ArrayRef BuildVectorSlice;
2189 if (!BuildVector.empty())
2190 BuildVectorSlice = BuildVector.slice(i, OpsWidth);
2191
2192 R.buildTree(Ops, BuildVectorSlice);
21812193 int Cost = R.getTreeCost();
21822194
21832195 if (Cost < -SLPCostThreshold) {
21842196 DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
2185 R.vectorizeTree();
2186
2197 Value *VectorizedRoot = R.vectorizeTree();
2198
2199 // Reconstruct the build vector by extracting the vectorized root. This
2200 // way we handle the case where some elements of the vector are undefined.
2201 // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
2202 if (!BuildVectorSlice.empty()) {
2203 Instruction *InsertAfter = cast(VectorizedRoot);
2204 for (auto &V : BuildVectorSlice) {
2205 InsertElementInst *IE = cast(V);
2206 IRBuilder<> Builder(++BasicBlock::iterator(InsertAfter));
2207 Instruction *Extract = cast(
2208 Builder.CreateExtractElement(VectorizedRoot, IE->getOperand(2)));
2209 IE->setOperand(1, Extract);
2210 IE->removeFromParent();
2211 IE->insertAfter(Extract);
2212 InsertAfter = IE;
2213 }
2214 }
21872215 // Move to the next bundle.
21882216 i += VF - 1;
21892217 Changed = true;
22922320 /// *p =
22932321 ///
22942322 class HorizontalReduction {
2295 SmallPtrSet ReductionOps;
2323 SmallVector ReductionOps;
22962324 SmallVector ReducedVals;
22972325
22982326 BinaryOperator *ReductionRoot;
23862414 // We need to be able to reassociate the adds.
23872415 if (!TreeN->isAssociative())
23882416 return false;
2389 ReductionOps.insert(TreeN);
2417 ReductionOps.push_back(TreeN);
23902418 }
23912419 // Retract.
23922420 Stack.pop_back();
24232451
24242452 for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
24252453 ArrayRef ValsToReduce(&ReducedVals[i], ReduxWidth);
2426 V.buildTree(ValsToReduce, &ReductionOps);
2454 V.buildTree(ValsToReduce, ReductionOps);
24272455
24282456 // Estimate cost.
24292457 int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
25422570 ///
25432571 /// Returns true if it matches
25442572 ///
2545 static bool findBuildVector(InsertElementInst *IE,
2546 SmallVectorImpl &Ops) {
2547 if (!isa(IE->getOperand(0)))
2573 static bool findBuildVector(InsertElementInst *FirstInsertElem,
2574 SmallVectorImpl &BuildVector,
2575 SmallVectorImpl &BuildVectorOpds) {
2576 if (!isa(FirstInsertElem->getOperand(0)))
25482577 return false;
25492578
2579 InsertElementInst *IE = FirstInsertElem;
25502580 while (true) {
2551 Ops.push_back(IE->getOperand(1));
2581 BuildVector.push_back(IE);
2582 BuildVectorOpds.push_back(IE->getOperand(1));
25522583
25532584 if (IE->use_empty())
25542585 return false;
27182749 }
27192750
27202751 // Try to vectorize trees that start at insertelement instructions.
2721 if (InsertElementInst *IE = dyn_cast(it)) {
2722 SmallVector Ops;
2723 if (!findBuildVector(IE, Ops))
2752 if (InsertElementInst *FirstInsertElem = dyn_cast(it)) {
2753 SmallVector BuildVector;
2754 SmallVector BuildVectorOpds;
2755 if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
27242756 continue;
27252757
2726 if (tryToVectorizeList(Ops, R)) {
2758 // Vectorize starting with the build vector operands ignoring the
2759 // BuildVector instructions for the purpose of scheduling and user
2760 // extraction.
2761 if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {
27272762 Changed = true;
27282763 it = BB->begin();
27292764 e = BB->end();
194194 ret <4 x float> %rb
195195 }
196196
197 ; Make sure that vectorization happens even if insertelements operations
198 ; must be rescheduled. The case here is from compiling Julia.
199 define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
200 ; CHECK-LABEL: @reschedule_extract(
201 ; CHECK: %1 = fadd <4 x float> %a, %b
202 %a0 = extractelement <4 x float> %a, i32 0
203 %b0 = extractelement <4 x float> %b, i32 0
204 %c0 = fadd float %a0, %b0
205 %v0 = insertelement <4 x float> undef, float %c0, i32 0
206 %a1 = extractelement <4 x float> %a, i32 1
207 %b1 = extractelement <4 x float> %b, i32 1
208 %c1 = fadd float %a1, %b1
209 %v1 = insertelement <4 x float> %v0, float %c1, i32 1
210 %a2 = extractelement <4 x float> %a, i32 2
211 %b2 = extractelement <4 x float> %b, i32 2
212 %c2 = fadd float %a2, %b2
213 %v2 = insertelement <4 x float> %v1, float %c2, i32 2
214 %a3 = extractelement <4 x float> %a, i32 3
215 %b3 = extractelement <4 x float> %b, i32 3
216 %c3 = fadd float %a3, %b3
217 %v3 = insertelement <4 x float> %v2, float %c3, i32 3
218 ret <4 x float> %v3
219 }
220
197221 ; Check that cost model for vectorization takes credit for
198222 ; instructions that are erased.
199223 define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {