llvm.org GIT mirror llvm / d69d9f2
SLPVectorization: Add a basic support for cross-basic block slp vectorization. We collect gather sequences when we vectorize basic blocks. Gather sequences are excellent hints for vectorization of other basic blocks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184444 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 7 years ago
4 changed file(s) with 137 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
9898 }
9999
100100 // Try to hoist some of the scalarization code to the preheader.
101 if (BBChanged) hoistGatherSequence(LI, BB, R);
101 if (BBChanged) {
102 hoistGatherSequence(LI, BB, R);
103 Changed |= vectorizeUsingGatherHints(R.getGatherSeqInstructions());
104 }
102105
103106 Changed |= BBChanged;
104107 }
129132 /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
130133 bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
131134
132 /// \brief Try to vectorize a list of operands.
133 bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R);
135 /// \brief Try to vectorize a list of operands. If \p NeedExtracts is true
136 /// then we calculate the cost of extracting the scalars from the vector.
137 /// \returns true if a value was vectorized.
138 bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool NeedExtracts);
134139
135140 /// \brief Try to vectorize a chain that may start at the operands of \V;
136141 bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
141146 /// \brief Try to hoist gather sequences outside of the loop in cases where
142147 /// all of the sources are loop invariant.
143148 void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
149
150 /// \brief Try to vectorize additional sequences in different basic blocks
151 /// based on values that we gathered in previous blocks. The list \p Gathers
152 /// holds the gather InsertElement instructions that were generated during
153 /// vectorization.
154 /// \returns True if some code was vectorized.
155 bool vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers);
144156
145157 /// \brief Scan the basic block and look for patterns that are likely to start
146158 /// a vectorization chain.
178190 bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
179191 if (!A || !B) return false;
180192 Value *VL[] = { A, B };
181 return tryToVectorizeList(VL, R);
182 }
183
184 bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) {
193 return tryToVectorizeList(VL, R, true);
194 }
195
196 bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R,
197 bool NeedExtracts) {
185198 if (VL.size() < 2)
186199 return false;
187200
203216 }
204217
205218 int Cost = R.getTreeCost(VL);
206 int ExtrCost = R.getScalarizationCost(VL);
219 int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;
207220 DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
208221 " Cost of extract:" << ExtrCost << ".\n");
209222 if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
306319 }
307320
308321 if (Incoming.size() > 1)
309 Changed |= tryToVectorizeList(Incoming, R);
322 Changed |= tryToVectorizeList(Incoming, R, true);
310323 }
311324
312325 return Changed;
328341 return Changed;
329342 }
330343
344 bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {
345 SmallVector Seq;
346 bool Changed = false;
347 for (int i = 0, e = Gathers.size(); i < e; ++i) {
348 InsertElementInst *IEI = dyn_cast_or_null(Gathers[i]);
349
350 if (IEI) {
351 if (Instruction *I = dyn_cast(IEI->getOperand(1)))
352 Seq.push_back(I);
353 } else {
354
355 if (!Seq.size())
356 continue;
357
358 Instruction *I = cast(Seq[0]);
359 BasicBlock *BB = I->getParent();
360
361 DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() <<
362 " in " << BB->getName() << ".\n");
363
364 // Check if the gathered values have multiple uses. If they only have one
365 // user then we know that the insert/extract pair will go away.
366 bool HasMultipleUsers = false;
367 for (int i=0; e = Seq.size(), i < e; ++i) {
368 if (!Seq[i]->hasOneUse()) {
369 HasMultipleUsers = true;
370 break;
371 }
372 }
373
374 BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
375
376 if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) {
377 DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() <<
378 " in " << BB->getName() << ".\n");
379 Changed = true;
380 }
381
382 Seq.clear();
383 }
384 }
385
386 return Changed;
387 }
388
331389 void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
332390 BoUpSLP &R) {
333391 // Check if this block is inside a loop.
343401 // Mark the insertion point for the block.
344402 Instruction *Location = PreHeader->getTerminator();
345403
346 BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
347 for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
404 BoUpSLP::InstrList &Gathers = R.getGatherSeqInstructions();
405 for (BoUpSLP::InstrList::iterator it = Gathers.begin(), e = Gathers.end();
348406 it != e; ++it) {
349 InsertElementInst *Insert = dyn_cast(*it);
407 InsertElementInst *Insert = dyn_cast_or_null(*it);
350408
351409 // The InsertElement sequence can be simplified into a constant.
410 // Also Ignore NULL pointers because they are only here to separate
411 // sequences.
352412 if (!Insert)
353413 continue;
354414
730730 // Remember that this instruction is used as part of a 'gather' sequence.
731731 // The caller of the bottom-up slp vectorizer can try to hoist the sequence
732732 // if the users are outside of the basic block.
733 GatherInstructions.push_back(Vec);
734 }
733 if (InsertElementInst *IEI = dyn_cast(Vec))
734 GatherInstructions.push_back(IEI);
735 }
736
737 // Mark the end of the gather sequence.
738 GatherInstructions.push_back(0);
735739
736740 for (unsigned i = 0; i < Ty->getNumElements(); ++i)
737741 VectorizedValues[VL[i]] = Vec;
3333 /// Bottom Up SLP vectorization utility class.
3434 struct BoUpSLP {
3535 typedef SmallVector ValueList;
36 typedef SmallVector InstrList;
3637 typedef SmallPtrSet ValueSet;
3738 typedef SmallVector StoreList;
3839 static const int max_cost = 1<<20;
7778 /// \returns the list of new instructions that were added in order to collect
7879 /// scalars into vectors. This list can be used to further optimize the gather
7980 /// sequences.
80 ValueList &getGatherSeqInstructions() {return GatherInstructions; }
81 InstrList &getGatherSeqInstructions() {return GatherInstructions; }
8182
8283 private:
8384 /// \brief This method contains the recursive part of getTreeCost.
165166 /// A list of instructions that are used when gathering scalars into vectors.
166167 /// In many cases these instructions can be hoisted outside of the BB.
167168 /// Iterating over this list is faster than calling LICM.
168 ValueList GatherInstructions;
169 /// Notice: We insert NULL ptrs to separate between the different gather
170 /// sequences.
171 InstrList GatherInstructions;
169172
170173 /// Instruction builder to construct the vectorized tree.
171174 IRBuilder<> Builder;
0 ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
3 target triple = "x86_64-apple-macosx10.8.0"
4
5 ; int foo(double *A, float *B, int g) {
6 ; float B0 = B[0];
7 ; float B1 = B[1]; <----- BasicBlock #1
8 ; B0 += 5;
9 ; B1 += 8;
10 ;
11 ; if (g) bar();
12 ;
13 ; A[0] += B0; <------- BasicBlock #3
14 ; A[1] += B1;
15 ; }
16
17
18 ;CHECK: @foo
19 ;CHECK: load <2 x float>
20 ;CHECK: fadd <2 x float>
21 ;CHECK: call i32
22 ;CHECK: load <2 x double>
23 ;CHECK: fadd <2 x double>
24 ;CHECK: store <2 x double>
25 ;CHECK: ret
26 define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
27 entry:
28 %0 = load float* %B, align 4
29 %arrayidx1 = getelementptr inbounds float* %B, i64 1
30 %1 = load float* %arrayidx1, align 4
31 %add = fadd float %0, 5.000000e+00
32 %add2 = fadd float %1, 8.000000e+00
33 %tobool = icmp eq i32 %g, 0
34 br i1 %tobool, label %if.end, label %if.then
35
36 if.then:
37 %call = tail call i32 (...)* @bar()
38 br label %if.end
39
40 if.end:
41 %conv = fpext float %add to double
42 %2 = load double* %A, align 8
43 %add4 = fadd double %conv, %2
44 store double %add4, double* %A, align 8
45 %conv5 = fpext float %add2 to double
46 %arrayidx6 = getelementptr inbounds double* %A, i64 1
47 %3 = load double* %arrayidx6, align 8
48 %add7 = fadd double %conv5, %3
49 store double %add7, double* %arrayidx6, align 8
50 ret i32 undef
51 }
52
53 declare i32 @bar(...)