llvm.org GIT mirror
SLPVectorization: Add a basic support for cross-basic block slp vectorization. We collect gather sequences when we vectorize basic blocks. Gather sequences are excellent hints for vectorization of other basic blocks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184444 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 7 years ago
4 changed file(s) with 137 addition(s) and 16 deletion(s).
 98 98 } 99 99 100 100 // Try to hoist some of the scalarization code to the preheader. 101 if (BBChanged) hoistGatherSequence(LI, BB, R);⏎ 101 if (BBChanged) {⏎ 102 hoistGatherSequence(LI, BB, R); 103 Changed |= vectorizeUsingGatherHints(R.getGatherSeqInstructions()); 104 } 102 105 103 106 Changed |= BBChanged; 104 107 } 129 132 /// \brief Try to vectorize a chain that starts at two arithmetic instrs. 130 133 bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); 131 134 132 /// \brief Try to vectorize a list of operands. 133 bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R);⏎ 135 /// \brief Try to vectorize a list of operands. If \p NeedExtracts is true⏎ 136 /// then we calculate the cost of extracting the scalars from the vector. 137 /// \returns true if a value was vectorized. 138 bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool NeedExtracts); 134 139 135 140 /// \brief Try to vectorize a chain that may start at the operands of \V; 136 141 bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); 141 146 /// \brief Try to hoist gather sequences outside of the loop in cases where 142 147 /// all of the sources are loop invariant. 143 148 void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R); 149 150 /// \brief Try to vectorize additional sequences in different basic blocks 151 /// based on values that we gathered in previous blocks. The list \p Gathers 152 /// holds the gather InsertElement instructions that were generated during 153 /// vectorization. 154 /// \returns True if some code was vectorized. 155 bool vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers); 144 156 145 157 /// \brief Scan the basic block and look for patterns that are likely to start 146 158 /// a vectorization chain. 178 190 bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { 179 191 if (!A || !B) return false; 180 192 Value *VL[] = { A, B }; 181 return tryToVectorizeList(VL, R); 182 } 183 184 bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) {⏎ 193 return tryToVectorizeList(VL, R, true);⏎ 194 } 195 196 bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, 197 bool NeedExtracts) { 185 198 if (VL.size() < 2) 186 199 return false; 187 200 203 216 } 204 217 205 218 int Cost = R.getTreeCost(VL); 206 int ExtrCost = R.getScalarizationCost(VL);⏎ 219 int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;⏎ 207 220 DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << 208 221 " Cost of extract:" << ExtrCost << ".\n"); 209 222 if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; 306 319 } 307 320 308 321 if (Incoming.size() > 1) 309 Changed |= tryToVectorizeList(Incoming, R);⏎ 322 Changed |= tryToVectorizeList(Incoming, R, true);⏎ 310 323 } 311 324 312 325 return Changed; 328 341 return Changed; 329 342 } 330 343 344 bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) { 345 SmallVector Seq; 346 bool Changed = false; 347 for (int i = 0, e = Gathers.size(); i < e; ++i) { 348 InsertElementInst *IEI = dyn_cast_or_null(Gathers[i]); 349 350 if (IEI) { 351 if (Instruction *I = dyn_cast(IEI->getOperand(1))) 352 Seq.push_back(I); 353 } else { 354 355 if (!Seq.size()) 356 continue; 357 358 Instruction *I = cast(Seq[0]); 359 BasicBlock *BB = I->getParent(); 360 361 DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() << 362 " in " << BB->getName() << ".\n"); 363 364 // Check if the gathered values have multiple uses. If they only have one 365 // user then we know that the insert/extract pair will go away. 366 bool HasMultipleUsers = false; 367 for (int i=0; e = Seq.size(), i < e; ++i) { 368 if (!Seq[i]->hasOneUse()) { 369 HasMultipleUsers = true; 370 break; 371 } 372 } 373 374 BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB)); 375 376 if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) { 377 DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() << 378 " in " << BB->getName() << ".\n"); 379 Changed = true; 380 } 381 382 Seq.clear(); 383 } 384 } 385 386 return Changed; 387 } 388 331 389 void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, 332 390 BoUpSLP &R) { 333 391 // Check if this block is inside a loop. 343 401 // Mark the insertion point for the block. 344 402 Instruction *Location = PreHeader->getTerminator(); 345 403 346 BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions(); 347 for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();⏎ 404 BoUpSLP::InstrList &Gathers = R.getGatherSeqInstructions();⏎ 405 for (BoUpSLP::InstrList::iterator it = Gathers.begin(), e = Gathers.end(); 348 406 it != e; ++it) { 349 InsertElementInst *Insert = dyn_cast(*it);⏎ 407 InsertElementInst *Insert = dyn_cast_or_null(*it);⏎ 350 408 351 409 // The InsertElement sequence can be simplified into a constant. 410 // Also Ignore NULL pointers because they are only here to separate 411 // sequences. 352 412 if (!Insert) 353 413 continue; 354 414
 730 730 // Remember that this instruction is used as part of a 'gather' sequence. 731 731 // The caller of the bottom-up slp vectorizer can try to hoist the sequence 732 732 // if the users are outside of the basic block. 733 GatherInstructions.push_back(Vec); 734 }⏎ 733 if (InsertElementInst *IEI = dyn_cast(Vec))⏎ 734 GatherInstructions.push_back(IEI); 735 } 736 737 // Mark the end of the gather sequence. 738 GatherInstructions.push_back(0); 735 739 736 740 for (unsigned i = 0; i < Ty->getNumElements(); ++i) 737 741 VectorizedValues[VL[i]] = Vec;
 33 33 /// Bottom Up SLP vectorization utility class. 34 34 struct BoUpSLP { 35 35 typedef SmallVector ValueList; 36 typedef SmallVector InstrList; 36 37 typedef SmallPtrSet ValueSet; 37 38 typedef SmallVector StoreList; 38 39 static const int max_cost = 1<<20; 77 78 /// \returns the list of new instructions that were added in order to collect 78 79 /// scalars into vectors. This list can be used to further optimize the gather 79 80 /// sequences. 80 ValueList &getGatherSeqInstructions() {return GatherInstructions; }⏎ 81 InstrList &getGatherSeqInstructions() {return GatherInstructions; }⏎ 81 82 82 83 private: 83 84 /// \brief This method contains the recursive part of getTreeCost. 165 166 /// A list of instructions that are used when gathering scalars into vectors. 166 167 /// In many cases these instructions can be hoisted outside of the BB. 167 168 /// Iterating over this list is faster than calling LICM. 168 ValueList GatherInstructions;⏎ 169 /// Notice: We insert NULL ptrs to separate between the different gather⏎ 170 /// sequences. 171 InstrList GatherInstructions; 169 172 170 173 /// Instruction builder to construct the vectorized tree. 171 174 IRBuilder<> Builder;
 0 ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s 1 2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 3 target triple = "x86_64-apple-macosx10.8.0" 4 5 ; int foo(double *A, float *B, int g) { 6 ; float B0 = B[0]; 7 ; float B1 = B[1]; <----- BasicBlock #1 8 ; B0 += 5; 9 ; B1 += 8; 10 ; 11 ; if (g) bar(); 12 ; 13 ; A[0] += B0; <------- BasicBlock #3 14 ; A[1] += B1; 15 ; } 16 17 18 ;CHECK: @foo 19 ;CHECK: load <2 x float> 20 ;CHECK: fadd <2 x float> 21 ;CHECK: call i32 22 ;CHECK: load <2 x double> 23 ;CHECK: fadd <2 x double> 24 ;CHECK: store <2 x double> 25 ;CHECK: ret 26 define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) { 27 entry: 28 %0 = load float* %B, align 4 29 %arrayidx1 = getelementptr inbounds float* %B, i64 1 30 %1 = load float* %arrayidx1, align 4 31 %add = fadd float %0, 5.000000e+00 32 %add2 = fadd float %1, 8.000000e+00 33 %tobool = icmp eq i32 %g, 0 34 br i1 %tobool, label %if.end, label %if.then 35 36 if.then: 37 %call = tail call i32 (...)* @bar() 38 br label %if.end 39 40 if.end: 41 %conv = fpext float %add to double 42 %2 = load double* %A, align 8 43 %add4 = fadd double %conv, %2 44 store double %add4, double* %A, align 8 45 %conv5 = fpext float %add2 to double 46 %arrayidx6 = getelementptr inbounds double* %A, i64 1 47 %3 = load double* %arrayidx6, align 8 48 %add7 = fadd double %conv5, %3 49 store double %add7, double* %arrayidx6, align 8 50 ret i32 undef 51 } 52 53 declare i32 @bar(...)