llvm.org GIT mirror llvm / 0b82799
Clang-format the SLP vectorizer. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184446 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 7 years ago
3 changed file(s) with 261 addition(s) and 204 deletion(s). Raw diff Collapse all Expand all
4040 using namespace llvm;
4141
4242 static cl::opt
43 SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
44 cl::desc("Only vectorize trees if the gain is above this "
45 "number. (gain = -cost of vectorization)"));
43 SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
44 cl::desc("Only vectorize trees if the gain is above this "
45 "number. (gain = -cost of vectorization)"));
4646 namespace {
4747
4848 /// The SLPVectorizer Pass.
4949 struct SLPVectorizer : public FunctionPass {
50 typedef MapVector*, BoUpSLP::StoreList> StoreListMap;
50 typedef MapVector *, BoUpSLP::StoreList> StoreListMap;
5151
5252 /// Pass identification, replacement for typeid
5353 static char ID;
7777 if (!DL)
7878 return false;
7979
80 DEBUG(dbgs()<<"SLP: Analyzing blocks in " << F.getName() << ".\n");
80 DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
8181
8282 for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
8383 BasicBlock *BB = it;
9393 // Vectorize trees that end at stores.
9494 if (unsigned count = collectStores(BB, R)) {
9595 (void)count;
96 DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n");
96 DEBUG(dbgs() << "SLP: Found " << count << " stores to vectorize.\n");
9797 BBChanged |= vectorizeStoreChains(R);
9898 }
9999
107107 }
108108
109109 if (Changed) {
110 DEBUG(dbgs()<<"SLP: vectorized \""<"\"\n");
110 DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
111111 DEBUG(verifyFunction(F));
112112 }
113113 return Changed;
130130 unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
131131
132132 /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
133 bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
133 bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
134134
135135 /// \brief Try to vectorize a list of operands. If \p NeedExtracts is true
136136 /// then we calculate the cost of extracting the scalars from the vector.
138138 bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool NeedExtracts);
139139
140140 /// \brief Try to vectorize a chain that may start at the operands of \V;
141 bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
141 bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
142142
143143 /// \brief Vectorize the stores that were collected in StoreRefs.
144144 bool vectorizeStoreChains(BoUpSLP &R);
187187 return count;
188188 }
189189
190 bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
191 if (!A || !B) return false;
190 bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
191 if (!A || !B)
192 return false;
192193 Value *VL[] = { A, B };
193194 return tryToVectorizeList(VL, R, true);
194195 }
198199 if (VL.size() < 2)
199200 return false;
200201
201 DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n");
202 DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");
202203
203204 // Check that all of the parts are scalar instructions of the same type.
204205 Instruction *I0 = dyn_cast(VL[0]);
205 if (!I0) return 0;
206 if (!I0)
207 return 0;
206208
207209 unsigned Opcode0 = I0->getOpcode();
208210
216218 }
217219
218220 int Cost = R.getTreeCost(VL);
219 int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;
220 DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
221 " Cost of extract:" << ExtrCost << ".\n");
222 if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
223 DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
221 int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;
222 DEBUG(dbgs() << "SLP: Cost of pair:" << Cost
223 << " Cost of extract:" << ExtrCost << ".\n");
224 if ((Cost + ExtrCost) >= -SLPCostThreshold)
225 return false;
226 DEBUG(dbgs() << "SLP: Vectorizing pair.\n");
224227 R.vectorizeArith(VL);
225228 return true;
226229 }
227230
228 bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
229 if (!V) return false;
231 bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
232 if (!V)
233 return false;
234
230235 // Try to vectorize V.
231236 if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
232237 return true;
266271 bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
267272 bool Changed = false;
268273 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
269 if (isa(it)) continue;
274 if (isa(it))
275 continue;
270276
271277 // Try to vectorize reductions that use PHINodes.
272278 if (PHINode *P = dyn_cast(it)) {
273279 // Check that the PHI is a reduction PHI.
274 if (P->getNumIncomingValues() != 2) return Changed;
275 Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
276 (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
277 0));
280 if (P->getNumIncomingValues() != 2)
281 return Changed;
282 Value *Rdx =
283 (P->getIncomingBlock(0) == BB
284 ? (P->getIncomingValue(0))
285 : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0));
278286 // Check if this is a Binary Operator.
279287 BinaryOperator *BI = dyn_cast_or_null(Rdx);
280288 if (!BI)
281289 continue;
282290
283291 Value *Inst = BI->getOperand(0);
284 if (Inst == P) Inst = BI->getOperand(1);
292 if (Inst == P)
293 Inst = BI->getOperand(1);
294
285295 Changed |= tryToVectorize(dyn_cast(Inst), R);
286296 continue;
287297 }
294304 }
295305 for (int i = 0; i < 2; ++i)
296306 if (BinaryOperator *BI = dyn_cast(CI->getOperand(i)))
297 Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
307 Changed |=
308 tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
298309 continue;
299310 }
300311 }
302313 // Scan the PHINodes in our successors in search for pairing hints.
303314 for (succ_iterator it = succ_begin(BB), e = succ_end(BB); it != e; ++it) {
304315 BasicBlock *Succ = *it;
305 SmallVector*, 4> Incoming;
316 SmallVector *, 4> Incoming;
306317
307318 // Collect the incoming values from the PHIs.
308319 for (BasicBlock::iterator instr = Succ->begin(), ie = Succ->end();
321332 if (Incoming.size() > 1)
322333 Changed |= tryToVectorizeList(Incoming, R, true);
323334 }
324
335
325336 return Changed;
326337 }
327338
333344 if (it->second.size() < 2)
334345 continue;
335346
336 DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
337 it->second.size() << ".\n");
347 DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
348 << it->second.size() << ".\n");
338349
339350 Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
340351 }
342353 }
343354
344355 bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {
345 SmallVector*, 4> Seq;
356 SmallVector *, 4> Seq;
346357 bool Changed = false;
347358 for (int i = 0, e = Gathers.size(); i < e; ++i) {
348359 InsertElementInst *IEI = dyn_cast_or_null(Gathers[i]);
358369 Instruction *I = cast(Seq[0]);
359370 BasicBlock *BB = I->getParent();
360371
361 DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() <<
362 " in " << BB->getName() << ".\n");
372 DEBUG(dbgs() << "SLP: Inspecting a gather list of size " << Seq.size()
373 << " in " << BB->getName() << ".\n");
363374
364375 // Check if the gathered values have multiple uses. If they only have one
365376 // user then we know that the insert/extract pair will go away.
366377 bool HasMultipleUsers = false;
367 for (int i=0; e = Seq.size(), i < e; ++i) {
378 for (int i = 0; e = Seq.size(), i < e; ++i) {
368379 if (!Seq[i]->hasOneUse()) {
369380 HasMultipleUsers = true;
370381 break;
374385 BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
375386
376387 if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) {
377 DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() <<
378 " in " << BB->getName() << ".\n");
388 DEBUG(dbgs() << "SLP: Vectorized a gather list of len " << Seq.size()
389 << " in " << BB->getName() << ".\n");
379390 Changed = true;
380391 }
381392
417428 // hoist this instruction.
418429 Instruction *CurrVec = dyn_cast(Insert->getOperand(0));
419430 Instruction *NewElem = dyn_cast(Insert->getOperand(1));
420 if (CurrVec && L->contains(CurrVec)) continue;
421 if (NewElem && L->contains(NewElem)) continue;
431 if (CurrVec && L->contains(CurrVec))
432 continue;
433 if (NewElem && L->contains(NewElem))
434 continue;
422435
423436 // We can hoist this instruction. Move it to the pre-header.
424437 Insert->moveBefore(Location);
437450 INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
438451
439452 namespace llvm {
440 Pass *createSLPVectorizerPass() {
441 return new SLPVectorizer();
442 }
443 }
444
453 Pass *createSLPVectorizerPass() { return new SLPVectorizer(); }
454 }
4444 namespace llvm {
4545
4646 BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
47 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
48 Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
47 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp)
48 : Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
4949 numberInstructions();
5050 }
5151
5454 InstrIdx.clear();
5555 InstrVec.clear();
5656 // Number the instructions in the block.
57 for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
57 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
5858 InstrIdx[it] = Loc++;
5959 InstrVec.push_back(it);
6060 assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
6262 }
6363
6464 Value *BoUpSLP::getPointerOperand(Value *I) {
65 if (LoadInst *LI = dyn_cast(I)) return LI->getPointerOperand();
66 if (StoreInst *SI = dyn_cast(I)) return SI->getPointerOperand();
65 if (LoadInst *LI = dyn_cast(I))
66 return LI->getPointerOperand();
67 if (StoreInst *SI = dyn_cast(I))
68 return SI->getPointerOperand();
6769 return 0;
6870 }
6971
7072 unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
71 if (LoadInst *L=dyn_cast(I)) return L->getPointerAddressSpace();
72 if (StoreInst *S=dyn_cast(I)) return S->getPointerAddressSpace();
73 if (LoadInst *L = dyn_cast(I))
74 return L->getPointerAddressSpace();
75 if (StoreInst *S = dyn_cast(I))
76 return S->getPointerAddressSpace();
7377 return -1;
7478 }
7579
8084 unsigned ASB = getAddressSpaceOperand(B);
8185
8286 // Check that the address spaces match and that the pointers are valid.
83 if (!PtrA || !PtrB || (ASA != ASB)) return false;
87 if (!PtrA || !PtrB || (ASA != ASB))
88 return false;
8489
8590 // Check that A and B are of the same type.
86 if (PtrA->getType() != PtrB->getType()) return false;
91 if (PtrA->getType() != PtrB->getType())
92 return false;
8793
8894 // Calculate the distance.
8995 const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
9298 const SCEVConstant *ConstOffSCEV = dyn_cast(OffsetSCEV);
9399
94100 // Non constant distance.
95 if (!ConstOffSCEV) return false;
101 if (!ConstOffSCEV)
102 return false;
96103
97104 int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
98105 Type *Ty = cast(PtrA->getType())->getElementType();
104111
105112 bool BoUpSLP::vectorizeStoreChain(ArrayRef Chain, int CostThreshold) {
106113 unsigned ChainLen = Chain.size();
107 DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <
114 DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
115 << "\n");
108116 Type *StoreTy = cast(Chain[0])->getValueOperand()->getType();
109117 unsigned Sz = DL->getTypeSizeInBits(StoreTy);
110118 unsigned VF = MinVecRegSize / Sz;
111119
112 if (!isPowerOf2_32(Sz) || VF < 2) return false;
120 if (!isPowerOf2_32(Sz) || VF < 2)
121 return false;
113122
114123 bool Changed = false;
115124 // Look for profitable vectorizable trees at all offsets, starting at zero.
116125 for (unsigned i = 0, e = ChainLen; i < e; ++i) {
117 if (i + VF > e) break;
118 DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
126 if (i + VF > e)
127 break;
128 DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
129 << "\n");
119130 ArrayRef Operands = Chain.slice(i, VF);
120131
121132 int Cost = getTreeCost(Operands);
122133 DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
123134 if (Cost < CostThreshold) {
124135 DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
125 Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,VF)));
136 Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands, VF)));
126137 vectorizeTree(Operands, VF);
127138 i += VF - 1;
128139 Changed = true;
134145
135146 int Cost = getTreeCost(Chain);
136147 if (Cost < CostThreshold) {
137 DEBUG(dbgs() << "SLP: Found store chain cost = "<< Cost <<" for size = " <<
138 ChainLen << "\n");
148 DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost
149 << " for size = " << ChainLen << "\n");
139150 Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Chain, ChainLen)));
140151 vectorizeTree(Chain, ChainLen);
141152 return true;
145156 }
146157
147158 bool BoUpSLP::vectorizeStores(ArrayRef Stores, int costThreshold) {
148 SetVector Heads, Tails;
149 SmallDenseMap ConsecutiveChain;
159 SetVector Heads, Tails;
160 SmallDenseMap ConsecutiveChain;
150161
151162 // We may run into multiple chains that merge into a single chain. We mark the
152163 // stores that we vectorized so that we don't visit the same store twice.
157168 // all of the pairs of loads that follow each other.
158169 for (unsigned i = 0, e = Stores.size(); i < e; ++i)
159170 for (unsigned j = 0; j < e; ++j) {
160 if (i == j) continue;
171 if (i == j)
172 continue;
173
161174 if (isConsecutiveAccess(Stores[i], Stores[j])) {
162175 Tails.insert(Stores[j]);
163176 Heads.insert(Stores[i]);
166179 }
167180
168181 // For stores that start but don't end a link in the chain:
169 for (SetVector*>::iterator it = Heads.begin(), e = Heads.end();
182 for (SetVector *>::iterator it = Heads.begin(), e = Heads.end();
170183 it != e; ++it) {
171 if (Tails.count(*it)) continue;
184 if (Tails.count(*it))
185 continue;
172186
173187 // We found a store instr that starts a chain. Now follow the chain and try
174188 // to vectorize it.
176190 Value *I = *it;
177191 // Collect the chain into a list.
178192 while (Tails.count(I) || Heads.count(I)) {
179 if (VectorizedStores.count(I)) break;
193 if (VectorizedStores.count(I))
194 break;
180195 Operands.push_back(I);
181196 // Move to the next value in the chain.
182197 I = ConsecutiveChain[I];
211226 }
212227
213228 AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
214 if (StoreInst *SI = dyn_cast(I)) return AA->getLocation(SI);
215 if (LoadInst *LI = dyn_cast(I)) return AA->getLocation(LI);
229 if (StoreInst *SI = dyn_cast(I))
230 return AA->getLocation(SI);
231 if (LoadInst *LI = dyn_cast(I))
232 return AA->getLocation(LI);
216233 return AliasAnalysis::Location();
217234 }
218235
223240 /// the source may alias.
224241 for (++I; I != E; ++I) {
225242 // Ignore store instructions that are marked as 'ignore'.
226 if (MemBarrierIgnoreList.count(I)) continue;
243 if (MemBarrierIgnoreList.count(I))
244 continue;
227245 if (Src->mayWriteToMemory()) /* Write */ {
228 if (!I->mayReadOrWriteMemory()) continue;
246 if (!I->mayReadOrWriteMemory())
247 continue;
229248 } else /* Read */ {
230 if (!I->mayWriteToMemory()) continue;
249 if (!I->mayWriteToMemory())
250 continue;
231251 }
232252 AliasAnalysis::Location A = getLocation(&*I);
233253 AliasAnalysis::Location B = getLocation(Src);
243263 Instruction *Loc = getInsertionPoint(LastIdx);
244264 Builder.SetInsertPoint(Loc);
245265
246 assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&
266 assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&
247267 "Vectorizing with in-tree users");
248268
249269 Value *Vec = vectorizeTree(Operands, Operands.size());
282302
283303 // Check that instructions with multiple users can be vectorized. Mark unsafe
284304 // instructions.
285 for (SetVector::iterator it = MultiUserVals.begin(),
286 e = MultiUserVals.end(); it != e; ++it) {
305 for (SetVector::iterator it = MultiUserVals.begin(),
306 e = MultiUserVals.end();
307 it != e; ++it) {
287308 // Check that all of the users of this instr are within the tree
288309 // and that they are all from the same lane.
289310 int Lane = -1;
290311 for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
291312 I != E; ++I) {
292313 if (LaneMap.find(*I) == LaneMap.end()) {
293 DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n");
314 DEBUG(dbgs() << "SLP: Instr " << **it << " has multiple users.\n");
294315
295316 // We don't have an ordering problem if the user is not in this basic
296317 // block.
304325 int Idx = InstrIdx[Inst];
305326 if (Idx < LastRootIndex) {
306327 MustScalarize.insert(*it);
307 DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
308 "because of an unsafe out of tree usage.\n");
328 DEBUG(dbgs() << "SLP: Adding to MustScalarize "
329 "because of an unsafe out of tree usage.\n");
309330 break;
310331 }
311332
312
313 DEBUG(dbgs()<<"SLP: Adding to MustExtract "
314 "because of a safe out of tree usage.\n");
333 DEBUG(dbgs() << "SLP: Adding to MustExtract "
334 "because of a safe out of tree usage.\n");
315335 MustExtract.insert(*it);
316336 continue;
317337 }
318 if (Lane == -1) Lane = LaneMap[*I];
338 if (Lane == -1)
339 Lane = LaneMap[*I];
319340 if (Lane != LaneMap[*I]) {
320341 MustScalarize.insert(*it);
321 DEBUG(dbgs()<<"SLP: Adding " << **it <<
322 " to MustScalarize because multiple lane use it: "
323 << Lane << " and " << LaneMap[*I] << ".\n");
342 DEBUG(dbgs() << "SLP: Adding " << **it
343 << " to MustScalarize because multiple lane use it: "
344 << Lane << " and " << LaneMap[*I] << ".\n");
324345 break;
325346 }
326347 }
359380 }
360381
361382 void BoUpSLP::getTreeUses_rec(ArrayRef VL, unsigned Depth) {
362 if (Depth == RecursionMaxDepth) return;
383 if (Depth == RecursionMaxDepth)
384 return;
363385
364386 // Don't handle vectors.
365 if (VL[0]->getType()->isVectorTy()) return;
387 if (VL[0]->getType()->isVectorTy())
388 return;
389
366390 if (StoreInst *SI = dyn_cast(VL[0]))
367 if (SI->getValueOperand()->getType()->isVectorTy()) return;
391 if (SI->getValueOperand()->getType()->isVectorTy())
392 return;
368393
369394 // Check if all of the operands are constants.
370395 bool AllConst = true;
374399 AllSameScalar &= (VL[0] == VL[i]);
375400 Instruction *I = dyn_cast(VL[i]);
376401 // If one of the instructions is out of this BB, we need to scalarize all.
377 if (I && I->getParent() != BB) return;
402 if (I && I->getParent() != BB)
403 return;
378404 }
379405
380406 // If all of the operands are identical or constant we have a simple solution.
381 if (AllConst || AllSameScalar) return;
407 if (AllConst || AllSameScalar)
408 return;
382409
383410 // Scalarize unknown structures.
384411 Instruction *VL0 = dyn_cast(VL[0]);
385 if (!VL0) return;
412 if (!VL0)
413 return;
386414
387415 unsigned Opcode = VL0->getOpcode();
388416 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
389417 Instruction *I = dyn_cast(VL[i]);
390418 // If not all of the instructions are identical then we have to scalarize.
391 if (!I || Opcode != I->getOpcode()) return;
419 if (!I || Opcode != I->getOpcode())
420 return;
392421 }
393422
394423 for (int i = 0, e = VL.size(); i < e; ++i) {
395424 // Check that the instruction is only used within
396425 // one lane.
397 if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
426 if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i)
427 return;
398428 // Make this instruction as 'seen' and remember the lane.
399429 LaneMap[VL[i]] = i;
400430 }
406436 // within our tree. At depth zero we have no local users, only external
407437 // users that we don't care about.
408438 if (Depth && I && I->getNumUses() > 1) {
409 DEBUG(dbgs()<<"SLP: Adding to MultiUserVals "
410 "because it has multiple users:" << *I << " \n");
439 DEBUG(dbgs() << "SLP: Adding to MultiUserVals "
440 "because it has multiple users:" << *I << " \n");
411441 MultiUserVals.insert(I);
412442 }
413443 }
414444
415445 switch (Opcode) {
416 case Instruction::ExtractElement: {
417 VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size());
418 // No need to follow ExtractElements that are going to be optimized away.
419 if (CanReuseExtract(VL, VL.size(), VecTy)) return;
420 // Fall through.
421 }
422 case Instruction::ZExt:
423 case Instruction::SExt:
424 case Instruction::FPToUI:
425 case Instruction::FPToSI:
426 case Instruction::FPExt:
427 case Instruction::PtrToInt:
428 case Instruction::IntToPtr:
429 case Instruction::SIToFP:
430 case Instruction::UIToFP:
431 case Instruction::Trunc:
432 case Instruction::FPTrunc:
433 case Instruction::BitCast:
434 case Instruction::Select:
435 case Instruction::ICmp:
436 case Instruction::FCmp:
437 case Instruction::Add:
438 case Instruction::FAdd:
439 case Instruction::Sub:
440 case Instruction::FSub:
441 case Instruction::Mul:
442 case Instruction::FMul:
443 case Instruction::UDiv:
444 case Instruction::SDiv:
445 case Instruction::FDiv:
446 case Instruction::URem:
447 case Instruction::SRem:
448 case Instruction::FRem:
449 case Instruction::Shl:
450 case Instruction::LShr:
451 case Instruction::AShr:
452 case Instruction::And:
453 case Instruction::Or:
454 case Instruction::Xor: {
455 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
456 ValueList Operands;
457 // Prepare the operand vector.
458 for (unsigned j = 0; j < VL.size(); ++j)
459 Operands.push_back(cast(VL[j])->getOperand(i));
460
461 getTreeUses_rec(Operands, Depth+1);
462 }
446 case Instruction::ExtractElement: {
447 VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size());
448 // No need to follow ExtractElements that are going to be optimized away.
449 if (CanReuseExtract(VL, VL.size(), VecTy))
463450 return;
464 }
465 case Instruction::Store: {
451 // Fall through.
452 }
453 case Instruction::ZExt:
454 case Instruction::SExt:
455 case Instruction::FPToUI:
456 case Instruction::FPToSI:
457 case Instruction::FPExt:
458 case Instruction::PtrToInt:
459 case Instruction::IntToPtr:
460 case Instruction::SIToFP:
461 case Instruction::UIToFP:
462 case Instruction::Trunc:
463 case Instruction::FPTrunc:
464 case Instruction::BitCast:
465 case Instruction::Select:
466 case Instruction::ICmp:
467 case Instruction::FCmp:
468 case Instruction::Add:
469 case Instruction::FAdd:
470 case Instruction::Sub:
471 case Instruction::FSub:
472 case Instruction::Mul:
473 case Instruction::FMul:
474 case Instruction::UDiv:
475 case Instruction::SDiv:
476 case Instruction::FDiv:
477 case Instruction::URem:
478 case Instruction::SRem:
479 case Instruction::FRem:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
483 case Instruction::And:
484 case Instruction::Or:
485 case Instruction::Xor: {
486 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
466487 ValueList Operands;
488 // Prepare the operand vector.
467489 for (unsigned j = 0; j < VL.size(); ++j)
468 Operands.push_back(cast(VL[j])->getOperand(0));
469 getTreeUses_rec(Operands, Depth+1);
470 return;
471 }
472 default:
490 Operands.push_back(cast(VL[j])->getOperand(i));
491
492 getTreeUses_rec(Operands, Depth + 1);
493 }
494 return;
495 }
496 case Instruction::Store: {
497 ValueList Operands;
498 for (unsigned j = 0; j < VL.size(); ++j)
499 Operands.push_back(cast(VL[j])->getOperand(0));
500 getTreeUses_rec(Operands, Depth + 1);
501 return;
502 }
503 default:
473504 return;
474505 }
475506 }
481512 ScalarTy = SI->getValueOperand()->getType();
482513
483514 /// Don't mess with vectors.
484 if (ScalarTy->isVectorTy()) return max_cost;
515 if (ScalarTy->isVectorTy())
516 return max_cost;
517
485518 VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
486519
487 if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
520 if (Depth == RecursionMaxDepth)
521 return getScalarizationCost(VecTy);
488522
489523 // Check if all of the operands are constants.
490524 bool AllConst = true;
502536 }
503537
504538 // Is this a simple vector constant.
505 if (AllConst) return 0;
539 if (AllConst)
540 return 0;
506541
507542 // If all of the operands are identical we can broadcast them.
508543 Instruction *VL0 = dyn_cast(VL[0]);
522557 if (MustScalarizeFlag)
523558 return getScalarizationCost(VecTy);
524559
525 if (!VL0) return getScalarizationCost(VecTy);
560 if (!VL0)
561 return getScalarizationCost(VecTy);
526562 assert(VL0->getParent() == BB && "Wrong BB");
527563
528564 unsigned Opcode = VL0->getOpcode();
529565 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
530566 Instruction *I = dyn_cast(VL[i]);
531567 // If not all of the instructions are identical then we have to scalarize.
532 if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
568 if (!I || Opcode != I->getOpcode())
569 return getScalarizationCost(VecTy);
533570 }
534571
535572 // Check if it is safe to sink the loads or the stores.
537574 int MaxIdx = getLastIndex(VL, VL.size());
538575 Instruction *Last = InstrVec[MaxIdx];
539576
540 for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
541 if (VL[i] == Last) continue;
577 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
578 if (VL[i] == Last)
579 continue;
542580 Value *Barrier = isUnsafeToSink(cast(VL[i]), Last);
543581 if (Barrier) {
544 DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
545 *Last << "\n because of " << *Barrier << "\n");
582 DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << *Last
583 << "\n because of " << *Barrier << "\n");
546584 return max_cost;
547585 }
548586 }
553591 for (unsigned i = 0, e = VL.size(); i < e; ++i)
554592 if (MustExtract.count(VL[i]))
555593 ExternalUserExtractCost +=
556 TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
594 TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
557595
558596 switch (Opcode) {
559597 case Instruction::ExtractElement: {
584622 return getScalarizationCost(VecTy);
585623 }
586624
587 Cost += getTreeCost_rec(Operands, Depth+1);
588 if (Cost >= max_cost) return max_cost;
625 Cost += getTreeCost_rec(Operands, Depth + 1);
626 if (Cost >= max_cost)
627 return max_cost;
589628
590629 // Calculate the cost of this instruction.
591630 int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
634673 for (unsigned j = 0; j < VL.size(); ++j)
635674 Operands.push_back(cast(VL[j])->getOperand(i));
636675
637 Cost += getTreeCost_rec(Operands, Depth+1);
638 if (Cost >= max_cost) return max_cost;
676 Cost += getTreeCost_rec(Operands, Depth + 1);
677 if (Cost >= max_cost)
678 return max_cost;
639679 }
640680
641681 // Calculate the cost of this instruction.
644684 if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||
645685 Opcode == Instruction::Select) {
646686 VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
647 ScalarCost = VecTy->getNumElements() *
648 TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
687 ScalarCost =
688 VecTy->getNumElements() *
689 TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
649690 VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
650691 } else {
651692 ScalarCost = VecTy->getNumElements() *
652 TTI->getArithmeticInstrCost(Opcode, ScalarTy);
693 TTI->getArithmeticInstrCost(Opcode, ScalarTy);
653694 VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
654695 }
655696 Cost += (VecCost - ScalarCost);
657698 }
658699 case Instruction::Load: {
659700 // If we are scalarize the loads, add the cost of forming the vector.
660 for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
661 if (!isConsecutiveAccess(VL[i], VL[i+1]))
701 for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
702 if (!isConsecutiveAccess(VL[i], VL[i + 1]))
662703 return getScalarizationCost(VecTy);
663704
664705 // Cost of wide load - cost of scalar loads.
665706 int ScalarLdCost = VecTy->getNumElements() *
666 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
707 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
667708 int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
668709 return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
669710 }
670711 case Instruction::Store: {
671712 // We know that we can merge the stores. Calculate the cost.
672713 int ScalarStCost = VecTy->getNumElements() *
673 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
674 int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
714 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
715 int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
675716 int StoreCost = VecStCost - ScalarStCost;
676717
677718 ValueList Operands;
691732
692733 int BoUpSLP::getLastIndex(ArrayRef VL, unsigned VF) {
693734 int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
694 for (unsigned i = 0; i < VF; ++i )
735 for (unsigned i = 0; i < VF; ++i)
695736 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
696737 return MaxIdx;
697738 }
715756 int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {
716757 assert(I->getParent() == BB && "Invalid parent for instruction I");
717758 assert(J->getParent() == BB && "Invalid parent for instruction J");
718 return std::max(InstrIdx[I],InstrIdx[J]);
759 return std::max(InstrIdx[I], InstrIdx[J]);
719760 }
720761
721762 Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {
724765
725766 Value *BoUpSLP::Scalarize(ArrayRef VL, VectorType *Ty) {
726767 Value *Vec = UndefValue::get(Ty);
727 for (unsigned i=0; i < Ty->getNumElements(); ++i) {
768 for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
728769 // Generate the 'InsertElement' instruction.
729770 Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
730771 // Remember that this instruction is used as part of a 'gather' sequence.
747788 Value *V = vectorizeTree_rec(VL, VF);
748789
749790 int LastInstrIdx = getLastIndex(VL, VL.size());
750 for (SetVector::iterator it = MustExtract.begin(),
751 e = MustExtract.end(); it != e; ++it) {
791 for (SetVector::iterator it = MustExtract.begin(),
792 e = MustExtract.end();
793 it != e; ++it) {
752794 Instruction *I = cast(*it);
753795
754796 // This is a scalarized value, so we can use the original value.
769811 ++U) {
770812 Instruction *UI = cast(*U);
771813 if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)
772 UI->replaceUsesOfWith(I ,Extract);
814 UI->replaceUsesOfWith(I, Extract);
773815 Replaced = true;
774816 }
775817 assert(Replaced && "Must replace at least one outside user");
813855 return Scalarize(VL, VecTy);
814856
815857 if (VectorizedValues.count(VL0)) {
816 Value * Vec = VectorizedValues[VL0];
858 Value *Vec = VectorizedValues[VL0];
817859 for (int i = 0; i < VF; ++i)
818860 VectorizedValues[VL[i]] = Vec;
819861 return Vec;
885927 VectorizedValues[VL[i]] = V;
886928
887929 return V;
888
889930 }
890931 case Instruction::Select: {
891932 ValueList TrueVec, FalseVec, CondVec;
932973 Value *LHS = vectorizeTree_rec(LHSVL, VF);
933974 Value *RHS = vectorizeTree_rec(RHSVL, VF);
934975 BinaryOperator *BinOp = cast(VL0);
935 Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS);
976 Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
936977
937978 for (int i = 0; i < VF; ++i)
938979 VectorizedValues[VL[i]] = V;
945986
946987 // Check if all of the loads are consecutive.
947988 for (unsigned i = 1, e = VF; i < e; ++i)
948 if (!isConsecutiveAccess(VL[i-1], VL[i]))
989 if (!isConsecutiveAccess(VL[i - 1], VL[i]))
949990 return Scalarize(VL, VecTy);
950991
951992 // Loads are inserted at the head of the tree because we don't want to sink
9711012 ValueOp.push_back(cast(VL[i])->getValueOperand());
9721013
9731014 Value *VecValue = vectorizeTree_rec(ValueOp, VF);
974 Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
975 VecTy->getPointerTo());
1015 Value *VecPtr =
1016 Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo());
9761017 Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
9771018
9781019 for (int i = 0; i < VF; ++i)
2424
2525 namespace llvm {
2626
27 class BasicBlock; class Instruction; class Type;
28 class VectorType; class StoreInst; class Value;
29 class ScalarEvolution; class DataLayout;
30 class TargetTransformInfo; class AliasAnalysis;
27 class BasicBlock;
28 class Instruction;
29 class Type;
30 class VectorType;
31 class StoreInst;
32 class Value;
33 class ScalarEvolution;
34 class DataLayout;
35 class TargetTransformInfo;
36 class AliasAnalysis;
3137 class Loop;
3238
3339 /// Bottom Up SLP vectorization utility class.
34 struct BoUpSLP {
35 typedef SmallVector ValueList;
36 typedef SmallVector InstrList;
37 typedef SmallPtrSet ValueSet;
38 typedef SmallVector StoreList;
39 static const int max_cost = 1<<20;
40 struct BoUpSLP {
41 typedef SmallVector ValueList;
42 typedef SmallVector InstrList;
43 typedef SmallPtrSet ValueSet;
44 typedef SmallVector StoreList;
45 static const int max_cost = 1 << 20;
4046
4147 // \brief C'tor.
4248 BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
43 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
49 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
4450
4551 /// \brief Take the pointer operand from the Load/Store instruction.
4652 /// \returns NULL if this is not a valid Load/Store instruction.
7278 bool vectorizeStores(ArrayRef Stores, int costThreshold);
7379
7480 /// \brief Vectorize a group of scalars into a vector tree.
75 /// \returns the vectorized value.
81 /// \returns the vectorized value.
7682 Value *vectorizeArith(ArrayRef Operands);
7783
7884 /// \returns the list of new instructions that were added in order to collect
7985 /// scalars into vectors. This list can be used to further optimize the gather
8086 /// sequences.
81 InstrList &getGatherSeqInstructions() {return GatherInstructions; }
87 InstrList &getGatherSeqInstructions() { return GatherInstructions; }
8288
8389 private:
8490 /// \brief This method contains the recursive part of getTreeCost.
129135
130136 private:
131137 /// Maps instructions to numbers and back.
132 SmallDenseMap*, int> InstrIdx;
138 SmallDenseMap *, int> InstrIdx;
133139 /// Maps integers to Instructions.
134 std::vector*> InstrVec;
140 std::vector *> InstrVec;
135141
136142 // -- containers that are used during getTreeCost -- //
137143
143149 /// Contains values that have users outside of the vectorized graph.
144150 /// We need to generate extract instructions for these values.
145151 /// NOTICE: The vectorization methods also use this set.
146 SetVector*> MustExtract;
152 SetVector *> MustExtract;
147153
148154 /// Contains a list of values that are used outside the current tree. This
149155 /// set must be reset between runs.
150 SetVector*> MultiUserVals;
156 SetVector *> MultiUserVals;
151157 /// Maps values in the tree to the vector lanes that uses them. This map must
152158 /// be reset between runs of getCost.
153 std::map*, int> LaneMap;
159 std::map *, int> LaneMap;
154160 /// A list of instructions to ignore while sinking
155161 /// memory instructions. This map must be reset between runs of getCost.
156162 ValueSet MemBarrierIgnoreList;
158164 // -- Containers that are used during vectorizeTree -- //
159165
160166 /// Maps between the first scalar to the vector. This map must be reset
161 ///between runs.
162 DenseMap VectorizedValues;
167 /// between runs.
168 DenseMap VectorizedValues;
163169
164170 // -- Containers that are used after vectorization by the caller -- //
165171
168174 /// Iterating over this list is faster than calling LICM.
169175 /// Notice: We insert NULL ptrs to separate between the different gather
170176 /// sequences.
171 InstrList GatherInstructions;
177 InstrList GatherInstructions;
172178
173179 /// Instruction builder to construct the vectorized tree.
174180 IRBuilder<> Builder;