llvm.org GIT mirror llvm / bef377b
Introduce the VectorizeConfig class, with which we can control the behavior of the BBVectorizePass without using command line option. As pointed out by Hal, we can ask the TargetLoweringInfo for the architecture specific VectorizeConfig to perform vectorizing with architecture specific information. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154096 91177308-0d34-0410-b5e6-96231b3b80d8 Hongbin Zheng 8 years ago
2 changed file(s) with 126 addition(s) and 34 deletion(s). Raw diff Collapse all Expand all
1919 class BasicBlockPass;
2020
2121 //===----------------------------------------------------------------------===//
22 /// @brief Vectorize configuration.
23 struct VectorizeConfig {
24 //===--------------------------------------------------------------------===//
25 // Target architecture related parameters
26
27 /// @brief The size of the native vector registers.
28 unsigned VectorBits;
29
30 /// @brief Don't try to vectorize integer values.
31 bool NoInts;
32
33 /// @brief Don't try to vectorize floating-point values.
34 bool NoFloats;
35
36 /// @brief Don't try to vectorize casting (conversion) operations.
37 bool NoCasts;
38
39 /// @brief Don't try to vectorize floating-point math intrinsics.
40 bool NoMath;
41
42 /// @brief Don't try to vectorize the fused-multiply-add intrinsic.
43 bool NoFMA;
44
45 /// @brief Don't try to vectorize loads and stores.
46 bool NoMemOps;
47
48 /// @brief Only generate aligned loads and stores.
49 bool AlignedOnly;
50
51 //===--------------------------------------------------------------------===//
52 // Misc parameters
53
54 /// @brief The required chain depth for vectorization.
55 unsigned ReqChainDepth;
56
57 /// @brief The maximum search distance for instruction pairs.
58 unsigned SearchLimit;
59
60 /// @brief The maximum number of candidate pairs with which to use a full
61 /// cycle check.
62 unsigned MaxCandPairsForCycleCheck;
63
64 /// @brief Replicating one element to a pair breaks the chain.
65 bool SplatBreaksChain;
66
67 /// @brief The maximum number of pairable instructions per group.
68 unsigned MaxInsts;
69
70 /// @brief The maximum number of pairing iterations.
71 unsigned MaxIter;
72
73 /// @brief Don't boost the chain-depth contribution of loads and stores.
74 bool NoMemOpBoost;
75
76 /// @brief Use a fast instruction dependency analysis.
77 bool FastDep;
78
79 /// @brief Initialize the VectorizeConfig from command line options.
80 VectorizeConfig();
81 };
82
83 //===----------------------------------------------------------------------===//
2284 //
2385 // BBVectorize - A basic-block vectorization pass.
2486 //
25 BasicBlockPass *createBBVectorizePass();
87 BasicBlockPass *
88 createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
2689
2790 //===----------------------------------------------------------------------===//
2891 /// @brief Vectorize the BasicBlock.
3497 ///
3598 /// @return True if the BB is changed, false otherwise.
3699 ///
37 bool vectorizeBasicBlock(Pass *P, BasicBlock &BB);
100 bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
101 const VectorizeConfig &C = VectorizeConfig());
38102
39103 } // End llvm namespace
40104
139139 namespace {
140140 struct BBVectorize : public BasicBlockPass {
141141 static char ID; // Pass identification, replacement for typeid
142 BBVectorize() : BasicBlockPass(ID) {
142
143 VectorizeConfig Config;
144
145 BBVectorize(const VectorizeConfig &C = VectorizeConfig())
146 : BasicBlockPass(ID), Config(C) {
143147 initializeBBVectorizePass(*PassRegistry::getPassRegistry());
144148 }
145149
146 BBVectorize(Pass *P) : BasicBlockPass(ID) {
150 BBVectorize(Pass *P, const VectorizeConfig &C)
151 : BasicBlockPass(ID), Config(C) {
147152 AA = &P->getAnalysis();
148153 SE = &P->getAnalysis();
149154 TD = P->getAnalysisIfAvailable();
290295 // Iterate a sufficient number of times to merge types of size 1 bit,
291296 // then 2 bits, then 4, etc. up to half of the target vector width of the
292297 // target vector register.
293 for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n <= MaxIter);
298 for (unsigned v = 2, n = 1;
299 v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
294300 v *= 2, ++n) {
295 DEBUG(dbgs() << "BBV: fusing loop #" << n <<
301 DEBUG(dbgs() << "BBV: fusing loop #" << n <<
296302 " for " << BB.getName() << " in " <<
297303 BB.getParent()->getName() << "...\n");
298304 if (vectorizePairs(BB))
342348 // candidate chains where longer chains are considered to be better.
343349 // Note: when this function returns 0, the resulting instructions are
344350 // not actually fused.
345 static inline size_t getDepthFactor(Value *V) {
351 inline size_t getDepthFactor(Value *V) {
346352 // InsertElement and ExtractElement have a depth factor of zero. This is
347353 // for two reasons: First, they cannot be usefully fused. Second, because
348354 // the pass generates a lot of these, they can confuse the simple metric
356362
357363 // Give a load or store half of the required depth so that load/store
358364 // pairs will vectorize.
359 if (!NoMemOpBoost && (isa(V) || isa(V)))
360 return ReqChainDepth/2;
365 if (!Config.NoMemOpBoost && (isa(V) || isa(V)))
366 return Config.ReqChainDepth/2;
361367
362368 return 1;
363369 }
430436 case Intrinsic::exp:
431437 case Intrinsic::exp2:
432438 case Intrinsic::pow:
433 return !NoMath;
439 return !Config.NoMath;
434440 case Intrinsic::fma:
435 return !NoFMA;
441 return !Config.NoFMA;
436442 }
437443 }
438444
526532 } else if (LoadInst *L = dyn_cast(I)) {
527533 // Vectorize simple loads if possbile:
528534 IsSimpleLoadStore = L->isSimple();
529 if (!IsSimpleLoadStore || NoMemOps)
535 if (!IsSimpleLoadStore || Config.NoMemOps)
530536 return false;
531537 } else if (StoreInst *S = dyn_cast(I)) {
532538 // Vectorize simple stores if possbile:
533539 IsSimpleLoadStore = S->isSimple();
534 if (!IsSimpleLoadStore || NoMemOps)
540 if (!IsSimpleLoadStore || Config.NoMemOps)
535541 return false;
536542 } else if (CastInst *C = dyn_cast(I)) {
537543 // We can vectorize casts, but not casts of pointer types, etc.
538 if (NoCasts)
544 if (Config.NoCasts)
539545 return false;
540546
541547 Type *SrcTy = C->getSrcTy();
575581 !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
576582 return false;
577583
578 if (NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
584 if (Config.NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
579585 return false;
580586
581 if (NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
587 if (Config.NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
582588 return false;
583589
584 if (T1->getPrimitiveSizeInBits() > VectorBits/2 ||
585 T2->getPrimitiveSizeInBits() > VectorBits/2)
590 if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
591 T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
586592 return false;
587593
588594 return true;
610616 LI->isVolatile() != LJ->isVolatile() ||
611617 LI->getOrdering() != LJ->getOrdering() ||
612618 LI->getSynchScope() != LJ->getSynchScope())
613 return false;
619 return false;
614620 } else if ((SI = dyn_cast(I)) && (SJ = dyn_cast(J))) {
615621 if (SI->getValueOperand()->getType() !=
616622 SJ->getValueOperand()->getType() ||
631637 int64_t OffsetInElmts = 0;
632638 if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
633639 OffsetInElmts) && abs64(OffsetInElmts) == 1) {
634 if (AlignedOnly) {
640 if (Config.AlignedOnly) {
635641 Type *aType = isa(I) ?
636642 cast(I)->getValueOperand()->getType() : I->getType();
637643 // An aligned load or store is possible only if the instruction
752758 AliasSetTracker WriteSet(*AA);
753759 bool JAfterStart = IAfterStart;
754760 BasicBlock::iterator J = llvm::next(I);
755 for (unsigned ss = 0; J != E && ss <= SearchLimit; ++J, ++ss) {
761 for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
756762 if (J == Start) JAfterStart = true;
757763
758764 // Determine if J uses I, if so, exit the loop.
759 bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep);
760 if (FastDep) {
765 bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
766 if (Config.FastDep) {
761767 // Note: For this heuristic to be effective, independent operations
762768 // must tend to be intermixed. This is likely to be true from some
763769 // kinds of grouped loop unrolling (but not the generic LLVM pass),
795801 // If we have already found too many pairs, break here and this function
796802 // will be called again starting after the last instruction selected
797803 // during this invocation.
798 if (PairableInsts.size() >= MaxInsts) {
804 if (PairableInsts.size() >= Config.MaxInsts) {
799805 ShouldContinue = true;
800806 break;
801807 }
840846 ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
841847 }
842848
843 if (SplatBreaksChain) continue;
849 if (Config.SplatBreaksChain) continue;
844850 // Look for cases where just the first value in the pair is used by
845851 // both members of another pair (splatting).
846852 for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
849855 }
850856 }
851857
852 if (SplatBreaksChain) return;
858 if (Config.SplatBreaksChain) return;
853859 // Look for cases where just the second value in the pair is used by
854860 // both members of another pair (splatting).
855861 for (Value::use_iterator I = P.second->use_begin(),
12791285 << *J->first << " <-> " << *J->second << "} of depth " <<
12801286 MaxDepth << " and size " << PrunedTree.size() <<
12811287 " (effective size: " << EffSize << ")\n");
1282 if (MaxDepth >= ReqChainDepth && EffSize > BestEffSize) {
1288 if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) {
12831289 BestMaxDepth = MaxDepth;
12841290 BestEffSize = EffSize;
12851291 BestTree = PrunedTree;
12951301 std::multimap &ConnectedPairs,
12961302 DenseSet &PairableInstUsers,
12971303 DenseMap& ChosenPairs) {
1298 bool UseCycleCheck = CandidatePairs.size() <= MaxCandPairsForCycleCheck;
1304 bool UseCycleCheck =
1305 CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
12991306 std::multimap PairableInstUserMap;
13001307 for (std::vector::iterator I = PairableInsts.begin(),
13011308 E = PairableInsts.end(); I != E; ++I) {
15461553 unsigned IID = F->getIntrinsicID();
15471554 if (o == NumOperands-1) {
15481555 BasicBlock &BB = *I->getParent();
1549
1556
15501557 Module *M = BB.getParent()->getParent();
15511558 Type *ArgType = I->getType();
15521559 Type *VArgType = getVecTypeForPair(ArgType);
1553
1560
15541561 // FIXME: is it safe to do this here?
15551562 ReplacedOperands[o] = Intrinsic::getDeclaration(M,
15561563 (Intrinsic::ID) IID, VArgType);
18661873 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
18671874 INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
18681875
1869 BasicBlockPass *llvm::createBBVectorizePass() {
1870 return new BBVectorize();
1876 BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
1877 return new BBVectorize(C);
18711878 }
18721879
1873 bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB) {
1874 BBVectorize BBVectorizer(P);
1880 bool
1881 llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
1882 BBVectorize BBVectorizer(P, C);
18751883 return BBVectorizer.vectorizeBB(BB);
18761884 }
1885
1886 //===----------------------------------------------------------------------===//
1887 VectorizeConfig::VectorizeConfig() {
1888 VectorBits = ::VectorBits;
1889 NoInts = ::NoInts;
1890 NoFloats = ::NoFloats;
1891 NoCasts = ::NoCasts;
1892 NoMath = ::NoMath;
1893 NoFMA = ::NoFMA;
1894 NoMemOps = ::NoMemOps;
1895 AlignedOnly = ::AlignedOnly;
1896 ReqChainDepth= ::ReqChainDepth;
1897 SearchLimit = ::SearchLimit;
1898 MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
1899 SplatBreaksChain = ::SplatBreaksChain;
1900 MaxInsts = ::MaxInsts;
1901 MaxIter = ::MaxIter;
1902 NoMemOpBoost = ::NoMemOpBoost;
1903 FastDep = ::FastDep;
1904 }