llvm.org GIT mirror llvm / 71667f5
Merging r324195: ------------------------------------------------------------------------ r324195 | mcrosier | 2018-02-04 16:42:24 +0100 (Sun, 04 Feb 2018) | 12 lines [LV] Use Demanded Bits and ValueTracking for reduction type-shrinking The type-shrinking logic in reduction detection, although narrow in scope, is also rather ad-hoc, which has led to bugs (e.g., PR35734). This patch modifies the approach to rely on the demanded bits and value tracking analyses, if available. We currently perform type-shrinking separately for reductions and other instructions in the loop. Long-term, we should probably think about computing minimal bit widths in a more complete way for the loops we want to vectorize. PR35734 Differential Revision: https://reviews.llvm.org/D42309 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@325508 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 6 months ago
4 changed file(s) with 211 addition(s) and 103 deletion(s). Raw diff Collapse all Expand all
2020 #include "llvm/ADT/SmallVector.h"
2121 #include "llvm/ADT/StringRef.h"
2222 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/Analysis/DemandedBits.h"
2324 #include "llvm/Analysis/EHPersonalities.h"
2425 #include "llvm/Analysis/TargetTransformInfo.h"
2526 #include "llvm/IR/Dominators.h"
171172 Value *Left, Value *Right);
172173
173174 /// Returns true if Phi is a reduction of type Kind and adds it to the
174 /// RecurrenceDescriptor.
175 /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
176 /// non-null, the minimal bit width needed to compute the reduction will be
177 /// computed.
175178 static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
176179 bool HasFunNoNaNAttr,
177 RecurrenceDescriptor &RedDes);
178
179 /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor is
180 /// returned in RedDes.
180 RecurrenceDescriptor &RedDes,
181 DemandedBits *DB = nullptr,
182 AssumptionCache *AC = nullptr,
183 DominatorTree *DT = nullptr);
184
185 /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
186 /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
187 /// non-null, the minimal bit width needed to compute the reduction will be
188 /// computed.
181189 static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
182 RecurrenceDescriptor &RedDes);
190 RecurrenceDescriptor &RedDes,
191 DemandedBits *DB = nullptr,
192 AssumptionCache *AC = nullptr,
193 DominatorTree *DT = nullptr);
183194
184195 /// Returns true if Phi is a first-order recurrence. A first-order recurrence
185196 /// is a non-reduction recurrence relation in which the value of the
216227
217228 /// Returns true if the recurrence kind is an arithmetic kind.
218229 static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
219
220 /// Determines if Phi may have been type-promoted. If Phi has a single user
221 /// that ANDs the Phi with a type mask, return the user. RT is updated to
222 /// account for the narrower bit width represented by the mask, and the AND
223 /// instruction is added to CI.
224 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
225 SmallPtrSetImpl &Visited,
226 SmallPtrSetImpl &CI);
227
228 /// Returns true if all the source operands of a recurrence are either
229 /// SExtInsts or ZExtInsts. This function is intended to be used with
230 /// lookThroughAnd to determine if the recurrence has been type-promoted. The
231 /// source operands are added to CI, and IsSigned is updated to indicate if
232 /// all source operands are SExtInsts.
233 static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit,
234 Type *RT, bool &IsSigned,
235 SmallPtrSetImpl &Visited,
236 SmallPtrSetImpl &CI);
237230
238231 /// Returns the type of the recurrence. This type can be narrower than the
239232 /// actual type of the Phi if the recurrence has been type-promoted.
2222 #include "llvm/Analysis/ScalarEvolutionExpander.h"
2323 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
2424 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/Analysis/ValueTracking.h"
2526 #include "llvm/IR/Dominators.h"
2627 #include "llvm/IR/Instructions.h"
2728 #include "llvm/IR/Module.h"
2930 #include "llvm/IR/ValueHandle.h"
3031 #include "llvm/Pass.h"
3132 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/KnownBits.h"
3234 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
3335
3436 using namespace llvm;
7678 return false;
7779 }
7880
79 Instruction *
80 RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
81 SmallPtrSetImpl &Visited,
82 SmallPtrSetImpl &CI) {
81 /// Determines if Phi may have been type-promoted. If Phi has a single user
82 /// that ANDs the Phi with a type mask, return the user. RT is updated to
83 /// account for the narrower bit width represented by the mask, and the AND
84 /// instruction is added to CI.
85 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
86 SmallPtrSetImpl &Visited,
87 SmallPtrSetImpl &CI) {
8388 if (!Phi->hasOneUse())
8489 return Phi;
8590
100105 return Phi;
101106 }
102107
103 bool RecurrenceDescriptor::getSourceExtensionKind(
104 Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
105 SmallPtrSetImpl &Visited,
106 SmallPtrSetImpl &CI) {
108 /// Compute the minimal bit width needed to represent a reduction whose exit
109 /// instruction is given by Exit.
110 static std::pair computeRecurrenceType(Instruction *Exit,
111 DemandedBits *DB,
112 AssumptionCache *AC,
113 DominatorTree *DT) {
114 bool IsSigned = false;
115 const DataLayout &DL = Exit->getModule()->getDataLayout();
116 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
117
118 if (DB) {
119 // Use the demanded bits analysis to determine the bits that are live out
120 // of the exit instruction, rounding up to the nearest power of two. If the
121 // use of demanded bits results in a smaller bit width, we know the value
122 // must be positive (i.e., IsSigned = false), because if this were not the
123 // case, the sign bit would have been demanded.
124 auto Mask = DB->getDemandedBits(Exit);
125 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
126 }
127
128 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
129 // If demanded bits wasn't able to limit the bit width, we can try to use
130 // value tracking instead. This can be the case, for example, if the value
131 // may be negative.
132 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
133 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
134 MaxBitWidth = NumTypeBits - NumSignBits;
135 KnownBits Bits = computeKnownBits(Exit, DL);
136 if (!Bits.isNonNegative()) {
137 // If the value is not known to be non-negative, we set IsSigned to true,
138 // meaning that we will use sext instructions instead of zext
139 // instructions to restore the original type.
140 IsSigned = true;
141 if (!Bits.isNegative())
142 // If the value is not known to be negative, we don't known what the
143 // upper bit is, and therefore, we don't know what kind of extend we
144 // will need. In this case, just increase the bit width by one bit and
145 // use sext.
146 ++MaxBitWidth;
147 }
148 }
149 if (!isPowerOf2_64(MaxBitWidth))
150 MaxBitWidth = NextPowerOf2(MaxBitWidth);
151
152 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
153 IsSigned);
154 }
155
156 /// Collect cast instructions that can be ignored in the vectorizer's cost
157 /// model, given a reduction exit value and the minimal type in which the
158 /// reduction can be represented.
159 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
160 Type *RecurrenceType,
161 SmallPtrSetImpl &Casts) {
107162
108163 SmallVector Worklist;
109 bool FoundOneOperand = false;
110 unsigned DstSize = RT->getPrimitiveSizeInBits();
164 SmallPtrSet Visited;
111165 Worklist.push_back(Exit);
112166
113 // Traverse the instructions in the reduction expression, beginning with the
114 // exit value.
115167 while (!Worklist.empty()) {
116 Instruction *I = Worklist.pop_back_val();
117 for (Use &U : I->operands()) {
118
119 // Terminate the traversal if the operand is not an instruction, or we
120 // reach the starting value.
121 Instruction *J = dyn_cast(U.get());
122 if (!J || J == Start)
123 continue;
124
125 // Otherwise, investigate the operation if it is also in the expression.
126 if (Visited.count(J)) {
127 Worklist.push_back(J);
168 Instruction *Val = Worklist.pop_back_val();
169 Visited.insert(Val);
170 if (auto *Cast = dyn_cast(Val))
171 if (Cast->getSrcTy() == RecurrenceType) {
172 // If the source type of a cast instruction is equal to the recurrence
173 // type, it will be eliminated, and should be ignored in the vectorizer
174 // cost model.
175 Casts.insert(Cast);
128176 continue;
129177 }
130178
131 // If the operand is not in Visited, it is not a reduction operation, but
132 // it does feed into one. Make sure it is either a single-use sign- or
133 // zero-extend instruction.
134 CastInst *Cast = dyn_cast(J);
135 bool IsSExtInst = isa(J);
136 if (!Cast || !Cast->hasOneUse() || !(isa(J) || IsSExtInst))
137 return false;
138
139 // Ensure the source type of the extend is no larger than the reduction
140 // type. It is not necessary for the types to be identical.
141 unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
142 if (SrcSize > DstSize)
143 return false;
144
145 // Furthermore, ensure that all such extends are of the same kind.
146 if (FoundOneOperand) {
147 if (IsSigned != IsSExtInst)
148 return false;
149 } else {
150 FoundOneOperand = true;
151 IsSigned = IsSExtInst;
152 }
153
154 // Lastly, if the source type of the extend matches the reduction type,
155 // add the extend to CI so that we can avoid accounting for it in the
156 // cost model.
157 if (SrcSize == DstSize)
158 CI.insert(Cast);
159 }
160 }
161 return true;
179 // Add all operands to the work list if they are loop-varying values that
180 // we haven't yet visited.
181 for (Value *O : cast(Val)->operands())
182 if (auto *I = dyn_cast(O))
183 if (TheLoop->contains(I) && !Visited.count(I))
184 Worklist.push_back(I);
185 }
162186 }
163187
164188 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
165189 Loop *TheLoop, bool HasFunNoNaNAttr,
166 RecurrenceDescriptor &RedDes) {
190 RecurrenceDescriptor &RedDes,
191 DemandedBits *DB,
192 AssumptionCache *AC,
193 DominatorTree *DT) {
167194 if (Phi->getNumIncomingValues() != 2)
168195 return false;
169196
352379 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
353380 return false;
354381
355 // If we think Phi may have been type-promoted, we also need to ensure that
356 // all source operands of the reduction are either SExtInsts or ZEstInsts. If
357 // so, we will be able to evaluate the reduction in the narrower bit width.
358 if (Start != Phi)
359 if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
360 IsSigned, VisitedInsts, CastInsts))
382 if (Start != Phi) {
383 // If the starting value is not the same as the phi node, we speculatively
384 // looked through an 'and' instruction when evaluating a potential
385 // arithmetic reduction to determine if it may have been type-promoted.
386 //
387 // We now compute the minimal bit width that is required to represent the
388 // reduction. If this is the same width that was indicated by the 'and', we
389 // can represent the reduction in the smaller type. The 'and' instruction
390 // will be eliminated since it will essentially be a cast instruction that
391 // can be ignore in the cost model. If we compute a different type than we
392 // did when evaluating the 'and', the 'and' will not be eliminated, and we
393 // will end up with different kinds of operations in the recurrence
394 // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
395 // the case.
396 //
397 // The vectorizer relies on InstCombine to perform the actual
398 // type-shrinking. It does this by inserting instructions to truncate the
399 // exit value of the reduction to the width indicated by RecurrenceType and
400 // then extend this value back to the original width. If IsSigned is false,
401 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
402 // used.
403 //
404 // TODO: We should not rely on InstCombine to rewrite the reduction in the
405 // smaller type. We should just generate a correctly typed expression
406 // to begin with.
407 Type *ComputedType;
408 std::tie(ComputedType, IsSigned) =
409 computeRecurrenceType(ExitInstruction, DB, AC, DT);
410 if (ComputedType != RecurrenceType)
361411 return false;
412
413 // The recurrence expression will be represented in a narrower type. If
414 // there are any cast instructions that will be unnecessary, collect them
415 // in CastInsts. Note that the 'and' instruction was already included in
416 // this list.
417 //
418 // TODO: A better way to represent this may be to tag in some way all the
419 // instructions that are a part of the reduction. The vectorizer cost
420 // model could then apply the recurrence type to these instructions,
421 // without needing a white list of instructions to ignore.
422 collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
423 }
362424
363425 // We found a reduction var if we have reached the original phi node and we
364426 // only have a single instruction with out-of-loop users.
479541 return false;
480542 }
481543 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
482 RecurrenceDescriptor &RedDes) {
544 RecurrenceDescriptor &RedDes,
545 DemandedBits *DB, AssumptionCache *AC,
546 DominatorTree *DT) {
483547
484548 BasicBlock *Header = TheLoop->getHeader();
485549 Function &F = *Header->getParent();
486550 bool HasFunNoNaNAttr =
487551 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
488552
489 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
553 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
554 AC, DT)) {
490555 DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
491556 return true;
492557 }
493 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
558 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
559 AC, DT)) {
494560 DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
495561 return true;
496562 }
497 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) {
563 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
564 AC, DT)) {
498565 DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
499566 return true;
500567 }
501 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) {
568 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
569 AC, DT)) {
502570 DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
503571 return true;
504572 }
505 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) {
573 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
574 AC, DT)) {
506575 DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
507576 return true;
508577 }
509 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr,
510 RedDes)) {
578 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
579 DB, AC, DT)) {
511580 DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
512581 return true;
513582 }
514 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
583 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
584 AC, DT)) {
515585 DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
516586 return true;
517587 }
518 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
588 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
589 AC, DT)) {
519590 DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
520591 return true;
521592 }
522 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) {
593 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
594 AC, DT)) {
523595 DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
524596 return true;
525597 }
15411541 const TargetTransformInfo *TTI,
15421542 std::function *GetLAA, LoopInfo *LI,
15431543 OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
1544 LoopVectorizeHints *H)
1544 LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
15451545 : TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT), GetLAA(GetLAA),
1546 ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H) {}
1546 ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H),
1547 DB(DB), AC(AC) {}
15471548
15481549 /// ReductionList contains the reduction descriptors for all
15491550 /// of the reductions that were found in the loop.
18311832
18321833 /// Used to emit an analysis of any legality issues.
18331834 LoopVectorizeHints *Hints;
1835
1836 /// The demanded bits analsyis is used to compute the minimum type size in
1837 /// which a reduction can be computed.
1838 DemandedBits *DB;
1839
1840 /// The assumption cache analysis is used to compute the minimum type size in
1841 /// which a reduction can be computed.
1842 AssumptionCache *AC;
18341843
18351844 /// While vectorizing these instructions we have to generate a
18361845 /// call to the appropriate masked intrinsic
52995308 }
53005309
53015310 RecurrenceDescriptor RedDes;
5302 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) {
5311 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
5312 DT)) {
53035313 if (RedDes.hasUnsafeAlgebra())
53045314 Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
53055315 AllowedExit.insert(RedDes.getLoopExitInstr());
85138523 // Check if it is legal to vectorize the loop.
85148524 LoopVectorizationRequirements Requirements(*ORE);
85158525 LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE,
8516 &Requirements, &Hints);
8526 &Requirements, &Hints, DB, AC);
85178527 if (!LVL.canVectorize()) {
85188528 DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
85198529 emitMissedWarning(F, L, Hints, ORE);
1313 ; CHECK-NEXT: [[TMP17]] = zext <4 x i8> [[TMP16]] to <4 x i32>
1414 ; CHECK-NEXT: br i1 {{.*}}, label %middle.block, label %vector.body
1515 ;
16 define void @PR34687(i1 %c, i32 %x, i32 %n) {
16 define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
1717 entry:
1818 br label %for.body
1919
3535
3636 for.end:
3737 %tmp2 = phi i32 [ %r.next, %if.end ]
38 ret void
38 %tmp3 = trunc i32 %tmp2 to i8
39 ret i8 %tmp3
3940 }
41
42 ; CHECK-LABEL: @PR35734(
43 ; CHECK: vector.ph:
44 ; CHECK: [[TMP3:%.*]] = insertelement <4 x i32> zeroinitializer, i32 %y, i32 0
45 ; CHECK-NEXT: br label %vector.body
46 ; CHECK: vector.body:
47 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
48 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP3]], %vector.ph ], [ [[TMP9:%.*]], %vector.body ]
49 ; CHECK: [[TMP5:%.*]] = and <4 x i32> [[VEC_PHI]],
50 ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]],
51 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
52 ; CHECK: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i1>
53 ; CHECK-NEXT: [[TMP9]] = sext <4 x i1> [[TMP8]] to <4 x i32>
54 ; CHECK-NEXT: br i1 {{.*}}, label %middle.block, label %vector.body
55 ;
56 define i32 @PR35734(i32 %x, i32 %y) {
57 entry:
58 br label %for.body
59
60 for.body:
61 %i = phi i32 [ %x, %entry ], [ %i.next, %for.body ]
62 %r = phi i32 [ %y, %entry ], [ %r.next, %for.body ]
63 %tmp0 = and i32 %r, 1
64 %r.next = add i32 %tmp0, -1
65 %i.next = add nsw i32 %i, 1
66 %cond = icmp sgt i32 %i, 77
67 br i1 %cond, label %for.end, label %for.body
68
69 for.end:
70 %tmp1 = phi i32 [ %r.next, %for.body ]
71 ret i32 %tmp1
72 }