llvm.org GIT mirror llvm / 81db61a
Factor the code for collecting IV users out of LSR into an IVUsers class, and generalize it so that it can be used by IndVarSimplify. Implement the base IndVarSimplify transformation code using IVUsers. This removes TestOrigIVForWrap and associated code, as ScalarEvolution now has enough builtin overflow detection and folding logic to handle all the same cases, and more. Run "opt -iv-users -analyze -disable-output" on your favorite loop for an example of what IVUsers does. This lets IndVarSimplify eliminate IV casts and compute trip counts in more cases. Also, this happens to finally fix the remaining testcases in PR1301. Now that IndVarSimplify is being more aggressive, it occasionally runs into the problem where ScalarEvolutionExpander's code for avoiding duplicate expansions makes it difficult to ensure that all expanded instructions dominate all the instructions that will use them. As a temporary measure, IndVarSimplify now uses a FixUsesBeforeDefs function to fix up instructions inserted by SCEVExpander. Fortunately, this code is contained, and can be easily removed once a more comprehensive solution is available. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@71535 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 10 years ago
13 changed file(s) with 1402 addition(s) and 970 deletion(s). Raw diff Collapse all Expand all
0 //===- llvm/Analysis/IVUsers.h - Induction Variable Users -------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements bookkeeping for "interesting" users of expressions
10 // computed from induction variables.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_ANALYSIS_IVUSERS_H
15 #define LLVM_ANALYSIS_IVUSERS_H
16
17 #include "llvm/Analysis/LoopPass.h"
18 #include "llvm/Analysis/ScalarEvolution.h"
19 #include
20 #include
21
22 namespace llvm {
23
24 class DominatorTree;
25 class Instruction;
26 class Value;
27 class IVUsersOfOneStride;
28
29 /// IVStrideUse - Keep track of one use of a strided induction variable, where
30 /// the stride is stored externally. The Offset member keeps track of the
31 /// offset from the IV, User is the actual user of the operand, and
32 /// 'OperandValToReplace' is the operand of the User that is the use.
33 class IVStrideUse : public CallbackVH, public ilist_node {
34 public:
35 IVStrideUse(IVUsersOfOneStride *parent,
36 const SCEVHandle &offset,
37 Instruction* U, Value *O, bool issigned)
38 : CallbackVH(U), Parent(parent), Offset(offset),
39 OperandValToReplace(O), IsSigned(issigned),
40 IsUseOfPostIncrementedValue(false) {
41 }
42
43 /// getUser - Return the user instruction for this use.
44 Instruction *getUser() const {
45 return cast(getValPtr());
46 }
47
48 /// setUser - Assign a new user instruction for this use.
49 void setUser(Instruction *NewUser) {
50 setValPtr(NewUser);
51 }
52
53 /// getParent - Return a pointer to the IVUsersOfOneStride that owns
54 /// this IVStrideUse.
55 IVUsersOfOneStride *getParent() const { return Parent; }
56
57 /// getOffset - Return the offset to add to a theoeretical induction
58 /// variable that starts at zero and counts up by the stride to compute
59 /// the value for the use. This always has the same type as the stride,
60 /// which may need to be casted to match the type of the use.
61 SCEVHandle getOffset() const { return Offset; }
62
63 /// setOffset - Assign a new offset to this use.
64 void setOffset(SCEVHandle Val) {
65 Offset = Val;
66 }
67
68 /// getOperandValToReplace - Return the Value of the operand in the user
69 /// instruction that this IVStrideUse is representing.
70 Value *getOperandValToReplace() const {
71 return OperandValToReplace;
72 }
73
74 /// setOperandValToReplace - Assign a new Value as the operand value
75 /// to replace.
76 void setOperandValToReplace(Value *Op) {
77 OperandValToReplace = Op;
78 }
79
80 /// isSigned - The stride (and thus also the Offset) of this use may be in
81 /// a narrower type than the use itself (OperandValToReplace->getType()).
82 /// When this is the case, isSigned() indicates whether the IV expression
83 /// should be signed-extended instead of zero-extended to fit the type of
84 /// the use.
85 bool isSigned() const { return IsSigned; }
86
87 /// isUseOfPostIncrementedValue - True if this should use the
88 /// post-incremented version of this IV, not the preincremented version.
89 /// This can only be set in special cases, such as the terminating setcc
90 /// instruction for a loop or uses dominated by the loop.
91 bool isUseOfPostIncrementedValue() const {
92 return IsUseOfPostIncrementedValue;
93 }
94
95 /// setIsUseOfPostIncrmentedValue - set the flag that indicates whether
96 /// this is a post-increment use.
97 void setIsUseOfPostIncrementedValue(bool Val) {
98 IsUseOfPostIncrementedValue = Val;
99 }
100
101 private:
102 /// Parent - a pointer to the IVUsersOfOneStride that owns this IVStrideUse.
103 IVUsersOfOneStride *Parent;
104
105 /// Offset - The offset to add to the base induction expression.
106 SCEVHandle Offset;
107
108 /// OperandValToReplace - The Value of the operand in the user instruction
109 /// that this IVStrideUse is representing.
110 WeakVH OperandValToReplace;
111
112 /// IsSigned - Determines whether the replacement value is sign or
113 /// zero extended to the type of the use.
114 bool IsSigned;
115
116 /// IsUseOfPostIncrementedValue - True if this should use the
117 /// post-incremented version of this IV, not the preincremented version.
118 bool IsUseOfPostIncrementedValue;
119
120 /// Deleted - Implementation of CallbackVH virtual function to
121 /// recieve notification when the User is deleted.
122 virtual void deleted();
123 };
124
125 template<> struct ilist_traits
126 : public ilist_default_traits {
127 // createSentinel is used to get hold of a node that marks the end of
128 // the list...
129 // The sentinel is relative to this instance, so we use a non-static
130 // method.
131 IVStrideUse *createSentinel() const {
132 // since i(p)lists always publicly derive from the corresponding
133 // traits, placing a data member in this class will augment i(p)list.
134 // But since the NodeTy is expected to publicly derive from
135 // ilist_node, there is a legal viable downcast from it
136 // to NodeTy. We use this trick to superpose i(p)list with a "ghostly"
137 // NodeTy, which becomes the sentinel. Dereferencing the sentinel is
138 // forbidden (save the ilist_node) so no one will ever notice
139 // the superposition.
140 return static_cast(&Sentinel);
141 }
142 static void destroySentinel(IVStrideUse*) {}
143
144 IVStrideUse *provideInitialHead() const { return createSentinel(); }
145 IVStrideUse *ensureHead(IVStrideUse*) const { return createSentinel(); }
146 static void noteHead(IVStrideUse*, IVStrideUse*) {}
147
148 private:
149 mutable ilist_node Sentinel;
150 };
151
152 /// IVUsersOfOneStride - This structure keeps track of all instructions that
153 /// have an operand that is based on the trip count multiplied by some stride.
154 struct IVUsersOfOneStride : public ilist_node {
155 private:
156 IVUsersOfOneStride(const IVUsersOfOneStride &I); // do not implement
157 void operator=(const IVUsersOfOneStride &I); // do not implement
158
159 public:
160 IVUsersOfOneStride() : Stride(0) {}
161
162 explicit IVUsersOfOneStride(const SCEV *stride) : Stride(stride) {}
163
164 /// Stride - The stride for all the contained IVStrideUses. This is
165 /// a constant for affine strides.
166 const SCEV *Stride;
167
168 /// Users - Keep track of all of the users of this stride as well as the
169 /// initial value and the operand that uses the IV.
170 ilist Users;
171
172 void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand,
173 bool isSigned) {
174 Users.push_back(new IVStrideUse(this, Offset, User, Operand, isSigned));
175 }
176 };
177
178 class IVUsers : public LoopPass {
179 friend class IVStrideUserVH;
180 Loop *L;
181 LoopInfo *LI;
182 DominatorTree *DT;
183 ScalarEvolution *SE;
184 SmallPtrSet Processed;
185
186 public:
187 /// IVUses - A list of all tracked IV uses of induction variable expressions
188 /// we are interested in.
189 ilist IVUses;
190
191 /// IVUsesByStride - A mapping from the strides in StrideOrder to the
192 /// uses in IVUses.
193 std::map IVUsesByStride;
194
195 /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
196 /// We use this to iterate over the IVUsesByStride collection without being
197 /// dependent on random ordering of pointers in the process.
198 SmallVector StrideOrder;
199
200 private:
201 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
202
203 virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
204
205 virtual void releaseMemory();
206
207 public:
208 static char ID; // Pass ID, replacement for typeid
209 IVUsers();
210
211 /// AddUsersIfInteresting - Inspect the specified Instruction. If it is a
212 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
213 /// return true. Otherwise, return false.
214 bool AddUsersIfInteresting(Instruction *I);
215
216 /// getReplacementExpr - Return a SCEV expression which computes the
217 /// value of the OperandValToReplace of the given IVStrideUse.
218 SCEVHandle getReplacementExpr(const IVStrideUse &U) const;
219
220 void print(raw_ostream &OS, const Module* = 0) const;
221 virtual void print(std::ostream &OS, const Module* = 0) const;
222 void print(std::ostream *OS, const Module* M = 0) const {
223 if (OS) print(*OS, M);
224 }
225
226 /// dump - This method is used for debugging.
227 void dump() const;
228 };
229
230 Pass *createIVUsersPass();
231
232 }
233
234 #endif
0 //===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements bookkeeping for "interesting" users of expressions
10 // computed from induction variables.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #define DEBUG_TYPE "iv-users"
15 #include "llvm/Analysis/IVUsers.h"
16 #include "llvm/Constants.h"
17 #include "llvm/Instructions.h"
18 #include "llvm/Type.h"
19 #include "llvm/DerivedTypes.h"
20 #include "llvm/Analysis/Dominators.h"
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/LoopPass.h"
23 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include
28 using namespace llvm;
29
30 char IVUsers::ID = 0;
31 static RegisterPass
32 X("iv-users", "Induction Variable Users", false, true);
33
34 Pass *llvm::createIVUsersPass() {
35 return new IVUsers();
36 }
37
38 /// containsAddRecFromDifferentLoop - Determine whether expression S involves a
39 /// subexpression that is an AddRec from a loop other than L. An outer loop
40 /// of L is OK, but not an inner loop nor a disjoint loop.
41 static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
42 // This is very common, put it first.
43 if (isa(S))
44 return false;
45 if (const SCEVCommutativeExpr *AE = dyn_cast(S)) {
46 for (unsigned int i=0; i< AE->getNumOperands(); i++)
47 if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
48 return true;
49 return false;
50 }
51 if (const SCEVAddRecExpr *AE = dyn_cast(S)) {
52 if (const Loop *newLoop = AE->getLoop()) {
53 if (newLoop == L)
54 return false;
55 // if newLoop is an outer loop of L, this is OK.
56 if (!LoopInfoBase::isNotAlreadyContainedIn(L, newLoop))
57 return false;
58 }
59 return true;
60 }
61 if (const SCEVUDivExpr *DE = dyn_cast(S))
62 return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
63 containsAddRecFromDifferentLoop(DE->getRHS(), L);
64 #if 0
65 // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
66 // need this when it is.
67 if (const SCEVSDivExpr *DE = dyn_cast(S))
68 return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
69 containsAddRecFromDifferentLoop(DE->getRHS(), L);
70 #endif
71 if (const SCEVCastExpr *CE = dyn_cast(S))
72 return containsAddRecFromDifferentLoop(CE->getOperand(), L);
73 return false;
74 }
75
76 /// getSCEVStartAndStride - Compute the start and stride of this expression,
77 /// returning false if the expression is not a start/stride pair, or true if it
78 /// is. The stride must be a loop invariant expression, but the start may be
79 /// a mix of loop invariant and loop variant expressions. The start cannot,
80 /// however, contain an AddRec from a different loop, unless that loop is an
81 /// outer loop of the current loop.
82 static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
83 SCEVHandle &Start, SCEVHandle &Stride,
84 bool &isSigned,
85 ScalarEvolution *SE, DominatorTree *DT) {
86 SCEVHandle TheAddRec = Start; // Initialize to zero.
87 bool isSExt = false;
88 bool isZExt = false;
89
90 // If the outer level is an AddExpr, the operands are all start values except
91 // for a nested AddRecExpr.
92 if (const SCEVAddExpr *AE = dyn_cast(SH)) {
93 for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
94 if (const SCEVAddRecExpr *AddRec =
95 dyn_cast(AE->getOperand(i))) {
96 if (AddRec->getLoop() == L)
97 TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
98 else
99 return false; // Nested IV of some sort?
100 } else {
101 Start = SE->getAddExpr(Start, AE->getOperand(i));
102 }
103
104 } else if (const SCEVZeroExtendExpr *Z = dyn_cast(SH)) {
105 TheAddRec = Z->getOperand();
106 isZExt = true;
107 } else if (const SCEVSignExtendExpr *S = dyn_cast(SH)) {
108 TheAddRec = S->getOperand();
109 isSExt = true;
110 } else if (isa(SH)) {
111 TheAddRec = SH;
112 } else {
113 return false; // not analyzable.
114 }
115
116 const SCEVAddRecExpr *AddRec = dyn_cast(TheAddRec);
117 if (!AddRec || AddRec->getLoop() != L) return false;
118
119 // Use getSCEVAtScope to attempt to simplify other loops out of
120 // the picture.
121 SCEVHandle AddRecStart = AddRec->getStart();
122 SCEVHandle BetterAddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
123 if (!isa(BetterAddRecStart))
124 AddRecStart = BetterAddRecStart;
125
126 // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
127 // than an outer loop of the current loop, reject it. LSR has no concept of
128 // operating on more than one loop at a time so don't confuse it with such
129 // expressions.
130 if (containsAddRecFromDifferentLoop(AddRecStart, L))
131 return false;
132
133 if (isSExt || isZExt)
134 Start = SE->getTruncateExpr(Start, AddRec->getType());
135
136 Start = SE->getAddExpr(Start, AddRecStart);
137
138 if (!isa(AddRec->getStepRecurrence(*SE))) {
139 // If stride is an instruction, make sure it dominates the loop preheader.
140 // Otherwise we could end up with a use before def situation.
141 BasicBlock *Preheader = L->getLoopPreheader();
142 if (!AddRec->getStepRecurrence(*SE)->dominates(Preheader, DT))
143 return false;
144
145 DOUT << "[" << L->getHeader()->getName()
146 << "] Variable stride: " << *AddRec << "\n";
147 }
148
149 Stride = AddRec->getStepRecurrence(*SE);
150 isSigned = isSExt;
151 return true;
152 }
153
154 /// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
155 /// and now we need to decide whether the user should use the preinc or post-inc
156 /// value. If this user should use the post-inc version of the IV, return true.
157 ///
158 /// Choosing wrong here can break dominance properties (if we choose to use the
159 /// post-inc value when we cannot) or it can end up adding extra live-ranges to
160 /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
161 /// should use the post-inc value).
162 static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
163 Loop *L, LoopInfo *LI, DominatorTree *DT,
164 Pass *P) {
165 // If the user is in the loop, use the preinc value.
166 if (L->contains(User->getParent())) return false;
167
168 BasicBlock *LatchBlock = L->getLoopLatch();
169
170 // Ok, the user is outside of the loop. If it is dominated by the latch
171 // block, use the post-inc value.
172 if (DT->dominates(LatchBlock, User->getParent()))
173 return true;
174
175 // There is one case we have to be careful of: PHI nodes. These little guys
176 // can live in blocks that are not dominated by the latch block, but (since
177 // their uses occur in the predecessor block, not the block the PHI lives in)
178 // should still use the post-inc value. Check for this case now.
179 PHINode *PN = dyn_cast(User);
180 if (!PN) return false; // not a phi, not dominated by latch block.
181
182 // Look at all of the uses of IV by the PHI node. If any use corresponds to
183 // a block that is not dominated by the latch block, give up and use the
184 // preincremented value.
185 unsigned NumUses = 0;
186 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
187 if (PN->getIncomingValue(i) == IV) {
188 ++NumUses;
189 if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
190 return false;
191 }
192
193 // Okay, all uses of IV by PN are in predecessor blocks that really are
194 // dominated by the latch block. Use the post-incremented value.
195 return true;
196 }
197
198 /// AddUsersIfInteresting - Inspect the specified instruction. If it is a
199 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
200 /// return true. Otherwise, return false.
201 bool IVUsers::AddUsersIfInteresting(Instruction *I) {
202 if (!SE->isSCEVable(I->getType()))
203 return false; // Void and FP expressions cannot be reduced.
204
205 // LSR is not APInt clean, do not touch integers bigger than 64-bits.
206 if (SE->getTypeSizeInBits(I->getType()) > 64)
207 return false;
208
209 if (!Processed.insert(I))
210 return true; // Instruction already handled.
211
212 // Get the symbolic expression for this instruction.
213 SCEVHandle ISE = SE->getSCEV(I);
214 if (isa(ISE)) return false;
215
216 // Get the start and stride for this expression.
217 Loop *UseLoop = LI->getLoopFor(I->getParent());
218 SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
219 SCEVHandle Stride = Start;
220 bool isSigned;
221
222 if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, isSigned, SE, DT))
223 return false; // Non-reducible symbolic expression, bail out.
224
225 SmallPtrSet UniqueUsers;
226 for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
227 UI != E; ++UI) {
228 Instruction *User = cast(*UI);
229 if (!UniqueUsers.insert(User))
230 continue;
231
232 // Do not infinitely recurse on PHI nodes.
233 if (isa(User) && Processed.count(User))
234 continue;
235
236 // Descend recursively, but not into PHI nodes outside the current loop.
237 // It's important to see the entire expression outside the loop to get
238 // choices that depend on addressing mode use right, although we won't
239 // consider references ouside the loop in all cases.
240 // If User is already in Processed, we don't want to recurse into it again,
241 // but do want to record a second reference in the same instruction.
242 bool AddUserToIVUsers = false;
243 if (LI->getLoopFor(User->getParent()) != L) {
244 if (isa(User) || Processed.count(User) ||
245 !AddUsersIfInteresting(User)) {
246 DOUT << "FOUND USER in other loop: " << *User
247 << " OF SCEV: " << *ISE << "\n";
248 AddUserToIVUsers = true;
249 }
250 } else if (Processed.count(User) ||
251 !AddUsersIfInteresting(User)) {
252 DOUT << "FOUND USER: " << *User
253 << " OF SCEV: " << *ISE << "\n";
254 AddUserToIVUsers = true;
255 }
256
257 if (AddUserToIVUsers) {
258 IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
259 if (!StrideUses) { // First occurrence of this stride?
260 StrideOrder.push_back(Stride);
261 StrideUses = new IVUsersOfOneStride(Stride);
262 IVUses.push_back(StrideUses);
263 IVUsesByStride[Stride] = StrideUses;
264 }
265
266 // Okay, we found a user that we cannot reduce. Analyze the instruction
267 // and decide what to do with it. If we are a use inside of the loop, use
268 // the value before incrementation, otherwise use it after incrementation.
269 if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) {
270 // The value used will be incremented by the stride more than we are
271 // expecting, so subtract this off.
272 SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
273 StrideUses->addUser(NewStart, User, I, isSigned);
274 StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
275 DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n";
276 } else {
277 StrideUses->addUser(Start, User, I, isSigned);
278 }
279 }
280 }
281 return true;
282 }
283
284 IVUsers::IVUsers()
285 : LoopPass(&ID) {
286 }
287
288 void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
289 AU.addRequired();
290 AU.addRequired();
291 AU.addRequired();
292 AU.setPreservesAll();
293 }
294
295 bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
296
297 L = l;
298 LI = &getAnalysis();
299 DT = &getAnalysis();
300 SE = &getAnalysis();
301
302 // Find all uses of induction variables in this loop, and categorize
303 // them by stride. Start by finding all of the PHI nodes in the header for
304 // this loop. If they are induction variables, inspect their uses.
305 for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I)
306 AddUsersIfInteresting(I);
307
308 return false;
309 }
310
311 /// getReplacementExpr - Return a SCEV expression which computes the
312 /// value of the OperandValToReplace of the given IVStrideUse.
313 SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const {
314 const Type *UseTy = U.getOperandValToReplace()->getType();
315 // Start with zero.
316 SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
317 // Create the basic add recurrence.
318 RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
319 // Add the offset in a separate step, because it may be loop-variant.
320 RetVal = SE->getAddExpr(RetVal, U.getOffset());
321 // For uses of post-incremented values, add an extra stride to compute
322 // the actual replacement value.
323 if (U.isUseOfPostIncrementedValue())
324 RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
325 // Evaluate the expression out of the loop, if possible.
326 if (!L->contains(U.getUser()->getParent())) {
327 SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
328 if (!isa(ExitVal) && ExitVal->isLoopInvariant(L))
329 RetVal = ExitVal;
330 }
331 // Promote the result to the type of the use.
332 if (SE->getTypeSizeInBits(RetVal->getType()) !=
333 SE->getTypeSizeInBits(UseTy)) {
334 if (U.isSigned())
335 RetVal = SE->getSignExtendExpr(RetVal, UseTy);
336 else
337 RetVal = SE->getZeroExtendExpr(RetVal, UseTy);
338 }
339 return RetVal;
340 }
341
342 void IVUsers::print(raw_ostream &OS, const Module *M) const {
343 OS << "IV Users for loop ";
344 WriteAsOperand(OS, L->getHeader(), false);
345 if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
346 OS << " with backedge-taken count "
347 << *SE->getBackedgeTakenCount(L);
348 }
349 OS << ":\n";
350
351 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
352 std::map::const_iterator SI =
353 IVUsesByStride.find(StrideOrder[Stride]);
354 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
355 OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n";
356
357 for (ilist::const_iterator UI = SI->second->Users.begin(),
358 E = SI->second->Users.end(); UI != E; ++UI) {
359 OS << " ";
360 WriteAsOperand(OS, UI->getOperandValToReplace(), false);
361 OS << " = ";
362 OS << *getReplacementExpr(*UI);
363 if (UI->isUseOfPostIncrementedValue())
364 OS << " (post-inc)";
365 OS << " in ";
366 UI->getUser()->print(OS);
367 }
368 }
369 }
370
371 void IVUsers::print(std::ostream &o, const Module *M) const {
372 raw_os_ostream OS(o);
373 print(OS, M);
374 }
375
376 void IVUsers::dump() const {
377 print(errs());
378 }
379
380 void IVUsers::releaseMemory() {
381 IVUsesByStride.clear();
382 StrideOrder.clear();
383 Processed.clear();
384 }
385
386 void IVStrideUse::deleted() {
387 // Remove this user from the list.
388 Parent->Users.erase(this);
389 // this now dangles!
390 }
748748
749749 //===---------------------------------------------------------------------===//
750750
751 We should be able to evaluate this loop:
752
753 int test(int x_offs) {
754 while (x_offs > 4)
755 x_offs -= 4;
756 return x_offs;
757 }
758
759 //===---------------------------------------------------------------------===//
760
761751 Reassociate should turn things like:
762752
763753 int factorial(int X) {
4242 #include "llvm/Constants.h"
4343 #include "llvm/Instructions.h"
4444 #include "llvm/Type.h"
45 #include "llvm/Analysis/Dominators.h"
46 #include "llvm/Analysis/IVUsers.h"
4547 #include "llvm/Analysis/ScalarEvolutionExpander.h"
4648 #include "llvm/Analysis/LoopInfo.h"
4749 #include "llvm/Analysis/LoopPass.h"
5052 #include "llvm/Support/Debug.h"
5153 #include "llvm/Support/GetElementPtrTypeIterator.h"
5254 #include "llvm/Transforms/Utils/Local.h"
55 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
5356 #include "llvm/Support/CommandLine.h"
5457 #include "llvm/ADT/SmallVector.h"
5558 #include "llvm/ADT/SetVector.h"
56 #include "llvm/ADT/SmallPtrSet.h"
5759 #include "llvm/ADT/Statistic.h"
60 #include "llvm/ADT/STLExtras.h"
5861 using namespace llvm;
5962
6063 STATISTIC(NumRemoved , "Number of aux indvars removed");
6467
6568 namespace {
6669 class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass {
70 IVUsers *IU;
6771 LoopInfo *LI;
6872 ScalarEvolution *SE;
6973 bool Changed;
7579 virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
7680
7781 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
82 AU.addRequired();
7883 AU.addRequired();
7984 AU.addRequiredID(LCSSAID);
8085 AU.addRequiredID(LoopSimplifyID);
8186 AU.addRequired();
87 AU.addRequired();
8288 AU.addPreserved();
8389 AU.addPreservedID(LoopSimplifyID);
90 AU.addPreserved();
8491 AU.addPreservedID(LCSSAID);
8592 AU.setPreservesCFG();
8693 }
8996
9097 void RewriteNonIntegerIVs(Loop *L);
9198
92 void LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount,
99 ICmpInst *LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount,
93100 Value *IndVar,
94101 BasicBlock *ExitingBlock,
95102 BranchInst *BI,
96103 SCEVExpander &Rewriter);
97104 void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount);
98105
99 void DeleteTriviallyDeadInstructions(SmallPtrSet &Insts);
100
101 void HandleFloatingPointIV(Loop *L, PHINode *PH,
102 SmallPtrSet &DeadInsts);
106 void RewriteIVExpressions(Loop *L, const Type *LargestType,
107 SCEVExpander &Rewriter);
108
109 void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter);
110
111 void FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter);
112
113 void HandleFloatingPointIV(Loop *L, PHINode *PH);
103114 };
104115 }
105116
109120
110121 Pass *llvm::createIndVarSimplifyPass() {
111122 return new IndVarSimplify();
112 }
113
114 /// DeleteTriviallyDeadInstructions - If any of the instructions is the
115 /// specified set are trivially dead, delete them and see if this makes any of
116 /// their operands subsequently dead.
117 void IndVarSimplify::
118 DeleteTriviallyDeadInstructions(SmallPtrSet &Insts) {
119 while (!Insts.empty()) {
120 Instruction *I = *Insts.begin();
121 Insts.erase(I);
122 if (isInstructionTriviallyDead(I)) {
123 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
124 if (Instruction *U = dyn_cast(I->getOperand(i)))
125 Insts.insert(U);
126 DOUT << "INDVARS: Deleting: " << *I;
127 I->eraseFromParent();
128 Changed = true;
129 }
130 }
131123 }
132124
133125 /// LinearFunctionTestReplace - This method rewrites the exit condition of the
135127 /// variable. This pass is able to rewrite the exit tests of any loop where the
136128 /// SCEV analysis can determine a loop-invariant trip count of the loop, which
137129 /// is actually a much broader range than just linear tests.
138 void IndVarSimplify::LinearFunctionTestReplace(Loop *L,
130 ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
139131 SCEVHandle BackedgeTakenCount,
140132 Value *IndVar,
141133 BasicBlock *ExitingBlock,
195187 << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
196188 << " RHS:\t" << *RHS << "\n";
197189
198 Value *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
199 BI->setCondition(Cond);
190 ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
191
192 Instruction *OrigCond = cast(BI->getCondition());
193 OrigCond->replaceAllUsesWith(Cond);
194 RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
195
200196 ++NumLFTR;
201197 Changed = true;
198 return Cond;
202199 }
203200
204201 /// RewriteLoopExitValues - Check to see if this loop has a computable
206203 /// final value of any expressions that are recurrent in the loop, and
207204 /// substitute the exit values from the loop into any instructions outside of
208205 /// the loop that use the final values of the current expressions.
206 ///
207 /// This is mostly redundant with the regular IndVarSimplify activities that
208 /// happen later, except that it's more powerful in some cases, because it's
209 /// able to brute-force evaluate arbitrary instructions as long as they have
210 /// constant operands at the beginning of the loop.
209211 void IndVarSimplify::RewriteLoopExitValues(Loop *L,
210212 const SCEV *BackedgeTakenCount) {
213 // Verify the input to the pass in already in LCSSA form.
214 assert(L->isLCSSAForm());
215
211216 BasicBlock *Preheader = L->getLoopPreheader();
212217
213218 // Scan all of the instructions in the loop, looking at those that have
225230 BlockToInsertInto = Preheader;
226231 BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI();
227232
228 bool HasConstantItCount = isa(BackedgeTakenCount);
229
230 SmallPtrSet InstructionsToDelete;
231233 std::map ExitValues;
232234
233235 // Find all values that are computed inside the loop, but used outside of it.
267269 if (!L->contains(Inst->getParent()))
268270 continue;
269271
270 // We require that this value either have a computable evolution or that
271 // the loop have a constant iteration count. In the case where the loop
272 // has a constant iteration count, we can sometimes force evaluation of
273 // the exit value through brute force.
274 SCEVHandle SH = SE->getSCEV(Inst);
275 if (!SH->hasComputableLoopEvolution(L) && !HasConstantItCount)
276 continue; // Cannot get exit evolution for the loop value.
277
278272 // Okay, this instruction has a user outside of the current loop
279273 // and varies predictably *inside* the loop. Evaluate the value it
280274 // contains when the loop exits, if possible.
281 SCEVHandle ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
275 SCEVHandle SH = SE->getSCEV(Inst);
276 SCEVHandle ExitValue = SE->getSCEVAtScope(SH, L->getParentLoop());
282277 if (isa(ExitValue) ||
283278 !ExitValue->isLoopInvariant(L))
284279 continue;
297292
298293 PN->setIncomingValue(i, ExitVal);
299294
300 // If this instruction is dead now, schedule it to be removed.
301 if (Inst->use_empty())
302 InstructionsToDelete.insert(Inst);
295 // If this instruction is dead now, delete it.
296 RecursivelyDeleteTriviallyDeadInstructions(Inst);
303297
304298 // See if this is a single-entry LCSSA PHI node. If so, we can (and
305299 // have to) remove
307301 // in the loop, so we don't need an LCSSA phi node anymore.
308302 if (NumPreds == 1) {
309303 PN->replaceAllUsesWith(ExitVal);
310 PN->eraseFromParent();
304 RecursivelyDeleteTriviallyDeadInstructions(PN);
311305 break;
312306 }
313307 }
314308 }
315309 }
316
317 DeleteTriviallyDeadInstructions(InstructionsToDelete);
318310 }
319311
320312 void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
324316 //
325317 BasicBlock *Header = L->getHeader();
326318
327 SmallPtrSet DeadInsts;
328 for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) {
329 PHINode *PN = cast(I);
330 HandleFloatingPointIV(L, PN, DeadInsts);
331 }
319 SmallVector PHIs;
320 for (BasicBlock::iterator I = Header->begin();
321 PHINode *PN = dyn_cast(I); ++I)
322 PHIs.push_back(PN);
323
324 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
325 if (PHINode *PN = dyn_cast_or_null(PHIs[i]))
326 HandleFloatingPointIV(L, PN);
332327
333328 // If the loop previously had floating-point IV, ScalarEvolution
334329 // may not have been able to compute a trip count. Now that we've done some
335330 // re-writing, the trip count may be computable.
336331 if (Changed)
337332 SE->forgetLoopBackedgeTakenCount(L);
338
339 if (!DeadInsts.empty())
340 DeleteTriviallyDeadInstructions(DeadInsts);
341 }
342
343 /// getEffectiveIndvarType - Determine the widest type that the
344 /// induction-variable PHINode Phi is cast to.
345 ///
346 static const Type *getEffectiveIndvarType(const PHINode *Phi,
347 const ScalarEvolution *SE) {
348 const Type *Ty = Phi->getType();
349
350 for (Value::use_const_iterator UI = Phi->use_begin(), UE = Phi->use_end();
351 UI != UE; ++UI) {
352 const Type *CandidateType = NULL;
353 if (const ZExtInst *ZI = dyn_cast(UI))
354 CandidateType = ZI->getDestTy();
355 else if (const SExtInst *SI = dyn_cast(UI))
356 CandidateType = SI->getDestTy();
357 else if (const IntToPtrInst *IP = dyn_cast(UI))
358 CandidateType = IP->getDestTy();
359 else if (const PtrToIntInst *PI = dyn_cast(UI))
360 CandidateType = PI->getDestTy();
361 if (CandidateType &&
362 SE->isSCEVable(CandidateType) &&
363 SE->getTypeSizeInBits(CandidateType) > SE->getTypeSizeInBits(Ty))
364 Ty = CandidateType;
365 }
366
367 return Ty;
368 }
369
370 /// TestOrigIVForWrap - Analyze the original induction variable that
371 /// controls the loop's iteration to determine whether it would ever
372 /// undergo signed or unsigned overflow.
373 ///
374 /// In addition to setting the NoSignedWrap and NoUnsignedWrap
375 /// variables to true when appropriate (they are not set to false here),
376 /// return the PHI for this induction variable. Also record the initial
377 /// and final values and the increment; these are not meaningful unless
378 /// either NoSignedWrap or NoUnsignedWrap is true, and are always meaningful
379 /// in that case, although the final value may be 0 indicating a nonconstant.
380 ///
381 /// TODO: This duplicates a fair amount of ScalarEvolution logic.
382 /// Perhaps this can be merged with
383 /// ScalarEvolution::getBackedgeTakenCount
384 /// and/or ScalarEvolution::get{Sign,Zero}ExtendExpr.
385 ///
386 static const PHINode *TestOrigIVForWrap(const Loop *L,
387 const BranchInst *BI,
388 const Instruction *OrigCond,
389 const ScalarEvolution &SE,
390 bool &NoSignedWrap,
391 bool &NoUnsignedWrap,
392 const ConstantInt* &InitialVal,
393 const ConstantInt* &IncrVal,
394 const ConstantInt* &LimitVal) {
395 // Verify that the loop is sane and find the exit condition.
396 const ICmpInst *Cmp = dyn_cast(OrigCond);
397 if (!Cmp) return 0;
398
399 const Value *CmpLHS = Cmp->getOperand(0);
400 const Value *CmpRHS = Cmp->getOperand(1);
401 const BasicBlock *TrueBB = BI->getSuccessor(0);
402 const BasicBlock *FalseBB = BI->getSuccessor(1);
403 ICmpInst::Predicate Pred = Cmp->getPredicate();
404
405 // Canonicalize a constant to the RHS.
406 if (isa(CmpLHS)) {
407 Pred = ICmpInst::getSwappedPredicate(Pred);
408 std::swap(CmpLHS, CmpRHS);
409 }
410 // Canonicalize SLE to SLT.
411 if (Pred == ICmpInst::ICMP_SLE)
412 if (const ConstantInt *CI = dyn_cast(CmpRHS))
413 if (!CI->getValue().isMaxSignedValue()) {
414 CmpRHS = ConstantInt::get(CI->getValue() + 1);
415 Pred = ICmpInst::ICMP_SLT;
416 }
417 // Canonicalize SGT to SGE.
418 if (Pred == ICmpInst::ICMP_SGT)
419 if (const ConstantInt *CI = dyn_cast(CmpRHS))
420 if (!CI->getValue().isMaxSignedValue()) {
421 CmpRHS = ConstantInt::get(CI->getValue() + 1);
422 Pred = ICmpInst::ICMP_SGE;
423 }
424 // Canonicalize SGE to SLT.
425 if (Pred == ICmpInst::ICMP_SGE) {
426 std::swap(TrueBB, FalseBB);
427 Pred = ICmpInst::ICMP_SLT;
428 }
429 // Canonicalize ULE to ULT.
430 if (Pred == ICmpInst::ICMP_ULE)
431 if (const ConstantInt *CI = dyn_cast(CmpRHS))
432 if (!CI->getValue().isMaxValue()) {
433 CmpRHS = ConstantInt::get(CI->getValue() + 1);
434 Pred = ICmpInst::ICMP_ULT;
435 }
436 // Canonicalize UGT to UGE.
437 if (Pred == ICmpInst::ICMP_UGT)
438 if (const ConstantInt *CI = dyn_cast(CmpRHS))
439 if (!CI->getValue().isMaxValue()) {
440 CmpRHS = ConstantInt::get(CI->getValue() + 1);
441 Pred = ICmpInst::ICMP_UGE;
442 }
443 // Canonicalize UGE to ULT.
444 if (Pred == ICmpInst::ICMP_UGE) {
445 std::swap(TrueBB, FalseBB);
446 Pred = ICmpInst::ICMP_ULT;
447 }
448 // For now, analyze only LT loops for signed overflow.
449 if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_ULT)
450 return 0;
451
452 bool isSigned = Pred == ICmpInst::ICMP_SLT;
453
454 // Get the increment instruction. Look past casts if we will
455 // be able to prove that the original induction variable doesn't
456 // undergo signed or unsigned overflow, respectively.
457 const Value *IncrInst = CmpLHS;
458 if (isSigned) {
459 if (const SExtInst *SI = dyn_cast(CmpLHS)) {
460 if (!isa(CmpRHS) ||
461 !cast(CmpRHS)->getValue()
462 .isSignedIntN(SE.getTypeSizeInBits(IncrInst->getType())))
463 return 0;
464 IncrInst = SI->getOperand(0);
465 }
466 } else {
467 if (const ZExtInst *ZI = dyn_cast(CmpLHS)) {
468 if (!isa(CmpRHS) ||
469 !cast(CmpRHS)->getValue()
470 .isIntN(SE.getTypeSizeInBits(IncrInst->getType())))
471 return 0;
472 IncrInst = ZI->getOperand(0);
473 }
474 }
475
476 // For now, only analyze induction variables that have simple increments.
477 const BinaryOperator *IncrOp = dyn_cast(IncrInst);
478 if (!IncrOp || IncrOp->getOpcode() != Instruction::Add)
479 return 0;
480 IncrVal = dyn_cast(IncrOp->getOperand(1));
481 if (!IncrVal)
482 return 0;
483
484 // Make sure the PHI looks like a normal IV.
485 const PHINode *PN = dyn_cast(IncrOp->getOperand(0));
486 if (!PN || PN->getNumIncomingValues() != 2)
487 return 0;
488 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
489 unsigned BackEdge = !IncomingEdge;
490 if (!L->contains(PN->getIncomingBlock(BackEdge)) ||
491 PN->getIncomingValue(BackEdge) != IncrOp)
492 return 0;
493 if (!L->contains(TrueBB))
494 return 0;
495
496 // For now, only analyze loops with a constant start value, so that
497 // we can easily determine if the start value is not a maximum value
498 // which would wrap on the first iteration.
499 InitialVal = dyn_cast(PN->getIncomingValue(IncomingEdge));
500 if (!InitialVal)
501 return 0;
502
503 // The upper limit need not be a constant; we'll check later.
504 LimitVal = dyn_cast(CmpRHS);
505
506 // We detect the impossibility of wrapping in two cases, both of
507 // which require starting with a non-max value:
508 // - The IV counts up by one, and the loop iterates only while it remains
509 // less than a limiting value (any) in the same type.
510 // - The IV counts up by a positive increment other than 1, and the
511 // constant limiting value + the increment is less than the max value
512 // (computed as max-increment to avoid overflow)
513 if (isSigned && !InitialVal->getValue().isMaxSignedValue()) {
514 if (IncrVal->equalsInt(1))
515 NoSignedWrap = true; // LimitVal need not be constant
516 else if (LimitVal) {
517 uint64_t numBits = LimitVal->getValue().getBitWidth();
518 if (IncrVal->getValue().sgt(APInt::getNullValue(numBits)) &&
519 (APInt::getSignedMaxValue(numBits) - IncrVal->getValue())
520 .sgt(LimitVal->getValue()))
521 NoSignedWrap = true;
522 }
523 } else if (!isSigned && !InitialVal->getValue().isMaxValue()) {
524 if (IncrVal->equalsInt(1))
525 NoUnsignedWrap = true; // LimitVal need not be constant
526 else if (LimitVal) {
527 uint64_t numBits = LimitVal->getValue().getBitWidth();
528 if (IncrVal->getValue().ugt(APInt::getNullValue(numBits)) &&
529 (APInt::getMaxValue(numBits) - IncrVal->getValue())
530 .ugt(LimitVal->getValue()))
531 NoUnsignedWrap = true;
532 }
533 }
534 return PN;
535 }
536
537 static Value *getSignExtendedTruncVar(const SCEVAddRecExpr *AR,
538 ScalarEvolution *SE,
539 const Type *LargestType, Loop *L,
540 const Type *myType,
541 SCEVExpander &Rewriter) {
542 SCEVHandle ExtendedStart =
543 SE->getSignExtendExpr(AR->getStart(), LargestType);
544 SCEVHandle ExtendedStep =
545 SE->getSignExtendExpr(AR->getStepRecurrence(*SE), LargestType);
546 SCEVHandle ExtendedAddRec =
547 SE->getAddRecExpr(ExtendedStart, ExtendedStep, L);
548 if (LargestType != myType)
549 ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType);
550 return Rewriter.expandCodeFor(ExtendedAddRec, myType);
551 }
552
553 static Value *getZeroExtendedTruncVar(const SCEVAddRecExpr *AR,
554 ScalarEvolution *SE,
555 const Type *LargestType, Loop *L,
556 const Type *myType,
557 SCEVExpander &Rewriter) {
558 SCEVHandle ExtendedStart =
559 SE->getZeroExtendExpr(AR->getStart(), LargestType);
560 SCEVHandle ExtendedStep =
561 SE->getZeroExtendExpr(AR->getStepRecurrence(*SE), LargestType);
562 SCEVHandle ExtendedAddRec =
563 SE->getAddRecExpr(ExtendedStart, ExtendedStep, L);
564 if (LargestType != myType)
565 ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType);
566 return Rewriter.expandCodeFor(ExtendedAddRec, myType);
567 }
568
569 /// allUsesAreSameTyped - See whether all Uses of I are instructions
570 /// with the same Opcode and the same type.
571 static bool allUsesAreSameTyped(unsigned int Opcode, Instruction *I) {
572 const Type* firstType = NULL;
573 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
574 UI != UE; ++UI) {
575 Instruction *II = dyn_cast(*UI);
576 if (!II || II->getOpcode() != Opcode)
577 return false;
578 if (!firstType)
579 firstType = II->getType();
580 else if (firstType != II->getType())
581 return false;
582 }
583 return true;
584333 }
585334
586335 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
336 IU = &getAnalysis();
587337 LI = &getAnalysis();
588338 SE = &getAnalysis();
589339 Changed = false;
593343 RewriteNonIntegerIVs(L);
594344
595345 BasicBlock *Header = L->getHeader();
596 BasicBlock *ExitingBlock = L->getExitingBlock();
597 SmallPtrSet DeadInsts;
598
599 // Verify the input to the pass in already in LCSSA form.
600 assert(L->isLCSSAForm());
346 BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
347 SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
601348
602349 // Check to see if this loop has a computable loop-invariant execution count.
603350 // If so, this means that we can compute the final value of any expressions
605352 // loop into any instructions outside of the loop that use the final values of
606353 // the current expressions.
607354 //
608 SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
609355 if (!isa(BackedgeTakenCount))
610356 RewriteLoopExitValues(L, BackedgeTakenCount);
611357
612 // Next, analyze all of the induction variables in the loop, canonicalizing
613 // auxillary induction variables.
614 std::vector > IndVars;
615
616 for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) {
617 PHINode *PN = cast(I);
618 if (SE->isSCEVable(PN->getType())) {
619 SCEVHandle SCEV = SE->getSCEV(PN);
358 // Compute the type of the largest recurrence expression, and decide whether
359 // a canonical induction variable should be inserted.
360 const Type *LargestType = 0;
361 bool NeedCannIV = false;
362 if (!isa(BackedgeTakenCount)) {
363 LargestType = BackedgeTakenCount->getType();
364 LargestType = SE->getEffectiveSCEVType(LargestType);
365 // If we have a known trip count and a single exit block, we'll be
366 // rewriting the loop exit test condition below, which requires a
367 // canonical induction variable.
368 if (ExitingBlock)
369 NeedCannIV = true;
370 }
371 for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
372 SCEVHandle Stride = IU->StrideOrder[i];
373 const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
374 if (!LargestType ||
375 SE->getTypeSizeInBits(Ty) >
376 SE->getTypeSizeInBits(LargestType))
377 LargestType = Ty;
378
379 std::map::iterator SI =
380 IU->IVUsesByStride.find(IU->StrideOrder[i]);
381 assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
382
383 if (!SI->second->Users.empty())
384 NeedCannIV = true;
385 }
386
387 // Create a rewriter object which we'll use to transform the code with.
388 SCEVExpander Rewriter(*SE, *LI);
389
390 // Now that we know the largest of of the induction variable expressions
391 // in this loop, insert a canonical induction variable of the largest size.
392 Value *IndVar = 0;
393 if (NeedCannIV) {
394 IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
395 ++NumInserted;
396 Changed = true;
397 DOUT << "INDVARS: New CanIV: " << *IndVar;
398 }
399
400 // If we have a trip count expression, rewrite the loop's exit condition
401 // using it. We can currently only handle loops with a single exit.
402 ICmpInst *NewICmp = 0;
403 if (!isa(BackedgeTakenCount) && ExitingBlock) {
404 assert(NeedCannIV &&
405 "LinearFunctionTestReplace requires a canonical induction variable");
406 // Can't rewrite non-branch yet.
407 if (BranchInst *BI = dyn_cast(ExitingBlock->getTerminator()))
408 NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
409 ExitingBlock, BI, Rewriter);
410 }
411
412 Rewriter.setInsertionPoint(Header->getFirstNonPHI());
413
414 // Rewrite IV-derived expressions.
415 RewriteIVExpressions(L, LargestType, Rewriter);
416
417 // Loop-invariant instructions in the preheader that aren't used in the
418 // loop may be sunk below the loop to reduce register pressure.
419 SinkUnusedInvariants(L, Rewriter);
420
421 // Reorder instructions to avoid use-before-def conditions.
422 FixUsesBeforeDefs(L, Rewriter);
423
424 // For completeness, inform IVUsers of the IV use in the newly-created
425 // loop exit test instruction.
426 if (NewICmp)
427 IU->AddUsersIfInteresting(cast(NewICmp->getOperand(0)));
428
429 // Clean up dead instructions.
430 DeleteDeadPHIs(L->getHeader());
431 // Check a post-condition.
432 assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!");
433 return Changed;
434 }
435
436 void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
437 SCEVExpander &Rewriter) {
438 SmallVector DeadInsts;
439
440 // Rewrite all induction variable expressions in terms of the canonical
441 // induction variable.
442 //
443 // If there were induction variables of other sizes or offsets, manually
444 // add the offsets to the primary induction variable and cast, avoiding
445 // the need for the code evaluation methods to insert induction variables
446 // of different sizes.
447 for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
448 SCEVHandle Stride = IU->StrideOrder[i];
449
450 std::map::iterator SI =
451 IU->IVUsesByStride.find(IU->StrideOrder[i]);
452 assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
453 ilist &List = SI->second->Users;
454 for (ilist::iterator UI = List.begin(),
455 E = List.end(); UI != E; ++UI) {
456 SCEVHandle Offset = UI->getOffset();
457 Value *Op = UI->getOperandValToReplace();
458 Instruction *User = UI->getUser();
459 bool isSigned = UI->isSigned();
460
461 // Compute the final addrec to expand into code.
462 SCEVHandle AR = IU->getReplacementExpr(*UI);
463
620464 // FIXME: It is an extremely bad idea to indvar substitute anything more
621465 // complex than affine induction variables. Doing so will put expensive
622466 // polynomial evaluations inside of the loop, and the str reduction pass
623467 // currently can only reduce affine polynomials. For now just disable
624 // indvar subst on anything more complex than an affine addrec.
625 if (const SCEVAddRecExpr *AR = dyn_cast(SCEV))
626 if (AR->getLoop() == L && AR->isAffine())
627 IndVars.push_back(std::make_pair(PN, SCEV));
628 }
629 }
630
631 // Compute the type of the largest recurrence expression, and collect
632 // the set of the types of the other recurrence expressions.
633 const Type *LargestType = 0;
634 SmallSetVector SizesToInsert;
635 if (!isa(BackedgeTakenCount)) {
636 LargestType = BackedgeTakenCount->getType();
637 LargestType = SE->getEffectiveSCEVType(LargestType);
638 SizesToInsert.insert(LargestType);
639 }
640 for (unsigned i = 0, e = IndVars.size(); i != e; ++i) {
641 const PHINode *PN = IndVars[i].first;
642 const Type *PNTy = PN->getType();
643 PNTy = SE->getEffectiveSCEVType(PNTy);
644 SizesToInsert.insert(PNTy);
645 const Type *EffTy = getEffectiveIndvarType(PN, SE);
646 EffTy = SE->getEffectiveSCEVType(EffTy);
647 SizesToInsert.insert(EffTy);
648 if (!LargestType ||
649 SE->getTypeSizeInBits(EffTy) >
650 SE->getTypeSizeInBits(LargestType))
651 LargestType = EffTy;
652 }
653
654 // Create a rewriter object which we'll use to transform the code with.
655 SCEVExpander Rewriter(*SE, *LI);
656
657 // Now that we know the largest of of the induction variables in this loop,
658 // insert a canonical induction variable of the largest size.
659 Value *IndVar = 0;
660 if (!SizesToInsert.empty()) {
661 IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
662 ++NumInserted;
663 Changed = true;
664 DOUT << "INDVARS: New CanIV: " << *IndVar;
665 }
666
667 // If we have a trip count expression, rewrite the loop's exit condition
668 // using it. We can currently only handle loops with a single exit.
669 bool NoSignedWrap = false;
670 bool NoUnsignedWrap = false;
671 const ConstantInt* InitialVal, * IncrVal, * LimitVal;
672 const PHINode *OrigControllingPHI = 0;
673 if (!isa(BackedgeTakenCount) && ExitingBlock)
674 // Can't rewrite non-branch yet.
675 if (BranchInst *BI = dyn_cast(ExitingBlock->getTerminator())) {
676 if (Instruction *OrigCond = dyn_cast(BI->getCondition())) {
677 // Determine if the OrigIV will ever undergo overflow.
678 OrigControllingPHI =
679 TestOrigIVForWrap(L, BI, OrigCond, *SE,
680 NoSignedWrap, NoUnsignedWrap,
681 InitialVal, IncrVal, LimitVal);
682
683 // We'll be replacing the original condition, so it'll be dead.
684 DeadInsts.insert(OrigCond);
685 }
686
687 LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
688 ExitingBlock, BI, Rewriter);
689 }
690
691 // Now that we have a canonical induction variable, we can rewrite any
692 // recurrences in terms of the induction variable. Start with the auxillary
693 // induction variables, and recursively rewrite any of their uses.
694 BasicBlock::iterator InsertPt = Header->getFirstNonPHI();
695 Rewriter.setInsertionPoint(InsertPt);
696
697 // If there were induction variables of other sizes, cast the primary
698 // induction variable to the right size for them, avoiding the need for the
699 // code evaluation methods to insert induction variables of different sizes.
700 for (unsigned i = 0, e = SizesToInsert.size(); i != e; ++i) {
701 const Type *Ty = SizesToInsert[i];
702 if (Ty != LargestType) {
703 Instruction *New = new TruncInst(IndVar, Ty, "indvar", InsertPt);
704 Rewriter.addInsertedValue(New, SE->getSCEV(New));
705 DOUT << "INDVARS: Made trunc IV for type " << *Ty << ": "
706 << *New << "\n";
707 }
708 }
709
710 // Rewrite all induction variables in terms of the canonical induction
711 // variable.
712 while (!IndVars.empty()) {
713 PHINode *PN = IndVars.back().first;
714 const SCEVAddRecExpr *AR = cast(IndVars.back().second);
715 Value *NewVal = Rewriter.expandCodeFor(AR, PN->getType());
716 DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *PN
717 << " into = " << *NewVal << "\n";
718 NewVal->takeName(PN);
719
720 /// If the new canonical induction variable is wider than the original,
721 /// and the original has uses that are casts to wider types, see if the
722 /// truncate and extend can be omitted.
723 if (PN == OrigControllingPHI && PN->getType() != LargestType)
724 for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
725 UI != UE; ++UI) {
726 Instruction *UInst = dyn_cast(*UI);
727 if (UInst && isa(UInst) && NoSignedWrap) {
728 Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, L,
729 UInst->getType(), Rewriter);
730 UInst->replaceAllUsesWith(TruncIndVar);
731 DeadInsts.insert(UInst);
732 }
733 // See if we can figure out sext(i+constant) doesn't wrap, so we can
734 // use a larger add. This is common in subscripting.
735 if (UInst && UInst->getOpcode()==Instruction::Add &&
736 !UInst->use_empty() &&
737 allUsesAreSameTyped(Instruction::SExt, UInst) &&
738 isa(UInst->getOperand(1)) &&
739 NoSignedWrap && LimitVal) {
740 uint64_t oldBitSize = LimitVal->getValue().getBitWidth();
741 uint64_t newBitSize = LargestType->getPrimitiveSizeInBits();
742 ConstantInt* AddRHS = dyn_cast(UInst->getOperand(1));
743 if (((APInt::getSignedMaxValue(oldBitSize) - IncrVal->getValue()) -
744 AddRHS->getValue()).sgt(LimitVal->getValue())) {
745 // We've determined this is (i+constant) and it won't overflow.
746 if (isa(UInst->use_begin())) {
747 SExtInst* oldSext = dyn_cast(UInst->use_begin());
748 uint64_t truncSize = oldSext->getType()->getPrimitiveSizeInBits();
749 Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType,
750 L, oldSext->getType(), Rewriter);
751 APInt APnewAddRHS = APInt(AddRHS->getValue()).sext(newBitSize);
752 if (newBitSize > truncSize)
753 APnewAddRHS = APnewAddRHS.trunc(truncSize);
754 ConstantInt* newAddRHS =ConstantInt::get(APnewAddRHS);
755 Value *NewAdd =
756 BinaryOperator::CreateAdd(TruncIndVar, newAddRHS,
757 UInst->getName()+".nosex", UInst);
758 for (Value::use_iterator UI2 = UInst->use_begin(),
759 UE2 = UInst->use_end(); UI2 != UE2; ++UI2) {
760 Instruction *II = dyn_cast(UI2);
761 II->replaceAllUsesWith(NewAdd);
762 DeadInsts.insert(II);
763 }
764 DeadInsts.insert(UInst);
765 }
468 // indvar subst on anything more complex than an affine addrec, unless
469 // it can be expanded to a trivial value.
470 if (!Stride->isLoopInvariant(L) &&
471 !isa(AR) &&
472 L->contains(User->getParent()))
473 continue;
474
475 Value *NewVal = 0;
476 if (AR->isLoopInvariant(L)) {
477 BasicBlock::iterator I = Rewriter.getInsertionPoint();
478 // Expand loop-invariant values in the loop preheader. They will
479 // be sunk to the exit block later, if possible.
480 NewVal =
481 Rewriter.expandCodeFor(AR, LargestType,
482 L->getLoopPreheader()->getTerminator());
483 Rewriter.setInsertionPoint(I);
484 ++NumReplaced;
485 } else {
486 const Type *IVTy = Offset->getType();
487 const Type *UseTy = Op->getType();
488
489 // Promote the Offset and Stride up to the canonical induction
490 // variable's bit width.
491 SCEVHandle PromotedOffset = Offset;
492 SCEVHandle PromotedStride = Stride;
493 if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) {
494 // It doesn't matter for correctness whether zero or sign extension
495 // is used here, since the value is truncated away below, but if the
496 // value is signed, sign extension is more likely to be folded.
497 if (isSigned) {
498 PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType);
499 PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType);
500 } else {
501 PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType);
502 // If the stride is obviously negative, use sign extension to
503 // produce things like x-1 instead of x+255.
504 if (isa(PromotedStride) &&
505 cast(PromotedStride)
506 ->getValue()->getValue().isNegative())
507 PromotedStride = SE->getSignExtendExpr(PromotedStride,
508 LargestType);
509 else
510 PromotedStride = SE->getZeroExtendExpr(PromotedStride,
511 LargestType);
766512 }
767513 }
768 // Try for sext(i | constant). This is safe as long as the
769 // high bit of the constant is not set.
770 if (UInst && UInst->getOpcode()==Instruction::Or &&
771 !UInst->use_empty() &&
772 allUsesAreSameTyped(Instruction::SExt, UInst) && NoSignedWrap &&
773 isa(UInst->getOperand(1))) {
774 ConstantInt* RHS = dyn_cast(UInst->getOperand(1));
775 if (!RHS->getValue().isNegative()) {
776 uint64_t newBitSize = LargestType->getPrimitiveSizeInBits();
777 SExtInst* oldSext = dyn_cast(UInst->use_begin());
778 uint64_t truncSize = oldSext->getType()->getPrimitiveSizeInBits();
779 Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType,
780 L, oldSext->getType(), Rewriter);
781 APInt APnewOrRHS = APInt(RHS->getValue()).sext(newBitSize);
782 if (newBitSize > truncSize)
783 APnewOrRHS = APnewOrRHS.trunc(truncSize);
784 ConstantInt* newOrRHS =ConstantInt::get(APnewOrRHS);
785 Value *NewOr =
786 BinaryOperator::CreateOr(TruncIndVar, newOrRHS,
787 UInst->getName()+".nosex", UInst);
788 for (Value::use_iterator UI2 = UInst->use_begin(),
789 UE2 = UInst->use_end(); UI2 != UE2; ++UI2) {
790 Instruction *II = dyn_cast(UI2);
791 II->replaceAllUsesWith(NewOr);
792 DeadInsts.insert(II);
793 }
794 DeadInsts.insert(UInst);
514
515 // Create the SCEV representing the offset from the canonical
516 // induction variable, still in the canonical induction variable's
517 // type, so that all expanded arithmetic is done in the same type.
518 SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType),
519 PromotedStride, L);
520 // Add the PromotedOffset as a separate step, because it may not be
521 // loop-invariant.
522 NewAR = SE->getAddExpr(NewAR, PromotedOffset);
523
524 // Expand the addrec into instructions.
525 Value *V = Rewriter.expandCodeFor(NewAR, LargestType);
526
527 // Insert an explicit cast if necessary to truncate the value
528 // down to the original stride type. This is done outside of
529 // SCEVExpander because in SCEV expressions, a truncate of an
530 // addrec is always folded.
531 if (LargestType != IVTy) {
532 if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType))
533 NewAR = SE->getTruncateExpr(NewAR, IVTy);
534 if (Rewriter.isInsertedExpression(NewAR))
535 V = Rewriter.expandCodeFor(NewAR, IVTy);
536 else {
537 V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false,
538 IVTy, false),
539 V, IVTy);
540 assert(!isa(V) && !isa(V) &&
541 "LargestType wasn't actually the largest type!");
542 // Force the rewriter to use this trunc whenever this addrec
543 // appears so that it doesn't insert new phi nodes or
544 // arithmetic in a different type.
545 Rewriter.addInsertedValue(V, NewAR);
795546 }
796547 }
797 // A zext of a signed variable known not to overflow is still safe.
798 if (UInst && isa(UInst) && (NoUnsignedWrap || NoSignedWrap)) {
799 Value *TruncIndVar = getZeroExtendedTruncVar(AR, SE, LargestType, L,
800 UInst->getType(), Rewriter);
801 UInst->replaceAllUsesWith(TruncIndVar);
802 DeadInsts.insert(UInst);
803 }
804 // If we have zext(i&constant), it's always safe to use the larger
805 // variable. This is not common but is a bottleneck in Openssl.
806 // (RHS doesn't have to be constant. There should be a better approach
807 // than bottom-up pattern matching for this...)
808 if (UInst && UInst->getOpcode()==Instruction::And &&
809 !UInst->use_empty() &&
810 allUsesAreSameTyped(Instruction::ZExt, UInst) &&
811 isa(UInst->getOperand(1))) {
812 uint64_t newBitSize = LargestType->getPrimitiveSizeInBits();
813 ConstantInt* AndRHS = dyn_cast(UInst->getOperand(1));
814 ZExtInst* oldZext = dyn_cast(UInst->use_begin());
815 uint64_t truncSize = oldZext->getType()->getPrimitiveSizeInBits();
816 Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType,
817 L, oldZext->getType(), Rewriter);
818 APInt APnewAndRHS = APInt(AndRHS->getValue()).zext(newBitSize);
819 if (newBitSize > truncSize)
820 APnewAndRHS = APnewAndRHS.trunc(truncSize);
821 ConstantInt* newAndRHS = ConstantInt::get(APnewAndRHS);
822 Value *NewAnd =
823 BinaryOperator::CreateAnd(TruncIndVar, newAndRHS,
824 UInst->getName()+".nozex", UInst);
825 for (Value::use_iterator UI2 = UInst->use_begin(),
826 UE2 = UInst->use_end(); UI2 != UE2; ++UI2) {
827 Instruction *II = dyn_cast(UI2);
828 II->replaceAllUsesWith(NewAnd);
829 DeadInsts.insert(II);
830 }
831 DeadInsts.insert(UInst);
832 }
833 // If we have zext((i+constant)&constant), we can use the larger
834 // variable even if the add does overflow. This works whenever the
835 // constant being ANDed is the same size as i, which it presumably is.
836 // We don't need to restrict the expression being and'ed to i+const,
837 // but we have to promote everything in it, so it's convenient.
838 // zext((i | constant)&constant) is also valid and accepted here.
839 if (UInst && (UInst->getOpcode()==Instruction::Add ||
840 UInst->getOpcode()==Instruction::Or) &&
841 UInst->hasOneUse() &&
842 isa(UInst->getOperand(1))) {
843 uint64_t newBitSize = LargestType->getPrimitiveSizeInBits();
844 ConstantInt* AddRHS = dyn_cast(UInst->getOperand(1));
845 Instruction *UInst2 = dyn_cast(UInst->use_begin());
846 if (UInst2 && UInst2->getOpcode() == Instruction::And &&
847 !UInst2->use_empty() &&
848 allUsesAreSameTyped(Instruction::ZExt, UInst2) &&
849 isa(UInst2->getOperand(1))) {
850 ZExtInst* oldZext = dyn_cast(UInst2->use_begin());
851 uint64_t truncSize = oldZext->getType()->getPrimitiveSizeInBits();
852 Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType,
853 L, oldZext->getType(), Rewriter);
854 ConstantInt* AndRHS = dyn_cast(UInst2->getOperand(1));
855 APInt APnewAddRHS = APInt(AddRHS->getValue()).zext(newBitSize);
856 if (newBitSize > truncSize)
857 APnewAddRHS = APnewAddRHS.trunc(truncSize);
858 ConstantInt* newAddRHS = ConstantInt::get(APnewAddRHS);
859 Value *NewAdd = ((UInst->getOpcode()==Instruction::Add) ?
860 BinaryOperator::CreateAdd(TruncIndVar, newAddRHS,
861 UInst->getName()+".nozex", UInst2) :
862 BinaryOperator::CreateOr(TruncIndVar, newAddRHS,
863 UInst->getName()+".nozex", UInst2));
864 APInt APcopy2 = APInt(AndRHS->getValue());
865 ConstantInt* newAndRHS = ConstantInt::get(APcopy2.zext(newBitSize));
866 Value *NewAnd =
867 BinaryOperator::CreateAnd(NewAdd, newAndRHS,
868 UInst->getName()+".nozex", UInst2);
869 for (Value::use_iterator UI2 = UInst2->use_begin(),
870 UE2 = UInst2->use_end(); UI2 != UE2; ++UI2) {
871 Instruction *II = dyn_cast(UI2);
872 II->replaceAllUsesWith(NewAnd);
873 DeadInsts.insert(II);
548
549 DOUT << "INDVARS: Made offset-and-trunc IV for offset "
550 << *IVTy << " " << *Offset << ": ";
551 DEBUG(WriteAsOperand(*DOUT, V, false));
552 DOUT << "\n";
553
554 // Now expand it into actual Instructions and patch it into place.
555 NewVal = Rewriter.expandCodeFor(AR, UseTy);
556 }
557
558 // Patch the new value into place.
559 if (Op->hasName())
560 NewVal->takeName(Op);
561 User->replaceUsesOfWith(Op, NewVal);
562 UI->setOperandValToReplace(NewVal);
563 DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op
564 << " into = " << *NewVal << "\n";
565 ++NumRemoved;
566 Changed = true;
567
568 // The old value may be dead now.
569 DeadInsts.push_back(Op);
570 }
571 }
572
573 // Now that we're done iterating through lists, clean up any instructions
574 // which are now dead.
575 while (!DeadInsts.empty()) {
576 Instruction *Inst = dyn_cast_or_null(DeadInsts.pop_back_val());
577 if (Inst)
578 RecursivelyDeleteTriviallyDeadInstructions(Inst);
579 }
580 }
581
582 /// If there's a single exit block, sink any loop-invariant values that
583 /// were defined in the preheader but not used inside the loop into the
584 /// exit block to reduce register pressure in the loop.
585 void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) {
586 BasicBlock *ExitBlock = L->getExitBlock();
587 if (!ExitBlock) return;
588
589 Instruction *NonPHI = ExitBlock->getFirstNonPHI();
590 BasicBlock *Preheader = L->getLoopPreheader();
591 BasicBlock::iterator I = Preheader->getTerminator();
592 while (I != Preheader->begin()) {
593 --I;
594 // New instructions were inserted at the end of the preheader. Only
595 // consider those new instructions.
596 if (!Rewriter.isInsertedInstruction(I))
597 break;
598 // Determine if there is a use in or before the loop (direct or
599 // otherwise).
600 bool UsedInLoop = false;
601 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
602 UI != UE; ++UI) {
603 BasicBlock *UseBB = cast(UI)->getParent();
604 if (PHINode *P = dyn_cast(UI)) {
605 unsigned i =
606 PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
607 UseBB = P->getIncomingBlock(i);
608 }
609 if (UseBB == Preheader || L->contains(UseBB)) {
610 UsedInLoop = true;
611 break;
612 }
613 }
614 // If there is, the def must remain in the preheader.
615 if (UsedInLoop)
616 continue;
617 // Otherwise, sink it to the exit block.
618 Instruction *ToMove = I;
619 bool Done = false;
620 if (I != Preheader->begin())
621 --I;
622 else
623 Done = true;
624 ToMove->moveBefore(NonPHI);
625 if (Done)
626 break;
627 }
628 }
629
630 /// Re-schedule the inserted instructions to put defs before uses. This
631 /// fixes problems that arrise when SCEV expressions contain loop-variant
632 /// values unrelated to the induction variable which are defined inside the
633 /// loop. FIXME: It would be better to insert instructions in the right
634 /// place so that this step isn't needed.
635 void IndVarSimplify::FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter) {
636 // Visit all the blocks in the loop in pre-order dom-tree dfs order.
637 DominatorTree *DT = &getAnalysis();
638 std::map NumPredsLeft;
639 SmallVector Worklist;
640 Worklist.push_back(DT->getNode(L->getHeader()));
641 do {
642 DomTreeNode *Node = Worklist.pop_back_val();
643 for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I)
644 if (L->contains((*I)->getBlock()))
645 Worklist.push_back(*I);
646 BasicBlock *BB = Node->getBlock();
647 // Visit all the instructions in the block top down.
648 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
649 // Count the number of operands that aren't properly dominating.
650 unsigned NumPreds = 0;
651 if (Rewriter.isInsertedInstruction(I) && !isa(I))
652 for (User::op_iterator OI = I->op_begin(), OE = I->op_end();
653 OI != OE; ++OI)
654 if (Instruction *Inst = dyn_cast(OI))
655 if (L->contains(Inst->getParent()) && !NumPredsLeft.count(Inst))
656 ++NumPreds;
657 NumPredsLeft[I] = NumPreds;
658 // Notify uses of the position of this instruction, and move the
659 // users (and their dependents, recursively) into place after this
660 // instruction if it is their last outstanding operand.
661 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
662 UI != UE; ++UI) {
663 Instruction *Inst = cast(UI);
664 std::map::iterator Z = NumPredsLeft.find(Inst);
665 if (Z != NumPredsLeft.end() && Z->second != 0 && --Z->second == 0) {
666 SmallVector UseWorkList;
667 UseWorkList.push_back(Inst);
668 BasicBlock::iterator InsertPt = next(I);
669 while (isa(InsertPt)) ++InsertPt;
670 do {
671 Instruction *Use = UseWorkList.pop_back_val();
672 Use->moveBefore(InsertPt);
673 NumPredsLeft.erase(Use);
674 for (Value::use_iterator IUI = Use->use_begin(),
675 IUE = Use->use_end(); IUI != IUE; ++IUI) {
676 Instruction *IUIInst = cast(IUI);
677 if (L->contains(IUIInst->getParent()) &&
678 Rewriter.isInsertedInstruction(IUIInst) &&
679 !isa(IUIInst))
680 UseWorkList.push_back(IUIInst);
874681 }
875 DeadInsts.insert(UInst);
876 DeadInsts.insert(UInst2);
877 }
682 } while (!UseWorkList.empty());
878683 }
879684 }
880
881 // Replace the old PHI Node with the inserted computation.
882 PN->replaceAllUsesWith(NewVal);
883 DeadInsts.insert(PN);
884 IndVars.pop_back();
885 ++NumRemoved;
886 Changed = true;
887 }
888
889 DeleteTriviallyDeadInstructions(DeadInsts);
890 assert(L->isLCSSAForm());
891 return Changed;
685 }
686 } while (!Worklist.empty());
892687 }
893688
894689 /// Return true if it is OK to use SIToFPInst for an inducation variable
932727 /// for(int i = 0; i < 10000; ++i)
933728 /// bar((double)i);
934729 ///
935 void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH,
936 SmallPtrSet &DeadInsts) {
730 void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
937731
938732 unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0));
939733 unsigned BackEdge = IncomingEdge^1;
1040834 ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(),
1041835 EC->getParent()->getTerminator());
1042836
837 // In the following deltions, PH may become dead and may be deleted.
838 // Use a WeakVH to observe whether this happens.
839 WeakVH WeakPH = PH;
840
1043841 // Delete old, floating point, exit comparision instruction.
1044842 EC->replaceAllUsesWith(NewEC);
1045 DeadInsts.insert(EC);
843 RecursivelyDeleteTriviallyDeadInstructions(EC);
1046844
1047845 // Delete old, floating point, increment instruction.
1048846 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
1049 DeadInsts.insert(Incr);
1050
1051 // Replace floating induction variable. Give SIToFPInst preference over
1052 // UIToFPInst because it is faster on platforms that are widely used.
1053 if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) {
1054 SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv",
1055 PH->getParent()->getFirstNonPHI());
1056 PH->replaceAllUsesWith(Conv);
1057 } else {
1058 UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv",
1059 PH->getParent()->getFirstNonPHI());
1060 PH->replaceAllUsesWith(Conv);
1061 }
1062 DeadInsts.insert(PH);
1063 }
1064
847 RecursivelyDeleteTriviallyDeadInstructions(Incr);
848
849 // Replace floating induction variable, if it isn't already deleted.
850 // Give SIToFPInst preference over UIToFPInst because it is faster on
851 // platforms that are widely used.
852 if (WeakPH && !PH->use_empty()) {
853 if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) {
854 SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv",
855 PH->getParent()->getFirstNonPHI());
856 PH->replaceAllUsesWith(Conv);
857 } else {
858 UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv",
859 PH->getParent()->getFirstNonPHI());
860 PH->replaceAllUsesWith(Conv);
861 }
862 RecursivelyDeleteTriviallyDeadInstructions(PH);
863 }
864
865 // Add a new IVUsers entry for the newly-created integer PHI.
866 IU->AddUsersIfInteresting(NewPHI);
867 }
1919 #include "llvm/Type.h"
2020 #include "llvm/DerivedTypes.h"
2121 #include "llvm/Analysis/Dominators.h"
22 #include "llvm/Analysis/IVUsers.h"
2223 #include "llvm/Analysis/LoopInfo.h"
2324 #include "llvm/Analysis/LoopPass.h"
2425 #include "llvm/Analysis/ScalarEvolutionExpander.h"
5253
5354 struct BasedUser;
5455
55 /// IVStrideUse - Keep track of one use of a strided induction variable, where
56 /// the stride is stored externally. The Offset member keeps track of the
57 /// offset from the IV, User is the actual user of the operand, and
58 /// 'OperandValToReplace' is the operand of the User that is the use.
59 struct VISIBILITY_HIDDEN IVStrideUse {
60 SCEVHandle Offset;
61 Instruction *User;
62 Value *OperandValToReplace;
63
64 // isUseOfPostIncrementedValue - True if this should use the
65 // post-incremented version of this IV, not the preincremented version.
66 // This can only be set in special cases, such as the terminating setcc
67 // instruction for a loop or uses dominated by the loop.
68 bool isUseOfPostIncrementedValue;
69
70 IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O)
71 : Offset(Offs), User(U), OperandValToReplace(O),
72 isUseOfPostIncrementedValue(false) {}
73 };
74
75 /// IVUsersOfOneStride - This structure keeps track of all instructions that
76 /// have an operand that is based on the trip count multiplied by some stride.
77 /// The stride for all of these users is common and kept external to this
78 /// structure.
79 struct VISIBILITY_HIDDEN IVUsersOfOneStride {
80 /// Users - Keep track of all of the users of this stride as well as the
81 /// initial value and the operand that uses the IV.
82 std::vector Users;
83
84 void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) {
85 Users.push_back(IVStrideUse(Offset, User, Operand));
86 }
87 };
88
8956 /// IVInfo - This structure keeps track of one IV expression inserted during
9057 /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
9158 /// well as the PHI node and increment value created for rewrite.
10976 };
11077
11178 class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass {
79 IVUsers *IU;
11280 LoopInfo *LI;
11381 DominatorTree *DT;
11482 ScalarEvolution *SE;
11583 bool Changed;
11684
117 /// IVUsesByStride - Keep track of all uses of induction variables that we
118 /// are interested in. The key of the map is the stride of the access.
119 std::map IVUsesByStride;
120
12185 /// IVsByStride - Keep track of all IVs that have been inserted for a
12286 /// particular stride.
12387 std::map IVsByStride;
12690 /// reused (nor should they be rewritten to reuse other strides).
12791 SmallSet StrideNoReuse;
12892
129 /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
130 /// We use this to iterate over the IVUsesByStride collection without being
131 /// dependent on random ordering of pointers in the process.
132 SmallVector StrideOrder;
133
13493 /// DeadInsts - Keep track of instructions we may have made dead, so that
13594 /// we can remove them after we are done working.
136 SmallVector<Instruction*, 16> DeadInsts;
95 SmallVector<WeakVH, 16> DeadInsts;
13796
13897 /// TLI - Keep a pointer of a TargetLowering to consult for determining
13998 /// transformation profitability.
160119 AU.addRequired();
161120 AU.addRequired();
162121 AU.addPreserved();
122 AU.addRequired();
123 AU.addPreserved();
163124 }
164125
165126 private:
166 bool AddUsersIfInteresting(Instruction *I, Loop *L,
167 SmallPtrSet &Processed);
168127 ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
169128 IVStrideUse* &CondUse,
170129 const SCEVHandle* &CondStride);
190149 const std::vector& UsersToProcess);
191150 bool ValidScale(bool, int64_t,
192151 const std::vector& UsersToProcess);
152 bool ValidOffset(bool, int64_t, int64_t,
153 const std::vector& UsersToProcess);
193154 SCEVHandle CollectIVUsers(const SCEVHandle &Stride,
194155 IVUsersOfOneStride &Uses,
195156 Loop *L,
241202 void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
242203 if (DeadInsts.empty()) return;
243204
244 // Sort the deadinsts list so that we can trivially eliminate duplicates as we
245 // go. The code below never adds a non-dead instruction to the worklist, but
246 // callers may not be so careful.
247 array_pod_sort(DeadInsts.begin(), DeadInsts.end());
248
249 // Drop duplicate instructions and those with uses.
250 for (unsigned i = 0, e = DeadInsts.size()-1; i < e; ++i) {
251 Instruction *I = DeadInsts[i];
252 if (!I->use_empty()) DeadInsts[i] = 0;
253 while (i != e && DeadInsts[i+1] == I)
254 DeadInsts[++i] = 0;
255 }
256
257205 while (!DeadInsts.empty()) {
258 Instruction *I = DeadInsts.back();
206 Instruction *I = dyn_cast_or_null(DeadInsts.back());
259207 DeadInsts.pop_back();
260208
261209 if (I == 0 || !isInstructionTriviallyDead(I))
310258 if (const SCEVCastExpr *CE = dyn_cast(S))
311259 return containsAddRecFromDifferentLoop(CE->getOperand(), L);
312260 return false;
313 }
314
315 /// getSCEVStartAndStride - Compute the start and stride of this expression,
316 /// returning false if the expression is not a start/stride pair, or true if it
317 /// is. The stride must be a loop invariant expression, but the start may be
318 /// a mix of loop invariant and loop variant expressions. The start cannot,
319 /// however, contain an AddRec from a different loop, unless that loop is an
320 /// outer loop of the current loop.
321 static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L,
322 SCEVHandle &Start, SCEVHandle &Stride,
323 ScalarEvolution *SE, DominatorTree *DT) {
324 SCEVHandle TheAddRec = Start; // Initialize to zero.
325
326 // If the outer level is an AddExpr, the operands are all start values except
327 // for a nested AddRecExpr.
328 if (const SCEVAddExpr *AE = dyn_cast(SH)) {
329 for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
330 if (const SCEVAddRecExpr *AddRec =
331 dyn_cast(AE->getOperand(i))) {
332 if (AddRec->getLoop() == L)
333 TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
334 else
335 return false; // Nested IV of some sort?
336 } else {
337 Start = SE->getAddExpr(Start, AE->getOperand(i));
338 }
339
340 } else if (isa(SH)) {
341 TheAddRec = SH;
342 } else {
343 return false; // not analyzable.
344 }
345
346 const SCEVAddRecExpr *AddRec = dyn_cast(TheAddRec);
347 if (!AddRec || AddRec->getLoop() != L) return false;
348
349 // FIXME: Generalize to non-affine IV's.
350 if (!AddRec->isAffine()) return false;
351
352 // If Start contains an SCEVAddRecExpr from a different loop, other than an
353 // outer loop of the current loop, reject it. SCEV has no concept of
354 // operating on more than one loop at a time so don't confuse it with such
355 // expressions.
356 if (containsAddRecFromDifferentLoop(AddRec->getOperand(0), L))
357 return false;
358
359 Start = SE->getAddExpr(Start, AddRec->getOperand(0));
360
361 if (!isa(AddRec->getOperand(1))) {
362 // If stride is an instruction, make sure it dominates the loop preheader.
363 // Otherwise we could end up with a use before def situation.
364 BasicBlock *Preheader = L->getLoopPreheader();
365 if (!AddRec->getOperand(1)->dominates(Preheader, DT))
366 return false;
367
368 DOUT << "[" << L->getHeader()->getName()
369 << "] Variable stride: " << *AddRec << "\n";
370 }
371
372 Stride = AddRec->getOperand(1);
373 return true;
374 }
375
376 /// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
377 /// and now we need to decide whether the user should use the preinc or post-inc
378 /// value. If this user should use the post-inc version of the IV, return true.
379 ///
380 /// Choosing wrong here can break dominance properties (if we choose to use the
381 /// post-inc value when we cannot) or it can end up adding extra live-ranges to
382 /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
383 /// should use the post-inc value).
384 static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
385 Loop *L, DominatorTree *DT, Pass *P,
386 SmallVectorImpl &DeadInsts){
387 // If the user is in the loop, use the preinc value.
388 if (L->contains(User->getParent())) return false;
389
390 BasicBlock *LatchBlock = L->getLoopLatch();
391
392 // Ok, the user is outside of the loop. If it is dominated by the latch
393 // block, use the post-inc value.
394 if (DT->dominates(LatchBlock, User->getParent()))
395 return true;
396
397 // There is one case we have to be careful of: PHI nodes. These little guys
398 // can live in blocks that do not dominate the latch block, but (since their
399 // uses occur in the predecessor block, not the block the PHI lives in) should
400 // still use the post-inc value. Check for this case now.
401 PHINode *PN = dyn_cast(User);
402 if (!PN) return false; // not a phi, not dominated by latch block.
403
404 // Look at all of the uses of IV by the PHI node. If any use corresponds to
405 // a block that is not dominated by the latch block, give up and use the
406 // preincremented value.
407 unsigned NumUses = 0;
408 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
409 if (PN->getIncomingValue(i) == IV) {
410 ++NumUses;
411 if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
412 return false;
413 }
414
415 // Okay, all uses of IV by PN are in predecessor blocks that really are
416 // dominated by the latch block. Use the post-incremented value.
417 return true;
418261 }
419262
420263 /// isAddressUse - Returns true if the specified instruction is using the
466309 return UseTy;
467310 }
468311
469 /// AddUsersIfInteresting - Inspect the specified instruction. If it is a
470 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
471 /// return true. Otherwise, return false.
472 bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L,
473 SmallPtrSet &Processed) {
474 if (!SE->isSCEVable(I->getType()))
475 return false; // Void and FP expressions cannot be reduced.
476
477 // LSR is not APInt clean, do not touch integers bigger than 64-bits.
478 if (SE->getTypeSizeInBits(I->getType()) > 64)
479 return false;
480
481 if (!Processed.insert(I))
482 return true; // Instruction already handled.
483
484 // Get the symbolic expression for this instruction.
485 SCEVHandle ISE = SE->getSCEV(I);
486 if (isa(ISE)) return false;
487
488 // Get the start and stride for this expression.
489 SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
490 SCEVHandle Stride = Start;
491 if (!getSCEVStartAndStride(ISE, L, Start, Stride, SE, DT))
492 return false; // Non-reducible symbolic expression, bail out.
493
494 std::vector IUsers;
495 // Collect all I uses now because IVUseShouldUsePostIncValue may
496 // invalidate use_iterator.
497 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
498 IUsers.push_back(cast(*UI));
499
500 for (unsigned iused_index = 0, iused_size = IUsers.size();
501 iused_index != iused_size; ++iused_index) {
502
503 Instruction *User = IUsers[iused_index];
504
505 // Do not infinitely recurse on PHI nodes.
506 if (isa(User) && Processed.count(User))
507 continue;
508
509 // Descend recursively, but not into PHI nodes outside the current loop.
510 // It's important to see the entire expression outside the loop to get
511 // choices that depend on addressing mode use right, although we won't
512 // consider references ouside the loop in all cases.
513 // If User is already in Processed, we don't want to recurse into it again,
514 // but do want to record a second reference in the same instruction.
515 bool AddUserToIVUsers = false;
516 if (LI->getLoopFor(User->getParent()) != L) {
517 if (isa(User) || Processed.count(User) ||
518 !AddUsersIfInteresting(User, L, Processed)) {
519 DOUT << "FOUND USER in other loop: " << *User
520 << " OF SCEV: " << *ISE << "\n";
521 AddUserToIVUsers = true;
522 }
523 } else if (Processed.count(User) ||
524 !AddUsersIfInteresting(User, L, Processed)) {
525 DOUT << "FOUND USER: " << *User
526 << " OF SCEV: " << *ISE << "\n";
527 AddUserToIVUsers = true;
528 }
529
530 if (AddUserToIVUsers) {
531 IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride];
532 if (StrideUses.Users.empty()) // First occurrence of this stride?
533 StrideOrder.push_back(Stride);
534
535 // Okay, we found a user that we cannot reduce. Analyze the instruction
536 // and decide what to do with it. If we are a use inside of the loop, use
537 // the value before incrementation, otherwise use it after incrementation.
538 if (IVUseShouldUsePostIncValue(User, I, L, DT, this, DeadInsts)) {
539 // The value used will be incremented by the stride more than we are
540 // expecting, so subtract this off.
541 SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
542 StrideUses.addUser(NewStart, User, I);
543 StrideUses.Users.back().isUseOfPostIncrementedValue = true;
544 DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n";
545 } else {
546 StrideUses.addUser(Start, User, I);
547 }
548 }
549 }
550 return true;
551 }
552
553312 namespace {
554313 /// BasedUser - For a particular base value, keep information about how we've
555314 /// partitioned the expression so far.
570329 /// EmittedBase.
571330 Value *OperandValToReplace;
572331
332 /// isSigned - The stride (and thus also the Base) of this use may be in
333 /// a narrower type than the use itself (OperandValToReplace->getType()).
334 /// When this is the case, the isSigned field indicates whether the
335 /// IV expression should be signed-extended instead of zero-extended to
336 /// fit the type of the use.
337 bool isSigned;
338
573339 /// Imm - The immediate value that should be added to the base immediately
574340 /// before Inst, because it will be folded into the imm field of the
575341 /// instruction. This is also sometimes used for loop-variant values that
588354 bool isUseOfPostIncrementedValue;
589355
590356 BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
591 : SE(se), Base(IVSU.Offset), Inst(IVSU.User),
592 OperandValToReplace(IVSU.OperandValToReplace),
357 : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
358 OperandValToReplace(IVSU.getOperandValToReplace()),
359 isSigned(IVSU.isSigned()),
593360 Imm(SE->getIntegerSCEV(0, Base->getType())),
594 isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {}
361 isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
595362
596363 // Once we rewrite the code to insert the new IVs we want, update the
597364 // operands of Inst to use the new expression 'NewBase', with 'Imm' added
599366 void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
600367 Instruction *InsertPt,
601368 SCEVExpander &Rewriter, Loop *L, Pass *P,
602 SmallVectorImpl> &DeadInsts);
369 SmallVectorImpl> &DeadInsts);
603370
604371 Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
605372 const Type *Ty,
637404 InsertLoop = InsertLoop->getParentLoop();
638405 }
639406
640 Value *Base = Rewriter.expandCodeFor(NewBase, Ty, BaseInsertPt);
407 Value *Base = Rewriter.expandCodeFor(NewBase, NewBase->getType(),
408 BaseInsertPt);
409
410 SCEVHandle NewValSCEV = SE->getUnknown(Base);
641411
642412 // If there is no immediate value, skip the next part.
643 if (Imm->isZero())
644 return Base;
645
646 // If we are inserting the base and imm values in the same block, make sure to
647 // adjust the IP position if insertion reused a result.
648 if (IP == BaseInsertPt)
649 IP = Rewriter.getInsertionPoint();
650
651 // Always emit the immediate (if non-zero) into the same block as the user.
652 SCEVHandle NewValSCEV = SE->getAddExpr(SE->getUnknown(Base), Imm);
413 if (!Imm->isZero()) {
414 // If we are inserting the base and imm values in the same block, make sure
415 // to adjust the IP position if insertion reused a result.
416 if (IP == BaseInsertPt)
417 IP = Rewriter.getInsertionPoint();
418
419 // Always emit the immediate (if non-zero) into the same block as the user.
420 NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
421 }
422
423 if (isSigned)
424 NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty);
425 else
426 NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty);
427
653428 return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
654429 }
655430
663438 void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
664439 Instruction *NewBasePt,
665440 SCEVExpander &Rewriter, Loop *L, Pass *P,
666 SmallVectorImpl<Instruction*> &DeadInsts){
441 SmallVectorImpl<WeakVH> &DeadInsts) {
667442 if (!isa(Inst)) {
668443 // By default, insert code at the user instruction.
669444 BasicBlock::iterator InsertPt = Inst;
1157932 return true;
1158933 }
1159934
935 /// ValidOffset - Check whether the given Offset is valid for all loads and
936 /// stores in UsersToProcess.
937 ///
938 bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,
939 int64_t Offset,
940 int64_t Scale,
941 const std::vector& UsersToProcess) {
942 if (!TLI)
943 return true;
944
945 for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
946 // If this is a load or other access, pass the type of the access in.
947 const Type *AccessTy = Type::VoidTy;
948 if (isAddressUse(UsersToProcess[i].Inst,
949 UsersToProcess[i].OperandValToReplace))
950 AccessTy = getAccessType(UsersToProcess[i].Inst);
951 else if (isa(UsersToProcess[i].Inst))
952 continue;
953
954 TargetLowering::AddrMode AM;
955 if (const SCEVConstant *SC = dyn_cast(UsersToProcess[i].Imm))
956 AM.BaseOffs = SC->getValue()->getSExtValue();
957 AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset;
958 AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
959 AM.Scale = Scale;
960
961 // If load[imm+r*scale] is illegal, bail out.
962 if (!TLI->isLegalAddressingMode(AM, AccessTy))
963 return false;
964 }
965 return true;
966 }
967
1160968 /// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not
1161969 /// a nop.
1162970 bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
11951003
11961004 if (const SCEVConstant *SC = dyn_cast(Stride)) {
11971005 int64_t SInt = SC->getValue()->getSExtValue();
1198 for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
1199 ++NewStride) {
1006 for (unsigned NewStride = 0, e = IU->StrideOrder.size();
1007 NewStride != e; ++NewStride) {
12001008 std::map::iterator SI =
1201 IVsByStride.find(StrideOrder[NewStride]);
1009 IVsByStride.find(IU->StrideOrder[NewStride]);
12021010 if (SI == IVsByStride.end() || !isa(SI->first) ||
12031011 StrideNoReuse.count(SI->first))
12041012 continue;
12141022 // multiplications.
12151023 if (Scale == 1 ||
12161024 (AllUsesAreAddresses &&
1217 ValidScale(HasBaseReg, Scale, UsersToProcess)))
1025 ValidScale(HasBaseReg, Scale, UsersToProcess))) {
1026 // Prefer to reuse an IV with a base of zero.
12181027 for (std::vector::iterator II = SI->second.IVs.begin(),
12191028 IE = SI->second.IVs.end(); II != IE; ++II)
1220 // FIXME: Only handle base == 0 for now.
1221 // Only reuse previous IV if it would not require a type conversion.
1029 // Only reuse previous IV if it would not require a type conversion
1030 // and if the base difference can be folded.
12221031 if (II->Base->isZero() &&
12231032 !RequiresTypeConversion(II->Base->getType(), Ty)) {
12241033 IV = *II;
12251034 return SE->getIntegerSCEV(Scale, Stride->getType());
12261035 }
1036 // Otherwise, settle for an IV with a foldable base.
1037 if (AllUsesAreAddresses)
1038 for (std::vector::iterator II = SI->second.IVs.begin(),
1039 IE = SI->second.IVs.end(); II != IE; ++II)
1040 // Only reuse previous IV if it would not require a type conversion
1041 // and if the base difference can be folded.
1042 if (SE->getEffectiveSCEVType(II->Base->getType()) ==
1043 SE->getEffectiveSCEVType(Ty) &&
1044 isa(II->Base)) {
1045 int64_t Base =
1046 cast(II->Base)->getValue()->getSExtValue();
1047 if (Base > INT32_MIN && Base <= INT32_MAX &&
1048 ValidOffset(HasBaseReg, -Base * Scale,
1049 Scale, UsersToProcess)) {
1050 IV = *II;
1051 return SE->getIntegerSCEV(Scale, Stride->getType());
1052 }
1053 }
1054 }
12271055 }
12281056 } else if (AllUsesAreOutsideLoop) {
12291057 // Accept nonconstant strides here; it is really really right to substitute
12301058 // an existing IV if we can.
1231 for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
1232 ++NewStride) {
1059 for (unsigned NewStride = 0, e = IU->StrideOrder.size();
1060 NewStride != e; ++NewStride) {
12331061 std::map::iterator SI =
1234 IVsByStride.find(StrideOrder[NewStride]);
1062 IVsByStride.find(IU->StrideOrder[NewStride]);
12351063 if (SI == IVsByStride.end() || !isa(SI->first))
12361064 continue;
12371065 int64_t SSInt = cast(SI->first)->getValue()->getSExtValue();
12481076 }
12491077 // Special case, old IV is -1*x and this one is x. Can treat this one as
12501078 // -1*old.
1251 for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
1252 ++NewStride) {
1079 for (unsigned NewStride = 0, e = IU->StrideOrder.size();
1080 NewStride != e; ++NewStride) {
12531081 std::map::iterator SI =
1254 IVsByStride.find(StrideOrder[NewStride]);
1082 IVsByStride.find(IU->StrideOrder[NewStride]);
12551083 if (SI == IVsByStride.end())
12561084 continue;
12571085 if (const SCEVMulExpr *ME = dyn_cast(SI->first))
13021130 bool &AllUsesAreAddresses,
13031131 bool &AllUsesAreOutsideLoop,
13041132 std::vector &UsersToProcess) {
1133 // FIXME: Generalize to non-affine IV's.
1134 if (!Stride->isLoopInvariant(L))
1135 return SE->getIntegerSCEV(0, Stride->getType());
1136
13051137 UsersToProcess.reserve(Uses.Users.size());
1306 for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) {
1307 UsersToProcess.push_back(BasedUser(Uses.Users[i], SE));
1308
1138 for (ilist::iterator I = Uses.Users.begin(),
1139 E = Uses.Users.end(); I != E; ++I) {
1140 UsersToProcess.push_back(BasedUser(*I, SE));
1141
13091142 // Move any loop variant operands from the offset field to the immediate
13101143 // field of the use, so that we don't try to use something before it is
13111144 // computed.
14031236 // TODO: For now, don't do full strength reduction if there could
14041237 // potentially be greater-stride multiples of the current stride
14051238 // which could reuse the current stride IV.
1406 if (StrideOrder.back() != Stride)
1239 if (IU->StrideOrder.back() != Stride)
14071240 return false;
14081241
14091242 // Iterate through the uses to find conditions that automatically rule out
18521685
18531686 SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
18541687
1855 if (SE->getTypeSizeInBits(RewriteOp->getType()) !=
1856 SE->getTypeSizeInBits(ReplacedTy)) {
1688 if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
1689 SE->getEffectiveSCEVType(ReplacedTy)) {
18571690 assert(SE->getTypeSizeInBits(RewriteOp->getType()) >
18581691 SE->getTypeSizeInBits(ReplacedTy) &&
18591692 "Unexpected widening cast!");
18831716 // it here.
18841717 if (!ReuseIV.Base->isZero()) {
18851718 SCEVHandle typedBase = ReuseIV.Base;
1886 if (SE->getTypeSizeInBits(RewriteExpr->getType()) !=
1887 SE->getTypeSizeInBits(ReuseIV.Base->getType())) {
1719 if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
1720 SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
18881721 // It's possible the original IV is a larger type than the new IV,
18891722 // in which case we have to truncate the Base. We checked in
18901723 // RequiresTypeConversion that this is valid.
19281761
19291762 // Mark old value we replaced as possibly dead, so that it is eliminated
19301763 // if we just replaced the last use of that value.
1931 DeadInsts.push_back(cast(User.OperandValToReplace));
1764 DeadInsts.push_back(User.OperandValToReplace);
19321765
19331766 UsersToProcess.pop_back();
19341767 ++NumReduced;
19481781 /// false.
19491782 bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
19501783 const SCEVHandle *&CondStride) {
1951 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse;
1952 ++Stride) {
1953 std::map::iterator SI =
1954 IVUsesByStride.find(StrideOrder[Stride]);
1955 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
1956
1957 for (std::vector::iterator UI = SI->second.Users.begin(),
1958 E = SI->second.Users.end(); UI != E; ++UI)
1959 if (UI->User == Cond) {
1784 for (unsigned Stride = 0, e = IU->StrideOrder.size();
1785 Stride != e && !CondUse; ++Stride) {
1786 std::map::iterator SI =
1787 IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
1788 assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
1789
1790 for (ilist::iterator UI = SI->second->Users.begin(),
1791 E = SI->second->Users.end(); UI != E; ++UI)
1792 if (UI->getUser() == Cond) {
19601793 // NOTE: we could handle setcc instructions with multiple uses here, but
19611794 // InstCombine does it as well for simple uses, it's not clear that it
19621795 // occurs enough in real life to handle.
1963 CondUse = &*UI;
1796 CondUse = UI;
19641797 CondStride = &SI->first;
19651798 return true;
19661799 }
20211854 ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
20221855 IVStrideUse* &CondUse,
20231856 const SCEVHandle* &CondStride) {
2024 if (StrideOrder.size() < 2 ||
2025 IVUsesByStride[*CondStride].Users.size() != 1)
1857 // If there's only one stride in the loop, there's nothing to do here.
1858 if (IU->StrideOrder.size() < 2)
20261859 return Cond;
1860 // If there are other users of the condition's stride, don't bother
1861 // trying to change the condition because the stride will still
1862 // remain.
1863 std::map::iterator I =
1864 IU->IVUsesByStride.find(*CondStride);
1865 if (I == IU->IVUsesByStride.end() ||
1866 I->second->Users.size() != 1)
1867 return Cond;
1868 // Only handle constant strides for now.
20271869 const SCEVConstant *SC = dyn_cast(*CondStride);
20281870 if (!SC) return Cond;
20291871
20501892 return Cond;
20511893
20521894 // Look for a suitable stride / iv as replacement.
2053 for (unsigned i = 0, e = StrideOrder.size(); i != e; ++i) {
2054 std::map::iterator SI =
2055 IVUsesByStride.find(StrideOrder[i]);
1895 for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
1896 std::map::iterator SI =
1897 IU->IVUsesByStride.find(IU->StrideOrder[i]);
20561898 if (!isa(SI->first))
20571899 continue;
20581900 int64_t SSInt = cast(SI->first)->getValue()->getSExtValue();
20681910 // Check for overflow.
20691911 if (!Mul.isSignedIntN(BitWidth))
20701912 continue;
1913 // Check for overflow in the stride's type too.
1914 if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType())))
1915 continue;
20711916
20721917 // Watch out for overflow.
20731918 if (ICmpInst::isSignedPredicate(Predicate) &&
20781923 continue;
20791924 // Pick the best iv to use trying to avoid a cast.
20801925 NewCmpLHS = NULL;
2081 for (std::vector::iterator UI = SI->second.Users.begin(),
2082 E = SI->second.Users.end(); UI != E; ++UI) {
2083 NewCmpLHS = UI->OperandValToReplace;
1926 for (ilist::iterator UI = SI->second->Users.begin(),
1927 E = SI->second->Users.end(); UI != E; ++UI) {
1928 Value *Op = UI->getOperandValToReplace();
1929
1930 // If the IVStrideUse implies a cast, check for an actual cast which
1931 // can be used to find the original IV expression.
1932 if (SE->getEffectiveSCEVType(Op->getType()) !=
1933 SE->getEffectiveSCEVType(SI->first->getType())) {
1934 CastInst *CI = dyn_cast(Op);
1935 // If it's not a simple cast, it's complicated.
1936 if (!CI)
1937 continue;
1938 // If it's a cast from a type other than the stride type,
1939 // it's complicated.
1940 if (CI->getOperand(0)->getType() != SI->first->getType())
1941 continue;
1942 // Ok, we found the IV expression in the stride's type.
1943 Op = CI->getOperand(0);
1944 }
1945
1946 NewCmpLHS = Op;
20841947 if (NewCmpLHS->getType() == CmpTy)
20851948 break;
20861949 }
21041967 // Don't rewrite if use offset is non-constant and the new type is
21051968 // of a different type.
21061969 // FIXME: too conservative?
2107 if (NewTyBits != TyBits && !isa(CondUse->Offset))
1970 if (NewTyBits != TyBits && !isa(CondUse->getOffset()))
21081971 continue;
21091972
21101973 bool AllUsesAreAddresses = true;
21111974 bool AllUsesAreOutsideLoop = true;
21121975 std::vector UsersToProcess;
2113 SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
1976 SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
21141977 AllUsesAreAddresses,
21151978 AllUsesAreOutsideLoop,
21161979 UsersToProcess);
21261989 if (Scale < 0 && !Cond->isEquality())
21271990 Predicate = ICmpInst::getSwappedPredicate(Predicate);
21281991
2129 NewStride = &StrideOrder[i];
1992 NewStride = &IU->StrideOrder[i];
21301993 if (!isa(NewCmpTy))
21311994 NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
21321995 else {
21341997 NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
21351998 }
21361999 NewOffset = TyBits == NewTyBits
2137 ? SE->getMulExpr(CondUse->Offset,
2000 ? SE->getMulExpr(CondUse->getOffset(),
21382001 SE->getConstant(ConstantInt::get(CmpTy, Scale)))
21392002 : SE->getConstant(ConstantInt::get(NewCmpIntTy,
2140 cast(CondUse->Offset)->getValue()->getSExtValue()*Scale));
2003 cast(CondUse->getOffset())->getValue()
2004 ->getSExtValue()*Scale));
21412005 break;
21422006 }
21432007 }
21642028 OldCond);
21652029
21662030 // Remove the old compare instruction. The old indvar is probably dead too.
2167 DeadInsts.push_back(cast(CondUse->OperandValToReplace));
2031 DeadInsts.push_back(CondUse->getOperandValToReplace());
21682032 OldCond->replaceAllUsesWith(Cond);
21692033 OldCond->eraseFromParent();
21702034
2171 IVUsesByStride[*CondStride].Users.pop_back();
2172 IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewCmpLHS);
2173 CondUse = &IVUsesByStride[*NewStride].Users.back();
2035 IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false);
2036 CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
21742037 CondStride = NewStride;
21752038 ++NumEliminated;
21762039 Changed = true;
22862149
22872150 // Delete the max calculation instructions.
22882151 Cond->replaceAllUsesWith(NewCond);
2152 CondUse->setUser(NewCond);
2153 Instruction *Cmp = cast(Sel->getOperand(0));
22892154 Cond->eraseFromParent();
2290 Instruction *Cmp = cast(Sel->getOperand(0));
22912155 Sel->eraseFromParent();
22922156 if (Cmp->use_empty())
22932157 Cmp->eraseFromParent();
2294 CondUse->User = NewCond;
22952158 return NewCond;
22962159 }
22972160
23032166 if (isa(BackedgeTakenCount))
23042167 return;
23052168
2306 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e;
2169 for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
23072170 ++Stride) {
2308 std::map::iterator SI =
2309 IVUsesByStride.find(StrideOrder[Stride]);
2310 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
2171 std::map::iterator SI =
2172 IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
2173 assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
23112174 if (!isa(SI->first))
23122175 continue;
23132176
2314 for (std::vector::iterator UI = SI->second.Users.begin(),
2315 E = SI->second.Users.end(); UI != E; /* empty */) {
2316 std::vector::iterator CandidateUI = UI;
2177 for (ilist::iterator UI = SI->second->Users.begin(),
2178 E = SI->second->Users.end(); UI != E; /* empty */) {
2179 ilist::iterator CandidateUI = UI;
23172180 ++UI;
2318 Instruction *ShadowUse = CandidateUI->User;
2181 Instruction *ShadowUse = CandidateUI->getUser();
23192182 const Type *DestTy = NULL;
23202183
23212184 /* If shadow use is a int->float cast then insert a second IV
23302193 for (unsigned i = 0; i < n; ++i, ++d)
23312194 foo(d);
23322195 */
2333 if (UIToFPInst *UCast = dyn_cast(CandidateUI->User))
2196 if (UIToFPInst *UCast = dyn_cast(CandidateUI->getUser()))
23342197 DestTy = UCast->getDestTy();
2335 else if (SIToFPInst *SCast = dyn_cast(CandidateUI->User))
2198 else if (SIToFPInst *SCast = dyn_cast(CandidateUI->getUser()))
23362199 DestTy = SCast->getDestTy();
23372200 if (!DestTy) continue;
23382201
23992262 /* Remove cast operation */
24002263 ShadowUse->replaceAllUsesWith(NewPH);
24012264 ShadowUse->eraseFromParent();
2402 SI->second.Users.erase(CandidateUI);
24032265 NumShadow++;
24042266 break;
24052267 }
24492311 // transform the icmp to use post-inc iv. Otherwise do so only if it would
24502312 // not reuse another iv and its iv would be reused by other uses. We are
24512313 // optimizing for the case where the icmp is the only use of the iv.
2452 IVUsersOfOneStride &StrideUses = IVUsesByStride[*CondStride];
2453 for (unsigned i = 0, e = StrideUses.Users.size(); i != e; ++i) {
2454 if (StrideUses.Users[i].User == Cond)
2314 IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride];
2315 for (ilist::iterator I = StrideUses.Users.begin(),
2316 E = StrideUses.Users.end(); I != E; ++I) {
2317 if (I->getUser() == Cond)
24552318 continue;
2456 if (!StrideUses.Users[i].isUseOfPostIncrementedValue)
2319 if (!I->isUseOfPostIncrementedValue())
24572320 return;
24582321 }
24592322
24622325 // StrengthReduceStridedIVUsers?
24632326 if (const SCEVConstant *SC = dyn_cast(*CondStride)) {
24642327 int64_t SInt = SC->getValue()->getSExtValue();
2465 for (unsigned NewStride = 0, ee = StrideOrder.size(); NewStride != ee;
2328 for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
24662329 ++NewStride) {
2467 std::map::iterator SI =
2468 IVUsesByStride.find(StrideOrder[NewStride]);
2330 std::map::iterator SI =
2331 IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
24692332 if (!isa(SI->first) || SI->first == *CondStride)
24702333 continue;
24712334 int64_t SSInt =
24782341 bool AllUsesAreAddresses = true;
24792342 bool AllUsesAreOutsideLoop = true;
24802343 std::vector UsersToProcess;
2481 SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
2344 SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
24822345 AllUsesAreAddresses,
24832346 AllUsesAreOutsideLoop,
24842347 UsersToProcess);
25172380 LatchBlock->getInstList().insert(TermBr, Cond);
25182381
25192382 // Clone the IVUse, as the old use still exists!
2520 IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond,
2521 CondUse->OperandValToReplace);
2522 CondUse = &IVUsesByStride[*CondStride].Users.back();
2383 IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
2384 CondUse->getOperandValToReplace(),
2385 false);
2386 CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
25232387 }
25242388 }
25252389
25262390 // If we get to here, we know that we can transform the setcc instruction to
25272391 // use the post-incremented version of the IV, allowing us to coalesce the
25282392 // live ranges for the IV correctly.
2529 CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride);
2530 CondUse->isUseOfPostIncrementedValue = true;
2393 CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride));
2394 CondUse->setIsUseOfPostIncrementedValue(true);
25312395 Changed = true;
25322396
25332397 ++NumLoopCond;
26432507
26442508 bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
26452509
2510 IU = &getAnalysis();
26462511 LI = &getAnalysis();
26472512 DT = &getAnalysis();
26482513 SE = &getAnalysis();
26492514 Changed = false;
26502515
2651 // Find all uses of induction variables in this loop, and categorize
2652 // them by stride. Start by finding all of the PHI nodes in the header for
2653 // this loop. If they are induction variables, inspect their uses.
2654 SmallPtrSet Processed; // Don't reprocess instructions.
2655 for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I)
2656 AddUsersIfInteresting(I, L, Processed);
2657
2658 if (!IVUsesByStride.empty()) {
2516 if (!IU->IVUsesByStride.empty()) {
26592517 #ifndef NDEBUG
26602518 DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart()
26612519 << "\" ";
26632521 #endif
26642522
26652523 // Sort the StrideOrder so we process larger strides first.
2666 std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare(SE));
2524 std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
2525 StrideCompare(SE));
26672526
26682527 // Optimize induction variables. Some indvar uses can be transformed to use
26692528 // strides that will be needed for other purposes. A common example of this
26942553 // Also, note that we iterate over IVUsesByStride indirectly by using
26952554 // StrideOrder. This extra layer of indirection makes the ordering of
26962555 // strides deterministic - not dependent on map order.
2697 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
2698 std::map::iterator SI =
2699 IVUsesByStride.find(StrideOrder[Stride]);
2700 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
2701 StrengthReduceStridedIVUsers(SI->first, SI->second, L);
2556 for (unsigned Stride = 0, e = IU->StrideOrder.size();
2557 Stride != e; ++Stride) {
2558 std::map::iterator SI =
2559 IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
2560 assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
2561 // FIXME: Generalize to non-affine IV's.
2562 if (!SI->first->isLoopInvariant(L))
2563 continue;
2564 StrengthReduceStridedIVUsers(SI->first, *SI->second, L);
27022565 }
27032566 }
27042567
27072570 OptimizeLoopCountIV(L);
27082571
27092572 // We're done analyzing this loop; release all the state we built up for it.
2710 IVUsesByStride.clear();
27112573 IVsByStride.clear();
2712 StrideOrder.clear();
27132574 StrideNoReuse.clear();
27142575
27152576 // Clean up after ourselves
0 ; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t
11 ; RUN: grep inc %t | count 1
22 ; RUN: grep dec %t | count 2
3 ; RUN: grep addq %t | count 13
4 ; RUN: grep leaq %t | count 8
5 ; RUN: grep leal %t | count 4
6 ; RUN: grep movq %t | count 5
3 ; RUN: grep addq %t | count 8
4 ; RUN: grep addb %t | count 2
5 ; RUN: grep leaq %t | count 12
6 ; RUN: grep leal %t | count 2
7 ; RUN: grep movq %t | count 4
78
89 ; IV users in each of the loops from other loops shouldn't cause LSR
910 ; to insert new induction variables. Previously it would create a
22 ; RUN: not grep movz %t
33 ; RUN: not grep sar %t
44 ; RUN: not grep shl %t
5 ; RUN: grep add %t | count 6
6 ; RUN: grep inc %t | count 2
7 ; RUN: grep dec %t | count 4
5 ; RUN: grep add %t | count 2
6 ; RUN: grep inc %t | count 4
7 ; RUN: grep dec %t | count 2
88 ; RUN: grep lea %t | count 2
99
1010 ; Optimize away zext-inreg and sext-inreg on the loop induction
1111 ; variable using trip-count information.
12 ; Also, the loop-reversal algorithm kicks in twice.
1312
1413 define void @count_up(double* %d, i64 %n) nounwind {
1514 entry:
77
88 bb2: ; preds = %bb3, %entry
99 %B_addr.0.rec = phi i64 [ %indvar.next154, %bb3 ], [ 0, %entry ] ; [#uses=2]
10 br i1 false, label %bb3, label %bb4
10 %z = icmp slt i64 %B_addr.0.rec, 20000
11 br i1 %z, label %bb3, label %bb4
1112
1213 bb3: ; preds = %bb2
1314 %indvar.next154 = add i64 %B_addr.0.rec, 1 ; [#uses=1]
1617 bb4: ; preds = %bb2
1718 %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec ; [#uses=1]
1819 %t1 = ptrtoint float* %B_addr.0 to i64 ; [#uses=1]
19 %t2 = and i64 %t1, 15 ; [#uses=1]
20 %t2 = and i64 %t1, 4294967295 ; [#uses=1]
2021 %t3 = icmp eq i64 %t2, 0 ; [#uses=1]
2122 br i1 %t3, label %bb5, label %bb10.preheader
2223
2425 br label %bb9
2526
2627 bb5: ; preds = %bb4
27 unreachable
28 ret float 7.0
2829
2930 bb9: ; preds = %bb10.preheader
3031 %t5 = getelementptr float* %B, i64 0 ; [#uses=1]
None ; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep {sext}
1 ; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep {zext}
0 ; RUN: llvm-as < %s | opt -indvars -instcombine | llvm-dis | not grep {\[sz\]ext}
21 ; ModuleID = ''
32 ;extern int *a, *b, *c, *d, *e, *f; /* 64 bit */
43 ;extern int K[256];
0 ; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
1 ; RUN: grep phi %t | count 4
2 ; RUN: grep {= phi i32} %t | count 4
3 ; RUN: not grep {sext i} %t
4 ; RUN: not grep {zext i} %t
5 ; RUN: not grep {trunc i} %t
6 ; RUN: not grep {add i8} %t
7 ; PR1301
8
9 ; Do a bunch of analysis and prove that the loops can use an i32 trip
10 ; count without casting.
11
12 ; ModuleID = 'ada.bc'
13 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
14 target triple = "i686-pc-linux-gnu"
15
16 define void @kinds__sbytezero([256 x i32]* nocapture %a) nounwind {
17 bb.thread:
18 %tmp46 = getelementptr [256 x i32]* %a, i32 0, i32 0 ; [#uses=1]
19 store i32 0, i32* %tmp46
20 br label %bb
21
22 bb: ; preds = %bb, %bb.thread
23 %i.0.reg2mem.0 = phi i8 [ -128, %bb.thread ], [ %tmp8, %bb ] ; [#uses=1]
24 %tmp8 = add i8 %i.0.reg2mem.0, 1 ; [#uses=3]
25 %tmp1 = sext i8 %tmp8 to i32 ; [#uses=1]
26 %tmp3 = add i32 %tmp1, 128 ; [#uses=1]
27 %tmp4 = getelementptr [256 x i32]* %a, i32 0, i32 %tmp3 ; [#uses=1]
28 store i32 0, i32* %tmp4
29 %0 = icmp eq i8 %tmp8, 127 ; [#uses=1]
30 br i1 %0, label %return, label %bb
31
32 return: ; preds = %bb
33 ret void
34 }
35
36 define void @kinds__ubytezero([256 x i32]* nocapture %a) nounwind {
37 bb.thread:
38 %tmp35 = getelementptr [256 x i32]* %a, i32 0, i32 0 ; [#uses=1]
39 store i32 0, i32* %tmp35
40 br label %bb
41
42 bb: ; preds = %bb, %bb.thread
43 %i.0.reg2mem.0 = phi i8 [ 0, %bb.thread ], [ %tmp7, %bb ] ; [#uses=1]
44 %tmp7 = add i8 %i.0.reg2mem.0, 1 ; [#uses=3]
45 %tmp1 = zext i8 %tmp7 to i32 ; [#uses=1]
46 %tmp3 = getelementptr [256 x i32]* %a, i32 0, i32 %tmp1 ; [#uses=1]
47 store i32 0, i32* %tmp3
48 %0 = icmp eq i8 %tmp7, -1 ; [#uses=1]
49 br i1 %0, label %return, label %bb
50
51 return: ; preds = %bb
52 ret void
53 }
54
55 define void @kinds__srangezero([21 x i32]* nocapture %a) nounwind {
56 bb.thread:
57 br label %bb
58
59 bb: ; preds = %bb, %bb.thread
60 %i.0.reg2mem.0 = phi i8 [ -10, %bb.thread ], [ %tmp7, %bb ] ; [#uses=2]
61 %tmp12 = sext i8 %i.0.reg2mem.0 to i32 ; [#uses=1]
62 %tmp4 = add i32 %tmp12, 10 ; [#uses=1]
63 %tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4 ; [#uses=1]
64 store i32 0, i32* %tmp5
65 %tmp7 = add i8 %i.0.reg2mem.0, 1 ; [#uses=2]
66 %0 = icmp sgt i8 %tmp7, 10 ; [#uses=1]
67 br i1 %0, label %return, label %bb
68
69 return: ; preds = %bb
70 ret void
71 }
72
73 define void @kinds__urangezero([21 x i32]* nocapture %a) nounwind {
74 bb.thread:
75 br label %bb
76
77 bb: ; preds = %bb, %bb.thread
78 %i.0.reg2mem.0 = phi i8 [ 10, %bb.thread ], [ %tmp7, %bb ] ; [#uses=2]
79 %tmp12 = sext i8 %i.0.reg2mem.0 to i32 ; [#uses=1]
80 %tmp4 = add i32 %tmp12, -10 ; [#uses=1]
81 %tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4 ; [#uses=1]
82 store i32 0, i32* %tmp5
83 %tmp7 = add i8 %i.0.reg2mem.0, 1 ; [#uses=2]
84 %0 = icmp sgt i8 %tmp7, 30 ; [#uses=1]
85 br i1 %0, label %return, label %bb
86
87 return: ; preds = %bb
88 ret void
89 }
0 ; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
1 ; RUN: not grep and %t
2 ; RUN: not grep zext %t
3
4 target datalayout = "-p:64:64:64"
5
6 define void @foo(double* %d, i64 %n) nounwind {
7 entry:
8 br label %loop
9
10 loop:
11 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
12 %indvar.i8 = and i64 %indvar, 255
13 %t0 = getelementptr double* %d, i64 %indvar.i8
14 %t1 = load double* %t0
15 %t2 = mul double %t1, 0.1
16 store double %t2, double* %t0
17 %indvar.i24 = and i64 %indvar, 16777215
18 %t3 = getelementptr double* %d, i64 %indvar.i24
19 %t4 = load double* %t3
20 %t5 = mul double %t4, 2.3
21 store double %t5, double* %t3
22 %t6 = getelementptr double* %d, i64 %indvar
23 %t7 = load double* %t6
24 %t8 = mul double %t7, 4.5
25 store double %t8, double* %t6
26 %indvar.next = add i64 %indvar, 1
27 %exitcond = icmp eq i64 %indvar.next, 10
28 br i1 %exitcond, label %return, label %loop
29
30 return:
31 ret void
32 }
0 ; RUN: llvm-as < %s | opt -indvars -loop-deletion | llvm-dis | grep phi | count 1
1
2 ; Indvars should be able to evaluate this loop, allowing loop deletion
3 ; to delete it.
4
5 define i32 @test(i32 %x_offs) nounwind readnone {
6 entry:
7 %0 = icmp sgt i32 %x_offs, 4 ; [#uses=1]
8 br i1 %0, label %bb.nph, label %bb2
9
10 bb.nph: ; preds = %entry
11 br label %bb
12
13 bb: ; preds = %bb1, %bb.nph
14 %x_offs_addr.01 = phi i32 [ %1, %bb1 ], [ %x_offs, %bb.nph ] ; [#uses=1]
15 %1 = add i32 %x_offs_addr.01, -4 ; [#uses=3]
16 br label %bb1
17
18 bb1: ; preds = %bb
19 %2 = icmp sgt i32 %1, 4 ; [#uses=1]
20 br i1 %2, label %bb, label %bb1.bb2_crit_edge
21
22 bb1.bb2_crit_edge: ; preds = %bb1
23 br label %bb2
24
25 bb2: ; preds = %bb1.bb2_crit_edge, %entry
26 %x_offs_addr.0.lcssa = phi i32 [ %1, %bb1.bb2_crit_edge ], [ %x_offs, %entry ] ; [#uses=1]
27 ret i32 %x_offs_addr.0.lcssa
28 }
None ; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep mul | count 3
0 ; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep {mul.*%lsr.iv} | count 2
11 ; The multiply in bb2 must not be reduced to an add, as the sext causes the
22 ; %1 argument to become negative after a while.
33 ; ModuleID = ''