llvm.org GIT mirror llvm / 16129fc
code hoisting pass based on GVN This pass hoists duplicated computations in the program. The primary goal of gvn-hoist is to reduce the size of functions before inline heuristics to reduce the total cost of function inlining. Pass written by Sebastian Pop, Aditya Kumar, Xiaoyu Hu, and Brian Rzycki. Important algorithmic contributions by Daniel Berlin under the form of reviews. Differential Revision: http://reviews.llvm.org/D19338 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275401 91177308-0d34-0410-b5e6-96231b3b80d8 Sebastian Pop 4 years ago
12 changed file(s) with 1643 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
119119 void initializeEdgeBundlesPass(PassRegistry&);
120120 void initializeEfficiencySanitizerPass(PassRegistry&);
121121 void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry &);
122 void initializeGVNHoistLegacyPassPass(PassRegistry &);
122123 void initializeExpandISelPseudosPass(PassRegistry&);
123124 void initializeExpandPostRAPass(PassRegistry&);
124125 void initializeExternalAAWrapperPassPass(PassRegistry&);
159159 (void) llvm::createConstantHoistingPass();
160160 (void) llvm::createCodeGenPreparePass();
161161 (void) llvm::createEarlyCSEPass();
162 (void) llvm::createGVNHoistPass();
162163 (void) llvm::createMergedLoadStoreMotionPass();
163164 (void) llvm::createGVNPass();
164165 (void) llvm::createMemCpyOptPass();
5757 AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
5858 MemoryDependenceResults &getMemDep() const { return *MD; }
5959
60 private:
61 friend class gvn::GVNLegacyPass;
62
6360 struct Expression;
64 friend struct DenseMapInfo;
6561
6662 /// This class holds the mapping between values and value numbers. It is used
6763 /// as an efficient mechanism to determine the expression-wise equivalence of
10298 uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
10399 void verifyRemoved(const Value *) const;
104100 };
101
102 private:
103 friend class gvn::GVNLegacyPass;
104 friend struct DenseMapInfo;
105105
106106 MemoryDependenceResults *MD;
107107 DominatorTree *DT;
227227 /// loads are eliminated by the pass.
228228 FunctionPass *createGVNPass(bool NoLoads = false);
229229
230 /// \brief A simple and fast domtree-based GVN pass to hoist common expressions
231 /// from sibling branches.
232 struct GVNHoistPass : PassInfoMixin {
233 /// \brief Run the pass over the function.
234 PreservedAnalyses run(Function &F, AnalysisManager &AM);
235 };
236
230237 }
231238
232239 #endif
325325
326326 //===----------------------------------------------------------------------===//
327327 //
328 // GVNHoist - This pass performs a simple and fast GVN pass over the dominator
329 // tree to hoist common expressions from sibling branches.
330 //
331 FunctionPass *createGVNHoistPass();
332
333 //===----------------------------------------------------------------------===//
334 //
328335 // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads
329336 // are hoisted into the header, while stores sink into the footer.
330337 //
132132 FUNCTION_PASS("dce", DCEPass())
133133 FUNCTION_PASS("dse", DSEPass())
134134 FUNCTION_PASS("early-cse", EarlyCSEPass())
135 FUNCTION_PASS("gvn-hoist", GVNHoistPass())
135136 FUNCTION_PASS("instcombine", InstCombinePass())
136137 FUNCTION_PASS("instsimplify", InstSimplifierPass())
137138 FUNCTION_PASS("invalidate", InvalidateAllAnalysesPass())
222222 FPM.add(createCFGSimplificationPass());
223223 FPM.add(createSROAPass());
224224 FPM.add(createEarlyCSEPass());
225 FPM.add(createGVNHoistPass());
225226 FPM.add(createLowerExpectIntrinsicPass());
226227 }
227228
1111 Float2Int.cpp
1212 GuardWidening.cpp
1313 GVN.cpp
14 GVNHoist.cpp
1415 InductiveRangeCheckElimination.cpp
1516 IndVarSimplify.cpp
1617 JumpThreading.cpp
0 //===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass hoists expressions from branches to a common dominator. It uses
10 // GVN (global value numbering) to discover expressions computing the same
11 // values. The primary goal is to reduce the code size, and in some
12 // cases reduce critical path (by exposing more ILP).
13 // Hoisting may affect the performance in some cases. To mitigate that, hoisting
14 // is disabled in the following cases.
15 // 1. Scalars across calls.
16 // 2. geps when corresponding load/store cannot be hoisted.
17 //===----------------------------------------------------------------------===//
18
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/Transforms/Scalar.h"
24 #include "llvm/Transforms/Scalar/GVN.h"
25 #include "llvm/Transforms/Utils/MemorySSA.h"
26 #include
27 #include
28 #include
29
30 using namespace llvm;
31
32 #define DEBUG_TYPE "gvn-hoist"
33
34 STATISTIC(NumHoisted, "Number of instructions hoisted");
35 STATISTIC(NumRemoved, "Number of instructions removed");
36 STATISTIC(NumLoadsHoisted, "Number of loads hoisted");
37 STATISTIC(NumLoadsRemoved, "Number of loads removed");
38 STATISTIC(NumStoresHoisted, "Number of stores hoisted");
39 STATISTIC(NumStoresRemoved, "Number of stores removed");
40 STATISTIC(NumCallsHoisted, "Number of calls hoisted");
41 STATISTIC(NumCallsRemoved, "Number of calls removed");
42
43 static cl::opt
44 MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1),
45 cl::desc("Max number of instructions to hoist "
46 "(default unlimited = -1)"));
47 static cl::opt MaxNumberOfBBSInPath(
48 "gvn-hoist-max-bbs", cl::Hidden, cl::init(4),
49 cl::desc("Max number of basic blocks on the path between "
50 "hoisting locations (default = 4, unlimited = -1)"));
51
52 static int HoistedCtr = 0;
53
54 namespace {
55
56 // Provides a sorting function based on the execution order of two instructions.
57 struct SortByDFSIn {
58 private:
59 DenseMap &DFSNumber;
60
61 public:
62 SortByDFSIn(DenseMap &D) : DFSNumber(D) {}
63
64 // Returns true when A executes before B.
65 bool operator()(const Instruction *A, const Instruction *B) const {
66 assert(A != B);
67 const BasicBlock *BA = A->getParent();
68 const BasicBlock *BB = B->getParent();
69 unsigned NA = DFSNumber[BA];
70 unsigned NB = DFSNumber[BB];
71 if (NA < NB)
72 return true;
73 if (NA == NB) {
74 // Sort them in the order they occur in the same basic block.
75 BasicBlock::const_iterator AI(A), BI(B);
76 return std::distance(AI, BI) < 0;
77 }
78 return false;
79 }
80 };
81
82 // A map from a VN (value number) to all the instructions with that VN.
83 typedef DenseMap> VNtoInsns;
84
85 // Records all scalar instructions candidate for code hoisting.
86 class InsnInfo {
87 VNtoInsns VNtoScalars;
88
89 public:
90 // Inserts I and its value number in VNtoScalars.
91 void insert(Instruction *I, GVN::ValueTable &VN) {
92 // Scalar instruction.
93 unsigned V = VN.lookupOrAdd(I);
94 VNtoScalars[V].push_back(I);
95 }
96
97 const VNtoInsns &getVNTable() const { return VNtoScalars; }
98 };
99
100 // Records all load instructions candidate for code hoisting.
101 class LoadInfo {
102 VNtoInsns VNtoLoads;
103
104 public:
105 // Insert Load and the value number of its memory address in VNtoLoads.
106 void insert(LoadInst *Load, GVN::ValueTable &VN) {
107 if (Load->isSimple()) {
108 unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
109 VNtoLoads[V].push_back(Load);
110 }
111 }
112
113 const VNtoInsns &getVNTable() const { return VNtoLoads; }
114 };
115
116 // Records all store instructions candidate for code hoisting.
117 class StoreInfo {
118 VNtoInsns VNtoStores;
119
120 public:
121 // Insert the Store and a hash number of the store address and the stored
122 // value in VNtoStores.
123 void insert(StoreInst *Store, GVN::ValueTable &VN) {
124 if (!Store->isSimple())
125 return;
126 // Hash the store address and the stored value.
127 Value *Ptr = Store->getPointerOperand();
128 Value *Val = Store->getValueOperand();
129 VNtoStores[hash_combine(VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val))]
130 .push_back(Store);
131 }
132
133 const VNtoInsns &getVNTable() const { return VNtoStores; }
134 };
135
136 // Records all call instructions candidate for code hoisting.
137 class CallInfo {
138 VNtoInsns VNtoCallsScalars;
139 VNtoInsns VNtoCallsLoads;
140 VNtoInsns VNtoCallsStores;
141
142 public:
143 // Insert Call and its value numbering in one of the VNtoCalls* containers.
144 void insert(CallInst *Call, GVN::ValueTable &VN) {
145 // A call that doesNotAccessMemory is handled as a Scalar,
146 // onlyReadsMemory will be handled as a Load instruction,
147 // all other calls will be handled as stores.
148 unsigned V = VN.lookupOrAdd(Call);
149
150 if (Call->doesNotAccessMemory())
151 VNtoCallsScalars[V].push_back(Call);
152 else if (Call->onlyReadsMemory())
153 VNtoCallsLoads[V].push_back(Call);
154 else
155 VNtoCallsStores[V].push_back(Call);
156 }
157
158 const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; }
159
160 const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; }
161
162 const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; }
163 };
164
165 typedef DenseMap BBSideEffectsSet;
166 typedef SmallVector SmallVecInsn;
167 typedef SmallVectorImpl SmallVecImplInsn;
168
169 // This pass hoists common computations across branches sharing common
170 // dominator. The primary goal is to reduce the code size, and in some
171 // cases reduce critical path (by exposing more ILP).
172 class GVNHoist {
173 public:
174 GVN::ValueTable VN;
175 DominatorTree *DT;
176 AliasAnalysis *AA;
177 MemoryDependenceResults *MD;
178 const bool OptForMinSize;
179 DenseMap DFSNumber;
180 BBSideEffectsSet BBSideEffects;
181 MemorySSA *MSSA;
182 enum InsKind { Unknown, Scalar, Load, Store };
183
184 GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md,
185 bool OptForMinSize)
186 : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize) {}
187
188 // Return true when there are exception handling in BB.
189 bool hasEH(const BasicBlock *BB) {
190 auto It = BBSideEffects.find(BB);
191 if (It != BBSideEffects.end())
192 return It->second;
193
194 if (BB->isEHPad() || BB->hasAddressTaken()) {
195 BBSideEffects[BB] = true;
196 return true;
197 }
198
199 if (BB->getTerminator()->mayThrow()) {
200 BBSideEffects[BB] = true;
201 return true;
202 }
203
204 BBSideEffects[BB] = false;
205 return false;
206 }
207
208 // Return true when all paths from A to the end of the function pass through
209 // either B or C.
210 bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B,
211 const BasicBlock *C) {
212 // We fully copy the WL in order to be able to remove items from it.
213 SmallPtrSet WL;
214 WL.insert(B);
215 WL.insert(C);
216
217 for (auto It = df_begin(A), E = df_end(A); It != E;) {
218 // There exists a path from A to the exit of the function if we are still
219 // iterating in DF traversal and we removed all instructions from the work
220 // list.
221 if (WL.empty())
222 return false;
223
224 const BasicBlock *BB = *It;
225 if (WL.erase(BB)) {
226 // Stop DFS traversal when BB is in the work list.
227 It.skipChildren();
228 continue;
229 }
230
231 // Check for end of function, calls that do not return, etc.
232 if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
233 return false;
234
235 // Increment DFS traversal when not skipping children.
236 ++It;
237 }
238
239 return true;
240 }
241
242 /* Return true when I1 appears before I2 in the instructions of BB. */
243 bool firstInBB(BasicBlock *BB, const Instruction *I1, const Instruction *I2) {
244 for (Instruction &I : *BB) {
245 if (&I == I1)
246 return true;
247 if (&I == I2)
248 return false;
249 }
250
251 llvm_unreachable("I1 and I2 not found in BB");
252 }
253 // Return true when there are users of Def in BB.
254 bool hasMemoryUseOnPath(MemoryAccess *Def, const BasicBlock *BB,
255 const Instruction *OldPt) {
256 Value::user_iterator UI = Def->user_begin();
257 Value::user_iterator UE = Def->user_end();
258 const BasicBlock *DefBB = Def->getBlock();
259 const BasicBlock *OldBB = OldPt->getParent();
260
261 for (; UI != UE; ++UI)
262 if (MemoryUse *U = dyn_cast(*UI)) {
263 BasicBlock *UBB = U->getBlock();
264 // Only analyze uses in BB.
265 if (BB != UBB)
266 continue;
267
268 // A use in the same block as the Def is on the path.
269 if (UBB == DefBB) {
270 assert(MSSA->locallyDominates(Def, U) && "def not dominating use");
271 return true;
272 }
273
274 if (UBB != OldBB)
275 return true;
276
277 // It is only harmful to hoist when the use is before OldPt.
278 if (firstInBB(UBB, U->getMemoryInst(), OldPt))
279 return true;
280 }
281
282 return false;
283 }
284
285 // Return true when there are exception handling or loads of memory Def
286 // between OldPt and NewPt.
287
288 // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
289 // return true when the counter NBBsOnAllPaths reaces 0, except when it is
290 // initialized to -1 which is unlimited.
291 bool hasEHOrLoadsOnPath(const Instruction *NewPt, const Instruction *OldPt,
292 MemoryAccess *Def, int &NBBsOnAllPaths) {
293 const BasicBlock *NewBB = NewPt->getParent();
294 const BasicBlock *OldBB = OldPt->getParent();
295 assert(DT->dominates(NewBB, OldBB) && "invalid path");
296 assert(DT->dominates(Def->getBlock(), NewBB) &&
297 "def does not dominate new hoisting point");
298
299 // Walk all basic blocks reachable in depth-first iteration on the inverse
300 // CFG from OldBB to NewBB. These blocks are all the blocks that may be
301 // executed between the execution of NewBB and OldBB. Hoisting an expression
302 // from OldBB into NewBB has to be safe on all execution paths.
303 for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
304 if (*I == NewBB) {
305 // Stop traversal when reaching HoistPt.
306 I.skipChildren();
307 continue;
308 }
309
310 // Impossible to hoist with exceptions on the path.
311 if (hasEH(*I))
312 return true;
313
314 // Check that we do not move a store past loads.
315 if (hasMemoryUseOnPath(Def, *I, OldPt))
316 return true;
317
318 // Stop walk once the limit is reached.
319 if (NBBsOnAllPaths == 0)
320 return true;
321
322 // -1 is unlimited number of blocks on all paths.
323 if (NBBsOnAllPaths != -1)
324 --NBBsOnAllPaths;
325
326 ++I;
327 }
328
329 return false;
330 }
331
332 // Return true when there are exception handling between HoistPt and BB.
333 // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
334 // return true when the counter NBBsOnAllPaths reaches 0, except when it is
335 // initialized to -1 which is unlimited.
336 bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB,
337 int &NBBsOnAllPaths) {
338 assert(DT->dominates(HoistPt, BB) && "Invalid path");
339
340 // Walk all basic blocks reachable in depth-first iteration on
341 // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
342 // blocks that may be executed between the execution of NewHoistPt and
343 // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
344 // on all execution paths.
345 for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) {
346 if (*I == HoistPt) {
347 // Stop traversal when reaching NewHoistPt.
348 I.skipChildren();
349 continue;
350 }
351
352 // Impossible to hoist with exceptions on the path.
353 if (hasEH(*I))
354 return true;
355
356 // Stop walk once the limit is reached.
357 if (NBBsOnAllPaths == 0)
358 return true;
359
360 // -1 is unlimited number of blocks on all paths.
361 if (NBBsOnAllPaths != -1)
362 --NBBsOnAllPaths;
363
364 ++I;
365 }
366
367 return false;
368 }
369
370 // Return true when it is safe to hoist a memory load or store U from OldPt
371 // to NewPt.
372 bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
373 MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) {
374
375 // In place hoisting is safe.
376 if (NewPt == OldPt)
377 return true;
378
379 const BasicBlock *NewBB = NewPt->getParent();
380 const BasicBlock *OldBB = OldPt->getParent();
381 const BasicBlock *UBB = U->getBlock();
382
383 // Check for dependences on the Memory SSA.
384 MemoryAccess *D = U->getDefiningAccess();
385 BasicBlock *DBB = D->getBlock();
386 if (DT->properlyDominates(NewBB, DBB))
387 // Cannot move the load or store to NewBB above its definition in DBB.
388 return false;
389
390 if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
391 if (MemoryUseOrDef *UD = dyn_cast(D))
392 if (firstInBB(DBB, NewPt, UD->getMemoryInst()))
393 // Cannot move the load or store to NewPt above its definition in D.
394 return false;
395
396 // Check for unsafe hoistings due to side effects.
397 if (K == InsKind::Store) {
398 if (hasEHOrLoadsOnPath(NewPt, OldPt, D, NBBsOnAllPaths))
399 return false;
400 } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
401 return false;
402
403 if (UBB == NewBB) {
404 if (DT->properlyDominates(DBB, NewBB))
405 return true;
406 assert(UBB == DBB);
407 assert(MSSA->locallyDominates(D, U));
408 }
409
410 // No side effects: it is safe to hoist.
411 return true;
412 }
413
414 // Return true when it is safe to hoist scalar instructions from BB1 and BB2
415 // to HoistBB.
416 bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB1,
417 const BasicBlock *BB2, int &NBBsOnAllPaths) {
418 // Check that the hoisted expression is needed on all paths. When HoistBB
419 // already contains an instruction to be hoisted, the expression is needed
420 // on all paths. Enable scalar hoisting at -Oz as it is safe to hoist
421 // scalars to a place where they are partially needed.
422 if (!OptForMinSize && BB1 != HoistBB &&
423 !hoistingFromAllPaths(HoistBB, BB1, BB2))
424 return false;
425
426 if (hasEHOnPath(HoistBB, BB1, NBBsOnAllPaths) ||
427 hasEHOnPath(HoistBB, BB2, NBBsOnAllPaths))
428 return false;
429
430 // Safe to hoist scalars from BB1 and BB2 to HoistBB.
431 return true;
432 }
433
434 // Each element of a hoisting list contains the basic block where to hoist and
435 // a list of instructions to be hoisted.
436 typedef std::pair HoistingPointInfo;
437 typedef SmallVector HoistingPointList;
438
439 // Partition InstructionsToHoist into a set of candidates which can share a
440 // common hoisting point. The partitions are collected in HPL. IsScalar is
441 // true when the instructions in InstructionsToHoist are scalars. IsLoad is
442 // true when the InstructionsToHoist are loads, false when they are stores.
443 void partitionCandidates(SmallVecImplInsn &InstructionsToHoist,
444 HoistingPointList &HPL, InsKind K) {
445 // No need to sort for two instructions.
446 if (InstructionsToHoist.size() > 2) {
447 SortByDFSIn Pred(DFSNumber);
448 std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred);
449 }
450
451 int NBBsOnAllPaths = MaxNumberOfBBSInPath;
452
453 SmallVecImplInsn::iterator II = InstructionsToHoist.begin();
454 SmallVecImplInsn::iterator Start = II;
455 Instruction *HoistPt = *II;
456 BasicBlock *HoistBB = HoistPt->getParent();
457 MemoryUseOrDef *UD;
458 if (K != InsKind::Scalar)
459 UD = cast(MSSA->getMemoryAccess(HoistPt));
460
461 for (++II; II != InstructionsToHoist.end(); ++II) {
462 Instruction *Insn = *II;
463 BasicBlock *BB = Insn->getParent();
464 BasicBlock *NewHoistBB;
465 Instruction *NewHoistPt;
466
467 if (BB == HoistBB) {
468 NewHoistBB = HoistBB;
469 NewHoistPt = firstInBB(BB, Insn, HoistPt) ? Insn : HoistPt;
470 } else {
471 NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB);
472 if (NewHoistBB == BB)
473 NewHoistPt = Insn;
474 else if (NewHoistBB == HoistBB)
475 NewHoistPt = HoistPt;
476 else
477 NewHoistPt = NewHoistBB->getTerminator();
478 }
479
480 if (K == InsKind::Scalar) {
481 if (safeToHoistScalar(NewHoistBB, HoistBB, BB, NBBsOnAllPaths)) {
482 // Extend HoistPt to NewHoistPt.
483 HoistPt = NewHoistPt;
484 HoistBB = NewHoistBB;
485 continue;
486 }
487 } else {
488 // When NewBB already contains an instruction to be hoisted, the
489 // expression is needed on all paths.
490 // Check that the hoisted expression is needed on all paths: it is
491 // unsafe to hoist loads to a place where there may be a path not
492 // loading from the same address: for instance there may be a branch on
493 // which the address of the load may not be initialized.
494 if ((HoistBB == NewHoistBB || BB == NewHoistBB ||
495 hoistingFromAllPaths(NewHoistBB, HoistBB, BB)) &&
496 // Also check that it is safe to move the load or store from HoistPt
497 // to NewHoistPt, and from Insn to NewHoistPt.
498 safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NBBsOnAllPaths) &&
499 safeToHoistLdSt(NewHoistPt, Insn,
500 cast(MSSA->getMemoryAccess(Insn)),
501 K, NBBsOnAllPaths)) {
502 // Extend HoistPt to NewHoistPt.
503 HoistPt = NewHoistPt;
504 HoistBB = NewHoistBB;
505 continue;
506 }
507 }
508
509 // At this point it is not safe to extend the current hoisting to
510 // NewHoistPt: save the hoisting list so far.
511 if (std::distance(Start, II) > 1)
512 HPL.push_back(std::make_pair(HoistBB, SmallVecInsn(Start, II)));
513
514 // Start over from BB.
515 Start = II;
516 if (K != InsKind::Scalar)
517 UD = cast(MSSA->getMemoryAccess(*Start));
518 HoistPt = Insn;
519 HoistBB = BB;
520 NBBsOnAllPaths = MaxNumberOfBBSInPath;
521 }
522
523 // Save the last partition.
524 if (std::distance(Start, II) > 1)
525 HPL.push_back(std::make_pair(HoistBB, SmallVecInsn(Start, II)));
526 }
527
528 // Initialize HPL from Map.
529 void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL,
530 InsKind K) {
531 for (VNtoInsns::const_iterator It = Map.begin(); It != Map.end(); ++It) {
532 if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold)
533 return;
534
535 const SmallVecInsn &V = It->second;
536 if (V.size() < 2)
537 continue;
538
539 // Compute the insertion point and the list of expressions to be hoisted.
540 SmallVecInsn InstructionsToHoist;
541 for (auto I : V)
542 if (!hasEH(I->getParent()))
543 InstructionsToHoist.push_back(I);
544
545 if (InstructionsToHoist.size())
546 partitionCandidates(InstructionsToHoist, HPL, K);
547 }
548 }
549
550 // Return true when all operands of Instr are available at insertion point
551 // HoistPt. When limiting the number of hoisted expressions, one could hoist
552 // a load without hoisting its access function. So before hoisting any
553 // expression, make sure that all its operands are available at insert point.
554 bool allOperandsAvailable(const Instruction *I,
555 const BasicBlock *HoistPt) const {
556 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
557 const Value *Op = I->getOperand(i);
558 const Instruction *Inst = dyn_cast(Op);
559 if (Inst && !DT->dominates(Inst->getParent(), HoistPt))
560 return false;
561 }
562
563 return true;
564 }
565
566 Instruction *firstOfTwo(Instruction *I, Instruction *J) const {
567 for (Instruction &I1 : *I->getParent())
568 if (&I1 == I || &I1 == J)
569 return &I1;
570 llvm_unreachable("Both I and J must be from same BB");
571 }
572
573 // Replace the use of From with To in Insn.
574 void replaceUseWith(Instruction *Insn, Value *From, Value *To) const {
575 for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
576 UI != UE;) {
577 Use &U = *UI++;
578 if (U.getUser() == Insn) {
579 U.set(To);
580 return;
581 }
582 }
583 llvm_unreachable("should replace exactly once");
584 }
585
586 bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt) const {
587 // Check whether the GEP of a ld/st can be synthesized at HoistPt.
588 Instruction *Gep = nullptr;
589 Instruction *Val = nullptr;
590 if (LoadInst *Ld = dyn_cast(Repl))
591 Gep = dyn_cast(Ld->getPointerOperand());
592 if (StoreInst *St = dyn_cast(Repl)) {
593 Gep = dyn_cast(St->getPointerOperand());
594 Val = dyn_cast(St->getValueOperand());
595 }
596
597 if (!Gep || !isa(Gep))
598 return false;
599
600 // Check whether we can compute the Gep at HoistPt.
601 if (!allOperandsAvailable(Gep, HoistPt))
602 return false;
603
604 // Also check that the stored value is available.
605 if (Val && !allOperandsAvailable(Val, HoistPt))
606 return false;
607
608 // Copy the gep before moving the ld/st.
609 Instruction *ClonedGep = Gep->clone();
610 ClonedGep->insertBefore(HoistPt->getTerminator());
611 replaceUseWith(Repl, Gep, ClonedGep);
612
613 // Also copy Val when it is a gep: geps are not hoisted by default.
614 if (Val && isa(Val)) {
615 Instruction *ClonedVal = Val->clone();
616 ClonedVal->insertBefore(HoistPt->getTerminator());
617 replaceUseWith(Repl, Val, ClonedVal);
618 }
619
620 return true;
621 }
622
623 std::pair hoist(HoistingPointList &HPL) {
624 unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
625 for (const HoistingPointInfo &HP : HPL) {
626 // Find out whether we already have one of the instructions in HoistPt,
627 // in which case we do not have to move it.
628 BasicBlock *HoistPt = HP.first;
629 const SmallVecInsn &InstructionsToHoist = HP.second;
630 Instruction *Repl = nullptr;
631 for (Instruction *I : InstructionsToHoist)
632 if (I->getParent() == HoistPt) {
633 // If there are two instructions in HoistPt to be hoisted in place:
634 // update Repl to be the first one, such that we can rename the uses
635 // of the second based on the first.
636 Repl = !Repl ? I : firstOfTwo(Repl, I);
637 }
638
639 if (Repl) {
640 // Repl is already in HoistPt: it remains in place.
641 assert(allOperandsAvailable(Repl, HoistPt) &&
642 "instruction depends on operands that are not available");
643 } else {
644 // When we do not find Repl in HoistPt, select the first in the list
645 // and move it to HoistPt.
646 Repl = InstructionsToHoist.front();
647
648 // We can move Repl in HoistPt only when all operands are available.
649 // The order in which hoistings are done may influence the availability
650 // of operands.
651 if (!allOperandsAvailable(Repl, HoistPt) &&
652 !makeOperandsAvailable(Repl, HoistPt))
653 continue;
654 Repl->moveBefore(HoistPt->getTerminator());
655 }
656
657 if (isa(Repl))
658 ++NL;
659 else if (isa(Repl))
660 ++NS;
661 else if (isa(Repl))
662 ++NC;
663 else // Scalar
664 ++NI;
665
666 // Remove and rename all other instructions.
667 for (Instruction *I : InstructionsToHoist)
668 if (I != Repl) {
669 ++NR;
670 if (isa(Repl))
671 ++NumLoadsRemoved;
672 else if (isa(Repl))
673 ++NumStoresRemoved;
674 else if (isa(Repl))
675 ++NumCallsRemoved;
676 I->replaceAllUsesWith(Repl);
677 I->eraseFromParent();
678 }
679 }
680
681 NumHoisted += NL + NS + NC + NI;
682 NumRemoved += NR;
683 NumLoadsHoisted += NL;
684 NumStoresHoisted += NS;
685 NumCallsHoisted += NC;
686 return {NI, NL + NC + NS};
687 }
688
689 // Hoist all expressions. Returns Number of scalars hoisted
690 // and number of non-scalars hoisted.
691 std::pair hoistExpressions(Function &F) {
692 InsnInfo II;
693 LoadInfo LI;
694 StoreInfo SI;
695 CallInfo CI;
696 for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
697 for (Instruction &I1 : *BB) {
698 if (LoadInst *Load = dyn_cast(&I1))
699 LI.insert(Load, VN);
700 else if (StoreInst *Store = dyn_cast(&I1))
701 SI.insert(Store, VN);
702 else if (CallInst *Call = dyn_cast(&I1)) {
703 if (IntrinsicInst *Intr = dyn_cast(Call)) {
704 if (isa(Intr) ||
705 Intr->getIntrinsicID() == Intrinsic::assume)
706 continue;
707 }
708 if (Call->mayHaveSideEffects()) {
709 if (!OptForMinSize)
710 break;
711 // We may continue hoisting across calls which write to memory.
712 if (Call->mayThrow())
713 break;
714 }
715 CI.insert(Call, VN);
716 } else if (OptForMinSize || !isa(&I1))
717 // Do not hoist scalars past calls that may write to memory because
718 // that could result in spills later. geps are handled separately.
719 // TODO: We can relax this for targets like AArch64 as they have more
720 // registers than X86.
721 II.insert(&I1, VN);
722 }
723 }
724
725 HoistingPointList HPL;
726 computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
727 computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
728 computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
729 computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
730 computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
731 computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
732 return hoist(HPL);
733 }
734
735 bool run(Function &F) {
736 VN.setDomTree(DT);
737 VN.setAliasAnalysis(AA);
738 VN.setMemDep(MD);
739 bool Res = false;
740
741 unsigned I = 0;
742 for (const BasicBlock *BB : depth_first(&F.getEntryBlock()))
743 DFSNumber.insert(std::make_pair(BB, ++I));
744
745 // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
746 while (1) {
747 // FIXME: only compute MemorySSA once. We need to update the analysis in
748 // the same time as transforming the code.
749 MemorySSA M(F, AA, DT);
750 MSSA = &M;
751
752 auto HoistStat = hoistExpressions(F);
753 if (HoistStat.first + HoistStat.second == 0) {
754 return Res;
755 }
756 if (HoistStat.second > 0) {
757 // To address a limitation of the current GVN, we need to rerun the
758 // hoisting after we hoisted loads in order to be able to hoist all
759 // scalars dependent on the hoisted loads. Same for stores.
760 VN.clear();
761 }
762 Res = true;
763 }
764
765 return Res;
766 }
767 };
768
769 class GVNHoistLegacyPass : public FunctionPass {
770 public:
771 static char ID;
772
773 GVNHoistLegacyPass() : FunctionPass(ID) {
774 initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
775 }
776
777 bool runOnFunction(Function &F) override {
778 auto &DT = getAnalysis().getDomTree();
779 auto &AA = getAnalysis().getAAResults();
780 auto &MD = getAnalysis().getMemDep();
781
782 GVNHoist G(&DT, &AA, &MD, F.optForMinSize());
783 return G.run(F);
784 }
785
786 void getAnalysisUsage(AnalysisUsage &AU) const override {
787 AU.addRequired();
788 AU.addRequired();
789 AU.addRequired();
790 AU.addPreserved();
791 }
792 };
793 } // namespace
794
795 PreservedAnalyses GVNHoistPass::run(Function &F,
796 AnalysisManager &AM) {
797 DominatorTree &DT = AM.getResult(F);
798 AliasAnalysis &AA = AM.getResult(F);
799 MemoryDependenceResults &MD = AM.getResult(F);
800
801 GVNHoist G(&DT, &AA, &MD, F.optForMinSize());
802 if (!G.run(F))
803 return PreservedAnalyses::all();
804
805 PreservedAnalyses PA;
806 PA.preserve();
807 return PA;
808 }
809
810 char GVNHoistLegacyPass::ID = 0;
811 INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",
812 "Early GVN Hoisting of Expressions", false, false)
813 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
814 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
815 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
816 INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",
817 "Early GVN Hoisting of Expressions", false, false)
818
819 FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); }
4343 initializeGuardWideningLegacyPassPass(Registry);
4444 initializeGVNLegacyPassPass(Registry);
4545 initializeEarlyCSELegacyPassPass(Registry);
46 initializeGVNHoistLegacyPassPass(Registry);
4647 initializeFlattenCFGPassPass(Registry);
4748 initializeInductiveRangeCheckEliminationPass(Registry);
4849 initializeIndVarSimplifyLegacyPassPass(Registry);
235236 unwrap(PM)->add(createEarlyCSEPass());
236237 }
237238
239 void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) {
240 unwrap(PM)->add(createGVNHoistPass());
241 }
242
238243 void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
239244 unwrap(PM)->add(createTypeBasedAAWrapperPass());
240245 }
0 ; RUN: opt -gvn-hoist -S < %s | FileCheck %s
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 ; Check that all "or" expressions are hoisted.
5 ; CHECK-LABEL: @encode
6 ; CHECK: or i32
7 ; CHECK-NOT: or i32
8
9 define i8* @encode(i8* %p, i32 %v) {
10 entry:
11 %p.addr = alloca i8*, align 8
12 %v.addr = alloca i32, align 4
13 store i8* %p, i8** %p.addr, align 8
14 store i32 %v, i32* %v.addr, align 4
15 %0 = load i32, i32* %v.addr, align 4
16 %cmp = icmp ult i32 %0, 23
17 br i1 %cmp, label %if.then, label %if.else
18
19 if.then: ; preds = %entry
20 %1 = load i32, i32* %v.addr, align 4
21 %or = or i32 %1, 128
22 %conv = trunc i32 %or to i8
23 %2 = load i8*, i8** %p.addr, align 8
24 %incdec.ptr = getelementptr inbounds i8, i8* %2, i32 1
25 store i8* %incdec.ptr, i8** %p.addr, align 8
26 store i8 %conv, i8* %2, align 1
27 br label %if.end15
28
29 if.else: ; preds = %entry
30 %3 = load i32, i32* %v.addr, align 4
31 %cmp1 = icmp ult i32 %3, 42
32 br i1 %cmp1, label %if.then3, label %if.else9
33
34 if.then3: ; preds = %if.else
35 %4 = load i32, i32* %v.addr, align 4
36 %or4 = or i32 %4, 128
37 %conv5 = trunc i32 %or4 to i8
38 %5 = load i8*, i8** %p.addr, align 8
39 %incdec.ptr6 = getelementptr inbounds i8, i8* %5, i32 1
40 store i8* %incdec.ptr6, i8** %p.addr, align 8
41 store i8 %conv5, i8* %5, align 1
42 %6 = load i32, i32* %v.addr, align 4
43 %conv7 = trunc i32 %6 to i8
44 %7 = load i8*, i8** %p.addr, align 8
45 %incdec.ptr8 = getelementptr inbounds i8, i8* %7, i32 1
46 store i8* %incdec.ptr8, i8** %p.addr, align 8
47 store i8 %conv7, i8* %7, align 1
48 br label %if.end
49
50 if.else9: ; preds = %if.else
51 %8 = load i32, i32* %v.addr, align 4
52 %or10 = or i32 %8, 128
53 %conv11 = trunc i32 %or10 to i8
54 %9 = load i8*, i8** %p.addr, align 8
55 %incdec.ptr12 = getelementptr inbounds i8, i8* %9, i32 1
56 store i8* %incdec.ptr12, i8** %p.addr, align 8
57 store i8 %conv11, i8* %9, align 1
58 %10 = load i32, i32* %v.addr, align 4
59 %shr = lshr i32 %10, 7
60 %conv13 = trunc i32 %shr to i8
61 %11 = load i8*, i8** %p.addr, align 8
62 %incdec.ptr14 = getelementptr inbounds i8, i8* %11, i32 1
63 store i8* %incdec.ptr14, i8** %p.addr, align 8
64 store i8 %conv13, i8* %11, align 1
65 br label %if.end
66
67 if.end: ; preds = %if.else9, %if.then3
68 br label %if.end15
69
70 if.end15: ; preds = %if.end, %if.then
71 %12 = load i8*, i8** %p.addr, align 8
72 ret i8* %12
73 }
0 ; RUN: opt -gvn-hoist -S < %s | FileCheck %s
1 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 ; Check that all "sub" expressions are hoisted.
5 ; CHECK-LABEL: @fun
6 ; CHECK: sub i64
7 ; CHECK-NOT: sub i64
8
9 define i64 @fun(i8* %out, i8* %end) {
10 %1 = icmp ult i8* %out, %end
11 br i1 %1, label %2, label %6
12
13 ;
14 %3 = ptrtoint i8* %end to i64
15 %4 = ptrtoint i8* %out to i64
16 %5 = sub i64 %3, %4
17 br label %10
18
19 ;
20 %7 = ptrtoint i8* %out to i64
21 %8 = ptrtoint i8* %end to i64
22 %9 = sub i64 %8, %7
23 br label %10
24
25 ;
26 %.in = phi i64 [ %5, %2 ], [ %9, %6 ]
27 %11 = add i64 %.in, 257
28 ret i64 %11
29 }
0 ; RUN: opt -gvn-hoist -S < %s | FileCheck %s
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 @GlobalVar = internal global float 1.000000e+00
5
6 ; Check that all scalar expressions are hoisted.
7 ;
8 ; CHECK-LABEL: @scalarsHoisting
9 ; CHECK: fsub
10 ; CHECK: fmul
11 ; CHECK: fsub
12 ; CHECK: fmul
13 ; CHECK-NOT: fmul
14 ; CHECK-NOT: fsub
15 define float @scalarsHoisting(float %d, float %min, float %max, float %a) {
16 entry:
17 %div = fdiv float 1.000000e+00, %d
18 %cmp = fcmp oge float %div, 0.000000e+00
19 br i1 %cmp, label %if.then, label %if.else
20
21 if.then: ; preds = %entry
22 %sub = fsub float %min, %a
23 %mul = fmul float %sub, %div
24 %sub1 = fsub float %max, %a
25 %mul2 = fmul float %sub1, %div
26 br label %if.end
27
28 if.else: ; preds = %entry
29 %sub3 = fsub float %max, %a
30 %mul4 = fmul float %sub3, %div
31 %sub5 = fsub float %min, %a
32 %mul6 = fmul float %sub5, %div
33 br label %if.end
34
35 if.end: ; preds = %if.else, %if.then
36 %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
37 %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
38 %add = fadd float %tmax.0, %tmin.0
39 ret float %add
40 }
41
42 ; Check that all loads and scalars depending on the loads are hoisted.
43 ; Check that getelementptr computation gets hoisted before the load.
44 ;
45 ; CHECK-LABEL: @readsAndScalarsHoisting
46 ; CHECK: load
47 ; CHECK: load
48 ; CHECK: load
49 ; CHECK: fsub
50 ; CHECK: fmul
51 ; CHECK: fsub
52 ; CHECK: fmul
53 ; CHECK-NOT: load
54 ; CHECK-NOT: fmul
55 ; CHECK-NOT: fsub
56 define float @readsAndScalarsHoisting(float %d, float* %min, float* %max, float* %a) {
57 entry:
58 %div = fdiv float 1.000000e+00, %d
59 %cmp = fcmp oge float %div, 0.000000e+00
60 br i1 %cmp, label %if.then, label %if.else
61
62 if.then: ; preds = %entry
63 %A = getelementptr float, float* %min, i32 1
64 %0 = load float, float* %A, align 4
65 %1 = load float, float* %a, align 4
66 %sub = fsub float %0, %1
67 %mul = fmul float %sub, %div
68 %2 = load float, float* %max, align 4
69 %sub1 = fsub float %2, %1
70 %mul2 = fmul float %sub1, %div
71 br label %if.end
72
73 if.else: ; preds = %entry
74 %3 = load float, float* %max, align 4
75 %4 = load float, float* %a, align 4
76 %sub3 = fsub float %3, %4
77 %mul4 = fmul float %sub3, %div
78 %B = getelementptr float, float* %min, i32 1
79 %5 = load float, float* %B, align 4
80 %sub5 = fsub float %5, %4
81 %mul6 = fmul float %sub5, %div
82 br label %if.end
83
84 if.end: ; preds = %if.else, %if.then
85 %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
86 %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
87 %add = fadd float %tmax.0, %tmin.0
88 ret float %add
89 }
90
91 ; Check that we do not hoist loads after a store: the first two loads will be
92 ; hoisted, and then the third load will not be hoisted.
93 ;
94 ; CHECK-LABEL: @readsAndWrites
95 ; CHECK: load
96 ; CHECK: load
97 ; CHECK: fsub
98 ; CHECK: fmul
99 ; CHECK: store
100 ; CHECK: load
101 ; CHECK: fsub
102 ; CHECK: fmul
103 ; CHECK: load
104 ; CHECK: fsub
105 ; CHECK: fmul
106 ; CHECK-NOT: load
107 ; CHECK-NOT: fmul
108 ; CHECK-NOT: fsub
109 define float @readsAndWrites(float %d, float* %min, float* %max, float* %a) {
110 entry:
111 %div = fdiv float 1.000000e+00, %d
112 %cmp = fcmp oge float %div, 0.000000e+00
113 br i1 %cmp, label %if.then, label %if.else
114
115 if.then: ; preds = %entry
116 %0 = load float, float* %min, align 4
117 %1 = load float, float* %a, align 4
118 store float %0, float* @GlobalVar
119 %sub = fsub float %0, %1
120 %mul = fmul float %sub, %div
121 %2 = load float, float* %max, align 4
122 %sub1 = fsub float %2, %1
123 %mul2 = fmul float %sub1, %div
124 br label %if.end
125
126 if.else: ; preds = %entry
127 %3 = load float, float* %max, align 4
128 %4 = load float, float* %a, align 4
129 %sub3 = fsub float %3, %4
130 %mul4 = fmul float %sub3, %div
131 %5 = load float, float* %min, align 4
132 %sub5 = fsub float %5, %4
133 %mul6 = fmul float %sub5, %div
134 br label %if.end
135
136 if.end: ; preds = %if.else, %if.then
137 %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
138 %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
139 %add = fadd float %tmax.0, %tmin.0
140 ret float %add
141 }
142
143 ; Check that we do hoist loads when the store is above the insertion point.
144 ;
145 ; CHECK-LABEL: @readsAndWriteAboveInsertPt
146 ; CHECK: load
147 ; CHECK: load
148 ; CHECK: load
149 ; CHECK: fsub
150 ; CHECK: fsub
151 ; CHECK: fmul
152 ; CHECK: fmul
153 ; CHECK-NOT: load
154 ; CHECK-NOT: fmul
155 ; CHECK-NOT: fsub
156 define float @readsAndWriteAboveInsertPt(float %d, float* %min, float* %max, float* %a) {
157 entry:
158 %div = fdiv float 1.000000e+00, %d
159 store float 0.000000e+00, float* @GlobalVar
160 %cmp = fcmp oge float %div, 0.000000e+00
161 br i1 %cmp, label %if.then, label %if.else
162
163 if.then: ; preds = %entry
164 %0 = load float, float* %min, align 4
165 %1 = load float, float* %a, align 4
166 %sub = fsub float %0, %1
167 %mul = fmul float %sub, %div
168 %2 = load float, float* %max, align 4
169 %sub1 = fsub float %2, %1
170 %mul2 = fmul float %sub1, %div
171 br label %if.end
172
173 if.else: ; preds = %entry
174 %3 = load float, float* %max, align 4
175 %4 = load float, float* %a, align 4
176 %sub3 = fsub float %3, %4
177 %mul4 = fmul float %sub3, %div
178 %5 = load float, float* %min, align 4
179 %sub5 = fsub float %5, %4
180 %mul6 = fmul float %sub5, %div
181 br label %if.end
182
183 if.end: ; preds = %if.else, %if.then
184 %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
185 %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
186 %add = fadd float %tmax.0, %tmin.0
187 ret float %add
188 }
189
190 ; Check that dependent expressions are hoisted.
191 ; CHECK-LABEL: @dependentScalarsHoisting
192 ; CHECK: fsub
193 ; CHECK: fadd
194 ; CHECK: fdiv
195 ; CHECK: fmul
196 ; CHECK-NOT: fsub
197 ; CHECK-NOT: fadd
198 ; CHECK-NOT: fdiv
199 ; CHECK-NOT: fmul
200 define float @dependentScalarsHoisting(float %a, float %b, i1 %c) {
201 entry:
202 br i1 %c, label %if.then, label %if.else
203
204 if.then:
205 %d = fsub float %b, %a
206 %e = fadd float %d, %a
207 %f = fdiv float %e, %a
208 %g = fmul float %f, %a
209 br label %if.end
210
211 if.else:
212 %h = fsub float %b, %a
213 %i = fadd float %h, %a
214 %j = fdiv float %i, %a
215 %k = fmul float %j, %a
216 br label %if.end
217
218 if.end:
219 %r = phi float [ %g, %if.then ], [ %k, %if.else ]
220 ret float %r
221 }
222
223 ; Check that all independent expressions are hoisted.
224 ; CHECK-LABEL: @independentScalarsHoisting
225 ; CHECK: fmul
226 ; CHECK: fadd
227 ; CHECK: fdiv
228 ; CHECK: fsub
229 ; CHECK-NOT: fsub
230 ; CHECK-NOT: fdiv
231 ; CHECK-NOT: fmul
232 define float @independentScalarsHoisting(float %a, float %b, i1 %c) {
233 entry:
234 br i1 %c, label %if.then, label %if.else
235
236 if.then:
237 %d = fadd float %b, %a
238 %e = fsub float %b, %a
239 %f = fdiv float %b, %a
240 %g = fmul float %b, %a
241 br label %if.end
242
243 if.else:
244 %i = fadd float %b, %a
245 %h = fsub float %b, %a
246 %j = fdiv float %b, %a
247 %k = fmul float %b, %a
248 br label %if.end
249
250 if.end:
251 %p = phi float [ %d, %if.then ], [ %i, %if.else ]
252 %q = phi float [ %e, %if.then ], [ %h, %if.else ]
253 %r = phi float [ %f, %if.then ], [ %j, %if.else ]
254 %s = phi float [ %g, %if.then ], [ %k, %if.else ]
255 %t = fadd float %p, %q
256 %u = fadd float %r, %s
257 %v = fadd float %t, %u
258 ret float %v
259 }
260
261 ; Check that we hoist load and scalar expressions in triangles.
262 ; CHECK-LABEL: @triangleHoisting
263 ; CHECK: load
264 ; CHECK: load
265 ; CHECK: load
266 ; CHECK: fsub
267 ; CHECK: fsub
268 ; CHECK: fmul
269 ; CHECK: fmul
270 ; CHECK-NOT: load
271 ; CHECK-NOT: fmul
272 ; CHECK-NOT: fsub
273 define float @triangleHoisting(float %d, float* %min, float* %max, float* %a) {
274 entry:
275 %div = fdiv float 1.000000e+00, %d
276 %cmp = fcmp oge float %div, 0.000000e+00
277 br i1 %cmp, label %if.then, label %if.end
278
279 if.then: ; preds = %entry
280 %0 = load float, float* %min, align 4
281 %1 = load float, float* %a, align 4
282 %sub = fsub float %0, %1
283 %mul = fmul float %sub, %div
284 %2 = load float, float* %max, align 4
285 %sub1 = fsub float %2, %1
286 %mul2 = fmul float %sub1, %div
287 br label %if.end
288
289 if.end: ; preds = %entry
290 %p1 = phi float [ %mul2, %if.then ], [ 0.000000e+00, %entry ]
291 %p2 = phi float [ %mul, %if.then ], [ 0.000000e+00, %entry ]
292 %3 = load float, float* %max, align 4
293 %4 = load float, float* %a, align 4
294 %sub3 = fsub float %3, %4
295 %mul4 = fmul float %sub3, %div
296 %5 = load float, float* %min, align 4
297 %sub5 = fsub float %5, %4
298 %mul6 = fmul float %sub5, %div
299
300 %x = fadd float %p1, %mul6
301 %y = fadd float %p2, %mul4
302 %z = fadd float %x, %y
303 ret float %z
304 }
305
306 ; Check that we hoist load and scalar expressions in dominator.
307 ; CHECK-LABEL: @dominatorHoisting
308 ; CHECK: load
309 ; CHECK: load
310 ; CHECK: fsub
311 ; CHECK: fmul
312 ; CHECK: load
313 ; CHECK: fsub
314 ; CHECK: fmul
315 ; CHECK-NOT: load
316 ; CHECK-NOT: fmul
317 ; CHECK-NOT: fsub
318 define float @dominatorHoisting(float %d, float* %min, float* %max, float* %a) {
319 entry:
320 %div = fdiv float 1.000000e+00, %d
321 %0 = load float, float* %min, align 4
322 %1 = load float, float* %a, align 4
323 %sub = fsub float %0, %1
324 %mul = fmul float %sub, %div
325 %2 = load float, float* %max, align 4
326 %sub1 = fsub float %2, %1
327 %mul2 = fmul float %sub1, %div
328 %cmp = fcmp oge float %div, 0.000000e+00
329 br i1 %cmp, label %if.then, label %if.end
330
331 if.then: ; preds = %entry
332 %3 = load float, float* %max, align 4
333 %4 = load float, float* %a, align 4
334 %sub3 = fsub float %3, %4
335 %mul4 = fmul float %sub3, %div
336 %5 = load float, float* %min, align 4
337 %sub5 = fsub float %5, %4
338 %mul6 = fmul float %sub5, %div
339 br label %if.end
340
341 if.end: ; preds = %entry
342 %p1 = phi float [ %mul4, %if.then ], [ 0.000000e+00, %entry ]
343 %p2 = phi float [ %mul6, %if.then ], [ 0.000000e+00, %entry ]
344
345 %x = fadd float %p1, %mul2
346 %y = fadd float %p2, %mul
347 %z = fadd float %x, %y
348 ret float %z
349 }
350
351 ; Check that we hoist load and scalar expressions in dominator.
352 ; CHECK-LABEL: @domHoisting
353 ; CHECK: load
354 ; CHECK: load
355 ; CHECK: fsub
356 ; CHECK: fmul
357 ; CHECK: load
358 ; CHECK: fsub
359 ; CHECK: fmul
360 ; CHECK-NOT: load
361 ; CHECK-NOT: fmul
362 ; CHECK-NOT: fsub
363 define float @domHoisting(float %d, float* %min, float* %max, float* %a) {
364 entry:
365 %div = fdiv float 1.000000e+00, %d
366 %0 = load float, float* %min, align 4
367 %1 = load float, float* %a, align 4
368 %sub = fsub float %0, %1
369 %mul = fmul float %sub, %div
370 %2 = load float, float* %max, align 4
371 %sub1 = fsub float %2, %1
372 %mul2 = fmul float %sub1, %div
373 %cmp = fcmp oge float %div, 0.000000e+00
374 br i1 %cmp, label %if.then, label %if.else
375
376 if.then:
377 %3 = load float, float* %max, align 4
378 %4 = load float, float* %a, align 4
379 %sub3 = fsub float %3, %4
380 %mul4 = fmul float %sub3, %div
381 %5 = load float, float* %min, align 4
382 %sub5 = fsub float %5, %4
383 %mul6 = fmul float %sub5, %div
384 br label %if.end
385
386 if.else:
387 %6 = load float, float* %max, align 4
388 %7 = load float, float* %a, align 4
389 %sub9 = fsub float %6, %7
390 %mul10 = fmul float %sub9, %div
391 %8 = load float, float* %min, align 4
392 %sub12 = fsub float %8, %7
393 %mul13 = fmul float %sub12, %div
394 br label %if.end
395
396 if.end:
397 %p1 = phi float [ %mul4, %if.then ], [ %mul10, %if.else ]
398 %p2 = phi float [ %mul6, %if.then ], [ %mul13, %if.else ]
399
400 %x = fadd float %p1, %mul2
401 %y = fadd float %p2, %mul
402 %z = fadd float %x, %y
403 ret float %z
404 }
405
406 ; Check that we do not hoist loads past stores within a same basic block.
407 ; CHECK-LABEL: @noHoistInSingleBBWithStore
408 ; CHECK: load
409 ; CHECK: store
410 ; CHECK: load
411 ; CHECK: store
412 define i32 @noHoistInSingleBBWithStore() {
413 entry:
414 %D = alloca i32, align 4
415 %0 = bitcast i32* %D to i8*
416 %bf = load i8, i8* %0, align 4
417 %bf.clear = and i8 %bf, -3
418 store i8 %bf.clear, i8* %0, align 4
419 %bf1 = load i8, i8* %0, align 4
420 %bf.clear1 = and i8 %bf1, 1
421 store i8 %bf.clear1, i8* %0, align 4
422 ret i32 0
423 }
424
425 ; Check that we do not hoist loads past calls within a same basic block.
426 ; CHECK-LABEL: @noHoistInSingleBBWithCall
427 ; CHECK: load
428 ; CHECK: call
429 ; CHECK: load
430 declare void @foo()
431 define i32 @noHoistInSingleBBWithCall() {
432 entry:
433 %D = alloca i32, align 4
434 %0 = bitcast i32* %D to i8*
435 %bf = load i8, i8* %0, align 4
436 %bf.clear = and i8 %bf, -3
437 call void @foo()
438 %bf1 = load i8, i8* %0, align 4
439 %bf.clear1 = and i8 %bf1, 1
440 ret i32 0
441 }
442
443 ; Check that we do not hoist loads past stores in any branch of a diamond.
444 ; CHECK-LABEL: @noHoistInDiamondWithOneStore1
445 ; CHECK: fdiv
446 ; CHECK: fcmp
447 ; CHECK: br
448 define float @noHoistInDiamondWithOneStore1(float %d, float* %min, float* %max, float* %a) {
449 entry:
450 %div = fdiv float 1.000000e+00, %d
451 %cmp = fcmp oge float %div, 0.000000e+00
452 br i1 %cmp, label %if.then, label %if.else
453
454 if.then: ; preds = %entry
455 store float 0.000000e+00, float* @GlobalVar
456 %0 = load float, float* %min, align 4
457 %1 = load float, float* %a, align 4
458 %sub = fsub float %0, %1
459 %mul = fmul float %sub, %div
460 %2 = load float, float* %max, align 4
461 %sub1 = fsub float %2, %1
462 %mul2 = fmul float %sub1, %div
463 br label %if.end
464
465 if.else: ; preds = %entry
466 ; There are no side effects on the if.else branch.
467 %3 = load float, float* %max, align 4
468 %4 = load float, float* %a, align 4
469 %sub3 = fsub float %3, %4
470 %mul4 = fmul float %sub3, %div
471 %5 = load float, float* %min, align 4
472 %sub5 = fsub float %5, %4
473 %mul6 = fmul float %sub5, %div
474 br label %if.end
475
476 if.end: ; preds = %if.else, %if.then
477 %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
478 %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
479
480 %6 = load float, float* %max, align 4
481 %7 = load float, float* %a, align 4
482 %sub6 = fsub float %6, %7
483 %mul7 = fmul float %sub6, %div
484 %8 = load float, float* %min, align 4
485 %sub8 = fsub float %8, %7
486 %mul9 = fmul float %sub8, %div
487
488 %add = fadd float %tmax.0, %tmin.0
489 ret float %add
490 }
491
492 ; Check that we do not hoist loads past stores from half diamond.
493 ; CHECK-LABEL: @noHoistInHalfDiamondPastStore
494 ; CHECK: load
495 ; CHECK-NEXT: load
496 ; CHECK-NEXT: store
497 ; CHECK-NEXT: br
498 ; CHECK: load
499 ; CHECK: load
500 ; CHECK: load
501 ; CHECK: br
502 define float @noHoistInHalfDiamondPastStore(float %d, float* %min, float* %max, float* %a) {
503 entry:
504 %div = fdiv float 1.000000e+00, %d
505 %cmp = fcmp oge float %div, 0.000000e+00
506 %0 = load float, float* %min, align 4
507 %1 = load float, float* %a, align 4
508
509 ; Loads should not be hoisted above this store.
510 store float 0.000000e+00, float* @GlobalVar
511
512 br i1 %cmp, label %if.then, label %if.end
513
514 if.then:
515 ; There are no side effects on the if.then branch.
516 %2 = load float, float* %max, align 4
517 %3 = load float, float* %a, align 4
518 %sub3 = fsub float %2, %3
519 %mul4 = fmul float %sub3, %div
520 %4 = load float, float* %min, align 4
521 %sub5 = fsub float %4, %3
522 %mul6 = fmul float %sub5, %div
523 br label %if.end
524
525 if.end:
526 %tmax.0 = phi float [ %mul4, %if.then ], [ %0, %entry ]
527 %tmin.0 = phi float [ %mul6, %if.then ], [ %1, %entry ]
528
529 %add = fadd float %tmax.0, %tmin.0
530 ret float %add
531 }
532
533 ; Check that we do not hoist loads past a store in any branch of a diamond.
534 ; CHECK-LABEL: @noHoistInDiamondWithOneStore2
535 ; CHECK: fdiv
536 ; CHECK: fcmp
537 ; CHECK: br
538 define float @noHoistInDiamondWithOneStore2(float %d, float* %min, float* %max, float* %a) {
539 entry:
540 %div = fdiv float 1.000000e+00, %d
541 %cmp = fcmp oge float %div, 0.000000e+00
542 br i1 %cmp, label %if.then, label %if.else
543
544 if.then: ; preds = %entry
545 ; There are no side effects on the if.then branch.
546 %0 = load float, float* %min, align 4
547 %1 = load float, float* %a, align 4
548 %sub = fsub float %0, %1
549 %mul = fmul float %sub, %div
550 %2 = load float, float* %max, align 4
551 %sub1 = fsub float %2, %1
552 %mul2 = fmul float %sub1, %div
553 br label %if.end
554
555 if.else: ; preds = %entry
556 store float 0.000000e+00, float* @GlobalVar
557 %3 = load float, float* %max, align 4
558 %4 = load float, float* %a, align 4
559 %sub3 = fsub float %3, %4
560 %mul4 = fmul float %sub3, %div
561 %5 = load float, float* %min, align 4
562 %sub5 = fsub float %5, %4
563 %mul6 = fmul float %sub5, %div
564 br label %if.end
565
566 if.end: ; preds = %if.else, %if.then
567 %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
568 %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
569
570 %6 = load float, float* %max, align 4
571 %7 = load float, float* %a, align 4
572 %sub6 = fsub float %6, %7
573 %mul7 = fmul float %sub6, %div
574 %8 = load float, float* %min, align 4
575 %sub8 = fsub float %8, %7
576 %mul9 = fmul float %sub8, %div
577
578 %add = fadd float %tmax.0, %tmin.0
579 ret float %add
580 }
581
582 ; Check that we do not hoist loads outside a loop containing stores.
583 ; CHECK-LABEL: @noHoistInLoopsWithStores
584 ; CHECK: fdiv
585 ; CHECK: fcmp
586 ; CHECK: br
587 define float @noHoistInLoopsWithStores(float %d, float* %min, float* %max, float* %a) {
588 entry:
589 %div = fdiv float 1.000000e+00, %d
590 %cmp = fcmp oge float %div, 0.000000e+00
591 br i1 %cmp, label %do.body, label %if.else
592
593 do.body:
594 %0 = load float, float* %min, align 4
595 %1 = load float, float* %a, align 4
596
597 ; It is unsafe to hoist the loads outside the loop because of the store.
598 store float 0.000000e+00, float* @GlobalVar
599
600 %sub = fsub float %0, %1
601 %mul = fmul float %sub, %div
602 %2 = load float, float* %max, align 4
603 %sub1 = fsub float %2, %1
604 %mul2 = fmul float %sub1, %div
605 br label %while.cond
606
607 while.cond:
608 %cmp1 = fcmp oge float %mul2, 0.000000e+00
609 br i1 %cmp1, label %if.end, label %do.body
610
611 if.else:
612 %3 = load float, float* %max, align 4
613 %4 = load float, float* %a, align 4
614 %sub3 = fsub float %3, %4
615 %mul4 = fmul float %sub3, %div
616 %5 = load float, float* %min, align 4
617 %sub5 = fsub float %5, %4
618 %mul6 = fmul float %sub5, %div
619 br label %if.end
620
621 if.end:
622 %tmax.0 = phi float [ %mul2, %while.cond ], [ %mul6, %if.else ]
623 %tmin.0 = phi float [ %mul, %while.cond ], [ %mul4, %if.else ]
624
625 %add = fadd float %tmax.0, %tmin.0
626 ret float %add
627 }
628
629 ; Check that we hoist stores: all the instructions from the then branch
630 ; should be hoisted.
631 ; CHECK-LABEL: @hoistStores
632 ; CHECK: zext
633 ; CHECK: trunc
634 ; CHECK: getelementptr
635 ; CHECK: load
636 ; CHECK: getelementptr
637 ; CHECK: store
638 ; CHECK: load
639 ; CHECK: load
640 ; CHECK: zext
641 ; CHECK: add
642 ; CHECK: store
643 ; CHECK: br
644 ; CHECK: if.then
645 ; CHECK: br
646
647 %struct.foo = type { i16* }
648
649 define void @hoistStores(%struct.foo* %s, i32* %coord, i1 zeroext %delta) {
650 entry:
651 %frombool = zext i1 %delta to i8
652 %tobool = trunc i8 %frombool to i1
653 br i1 %tobool, label %if.then, label %if.else
654
655 if.then: ; preds = %entry
656 %p = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
657 %0 = load i16*, i16** %p, align 8
658 %incdec.ptr = getelementptr inbounds i16, i16* %0, i32 1
659 store i16* %incdec.ptr, i16** %p, align 8
660 %1 = load i16, i16* %0, align 2
661 %conv = zext i16 %1 to i32
662 %2 = load i32, i32* %coord, align 4
663 %add = add i32 %2, %conv
664 store i32 %add, i32* %coord, align 4
665 br label %if.end
666
667 if.else: ; preds = %entry
668 %p1 = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
669 %3 = load i16*, i16** %p1, align 8
670 %incdec.ptr2 = getelementptr inbounds i16, i16* %3, i32 1
671 store i16* %incdec.ptr2, i16** %p1, align 8
672 %4 = load i16, i16* %3, align 2
673 %conv3 = zext i16 %4 to i32
674 %5 = load i32, i32* %coord, align 4
675 %add4 = add i32 %5, %conv3
676 store i32 %add4, i32* %coord, align 4
677 %6 = load i16*, i16** %p1, align 8
678 %incdec.ptr6 = getelementptr inbounds i16, i16* %6, i32 1
679 store i16* %incdec.ptr6, i16** %p1, align 8
680 %7 = load i16, i16* %6, align 2
681 %conv7 = zext i16 %7 to i32
682 %shl = shl i32 %conv7, 8
683 %8 = load i32, i32* %coord, align 4
684 %add8 = add i32 %8, %shl
685 store i32 %add8, i32* %coord, align 4
686 br label %if.end
687
688 if.end: ; preds = %if.else, %if.then
689 ret void
690 }