llvm.org GIT mirror llvm / 02cc44c
Temporarily Revert "Add basic loop fusion pass." As it's causing some bot failures (and per request from kbarton). This reverts commit r358543/ab70da07286e618016e78247e4a24fcb84077fda. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358546 91177308-0d34-0410-b5e6-96231b3b80d8 Eric Christopher 5 months ago
4868 changed file(s) with 0 addition(s) and 575521 deletion(s). Raw diff Collapse all Expand all
218218 void initializeLoopDistributeLegacyPass(PassRegistry&);
219219 void initializeLoopExtractorPass(PassRegistry&);
220220 void initializeLoopGuardWideningLegacyPassPass(PassRegistry&);
221 void initializeLoopFuseLegacyPass(PassRegistry&);
222221 void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
223222 void initializeLoopInfoWrapperPassPass(PassRegistry&);
224223 void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&);
+0
-30
include/llvm/Transforms/Scalar/LoopFuse.h less more
None //===- LoopFuse.h - Loop Fusion Pass ----------------------------*- C++ -*-===//
1 //
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7 ///
8 /// \file
9 /// This file implements the Loop Fusion pass.
10 ///
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
14 #define LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
15
16 #include "llvm/IR/PassManager.h"
17
18 namespace llvm {
19
20 class Function;
21
22 class LoopFusePass : public PassInfoMixin {
23 public:
24 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
25 };
26
27 } // end namespace llvm
28
29 #endif // LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
459459
460460 //===----------------------------------------------------------------------===//
461461 //
462 // LoopFuse - Fuse loops.
463 //
464 FunctionPass *createLoopFusePass();
465
466 //===----------------------------------------------------------------------===//
467 //
468462 // LoopLoadElimination - Perform loop-aware load elimination.
469463 //
470464 FunctionPass *createLoopLoadEliminationPass();
121121 #include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
122122 #include "llvm/Transforms/Scalar/LoopDeletion.h"
123123 #include "llvm/Transforms/Scalar/LoopDistribute.h"
124 #include "llvm/Transforms/Scalar/LoopFuse.h"
125124 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
126125 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
127126 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
196196 FUNCTION_PASS("lcssa", LCSSAPass())
197197 FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
198198 FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
199 FUNCTION_PASS("loop-fuse", LoopFusePass())
200199 FUNCTION_PASS("loop-distribute", LoopDistributePass())
201200 FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
202201 FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())
2727 LoopDeletion.cpp
2828 LoopDataPrefetch.cpp
2929 LoopDistribute.cpp
30 LoopFuse.cpp
3130 LoopIdiomRecognize.cpp
3231 LoopInstSimplify.cpp
3332 LoopInterchange.cpp
+0
-1212
lib/Transforms/Scalar/LoopFuse.cpp less more
None //===- LoopFuse.cpp - Loop Fusion Pass ------------------------------------===//
1 //
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7 ///
8 /// \file
9 /// This file implements the loop fusion pass.
10 /// The implementation is largely based on the following document:
11 ///
12 /// Code Transformations to Augment the Scope of Loop Fusion in a
13 /// Production Compiler
14 /// Christopher Mark Barton
15 /// MSc Thesis
16 /// https://webdocs.cs.ualberta.ca/~amaral/thesis/ChristopherBartonMSc.pdf
17 ///
18 /// The general approach taken is to collect sets of control flow equivalent
19 /// loops and test whether they can be fused. The necessary conditions for
20 /// fusion are:
21 /// 1. The loops must be adjacent (there cannot be any statements between
22 /// the two loops).
23 /// 2. The loops must be conforming (they must execute the same number of
24 /// iterations).
25 /// 3. The loops must be control flow equivalent (if one loop executes, the
26 /// other is guaranteed to execute).
27 /// 4. There cannot be any negative distance dependencies between the loops.
28 /// If all of these conditions are satisfied, it is safe to fuse the loops.
29 ///
30 /// This implementation creates FusionCandidates that represent the loop and the
31 /// necessary information needed by fusion. It then operates on the fusion
32 /// candidates, first confirming that the candidate is eligible for fusion. The
33 /// candidates are then collected into control flow equivalent sets, sorted in
34 /// dominance order. Each set of control flow equivalent candidates is then
35 /// traversed, attempting to fuse pairs of candidates in the set. If all
36 /// requirements for fusion are met, the two candidates are fused, creating a
37 /// new (fused) candidate which is then added back into the set to consider for
38 /// additional fusion.
39 ///
40 /// This implementation currently does not make any modifications to remove
41 /// conditions for fusion. Code transformations to make loops conform to each of
42 /// the conditions for fusion are discussed in more detail in the document
43 /// above. These can be added to the current implementation in the future.
44 //===----------------------------------------------------------------------===//
45
46 #include "llvm/Transforms/Scalar/LoopFuse.h"
47 #include "llvm/ADT/Statistic.h"
48 #include "llvm/Analysis/DependenceAnalysis.h"
49 #include "llvm/Analysis/DomTreeUpdater.h"
50 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
52 #include "llvm/Analysis/PostDominators.h"
53 #include "llvm/Analysis/ScalarEvolution.h"
54 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/Verifier.h"
57 #include "llvm/Pass.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include "llvm/Transforms/Scalar.h"
61 #include "llvm/Transforms/Utils.h"
62 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
63
64 using namespace llvm;
65
66 #define DEBUG_TYPE "loop-fusion"
67
68 STATISTIC(FuseCounter, "Count number of loop fusions performed");
69 STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
70 STATISTIC(InvalidPreheader, "Loop has invalid preheader");
71 STATISTIC(InvalidHeader, "Loop has invalid header");
72 STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks");
73 STATISTIC(InvalidExitBlock, "Loop has invalid exit block");
74 STATISTIC(InvalidLatch, "Loop has invalid latch");
75 STATISTIC(InvalidLoop, "Loop is invalid");
76 STATISTIC(AddressTakenBB, "Basic block has address taken");
77 STATISTIC(MayThrowException, "Loop may throw an exception");
78 STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
79 STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
80 STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
81 STATISTIC(InvalidTripCount,
82 "Loop does not have invariant backedge taken count");
83 STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
84 STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same");
85 STATISTIC(NonAdjacent, "Candidates are not adjacent");
86 STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader");
87
88 enum FusionDependenceAnalysisChoice {
89 FUSION_DEPENDENCE_ANALYSIS_SCEV,
90 FUSION_DEPENDENCE_ANALYSIS_DA,
91 FUSION_DEPENDENCE_ANALYSIS_ALL,
92 };
93
94 static cl::opt FusionDependenceAnalysis(
95 "loop-fusion-dependence-analysis",
96 cl::desc("Which dependence analysis should loop fusion use?"),
97 cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev",
98 "Use the scalar evolution interface"),
99 clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da",
100 "Use the dependence analysis interface"),
101 clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all",
102 "Use all available analyses")),
103 cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
104
105 #ifndef NDEBUG
106 static cl::opt
107 VerboseFusionDebugging("loop-fusion-verbose-debug",
108 cl::desc("Enable verbose debugging for Loop Fusion"),
109 cl::Hidden, cl::init(false), cl::ZeroOrMore);
110 #endif
111
112 /// This class is used to represent a candidate for loop fusion. When it is
113 /// constructed, it checks the conditions for loop fusion to ensure that it
114 /// represents a valid candidate. It caches several parts of a loop that are
115 /// used throughout loop fusion (e.g., loop preheader, loop header, etc) instead
116 /// of continually querying the underlying Loop to retrieve these values. It is
117 /// assumed these will not change throughout loop fusion.
118 ///
119 /// The invalidate method should be used to indicate that the FusionCandidate is
120 /// no longer a valid candidate for fusion. Similarly, the isValid() method can
121 /// be used to ensure that the FusionCandidate is still valid for fusion.
122 struct FusionCandidate {
123 /// Cache of parts of the loop used throughout loop fusion. These should not
124 /// need to change throughout the analysis and transformation.
125 /// These parts are cached to avoid repeatedly looking up in the Loop class.
126
127 /// Preheader of the loop this candidate represents
128 BasicBlock *Preheader;
129 /// Header of the loop this candidate represents
130 BasicBlock *Header;
131 /// Blocks in the loop that exit the loop
132 BasicBlock *ExitingBlock;
133 /// The successor block of this loop (where the exiting blocks go to)
134 BasicBlock *ExitBlock;
135 /// Latch of the loop
136 BasicBlock *Latch;
137 /// The loop that this fusion candidate represents
138 Loop *L;
139 /// Vector of instructions in this loop that read from memory
140 SmallVector MemReads;
141 /// Vector of instructions in this loop that write to memory
142 SmallVector MemWrites;
143 /// Are all of the members of this fusion candidate still valid
144 bool Valid;
145
146 /// Dominator and PostDominator trees are needed for the
147 /// FusionCandidateCompare function, required by FusionCandidateSet to
148 /// determine where the FusionCandidate should be inserted into the set. These
149 /// are used to establish ordering of the FusionCandidates based on dominance.
150 const DominatorTree *DT;
151 const PostDominatorTree *PDT;
152
153 FusionCandidate(Loop *L, const DominatorTree *DT,
154 const PostDominatorTree *PDT)
155 : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
156 ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
157 Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) {
158
159 // Walk over all blocks in the loop and check for conditions that may
160 // prevent fusion. For each block, walk over all instructions and collect
161 // the memory reads and writes If any instructions that prevent fusion are
162 // found, invalidate this object and return.
163 for (BasicBlock *BB : L->blocks()) {
164 if (BB->hasAddressTaken()) {
165 AddressTakenBB++;
166 invalidate();
167 return;
168 }
169
170 for (Instruction &I : *BB) {
171 if (I.mayThrow()) {
172 MayThrowException++;
173 invalidate();
174 return;
175 }
176 if (StoreInst *SI = dyn_cast(&I)) {
177 if (SI->isVolatile()) {
178 ContainsVolatileAccess++;
179 invalidate();
180 return;
181 }
182 }
183 if (LoadInst *LI = dyn_cast(&I)) {
184 if (LI->isVolatile()) {
185 ContainsVolatileAccess++;
186 invalidate();
187 return;
188 }
189 }
190 if (I.mayWriteToMemory())
191 MemWrites.push_back(&I);
192 if (I.mayReadFromMemory())
193 MemReads.push_back(&I);
194 }
195 }
196 }
197
198 /// Check if all members of the class are valid.
199 bool isValid() const {
200 return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&
201 !L->isInvalid() && Valid;
202 }
203
204 /// Verify that all members are in sync with the Loop object.
205 void verify() const {
206 assert(isValid() && "Candidate is not valid!!");
207 assert(!L->isInvalid() && "Loop is invalid!");
208 assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync");
209 assert(Header == L->getHeader() && "Header is out of sync");
210 assert(ExitingBlock == L->getExitingBlock() &&
211 "Exiting Blocks is out of sync");
212 assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync");
213 assert(Latch == L->getLoopLatch() && "Latch is out of sync");
214 }
215
216 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
217 LLVM_DUMP_METHOD void dump() const {
218 dbgs() << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
219 << "\n"
220 << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
221 << "\tExitingBB: "
222 << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
223 << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
224 << "\n"
225 << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n";
226 }
227 #endif
228
229 private:
230 // This is only used internally for now, to clear the MemWrites and MemReads
231 // list and setting Valid to false. I can't envision other uses of this right
232 // now, since once FusionCandidates are put into the FusionCandidateSet they
233 // are immutable. Thus, any time we need to change/update a FusionCandidate,
234 // we must create a new one and insert it into the FusionCandidateSet to
235 // ensure the FusionCandidateSet remains ordered correctly.
236 void invalidate() {
237 MemWrites.clear();
238 MemReads.clear();
239 Valid = false;
240 }
241 };
242
243 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
244 const FusionCandidate &FC) {
245 if (FC.isValid())
246 OS << FC.Preheader->getName();
247 else
248 OS << "";
249
250 return OS;
251 }
252
253 struct FusionCandidateCompare {
254 /// Comparison functor to sort two Control Flow Equivalent fusion candidates
255 /// into dominance order.
256 /// If LHS dominates RHS and RHS post-dominates LHS, return true;
257 /// IF RHS dominates LHS and LHS post-dominates RHS, return false;
258 bool operator()(const FusionCandidate &LHS,
259 const FusionCandidate &RHS) const {
260 const DominatorTree *DT = LHS.DT;
261 const PostDominatorTree *PDT = LHS.PDT;
262
263 assert(DT && PDT && "Expecting valid dominator tree");
264
265 if (DT->dominates(LHS.Preheader, RHS.Preheader)) {
266 // Verify RHS Postdominates LHS
267 assert(PDT->dominates(RHS.Preheader, LHS.Preheader));
268 return true;
269 }
270
271 if (DT->dominates(RHS.Preheader, LHS.Preheader)) {
272 // RHS dominates LHS
273 // Verify LHS post-dominates RHS
274 assert(PDT->dominates(LHS.Preheader, RHS.Preheader));
275 return false;
276 }
277 // If LHS does not dominate RHS and RHS does not dominate LHS then there is
278 // no dominance relationship between the two FusionCandidates. Thus, they
279 // should not be in the same set together.
280 llvm_unreachable(
281 "No dominance relationship between these fusion candidates!");
282 }
283 };
284
285 namespace {
286 using LoopVector = SmallVector;
287
288 // Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
289 // order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
290 // dominates FC1 and FC1 post-dominates FC0.
291 // std::set was chosen because we want a sorted data structure with stable
292 // iterators. A subsequent patch to loop fusion will enable fusing non-ajdacent
293 // loops by moving intervening code around. When this intervening code contains
294 // loops, those loops will be moved also. The corresponding FusionCandidates
295 // will also need to be moved accordingly. As this is done, having stable
296 // iterators will simplify the logic. Similarly, having an efficient insert that
297 // keeps the FusionCandidateSet sorted will also simplify the implementation.
298 using FusionCandidateSet = std::set;
299 using FusionCandidateCollection = SmallVector;
300 } // namespace
301
302 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
303 const FusionCandidateSet &CandSet) {
304 for (auto IT : CandSet)
305 OS << IT << "\n";
306
307 return OS;
308 }
309
310 static void
311 printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
312 LLVM_DEBUG(dbgs() << "Fusion Candidates: \n");
313 for (const auto &CandidateSet : FusionCandidates) {
314 LLVM_DEBUG({
315 dbgs() << "*** Fusion Candidate Set ***\n";
316 dbgs() << CandidateSet;
317 dbgs() << "****************************\n";
318 });
319 }
320 }
321
322 /// Collect all loops in function at the same nest level, starting at the
323 /// outermost level.
324 ///
325 /// This data structure collects all loops at the same nest level for a
326 /// given function (specified by the LoopInfo object). It starts at the
327 /// outermost level.
328 struct LoopDepthTree {
329 using LoopsOnLevelTy = SmallVector;
330 using iterator = LoopsOnLevelTy::iterator;
331 using const_iterator = LoopsOnLevelTy::const_iterator;
332
333 LoopDepthTree(LoopInfo &LI) : Depth(1) {
334 if (!LI.empty())
335 LoopsOnLevel.emplace_back(LoopVector(LI.rbegin(), LI.rend()));
336 }
337
338 /// Test whether a given loop has been removed from the function, and thus is
339 /// no longer valid.
340 bool isRemovedLoop(const Loop *L) const { return RemovedLoops.count(L); }
341
342 /// Record that a given loop has been removed from the function and is no
343 /// longer valid.
344 void removeLoop(const Loop *L) { RemovedLoops.insert(L); }
345
346 /// Descend the tree to the next (inner) nesting level
347 void descend() {
348 LoopsOnLevelTy LoopsOnNextLevel;
349
350 for (const LoopVector &LV : *this)
351 for (Loop *L : LV)
352 if (!isRemovedLoop(L) && L->begin() != L->end())
353 LoopsOnNextLevel.emplace_back(LoopVector(L->begin(), L->end()));
354
355 LoopsOnLevel = LoopsOnNextLevel;
356 RemovedLoops.clear();
357 Depth++;
358 }
359
360 bool empty() const { return size() == 0; }
361 size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); }
362 unsigned getDepth() const { return Depth; }
363
364 iterator begin() { return LoopsOnLevel.begin(); }
365 iterator end() { return LoopsOnLevel.end(); }
366 const_iterator begin() const { return LoopsOnLevel.begin(); }
367 const_iterator end() const { return LoopsOnLevel.end(); }
368
369 private:
370 /// Set of loops that have been removed from the function and are no longer
371 /// valid.
372 SmallPtrSet RemovedLoops;
373
374 /// Depth of the current level, starting at 1 (outermost loops).
375 unsigned Depth;
376
377 /// Vector of loops at the current depth level that have the same parent loop
378 LoopsOnLevelTy LoopsOnLevel;
379 };
380
381 #ifndef NDEBUG
382 static void printLoopVector(const LoopVector &LV) {
383 dbgs() << "****************************\n";
384 for (auto L : LV)
385 printLoop(*L, dbgs());
386 dbgs() << "****************************\n";
387 }
388 #endif
389
390 static void reportLoopFusion(const FusionCandidate &FC0,
391 const FusionCandidate &FC1,
392 OptimizationRemarkEmitter &ORE) {
393 using namespace ore;
394 ORE.emit(
395 OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent())
396 << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName()))
397 << " with " << NV("Cand2", StringRef(FC1.Preheader->getName())));
398 }
399
400 struct LoopFuser {
401 private:
402 // Sets of control flow equivalent fusion candidates for a given nest level.
403 FusionCandidateCollection FusionCandidates;
404
405 LoopDepthTree LDT;
406 DomTreeUpdater DTU;
407
408 LoopInfo &LI;
409 DominatorTree &DT;
410 DependenceInfo &DI;
411 ScalarEvolution &SE;
412 PostDominatorTree &PDT;
413 OptimizationRemarkEmitter &ORE;
414
415 public:
416 LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
417 ScalarEvolution &SE, PostDominatorTree &PDT,
418 OptimizationRemarkEmitter &ORE, const DataLayout &DL)
419 : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
420 DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE) {}
421
422 /// This is the main entry point for loop fusion. It will traverse the
423 /// specified function and collect candidate loops to fuse, starting at the
424 /// outermost nesting level and working inwards.
425 bool fuseLoops(Function &F) {
426 #ifndef NDEBUG
427 if (VerboseFusionDebugging) {
428 LI.print(dbgs());
429 }
430 #endif
431
432 LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName()
433 << "\n");
434 bool Changed = false;
435
436 while (!LDT.empty()) {
437 LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth "
438 << LDT.getDepth() << "\n";);
439
440 for (const LoopVector &LV : LDT) {
441 assert(LV.size() > 0 && "Empty loop set was build!");
442
443 // Skip singleton loop sets as they do not offer fusion opportunities on
444 // this level.
445 if (LV.size() == 1)
446 continue;
447 #ifndef NDEBUG
448 if (VerboseFusionDebugging) {
449 LLVM_DEBUG({
450 dbgs() << " Visit loop set (#" << LV.size() << "):\n";
451 printLoopVector(LV);
452 });
453 }
454 #endif
455
456 collectFusionCandidates(LV);
457 Changed |= fuseCandidates();
458 }
459
460 // Finished analyzing candidates at this level.
461 // Descend to the next level and clear all of the candidates currently
462 // collected. Note that it will not be possible to fuse any of the
463 // existing candidates with new candidates because the new candidates will
464 // be at a different nest level and thus not be control flow equivalent
465 // with all of the candidates collected so far.
466 LLVM_DEBUG(dbgs() << "Descend one level!\n");
467 LDT.descend();
468 FusionCandidates.clear();
469 }
470
471 if (Changed)
472 LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump(););
473
474 #ifndef NDEBUG
475 assert(DT.verify());
476 assert(PDT.verify());
477 LI.verify(DT);
478 SE.verify();
479 #endif
480
481 LLVM_DEBUG(dbgs() << "Loop Fusion complete\n");
482 return Changed;
483 }
484
485 private:
486 /// Determine if two fusion candidates are control flow equivalent.
487 ///
488 /// Two fusion candidates are control flow equivalent if when one executes,
489 /// the other is guaranteed to execute. This is determined using dominators
490 /// and post-dominators: if A dominates B and B post-dominates A then A and B
491 /// are control-flow equivalent.
492 bool isControlFlowEquivalent(const FusionCandidate &FC0,
493 const FusionCandidate &FC1) const {
494 assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
495
496 if (DT.dominates(FC0.Preheader, FC1.Preheader))
497 return PDT.dominates(FC1.Preheader, FC0.Preheader);
498
499 if (DT.dominates(FC1.Preheader, FC0.Preheader))
500 return PDT.dominates(FC0.Preheader, FC1.Preheader);
501
502 return false;
503 }
504
505 /// Determine if a fusion candidate (representing a loop) is eligible for
506 /// fusion. Note that this only checks whether a single loop can be fused - it
507 /// does not check whether it is *legal* to fuse two loops together.
508 bool eligibleForFusion(const FusionCandidate &FC) const {
509 if (!FC.isValid()) {
510 LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n");
511 if (!FC.Preheader)
512 InvalidPreheader++;
513 if (!FC.Header)
514 InvalidHeader++;
515 if (!FC.ExitingBlock)
516 InvalidExitingBlock++;
517 if (!FC.ExitBlock)
518 InvalidExitBlock++;
519 if (!FC.Latch)
520 InvalidLatch++;
521 if (FC.L->isInvalid())
522 InvalidLoop++;
523
524 return false;
525 }
526
527 // Require ScalarEvolution to be able to determine a trip count.
528 if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) {
529 LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
530 << " trip count not computable!\n");
531 InvalidTripCount++;
532 return false;
533 }
534
535 if (!FC.L->isLoopSimplifyForm()) {
536 LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
537 << " is not in simplified form!\n");
538 NotSimplifiedForm++;
539 return false;
540 }
541
542 return true;
543 }
544
545 /// Iterate over all loops in the given loop set and identify the loops that
546 /// are eligible for fusion. Place all eligible fusion candidates into Control
547 /// Flow Equivalent sets, sorted by dominance.
548 void collectFusionCandidates(const LoopVector &LV) {
549 for (Loop *L : LV) {
550 FusionCandidate CurrCand(L, &DT, &PDT);
551 if (!eligibleForFusion(CurrCand))
552 continue;
553
554 // Go through each list in FusionCandidates and determine if L is control
555 // flow equivalent with the first loop in that list. If it is, append LV.
556 // If not, go to the next list.
557 // If no suitable list is found, start another list and add it to
558 // FusionCandidates.
559 bool FoundSet = false;
560
561 for (auto &CurrCandSet : FusionCandidates) {
562 if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
563 CurrCandSet.insert(CurrCand);
564 FoundSet = true;
565 #ifndef NDEBUG
566 if (VerboseFusionDebugging)
567 LLVM_DEBUG(dbgs() << "Adding " << CurrCand
568 << " to existing candidate set\n");
569 #endif
570 break;
571 }
572 }
573 if (!FoundSet) {
574 // No set was found. Create a new set and add to FusionCandidates
575 #ifndef NDEBUG
576 if (VerboseFusionDebugging)
577 LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
578 #endif
579 FusionCandidateSet NewCandSet;
580 NewCandSet.insert(CurrCand);
581 FusionCandidates.push_back(NewCandSet);
582 }
583 NumFusionCandidates++;
584 }
585 }
586
587 /// Determine if it is beneficial to fuse two loops.
588 ///
589 /// For now, this method simply returns true because we want to fuse as much
590 /// as possible (primarily to test the pass). This method will evolve, over
591 /// time, to add heuristics for profitability of fusion.
592 bool isBeneficialFusion(const FusionCandidate &FC0,
593 const FusionCandidate &FC1) {
594 return true;
595 }
596
597 /// Determine if two fusion candidates have the same trip count (i.e., they
598 /// execute the same number of iterations).
599 ///
600 /// Note that for now this method simply returns a boolean value because there
601 /// are no mechanisms in loop fusion to handle different trip counts. In the
602 /// future, this behaviour can be extended to adjust one of the loops to make
603 /// the trip counts equal (e.g., loop peeling). When this is added, this
604 /// interface may need to change to return more information than just a
605 /// boolean value.
606 bool identicalTripCounts(const FusionCandidate &FC0,
607 const FusionCandidate &FC1) const {
608 const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
609 if (isa(TripCount0)) {
610 UncomputableTripCount++;
611 LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
612 return false;
613 }
614
615 const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
616 if (isa(TripCount1)) {
617 UncomputableTripCount++;
618 LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
619 return false;
620 }
621 LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
622 << *TripCount1 << " are "
623 << (TripCount0 == TripCount1 ? "identical" : "different")
624 << "\n");
625
626 return (TripCount0 == TripCount1);
627 }
628
629 /// Walk each set of control flow equivalent fusion candidates and attempt to
630 /// fuse them. This does a single linear traversal of all candidates in the
631 /// set. The conditions for legal fusion are checked at this point. If a pair
632 /// of fusion candidates passes all legality checks, they are fused together
633 /// and a new fusion candidate is created and added to the FusionCandidateSet.
634 /// The original fusion candidates are then removed, as they are no longer
635 /// valid.
636 bool fuseCandidates() {
637 bool Fused = false;
638 LLVM_DEBUG(printFusionCandidates(FusionCandidates));
639 for (auto &CandidateSet : FusionCandidates) {
640 if (CandidateSet.size() < 2)
641 continue;
642
643 LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
644 << CandidateSet << "\n");
645
646 for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
647 assert(!LDT.isRemovedLoop(FC0->L) &&
648 "Should not have removed loops in CandidateSet!");
649 auto FC1 = FC0;
650 for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
651 assert(!LDT.isRemovedLoop(FC1->L) &&
652 "Should not have removed loops in CandidateSet!");
653
654 LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
655 dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
656
657 FC0->verify();
658 FC1->verify();
659
660 if (!identicalTripCounts(*FC0, *FC1)) {
661 LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
662 "counts. Not fusing.\n");
663 NonEqualTripCount++;
664 continue;
665 }
666
667 if (!isAdjacent(*FC0, *FC1)) {
668 LLVM_DEBUG(dbgs()
669 << "Fusion candidates are not adjacent. Not fusing.\n");
670 NonAdjacent++;
671 continue;
672 }
673
674 // For now we skip fusing if the second candidate has any instructions
675 // in the preheader. This is done because we currently do not have the
676 // safety checks to determine if it is save to move the preheader of
677 // the second candidate past the body of the first candidate. Once
678 // these checks are added, this condition can be removed.
679 if (!isEmptyPreheader(*FC1)) {
680 LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
681 "preheader. Not fusing.\n");
682 NonEmptyPreheader++;
683 continue;
684 }
685
686 if (!dependencesAllowFusion(*FC0, *FC1)) {
687 LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
688 continue;
689 }
690
691 bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
692 LLVM_DEBUG(dbgs()
693 << "\tFusion appears to be "
694 << (BeneficialToFuse ? "" : "un") << "profitable!\n");
695 if (!BeneficialToFuse)
696 continue;
697
698 // All analysis has completed and has determined that fusion is legal
699 // and profitable. At this point, start transforming the code and
700 // perform fusion.
701
702 LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
703 << *FC1 << "\n");
704
705 // Report fusion to the Optimization Remarks.
706 // Note this needs to be done *before* performFusion because
707 // performFusion will change the original loops, making it not
708 // possible to identify them after fusion is complete.
709 reportLoopFusion(*FC0, *FC1, ORE);
710
711 FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT);
712 FusedCand.verify();
713 assert(eligibleForFusion(FusedCand) &&
714 "Fused candidate should be eligible for fusion!");
715
716 // Notify the loop-depth-tree that these loops are not valid objects
717 // anymore.
718 LDT.removeLoop(FC1->L);
719
720 CandidateSet.erase(FC0);
721 CandidateSet.erase(FC1);
722
723 auto InsertPos = CandidateSet.insert(FusedCand);
724
725 assert(InsertPos.second &&
726 "Unable to insert TargetCandidate in CandidateSet!");
727
728 // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations
729 // of the FC1 loop will attempt to fuse the new (fused) loop with the
730 // remaining candidates in the current candidate set.
731 FC0 = FC1 = InsertPos.first;
732
733 LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
734 << "\n");
735
736 Fused = true;
737 }
738 }
739 }
740 return Fused;
741 }
742
743 /// Rewrite all additive recurrences in a SCEV to use a new loop.
744 class AddRecLoopReplacer : public SCEVRewriteVisitor {
745 public:
746 AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,
747 bool UseMax = true)
748 : SCEVRewriteVisitor(SE), Valid(true), UseMax(UseMax), OldL(OldL),
749 NewL(NewL) {}
750
751 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
752 const Loop *ExprL = Expr->getLoop();
753 SmallVector Operands;
754 if (ExprL == &OldL) {
755 Operands.append(Expr->op_begin(), Expr->op_end());
756 return SE.getAddRecExpr(Operands, &NewL, Expr->getNoWrapFlags());
757 }
758
759 if (OldL.contains(ExprL)) {
760 bool Pos = SE.isKnownPositive(Expr->getStepRecurrence(SE));
761 if (!UseMax || !Pos || !Expr->isAffine()) {
762 Valid = false;
763 return Expr;
764 }
765 return visit(Expr->getStart());
766 }
767
768 for (const SCEV *Op : Expr->operands())
769 Operands.push_back(visit(Op));
770 return SE.getAddRecExpr(Operands, ExprL, Expr->getNoWrapFlags());
771 }
772
773 bool wasValidSCEV() const { return Valid; }
774
775 private:
776 bool Valid, UseMax;
777 const Loop &OldL, &NewL;
778 };
779
780 /// Return false if the access functions of \p I0 and \p I1 could cause
781 /// a negative dependence.
782 bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0,
783 Instruction &I1, bool EqualIsInvalid) {
784 Value *Ptr0 = getLoadStorePointerOperand(&I0);
785 Value *Ptr1 = getLoadStorePointerOperand(&I1);
786 if (!Ptr0 || !Ptr1)
787 return false;
788
789 const SCEV *SCEVPtr0 = SE.getSCEVAtScope(Ptr0, &L0);
790 const SCEV *SCEVPtr1 = SE.getSCEVAtScope(Ptr1, &L1);
791 #ifndef NDEBUG
792 if (VerboseFusionDebugging)
793 LLVM_DEBUG(dbgs() << " Access function check: " << *SCEVPtr0 << " vs "
794 << *SCEVPtr1 << "\n");
795 #endif
796 AddRecLoopReplacer Rewriter(SE, L0, L1);
797 SCEVPtr0 = Rewriter.visit(SCEVPtr0);
798 #ifndef NDEBUG
799 if (VerboseFusionDebugging)
800 LLVM_DEBUG(dbgs() << " Access function after rewrite: " << *SCEVPtr0
801 << " [Valid: " << Rewriter.wasValidSCEV() << "]\n");
802 #endif
803 if (!Rewriter.wasValidSCEV())
804 return false;
805
806 // TODO: isKnownPredicate doesnt work well when one SCEV is loop carried (by
807 // L0) and the other is not. We could check if it is monotone and test
808 // the beginning and end value instead.
809
810 BasicBlock *L0Header = L0.getHeader();
811 auto HasNonLinearDominanceRelation = [&](const SCEV *S) {
812 const SCEVAddRecExpr *AddRec = dyn_cast(S);
813 if (!AddRec)
814 return false;
815 return !DT.dominates(L0Header, AddRec->getLoop()->getHeader()) &&
816 !DT.dominates(AddRec->getLoop()->getHeader(), L0Header);
817 };
818 if (SCEVExprContains(SCEVPtr1, HasNonLinearDominanceRelation))
819 return false;
820
821 ICmpInst::Predicate Pred =
822 EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;
823 bool IsAlwaysGE = SE.isKnownPredicate(Pred, SCEVPtr0, SCEVPtr1);
824 #ifndef NDEBUG
825 if (VerboseFusionDebugging)
826 LLVM_DEBUG(dbgs() << " Relation: " << *SCEVPtr0
827 << (IsAlwaysGE ? " >= " : " may < ") << *SCEVPtr1
828 << "\n");
829 #endif
830 return IsAlwaysGE;
831 }
832
833 /// Return true if the dependences between @p I0 (in @p L0) and @p I1 (in
834 /// @p L1) allow loop fusion of @p L0 and @p L1. The dependence analyses
835 /// specified by @p DepChoice are used to determine this.
836 bool dependencesAllowFusion(const FusionCandidate &FC0,
837 const FusionCandidate &FC1, Instruction &I0,
838 Instruction &I1, bool AnyDep,
839 FusionDependenceAnalysisChoice DepChoice) {
840 #ifndef NDEBUG
841 if (VerboseFusionDebugging) {
842 LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : "
843 << DepChoice << "\n");
844 }
845 #endif
846 switch (DepChoice) {
847 case FUSION_DEPENDENCE_ANALYSIS_SCEV:
848 return accessDiffIsPositive(*FC0.L, *FC1.L, I0, I1, AnyDep);
849 case FUSION_DEPENDENCE_ANALYSIS_DA: {
850 auto DepResult = DI.depends(&I0, &I1, true);
851 if (!DepResult)
852 return true;
853 #ifndef NDEBUG
854 if (VerboseFusionDebugging) {
855 LLVM_DEBUG(dbgs() << "DA res: "; DepResult->dump(dbgs());
856 dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: "
857 << (DepResult->isOrdered() ? "true" : "false")
858 << "]\n");
859 LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels()
860 << "\n");
861 }
862 #endif
863
864 if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
865 LLVM_DEBUG(
866 dbgs() << "TODO: Implement pred/succ dependence handling!\n");
867
868 // TODO: Can we actually use the dependence info analysis here?
869 return false;
870 }
871
872 case FUSION_DEPENDENCE_ANALYSIS_ALL:
873 return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
874 FUSION_DEPENDENCE_ANALYSIS_SCEV) ||
875 dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
876 FUSION_DEPENDENCE_ANALYSIS_DA);
877 }
878
879 llvm_unreachable("Unknown fusion dependence analysis choice!");
880 }
881
882 /// Perform a dependence check and return if @p FC0 and @p FC1 can be fused.
883 bool dependencesAllowFusion(const FusionCandidate &FC0,
884 const FusionCandidate &FC1) {
885 LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
886 << "\n");
887 assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
888 assert(DT.dominates(FC0.Preheader, FC1.Preheader));
889
890 for (Instruction *WriteL0 : FC0.MemWrites) {
891 for (Instruction *WriteL1 : FC1.MemWrites)
892 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
893 /* AnyDep */ false,
894 FusionDependenceAnalysis)) {
895 InvalidDependencies++;
896 return false;
897 }
898 for (Instruction *ReadL1 : FC1.MemReads)
899 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1,
900 /* AnyDep */ false,
901 FusionDependenceAnalysis)) {
902 InvalidDependencies++;
903 return false;
904 }
905 }
906
907 for (Instruction *WriteL1 : FC1.MemWrites) {
908 for (Instruction *WriteL0 : FC0.MemWrites)
909 if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
910 /* AnyDep */ false,
911 FusionDependenceAnalysis)) {
912 InvalidDependencies++;
913 return false;
914 }
915 for (Instruction *ReadL0 : FC0.MemReads)
916 if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1,
917 /* AnyDep */ false,
918 FusionDependenceAnalysis)) {
919 InvalidDependencies++;
920 return false;
921 }
922 }
923
924 // Walk through all uses in FC1. For each use, find the reaching def. If the
925 // def is located in FC0 then it is is not safe to fuse.
926 for (BasicBlock *BB : FC1.L->blocks())
927 for (Instruction &I : *BB)
928 for (auto &Op : I.operands())
929 if (Instruction *Def = dyn_cast(Op))
930 if (FC0.L->contains(Def->getParent())) {
931 InvalidDependencies++;
932 return false;
933 }
934
935 return true;
936 }
937
938 /// Determine if the exit block of \p FC0 is the preheader of \p FC1. In this
939 /// case, there is no code in between the two fusion candidates, thus making
940 /// them adjacent.
941 bool isAdjacent(const FusionCandidate &FC0,
942 const FusionCandidate &FC1) const {
943 return FC0.ExitBlock == FC1.Preheader;
944 }
945
946 bool isEmptyPreheader(const FusionCandidate &FC) const {
947 return FC.Preheader->size() == 1;
948 }
949
950 /// Fuse two fusion candidates, creating a new fused loop.
951 ///
952 /// This method contains the mechanics of fusing two loops, represented by \p
953 /// FC0 and \p FC1. It is assumed that \p FC0 dominates \p FC1 and \p FC1
954 /// postdominates \p FC0 (making them control flow equivalent). It also
955 /// assumes that the other conditions for fusion have been met: adjacent,
956 /// identical trip counts, and no negative distance dependencies exist that
957 /// would prevent fusion. Thus, there is no checking for these conditions in
958 /// this method.
959 ///
960 /// Fusion is performed by rewiring the CFG to update successor blocks of the
961 /// components of tho loop. Specifically, the following changes are done:
962 ///
963 /// 1. The preheader of \p FC1 is removed as it is no longer necessary
964 /// (because it is currently only a single statement block).
965 /// 2. The latch of \p FC0 is modified to jump to the header of \p FC1.
966 /// 3. The latch of \p FC1 i modified to jump to the header of \p FC0.
967 /// 4. All blocks from \p FC1 are removed from FC1 and added to FC0.
968 ///
969 /// All of these modifications are done with dominator tree updates, thus
970 /// keeping the dominator (and post dominator) information up-to-date.
971 ///
972 /// This can be improved in the future by actually merging blocks during
973 /// fusion. For example, the preheader of \p FC1 can be merged with the
974 /// preheader of \p FC0. This would allow loops with more than a single
975 /// statement in the preheader to be fused. Similarly, the latch blocks of the
976 /// two loops could also be fused into a single block. This will require
977 /// analysis to prove it is safe to move the contents of the block past
978 /// existing code, which currently has not been implemented.
979 Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) {
980 assert(FC0.isValid() && FC1.isValid() &&
981 "Expecting valid fusion candidates");
982
983 LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
984 dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
985
986 assert(FC1.Preheader == FC0.ExitBlock);
987 assert(FC1.Preheader->size() == 1 &&
988 FC1.Preheader->getSingleSuccessor() == FC1.Header);
989
990 // Remember the phi nodes originally in the header of FC0 in order to rewire
991 // them later. However, this is only necessary if the new loop carried
992 // values might not dominate the exiting branch. While we do not generally
993 // test if this is the case but simply insert intermediate phi nodes, we
994 // need to make sure these intermediate phi nodes have different
995 // predecessors. To this end, we filter the special case where the exiting
996 // block is the latch block of the first loop. Nothing needs to be done
997 // anyway as all loop carried values dominate the latch and thereby also the
998 // exiting branch.
999 SmallVector OriginalFC0PHIs;
1000 if (FC0.ExitingBlock != FC0.Latch)
1001 for (PHINode &PHI : FC0.Header->phis())
1002 OriginalFC0PHIs.push_back(&PHI);
1003
1004 // Replace incoming blocks for header PHIs first.
1005 FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
1006 FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
1007
1008 // Then modify the control flow and update DT and PDT.
1009 SmallVector TreeUpdates;
1010
1011 // The old exiting block of the first loop (FC0) has to jump to the header
1012 // of the second as we need to execute the code in the second header block
1013 // regardless of the trip count. That is, if the trip count is 0, so the
1014 // back edge is never taken, we still have to execute both loop headers,
1015 // especially (but not only!) if the second is a do-while style loop.
1016 // However, doing so might invalidate the phi nodes of the first loop as
1017 // the new values do only need to dominate their latch and not the exiting
1018 // predicate. To remedy this potential problem we always introduce phi
1019 // nodes in the header of the second loop later that select the loop carried
1020 // value, if the second header was reached through an old latch of the
1021 // first, or undef otherwise. This is sound as exiting the first implies the
1022 // second will exit too, __without__ taking the back-edge. [Their
1023 // trip-counts are equal after all.
1024 // KB: Would this sequence be simpler to just just make FC0.ExitingBlock go
1025 // to FC1.Header? I think this is basically what the three sequences are
1026 // trying to accomplish; however, doing this directly in the CFG may mean
1027 // the DT/PDT becomes invalid
1028 FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
1029 FC1.Header);
1030 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1031 DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
1032 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1033 DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
1034
1035 // The pre-header of L1 is not necessary anymore.
1036 assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
1037 FC1.Preheader->getTerminator()->eraseFromParent();
1038 new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
1039 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1040 DominatorTree::Delete, FC1.Preheader, FC1.Header));
1041
1042 // Moves the phi nodes from the second to the first loops header block.
1043 while (PHINode *PHI = dyn_cast(&FC1.Header->front())) {
1044 if (SE.isSCEVable(PHI->getType()))
1045 SE.forgetValue(PHI);
1046 if (PHI->hasNUsesOrMore(1))
1047 PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
1048 else
1049 PHI->eraseFromParent();
1050 }
1051
1052 // Introduce new phi nodes in the second loop header to ensure
1053 // exiting the first and jumping to the header of the second does not break
1054 // the SSA property of the phis originally in the first loop. See also the
1055 // comment above.
1056 Instruction *L1HeaderIP = &FC1.Header->front();
1057 for (PHINode *LCPHI : OriginalFC0PHIs) {
1058 int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
1059 assert(L1LatchBBIdx >= 0 &&
1060 "Expected loop carried value to be rewired at this point!");
1061
1062 Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
1063
1064 PHINode *L1HeaderPHI = PHINode::Create(
1065 LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
1066 L1HeaderPHI->addIncoming(LCV, FC0.Latch);
1067 L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
1068 FC0.ExitingBlock);
1069
1070 LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
1071 }
1072
1073 // Replace latch terminator destinations.
1074 FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
1075 FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
1076
1077 // If FC0.Latch and FC0.ExitingBlock are the same then we have already
1078 // performed the updates above.
1079 if (FC0.Latch != FC0.ExitingBlock)
1080 TreeUpdates.emplace_back(DominatorTree::UpdateType(
1081 DominatorTree::Insert, FC0.Latch, FC1.Header));
1082
1083 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
1084 FC0.Latch, FC0.Header));
1085 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
1086 FC1.Latch, FC0.Header));
1087 TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
1088 FC1.Latch, FC1.Header));
1089
1090 // Update DT/PDT
1091 DTU.applyUpdates(TreeUpdates);
1092
1093 LI.removeBlock(FC1.Preheader);
1094 DTU.deleteBB(FC1.Preheader);
1095 DTU.flush();
1096
1097 // Is there a way to keep SE up-to-date so we don't need to forget the loops
1098 // and rebuild the information in subsequent passes of fusion?
1099 SE.forgetLoop(FC1.L);
1100 SE.forgetLoop(FC0.L);
1101
1102 // Merge the loops.
1103 SmallVector Blocks(FC1.L->block_begin(),
1104 FC1.L->block_end());
1105 for (BasicBlock *BB : Blocks) {
1106 FC0.L->addBlockEntry(BB);
1107 FC1.L->removeBlockFromLoop(BB);
1108 if (LI.getLoopFor(BB) != FC1.L)
1109 continue;
1110 LI.changeLoopFor(BB, FC0.L);
1111 }
1112 while (!FC1.L->empty()) {
1113 const auto &ChildLoopIt = FC1.L->begin();
1114 Loop *ChildLoop = *ChildLoopIt;
1115 FC1.L->removeChildLoop(ChildLoopIt);
1116 FC0.L->addChildLoop(ChildLoop);
1117 }
1118
1119 // Delete the now empty loop L1.
1120 LI.erase(FC1.L);
1121
1122 #ifndef NDEBUG
1123 assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
1124 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1125 assert(PDT.verify());
1126 LI.verify(DT);
1127 SE.verify();
1128 #endif
1129
1130 FuseCounter++;
1131
1132 LLVM_DEBUG(dbgs() << "Fusion done:\n");
1133
1134 return FC0.L;
1135 }
1136 };
1137
1138 struct LoopFuseLegacy : public FunctionPass {
1139
1140 static char ID;
1141
1142 LoopFuseLegacy() : FunctionPass(ID) {
1143 initializeLoopFuseLegacyPass(*PassRegistry::getPassRegistry());
1144 }
1145
1146 void getAnalysisUsage(AnalysisUsage &AU) const override {
1147 AU.addRequiredID(LoopSimplifyID);
1148 AU.addRequired();
1149 AU.addRequired();
1150 AU.addRequired();
1151 AU.addRequired();
1152 AU.addRequired();
1153 AU.addRequired();
1154
1155 AU.addPreserved();
1156 AU.addPreserved();
1157 AU.addPreserved();
1158 AU.addPreserved();
1159 }
1160
1161 bool runOnFunction(Function &F) override {
1162 if (skipFunction(F))
1163 return false;
1164 auto &LI = getAnalysis().getLoopInfo();
1165 auto &DT = getAnalysis().getDomTree();
1166 auto &DI = getAnalysis().getDI();
1167 auto &SE = getAnalysis().getSE();
1168 auto &PDT = getAnalysis().getPostDomTree();
1169 auto &ORE = getAnalysis().getORE();
1170
1171 const DataLayout &DL = F.getParent()->getDataLayout();
1172 LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
1173 return LF.fuseLoops(F);
1174 }
1175 };
1176
1177 PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
1178 auto &LI = AM.getResult(F);
1179 auto &DT = AM.getResult(F);
1180 auto &DI = AM.getResult(F);
1181 auto &SE = AM.getResult(F);
1182 auto &PDT = AM.getResult(F);
1183 auto &ORE = AM.getResult(F);
1184
1185 const DataLayout &DL = F.getParent()->getDataLayout();
1186 LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
1187 bool Changed = LF.fuseLoops(F);
1188 if (!Changed)
1189 return PreservedAnalyses::all();
1190
1191 PreservedAnalyses PA;
1192 PA.preserve();
1193 PA.preserve();
1194 PA.preserve();
1195 PA.preserve();
1196 return PA;
1197 }
1198
1199 char LoopFuseLegacy::ID = 0;
1200
1201 INITIALIZE_PASS_BEGIN(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false,
1202 false)
1203 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
1204 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1205 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1206 INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
1207 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1208 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
1209 INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
1210
1211 FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
6161 initializeJumpThreadingPass(Registry);
6262 initializeLegacyLICMPassPass(Registry);
6363 initializeLegacyLoopSinkPassPass(Registry);
64 initializeLoopFuseLegacyPass(Registry);
6564 initializeLoopDataPrefetchLegacyPassPass(Registry);
6665 initializeLoopDeletionLegacyPassPass(Registry);
6766 initializeLoopAccessLegacyAnalysisPass(Registry);
+0
-11
test/Transforms/ADCE/2002-01-31-UseStuckAround.ll less more
None ; RUN: opt < %s -adce
1
2 define i32 @"main"(i32 %argc) {
3 br label %2
4
5 %retval = phi i32 [ %argc, %2 ] ; [#uses=2]
6 %two = add i32 %retval, %retval ; [#uses=1]
7 ret i32 %two
8
9 br label %1
10 }
+0
-16
test/Transforms/ADCE/2002-05-22-PHITest.ll less more
None ; It is illegal to remove BB1 because it will mess up the PHI node!
1 ;
2 ; RUN: opt < %s -adce -S | grep BB1
3
4 define i32 @test(i1 %C, i32 %A, i32 %B) {
5 ;
6 br i1 %C, label %BB1, label %BB2
7
8 BB1: ; preds = %0
9 br label %BB2
10
11 BB2: ; preds = %BB1, %0
12 %R = phi i32 [ %A, %0 ], [ %B, %BB1 ] ; [#uses=1]
13 ret i32 %R
14 }
15
+0
-35
test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll less more
None ; This testcase contains a entire loop that should be removed. The only thing
1 ; left is the store instruction in BB0. The problem this testcase was running
2 ; into was that when the reg109 PHI was getting zero predecessors, it was
3 ; removed even though there were uses still around. Now the uses are filled
4 ; in with a dummy value before the PHI is deleted.
5 ;
6 ; RUN: opt < %s -S -adce | grep bb1
7 ; RUN: opt < %s -S -adce -adce-remove-loops | FileCheck %s
8
9 %node_t = type { double*, %node_t*, %node_t**, double**, double*, i32, i32 }
10
11 define void @localize_local(%node_t* %nodelist) {
12 bb0:
13 %nodelist.upgrd.1 = alloca %node_t* ; <%node_t**> [#uses=2]
14 store %node_t* %nodelist, %node_t** %nodelist.upgrd.1
15 br label %bb1
16
17 bb1: ; preds = %bb0
18 %reg107 = load %node_t*, %node_t** %nodelist.upgrd.1 ; <%node_t*> [#uses=2]
19 %cond211 = icmp eq %node_t* %reg107, null ; [#uses=1]
20 ; CHECK: br label %bb3
21 br i1 %cond211, label %bb3, label %bb2
22
23 bb2: ; preds = %bb2, %bb1
24 %reg109 = phi %node_t* [ %reg110, %bb2 ], [ %reg107, %bb1 ] ; <%node_t*> [#uses=1]
25 %reg212 = getelementptr %node_t, %node_t* %reg109, i64 0, i32 1 ; <%node_t**> [#uses=1]
26 %reg110 = load %node_t*, %node_t** %reg212 ; <%node_t*> [#uses=2]
27 %cond213 = icmp ne %node_t* %reg110, null ; [#uses=1]
28 ; CHECK: br label %bb3
29 br i1 %cond213, label %bb2, label %bb3
30
31 bb3: ; preds = %bb2, %bb1
32 ret void
33 }
34
+0
-19
test/Transforms/ADCE/2002-05-28-Crash-distilled.ll less more
None ; This testcase is a distilled form of: 2002-05-28-Crash.ll
1
2 ; RUN: opt < %s -adce
3 ; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
4
5 define float @test(i32 %i) {
6 %F = sitofp i32 %i to float ; [#uses=1]
7 %I = bitcast i32 %i to i32 ; [#uses=1]
8 br label %Loop
9
10 Loop: ; preds = %Loop, %0
11 %B = icmp ne i32 %I, 0 ; [#uses=1]
12 ; CHECK: br label %Out
13 br i1 %B, label %Out, label %Loop
14
15 Out: ; preds = %Loop
16 ret float %F
17 }
18
+0
-56
test/Transforms/ADCE/2002-05-28-Crash.ll less more
None ; This testcase is distilled from the GNU rx package. The loop should be
1 ; removed but causes a problem when ADCE does. The source function is:
2 ; int rx_bitset_empty (int size, rx_Bitset set) {
3 ; int x;
4 ; RX_subset s;
5 ; s = set[0];
6 ; set[0] = 1;
7 ; for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
8 ; ;
9 ; set[0] = s;
10 ; return !s;
11 ;}
12 ;
13 ; RUN: opt < %s -adce
14 ; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
15
16 define i32 @rx_bitset_empty(i32 %size, i32* %set) {
17 bb1:
18 %reg110 = load i32, i32* %set ; [#uses=2]
19 store i32 1, i32* %set
20 %cast112 = sext i32 %size to i64 ; [#uses=1]
21 %reg113 = add i64 %cast112, 31 ; [#uses=1]
22 %reg114 = lshr i64 %reg113, 5 ; [#uses=2]
23 %cast109 = trunc i64 %reg114 to i32 ; [#uses=1]
24 %reg129 = add i32 %cast109, -1 ; [#uses=1]
25 %reg114-idxcast = trunc i64 %reg114 to i32 ; [#uses=1]
26 %reg114-idxcast-offset = add i32 %reg114-idxcast, 1073741823 ; [#uses=1]
27 %reg114-idxcast-offset.upgrd.1 = zext i32 %reg114-idxcast-offset to i64 ; [#uses=1]
28 %reg124 = getelementptr i32, i32* %set, i64 %reg114-idxcast-offset.upgrd.1 ; [#uses=1]
29 %reg125 = load i32, i32* %reg124 ; [#uses=1]
30 %cond232 = icmp ne i32 %reg125, 0 ; [#uses=1]
31 ; CHECK: br label %bb3
32 br i1 %cond232, label %bb3, label %bb2
33
34 bb2: ; preds = %bb2, %bb1
35 %cann-indvar = phi i32 [ 0, %bb1 ], [ %add1-indvar, %bb2 ] ; [#uses=2]
36 %reg130-scale = mul i32 %cann-indvar, -1 ; [#uses=1]
37 %reg130 = add i32 %reg130-scale, %reg129 ; [#uses=1]
38 %add1-indvar = add i32 %cann-indvar, 1 ; [#uses=1]
39 %reg130-idxcast = bitcast i32 %reg130 to i32 ; [#uses=1]
40 %reg130-idxcast-offset = add i32 %reg130-idxcast, 1073741823 ; [#uses=1]
41 %reg130-idxcast-offset.upgrd.2 = zext i32 %reg130-idxcast-offset to i64 ; [#uses=1]
42 %reg118 = getelementptr i32, i32* %set, i64 %reg130-idxcast-offset.upgrd.2 ; [#uses=1]
43 %reg119 = load i32, i32* %reg118 ; [#uses=1]
44 %cond233 = icmp eq i32 %reg119, 0 ; [#uses=1]
45 br i1 %cond233, label %bb2, label %bb3
46
47 bb3: ; preds = %bb2, %bb1
48 store i32 %reg110, i32* %set
49 %cast126 = zext i32 %reg110 to i64 ; [#uses=1]
50 %reg127 = add i64 %cast126, -1 ; [#uses=1]
51 %reg128 = lshr i64 %reg127, 63 ; [#uses=1]
52 %cast120 = trunc i64 %reg128 to i32 ; [#uses=1]
53 ret i32 %cast120
54 }
55
+0
-14
test/Transforms/ADCE/2002-07-17-AssertionFailure.ll less more
None ; This testcase fails because ADCE does not correctly delete the chain of
1 ; three instructions that are dead here. Ironically there were a dead basic
2 ; block in this function, it would work fine, but that would be the part we
3 ; have to fix now, wouldn't it....
4 ;
5 ; RUN: opt < %s -adce -S | FileCheck %s
6
7 define void @foo(i8* %reg5481) {
8 %cast611 = bitcast i8* %reg5481 to i8** ; [#uses=1]
9 %reg162 = load i8*, i8** %cast611 ; [#uses=1]
10 ; CHECK-NOT: ptrtoint
11 ptrtoint i8* %reg162 to i32 ; :1 [#uses=0]
12 ret void
13 }
+0
-50
test/Transforms/ADCE/2002-07-17-PHIAssertion.ll less more
None ; This testcase was extracted from the gzip SPEC benchmark
1 ;
2 ; RUN: opt < %s -adce | FileCheck %s
3
4 @bk = external global i32 ; [#uses=2]
5 @hufts = external global i32 ; [#uses=1]
6
7 define i32 @inflate() {
8 bb0:
9 br label %bb2
10
11 bb2: ; preds = %bb6, %bb0
12 %reg128 = phi i32 [ %reg130, %bb6 ], [ 0, %bb0 ] ; [#uses=2]
13 br i1 true, label %bb4, label %bb3
14
15 bb3: ; preds = %bb2
16 br label %UnifiedExitNode
17
18 ; CHECK-NOT: bb4:
19 ; CHECK-NOT: bb5:
20 bb4: ; preds = %bb2
21 %reg117 = load i32, i32* @hufts ; [#uses=2]
22 %cond241 = icmp ule i32 %reg117, %reg128 ; [#uses=1]
23 br i1 %cond241, label %bb6, label %bb5
24
25 bb5: ; preds = %bb4
26 br label %bb6
27
28 bb6: ; preds = %bb5, %bb4
29 %reg130 = phi i32 [ %reg117, %bb5 ], [ %reg128, %bb4 ] ; [#uses=1]
30 br i1 false, label %bb2, label %bb7
31
32 bb7: ; preds = %bb6
33 %reg126 = load i32, i32* @bk ; [#uses=1]
34 %cond247 = icmp ule i32 %reg126, 7 ; [#uses=1]
35 br i1 %cond247, label %bb9, label %bb8
36
37 bb8: ; preds = %bb8, %bb7
38 %reg119 = load i32, i32* @bk ; [#uses=1]
39 %cond256 = icmp ugt i32 %reg119, 7 ; [#uses=1]
40 br i1 %cond256, label %bb8, label %bb9
41
42 bb9: ; preds = %bb8, %bb7
43 br label %UnifiedExitNode
44
45 UnifiedExitNode: ; preds = %bb9, %bb3
46 %UnifiedRetVal = phi i32 [ 7, %bb3 ], [ 0, %bb9 ] ; [#uses=1]
47 ret i32 %UnifiedRetVal
48 }
49
+0
-10
test/Transforms/ADCE/2002-07-29-Segfault.ll less more
None ; RUN: opt < %s -adce -disable-output
1 ; RUN: opt < %s -adce -disable-output -adce-remove-loops
2
3 define void @test() {
4 br label %BB3
5
6 BB3: ; preds = %BB3, %0
7 br label %BB3
8 }
9
+0
-30
test/Transforms/ADCE/2003-01-22-PredecessorProblem.ll less more
None ; Testcase reduced from 197.parser by bugpoint
1 ; RUN: opt < %s -adce
2 ; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
3
4 define void @conjunction_prune() {
5 ;
6 br label %bb19
7
8 bb19: ; preds = %bb23, %bb22, %0
9 %reg205 = phi i8* [ null, %bb22 ], [ null, %bb23 ], [ null, %0 ] ; [#uses=1]
10 ; CHECK: br label %bb22
11 br i1 false, label %bb21, label %bb22
12
13 bb21: ; preds = %bb19
14 %cast455 = bitcast i8* %reg205 to i8** ; [#uses=0]
15 ; CHECK: br label %bb22
16 br label %bb22
17
18 bb22: ; preds = %bb21, %bb19
19 ; CHECK: br label %bb23
20 br i1 false, label %bb19, label %bb23
21
22 bb23: ; preds = %bb22
23 ; CHECK: br label %bb28
24 br i1 false, label %bb19, label %bb28
25
26 bb28: ; preds = %bb23
27 ret void
28 }
29
+0
-37
test/Transforms/ADCE/2003-04-25-PHIPostDominateProblem.ll less more
None ; THis testcase caused an assertion failure because a PHI node did not have
1 ; entries for it's postdominator. But I think this can only happen when the
2 ; PHI node is dead, so we just avoid patching up dead PHI nodes.
3
4 ; RUN: opt < %s -adce -S | FileCheck %s
5 ; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
6
7 target datalayout = "e-p:32:32"
8
9 define void @dead_test8() {
10 entry:
11 br label %loopentry
12
13 loopentry: ; preds = %endif, %entry
14 %k.1 = phi i32 [ %k.0, %endif ], [ 0, %entry ] ; [#uses=1]
15 br i1 false, label %no_exit, label %return
16
17 no_exit: ; preds = %loopentry
18 ; CHECK: br label %then
19 br i1 false, label %then, label %else
20
21 then: ; preds = %no_exit
22 br label %endif
23
24 else: ; preds = %no_exit
25 %dec = add i32 %k.1, -1 ; [#uses=1]
26 br label %endif
27
28 endif: ; preds = %else, %then
29 %k.0 = phi i32 [ %dec, %else ], [ 0, %then ] ; [#uses=1]
30 store i32 2, i32* null
31 br label %loopentry
32
33 return: ; preds = %loopentry
34 ret void
35 }
36
+0
-29
test/Transforms/ADCE/2003-06-11-InvalidCFG.ll less more
None ; RUN: opt < %s -adce -disable-output
1 ; RUN: opt < %s -adce -adce-remove-loops -disable-output
2
3 @G = external global i32* ; [#uses=1]
4
5 declare void @Fn(i32*)
6
7 define i32 @main(i32 %argc.1, i8** %argv.1) {
8 entry:
9 br label %endif.42
10
11 endif.42: ; preds = %shortcirc_done.12, %then.66, %endif.42, %entry
12 br i1 false, label %endif.65, label %endif.42
13
14 then.66: ; preds = %shortcirc_done.12
15 call void @Fn( i32* %tmp.2846 )
16 br label %endif.42
17
18 endif.65: ; preds = %endif.42
19 %tmp.2846 = load i32*, i32** @G ; [#uses=1]
20 br i1 false, label %shortcirc_next.12, label %shortcirc_done.12
21
22 shortcirc_next.12: ; preds = %endif.65
23 br label %shortcirc_done.12
24
25 shortcirc_done.12: ; preds = %shortcirc_next.12, %endif.65
26 br i1 false, label %then.66, label %endif.42
27 }
28
+0
-94
test/Transforms/ADCE/2003-06-24-BadSuccessor.ll less more
None ; RUN: opt < %s -adce -disable-output
1 ; RUN: opt < %s -adce -adce-remove-loops=true -disable-output
2
3 target datalayout = "e-p:32:32"
4 %struct..CppObjTypeDesc = type { i32, i16, i16 }
5 %struct..TypeToken = type { i32, i16, i16 }
6
7 define i32 @C_ReFaxToDb() {
8 entry:
9 br i1 false, label %endif.0, label %then.0
10
11 then.0: ; preds = %entry
12 ret i32 0
13
14 endif.0: ; preds = %entry
15 br i1 false, label %then.11, label %then.4
16
17 then.4: ; preds = %endif.0
18 ret i32 0
19
20 then.11: ; preds = %endif.0
21 br i1 false, label %loopentry.0, label %else.2
22
23 loopentry.0: ; preds = %loopentry.1, %endif.14, %then.11
24 br i1 false, label %endif.14, label %loopexit.0
25
26 endif.14: ; preds = %loopentry.0
27 br i1 false, label %loopentry.1, label %loopentry.0
28
29 loopentry.1: ; preds = %then.53, %endif.14
30 %SubArrays.10 = phi i32* [ %SubArrays.8, %then.53 ], [ null, %endif.14 ] ; [#uses=3]
31 br i1 false, label %no_exit.1, label %loopentry.0
32
33 no_exit.1: ; preds = %loopentry.1
34 ; CHECK: switch
35 switch i32 0, label %label.17 [
36 i32 2, label %label.11
37 i32 19, label %label.10
38 ]
39
40 label.10: ; preds = %no_exit.1
41 br i1 false, label %then.43, label %endif.43
42
43 then.43: ; preds = %label.10
44 br i1 false, label %then.44, label %endif.44
45
46 then.44: ; preds = %then.43
47 br i1 false, label %shortcirc_next.4, label %endif.45
48
49 shortcirc_next.4: ; preds = %then.44
50 br i1 false, label %no_exit.2, label %loopexit.2
51
52 no_exit.2: ; preds = %shortcirc_next.4
53 %tmp.897 = getelementptr i32, i32* %SubArrays.10, i64 0 ; [#uses=1]
54 %tmp.899 = load i32, i32* %tmp.897 ; [#uses=1]
55 store i32 %tmp.899, i32* null
56 ret i32 0
57
58 loopexit.2: ; preds = %shortcirc_next.4
59 ret i32 0
60
61 endif.45: ; preds = %then.44
62 ret i32 0
63
64 endif.44: ; preds = %then.43
65 ret i32 0
66
67 endif.43: ; preds = %label.10
68 ret i32 0
69
70 label.11: ; preds = %no_exit.1
71 ret i32 0
72
73 label.17: ; preds = %no_exit.1
74 br i1 false, label %then.53, label %shortcirc_next.7
75
76 shortcirc_next.7: ; preds = %label.17
77 br i1 false, label %then.53, label %shortcirc_next.8
78
79 shortcirc_next.8: ; preds = %shortcirc_next.7
80 ret i32 0
81
82 then.53: ; preds = %shortcirc_next.7, %label.17
83 %SubArrays.8 = phi i32* [ %SubArrays.10, %shortcirc_next.7 ], [ %SubArrays.10, %label.17 ] ; [#uses=1]
84 %tmp.1023 = load i32, i32* null ; [#uses=1]
85 switch i32 %tmp.1023, label %loopentry.1 [
86 ]
87
88 loopexit.0: ; preds = %loopentry.0
89 ret i32 0
90
91 else.2: ; preds = %then.11
92 ret i32 0
93 }
+0
-44
test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll less more
None ; RUN: opt < %s -adce -S | FileCheck %s
1 ; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
2
3 define void @dead_test8(i32* %data.1, i32 %idx.1) {
4 entry:
5 %tmp.1 = load i32, i32* %data.1 ; [#uses=2]
6 %tmp.41 = icmp sgt i32 %tmp.1, 0 ; [#uses=1]
7 br i1 %tmp.41, label %no_exit.preheader, label %return
8
9 no_exit.preheader: ; preds = %entry
10 %tmp.11 = getelementptr i32, i32* %data.1, i64 1 ; [#uses=1]
11 %tmp.22-idxcast = sext i32 %idx.1 to i64 ; [#uses=1]
12 %tmp.28 = getelementptr i32, i32* %data.1, i64 %tmp.22-idxcast ; [#uses=1]
13 br label %no_exit
14
15 no_exit: ; preds = %endif, %no_exit.preheader
16 %k.1 = phi i32 [ %k.0, %endif ], [ 0, %no_exit.preheader ] ; [#uses=3]
17 %i.0 = phi i32 [ %inc.1, %endif ], [ 0, %no_exit.preheader ] ; [#uses=1]
18 %tmp.12 = load i32, i32* %tmp.11 ; [#uses=1]
19 %tmp.14 = sub i32 0, %tmp.12 ; [#uses=1]
20 ; CHECK-NOT: %tmp.161
21 %tmp.161 = icmp ne i32 %k.1, %tmp.14 ; [#uses=1]
22 ; CHECK: br label %then
23 br i1 %tmp.161, label %then, label %else
24
25 then: ; preds = %no_exit
26 %inc.0 = add i32 %k.1, 1 ; [#uses=1]
27 br label %endif
28
29 else: ; preds = %no_exit
30 %dec = add i32 %k.1, -1 ; [#uses=1]
31 br label %endif
32
33 endif: ; preds = %else, %then
34 %k.0 = phi i32 [ %dec, %else ], [ %inc.0, %then ] ; [#uses=1]
35 store i32 2, i32* %tmp.28
36 %inc.1 = add i32 %i.0, 1 ; [#uses=2]
37 %tmp.4 = icmp slt i32 %inc.1, %tmp.1 ; [#uses=1]
38 br i1 %tmp.4, label %no_exit, label %return
39
40 return: ; preds = %endif, %entry
41 ret void
42 }
43
+0
-22
test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll less more
None ; RUN: opt < %s -adce -disable-output
1
2 define void @test() personality i32 (...)* @__gxx_personality_v0 {
3 br i1 false, label %then, label %endif
4
5 then: ; preds = %0
6 invoke void null( i8* null )
7 to label %invoke_cont unwind label %invoke_catch
8
9 invoke_catch: ; preds = %then
10 %exn = landingpad {i8*, i32}
11 cleanup
12 resume { i8*, i32 } %exn
13
14 invoke_cont: ; preds = %then
15 ret void
16
17 endif: ; preds = %0
18 ret void
19 }
20
21 declare i32 @__gxx_personality_v0(...)
+0
-10
test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll less more
None ; RUN: opt < %s -adce -disable-output
1 ; RUN: opt < %s -adce -adce-remove-loops -disable-output
2
3 define i32 @main() {
4 br label %loop
5
6 loop: ; preds = %loop, %0
7 br label %loop
8 }
9
+0
-20
test/Transforms/ADCE/2003-11-16-MissingPostDominanceInfo.ll less more
None ; RUN: opt < %s -adce -simplifycfg -S | grep call
1 ; RUN: opt < %s -adce -adce-remove-loops -simplifycfg -S | grep call
2
3 declare void @exit(i32)
4
5 define i32 @main(i32 %argc) {
6 %C = icmp eq i32 %argc, 1 ; [#uses=2]
7 br i1 %C, label %Cond, label %Done
8
9 Cond: ; preds = %0
10 br i1 %C, label %Loop, label %Done
11
12 Loop: ; preds = %Loop, %Cond
13 call void @exit( i32 0 )
14 br label %Loop
15
16 Done: ; preds = %Cond, %0
17 ret i32 1
18 }
19
+0
-17
test/Transforms/ADCE/2004-05-04-UnreachableBlock.ll less more
None ; RUN: opt < %s -adce -disable-output
1 ; RUN: opt < %s -adce -adce-remove-loops -disable-output
2
3 define void @test() {
4 entry:
5 br label %UnifiedReturnBlock
6
7 UnifiedReturnBlock: ; preds = %invoke_catch.0, %entry
8 ret void
9
10 invoke_catch.0: ; No predecessors!
11 br i1 false, label %UnifiedUnwindBlock, label %UnifiedReturnBlock
12
13 UnifiedUnwindBlock: ; preds = %invoke_catch.0
14 unreachable
15 }
16
+0
-52
test/Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll less more
None ; RUN: opt < %s -adce -disable-output
1
2 declare void @strlen()
3
4 declare void @_ZN10QByteArray6resizeEi()
5
6 declare void @q_atomic_decrement()
7
8 define void @_ZNK10QByteArray13leftJustifiedEicb() personality i32 (...)* @__gxx_personality_v0 {
9 entry:
10 invoke void @strlen( )
11 to label %tmp.3.i.noexc unwind label %invoke_catch.0
12
13 tmp.3.i.noexc: ; preds = %entry
14 br i1 false, label %then.0, label %else.0
15
16 invoke_catch.0: ; preds = %entry
17 %exn.0 = landingpad {i8*, i32}
18 cleanup
19 invoke void @q_atomic_decrement( )
20 to label %tmp.1.i.i183.noexc unwind label %terminate
21
22 tmp.1.i.i183.noexc: ; preds = %invoke_catch.0
23 ret void
24
25 then.0: ; preds = %tmp.3.i.noexc
26 invoke void @_ZN10QByteArray6resizeEi( )
27 to label %invoke_cont.1 unwind label %invoke_catch.1
28
29 invoke_catch.1: ; preds = %then.0
30 %exn.1 = landingpad {i8*, i32}
31 cleanup
32 invoke void @q_atomic_decrement( )
33 to label %tmp.1.i.i162.noexc unwind label %terminate
34
35 tmp.1.i.i162.noexc: ; preds = %invoke_catch.1
36 ret void
37
38 invoke_cont.1: ; preds = %then.0
39 ret void
40
41 else.0: ; preds = %tmp.3.i.noexc
42 ret void
43
44 terminate: ; preds = %invoke_catch.1, %invoke_catch.0
45 %dbg.0.1 = phi { }* [ null, %invoke_catch.1 ], [ null, %invoke_catch.0 ] ; <{ }*> [#uses=0]
46 %exn = landingpad {i8*, i32}
47 cleanup
48 unreachable
49 }
50
51 declare i32 @__gxx_personality_v0(...)
+0
-55
test/Transforms/ADCE/2016-09-06.ll less more
None ; RUN: opt < %s -sroa -adce -adce-remove-loops -S | FileCheck %s
1 ; ModuleID = 'test1.bc'
2 source_filename = "test1.c"
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-linux-gnu"
5
6 ; Function Attrs: nounwind uwtable
7 define i32 @foo(i32, i32, i32) #0 {
8 %4 = alloca i32, align 4
9 %5 = alloca i32, align 4
10 %6 = alloca i32, align 4
11 %7 = alloca i32, align 4
12 %8 = alloca i32, align 4
13 store i32 %0, i32* %4, align 4
14 store i32 %1, i32* %5, align 4
15 store i32 %2, i32* %6, align 4
16 store i32 0, i32* %7, align 4
17 %9 = load i32, i32* %5, align 4
18 %I10 = icmp ne i32 %9, 0
19 br i1 %I10, label %B11, label %B21
20
21 B11:
22 store i32 0, i32* %8, align 4
23 br label %B12
24
25 B12:
26 %I13 = load i32, i32* %8, align 4
27 %I14 = load i32, i32* %6, align 4
28 %I15 = icmp slt i32 %I13, %I14
29 ; CHECK: br label %B20
30 br i1 %I15, label %B16, label %B20
31
32 B16:
33 br label %B17
34
35 B17:
36 %I18 = load i32, i32* %8, align 4
37 %I19 = add nsw i32 %I18, 1
38 store i32 %I19, i32* %8, align 4
39 br label %B12
40
41 B20:
42 store i32 1, i32* %7, align 4
43 br label %B21
44
45 B21:
46 %I22 = load i32, i32* %7, align 4
47 ret i32 %I22
48 }
49
50 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
51
52 !llvm.ident = !{!0}
53
54 !0 = !{!"clang version 4.0.0"}
+0
-24
test/Transforms/ADCE/2017-08-21-DomTree-deletions.ll less more
None ; RUN: opt < %s -adce | llvm-dis
1 ; RUN: opt < %s -adce -verify-dom-info | llvm-dis
2
3 define void @foo() {
4 entry:
5 br label %switch
6 switch: ; preds = %entry
7 switch i32 undef, label %default [
8 i32 2, label %two
9 i32 5, label %five
10 i32 4, label %four
11 ]
12 four: ; preds = %switch
13 br label %exit
14 five: ; preds = %switch
15 br label %exit
16 two: ; preds = %switch
17 br label %exit
18 default: ; preds = %switch
19 br label %exit
20 exit: ; preds = %default, %two, %five, %four
21 ret void
22 }
23
+0
-18
test/Transforms/ADCE/basictest.ll less more
None ; RUN: opt < %s -adce -simplifycfg | llvm-dis
1 ; RUN: opt < %s -passes=adce | llvm-dis
2
3 define i32 @Test(i32 %A, i32 %B) {
4 BB1:
5 br label %BB4
6
7 BB2: ; No predecessors!
8 br label %BB3
9
10 BB3: ; preds = %BB4, %BB2
11 %ret = phi i32 [ %X, %BB4 ], [ %B, %BB2 ] ; [#uses=1]
12 ret i32 %ret
13
14 BB4: ; preds = %BB1
15 %X = phi i32 [ %A, %BB1 ] ; [#uses=1]
16 br label %BB3
17 }
+0
-102
test/Transforms/ADCE/basictest1.ll less more
None ; RUN: opt < %s -adce -S | FileCheck %s
1 ; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
2
3 %FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
4 %spec_fd_t = type { i32, i32, i32, i8* }
5 @__iob = external global [20 x %FILE] ; <[20 x %FILE]*> [#uses=1]
6 @dbglvl = global i32 4 ; [#uses=3]
7 @spec_fd = external global [3 x %spec_fd_t] ; <[3 x %spec_fd_t]*> [#uses=4]
8 @.LC9 = internal global [34 x i8] c"spec_read: fd=%d, > MAX_SPEC_FD!\0A\00" ; <[34 x i8]*> [#uses=1]
9 @.LC10 = internal global [4 x i8] c"EOF\00" ; <[4 x i8]*> [#uses=1]
10 @.LC11 = internal global [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
11 @.LC12 = internal global [17 x i8] c"spec_getc: %d = \00" ; <[17 x i8]*> [#uses=1]
12
13 declare i32 @fprintf(%FILE*, i8*, ...)
14
15 declare void @exit(i32)
16
17 declare i32 @remove(i8*)
18
19 declare i32 @fputc(i32, %FILE*)
20
21 declare i32 @fwrite(i8*, i32, i32, %FILE*)
22
23 declare void @perror(i8*)
24
25 define i32 @spec_getc(i32 %fd) {
26 %reg109 = load i32, i32* @dbglvl ; [#uses=1]
27 %cond266 = icmp sle i32 %reg109, 4 ; [#uses=1]
28 ; CHECKL br label %bb3
29 br i1 %cond266, label %bb3, label %bb2
30
31 bb2: ; preds = %0
32 %cast273 = getelementptr [17 x i8], [17 x i8]* @.LC12, i64 0, i64 0 ; [#uses=0]
33 br label %bb3
34
35 bb3: ; preds = %bb2, %0
36 %cond267 = icmp sle i32 %fd, 3 ; [#uses=1]
37 br i1 %cond267, label %bb5, label %bb4
38
39 bb4: ; preds = %bb3
40 %reg111 = getelementptr [20 x %FILE], [20 x %FILE]* @__iob, i64 0, i64 1, i32 3 ; [#uses=1]
41 %cast274 = getelementptr [34 x i8], [34 x i8]* @.LC9, i64 0, i64 0 ; [#uses=0]
42 %cast282 = bitcast i8* %reg111 to %FILE* ; <%FILE*> [#uses=0]
43 call void @exit( i32 1 )
44 br label %UnifiedExitNode
45
46 bb5: ; preds = %bb3
47 %reg107-idxcast1 = sext i32 %fd to i64 ; [#uses=2]
48 %reg107-idxcast2 = sext i32 %fd to i64 ; [#uses=1]
49 %reg1311 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast2 ; <%spec_fd_t*> [#uses=1]
50 %idx1 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2 ; [#uses=1]
51 %reg1321 = load i32, i32* %idx1 ; [#uses=3]
52 %idx2 = getelementptr %spec_fd_t, %spec_fd_t* %reg1311, i64 0, i32 1 ; [#uses=1]
53 %reg1331 = load i32, i32* %idx2 ; [#uses=1]
54 %cond270 = icmp slt i32 %reg1321, %reg1331 ; [#uses=1]
55 br i1 %cond270, label %bb9, label %bb6
56
57 bb6: ; preds = %bb5
58 %reg134 = load i32, i32* @dbglvl ; [#uses=1]
59 %cond271 = icmp sle i32 %reg134, 4 ; [#uses=1]
60 ; CHECK: br label %bb8
61 br i1 %cond271, label %bb8, label %bb7
62
63 bb7: ; preds = %bb6
64 %cast277 = getelementptr [4 x i8], [4 x i8]* @.LC10, i64 0, i64 0 ; [#uses=0]
65 br label %bb8
66
67 bb8: ; preds = %bb7, %bb6
68 br label %UnifiedExitNode
69
70 bb9: ; preds = %bb5
71 %reg107-idxcast3 = sext i32 %fd to i64 ; [#uses=1]
72 %idx3 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast3, i32 3 ; [#uses=1]
73 %reg1601 = load i8*, i8** %idx3 ; [#uses=1]
74 %reg132-idxcast1 = sext i32 %reg1321 to i64 ; [#uses=1]
75 %idx4 = getelementptr i8, i8* %reg1601, i64 %reg132-idxcast1 ; [#uses=1]
76 %reg1621 = load i8, i8* %idx4 ; [#uses=2]
77 %cast108 = zext i8 %reg1621 to i64 ; [#uses=0]
78 %reg157 = add i32 %reg1321, 1 ; [#uses=1]
79 %idx5 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2 ; [#uses=1]
80 store i32 %reg157, i32* %idx5
81 %reg163 = load i32, i32* @dbglvl ; [#uses=1]
82 %cond272 = icmp sle i32 %reg163, 4 ; [#uses=1]
83 ; CHECK: br label %bb11
84 br i1 %cond272, label %bb11, label %bb10
85
86 bb10: ; preds = %bb9
87 %cast279 = getelementptr [4 x i8], [4 x i8]* @.LC11, i64 0, i64 0 ; [#uses=0]
88 br label %bb11
89
90 bb11: ; preds = %bb10, %bb9
91 %cast291 = zext i8 %reg1621 to i32 ; [#uses=1]
92 br label %UnifiedExitNode
93
94 UnifiedExitNode: ; preds = %bb11, %bb8, %bb4
95 %UnifiedRetVal = phi i32 [ 42, %bb4 ], [ -1, %bb8 ], [ %cast291, %bb11 ] ; [#uses=1]
96 ret i32 %UnifiedRetVal
97 }
98
99 declare i32 @puts(i8*)
100
101 declare i32 @printf(i8*, ...)
+0
-102
test/Transforms/ADCE/basictest2.ll less more
None ; RUN: opt < %s -adce -disable-output
1 ; RUN: opt < %s -adce -adce-remove-loops -S | FileCheck %s
2
3 %FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
4 %spec_fd_t = type { i32, i32, i32, i8* }
5 @__iob = external global [20 x %FILE] ; <[20 x %FILE]*> [#uses=1]
6 @dbglvl = global i32 4 ; [#uses=3]
7 @spec_fd = external global [3 x %spec_fd_t] ; <[3 x %spec_fd_t]*> [#uses=4]
8 @.LC9 = internal global [34 x i8] c"spec_read: fd=%d, > MAX_SPEC_FD!\0A\00" ; <[34 x i8]*> [#uses=1]
9 @.LC10 = internal global [4 x i8] c"EOF\00" ; <[4 x i8]*> [#uses=1]
10 @.LC11 = internal global [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
11 @.LC12 = internal global [17 x i8] c"spec_getc: %d = \00" ; <[17 x i8]*> [#uses=1]
12
13 declare i32 @fprintf(%FILE*, i8*, ...)
14
15 declare void @exit(i32)
16
17 declare i32 @remove(i8*)
18
19 declare i32 @fputc(i32, %FILE*)
20
21 declare i32 @fwrite(i8*, i32, i32, %FILE*)
22
23 declare void @perror(i8*)
24
25 define i32 @spec_getc(i32 %fd) {
26 %reg109 = load i32, i32* @dbglvl ; [#uses=1]
27 %cond266 = icmp sle i32 %reg109, 4 ; [#uses=1]
28 ; CHECK: br label %bb3
29 br i1 %cond266, label %bb3, label %bb2
30
31 bb2: ; preds = %0
32 %cast273 = getelementptr [17 x i8], [17 x i8]* @.LC12, i64 0, i64 0 ; [#uses=0]
33 br label %bb3
34
35 bb3: ; preds = %bb2, %0
36 %cond267 = icmp sle i32 %fd, 3 ; [#uses=0]
37 br label %bb5
38
39 bb4: ; No predecessors!
40 %reg111 = getelementptr [20 x %FILE], [20 x %FILE]* @__iob, i64 0, i64 1, i32 3 ; [#uses=1]
41 %cast274 = getelementptr [34 x i8], [34 x i8]* @.LC9, i64 0, i64 0 ; [#uses=0]
42 %cast282 = bitcast i8* %reg111 to %FILE* ; <%FILE*> [#uses=0]
43 call void @exit( i32 1 )
44 br label %UnifiedExitNode
45
46 bb5: ; preds = %bb3
47 %reg107-idxcast1 = sext i32 %fd to i64 ; [#uses=2]
48 %reg107-idxcast2 = sext i32 %fd to i64 ; [#uses=1]
49 %reg1311 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast2 ; <%spec_fd_t*> [#uses=1]
50 %idx1 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2 ; [#uses=1]
51 %reg1321 = load i32, i32* %idx1 ; [#uses=3]
52 %idx2 = getelementptr %spec_fd_t, %spec_fd_t* %reg1311, i64 0, i32 1 ; [#uses=1]
53 %reg1331 = load i32, i32* %idx2 ; [#uses=1]
54 %cond270 = icmp slt i32 %reg1321, %reg1331 ; [#uses=1]
55 br i1 %cond270, label %bb9, label %bb6
56
57 bb6: ; preds = %bb5
58 %reg134 = load i32, i32* @dbglvl ; [#uses=1]
59 %cond271 = icmp sle i32 %reg134, 4 ; [#uses=1]
60 ; CHECK: br label %bb8
61 br i1 %cond271, label %bb8, label %bb7
62
63 bb7: ; preds = %bb6
64 %cast277 = getelementptr [4 x i8], [4 x i8]* @.LC10, i64 0, i64 0 ; [#uses=0]
65 br label %bb8
66
67 bb8: ; preds = %bb7, %bb6
68 br label %UnifiedExitNode
69
70 bb9: ; preds = %bb5
71 %reg107-idxcast3 = sext i32 %fd to i64 ; [#uses=1]
72 %idx3 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast3, i32 3 ; [#uses=1]
73 %reg1601 = load i8*, i8** %idx3 ; [#uses=1]
74 %reg132-idxcast1 = sext i32 %reg1321 to i64 ; [#uses=1]
75 %idx4 = getelementptr i8, i8* %reg1601, i64 %reg132-idxcast1 ; [#uses=1]
76 %reg1621 = load i8, i8* %idx4 ; [#uses=2]
77 %cast108 = zext i8 %reg1621 to i64 ; [#uses=0]
78 %reg157 = add i32 %reg1321, 1 ; [#uses=1]
79 %idx5 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2 ; [#uses=1]
80 store i32 %reg157, i32* %idx5
81 %reg163 = load i32, i32* @dbglvl ; [#uses=1]
82 %cond272 = icmp sle i32 %reg163, 4 ; [#uses=1]
83 ; CHECK: br label %bb11
84 br i1 %cond272, label %bb11, label %bb10
85
86 bb10: ; preds = %bb9
87 %cast279 = getelementptr [4 x i8], [4 x i8]* @.LC11, i64 0, i64 0 ; [#uses=0]
88 br label %bb11
89
90 bb11: ; preds = %bb10, %bb9
91 %cast291 = zext i8 %reg1621 to i32 ; [#uses=1]
92 br label %UnifiedExitNode
93
94 UnifiedExitNode: ; preds = %bb11, %bb8, %bb4
95 %UnifiedRetVal = phi i32 [ 42, %bb4 ], [ -1, %bb8 ], [ %cast291, %bb11 ] ; [#uses=1]
96 ret i32 %UnifiedRetVal
97 }
98
99 declare i32 @puts(i8*)
100
101 declare i32 @printf(i8*, ...)
+0
-8
test/Transforms/ADCE/dce_pure_call.ll less more
None ; RUN: opt -adce -S < %s | not grep call
1
2 declare i32 @strlen(i8*) readonly nounwind
3
4 define void @test() {
5 call i32 @strlen( i8* null ) ; :1 [#uses=0]
6 ret void
7 }
+0
-19
test/Transforms/ADCE/dce_pure_invoke.ll less more
None ; RUN: opt < %s -adce -S | grep null
1
2 declare i32 @strlen(i8*) readnone
3
4 define i32 @test() personality i32 (...)* @__gxx_personality_v0 {
5 ; invoke of pure function should not be deleted!
6 invoke i32 @strlen( i8* null ) readnone
7 to label %Cont unwind label %Other ; :1 [#uses=0]
8
9 Cont: ; preds = %0
10 ret i32 0
11
12 Other: ; preds = %0
13 %exn = landingpad {i8*, i32}
14 cleanup
15 ret i32 1
16 }
17
18 declare i32 @__gxx_personality_v0(...)
+0
-100
test/Transforms/ADCE/debug-info-intrinsic.ll less more
None ; RUN: opt -adce -S < %s | FileCheck %s
1 ; Test that debug info intrinsics in dead scopes get eliminated by -adce.
2
3 ; Generated with 'clang -g -S -emit-llvm | opt -mem2reg -inline' at r262899
4 ; (before -adce was augmented) and then hand-reduced. This was the input:
5 ;
6 ;;void sink(void);
7 ;;
8 ;;void variable_in_unused_subscope(void) {
9 ;; { int i = 0; }
10 ;; sink();
11 ;;}
12 ;;
13 ;;void variable_in_parent_scope(void) {
14 ;; int i = 0;
15 ;; { sink(); }
16 ;;}
17 ;;
18 ;;static int empty_function_with_unused_variable(void) {
19 ;; { int i = 0; }
20 ;; return 0;
21 ;;}
22 ;;
23 ;;void calls_empty_function_with_unused_variable_in_unused_subscope(void) {
24 ;; { empty_function_with_unused_variable(); }
25 ;; sink();
26 ;;}
27
28 declare void @llvm.dbg.value(metadata, metadata, metadata)
29
30 declare void @sink()
31
32 ; CHECK-LABEL: define void @variable_in_unused_subscope(
33 define void @variable_in_unused_subscope() !dbg !4 {
34 ; CHECK-NEXT: entry:
35 ; CHECK-NEXT: call void @sink
36 ; CHECK-NEXT: ret void
37 entry:
38 call void @llvm.dbg.value(metadata i32 0, metadata !15, metadata !17), !dbg !18
39 call void @sink(), !dbg !19
40 ret void, !dbg !20
41 }
42
43 ; CHECK-LABEL: define void @variable_in_parent_scope(
44 define void @variable_in_parent_scope() !dbg !7 {
45 ; CHECK-NEXT: entry:
46 ; CHECK-NEXT: call void @llvm.dbg.value
47 ; CHECK-NEXT: call void @sink
48 ; CHECK-NEXT: ret void
49 entry:
50 call void @llvm.dbg.value(metadata i32 0, metadata !21, metadata !17), !dbg !22
51 call void @sink(), !dbg !23
52 ret void, !dbg !25
53 }
54
55 ; CHECK-LABEL: define void @calls_empty_function_with_unused_variable_in_unused_subscope(
56 define void @calls_empty_function_with_unused_variable_in_unused_subscope() !dbg !8 {
57 ; CHECK-NEXT: entry:
58 ; CHECK-NEXT: call void @sink
59 ; CHECK-NEXT: ret void
60 entry:
61 call void @llvm.dbg.value(metadata i32 0, metadata !26, metadata !17), !dbg !28
62 call void @sink(), !dbg !31
63 ret void, !dbg !32
64 }
65
66 !llvm.dbg.cu = !{!0}
67 !llvm.module.flags = !{!14}
68
69 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
70 !1 = !DIFile(filename: "t.c", directory: "/path/to/test/Transforms/ADCE")
71 !2 = !{}
72 !4 = distinct !DISubprogram(name: "variable_in_unused_subscope", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
73 !5 = !DISubroutineType(types: !6)
74 !6 = !{null}
75 !7 = distinct !DISubprogram(name: "variable_in_parent_scope", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
76 !8 = distinct !DISubprogram(name: "calls_empty_function_with_unused_variable_in_unused_subscope", scope: !1, file: !1, line: 18, type: !5, isLocal: false, isDefinition: true, scopeLine: 18, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
77 !10 = distinct !DISubprogram(name: "empty_function_with_unused_variable", scope: !1, file: !1, line: 13, type: !11, isLocal: true, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
78 !11 = !DISubroutineType(types: !12)
79 !12 = !{!13}
80 !13 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
81 !14 = !{i32 2, !"Debug Info Version", i32 3}
82 !15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 4, type: !13)
83 !16 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 3)
84 !17 = !DIExpression()
85 !18 = !DILocation(line: 4, column: 9, scope: !16)
86 !19 = !DILocation(line: 5, column: 3, scope: !4)
87 !20 = !DILocation(line: 6, column: 1, scope: !4)
88 !21 = !DILocalVariable(name: "i", scope: !7, file: !1, line: 9, type: !13)
89 !22 = !DILocation(line: 9, column: 7, scope: !7)
90 !23 = !DILocation(line: 10, column: 5, scope: !24)
91 !24 = distinct !DILexicalBlock(scope: !7, file: !1, line: 10, column: 3)
92 !25 = !DILocation(line: 11, column: 1, scope: !7)
93 !26 = !DILocalVariable(name: "i", scope: !27, file: !1, line: 14, type: !13)
94 !27 = distinct !DILexicalBlock(scope: !10, file: !1, line: 14, column: 3)
95 !28 = !DILocation(line: 14, column: 9, scope: !27, inlinedAt: !29)
96 !29 = distinct !DILocation(line: 19, column: 5, scope: !30)
97 !30 = distinct !DILexicalBlock(scope: !8, file: !1, line: 19, column: 3)
98 !31 = !DILocation(line: 20, column: 3, scope: !8)
99 !32 = !DILocation(line: 21, column: 1, scope: !8)
+0
-19
test/Transforms/ADCE/delete-profiling-calls-to-constant.ll less more
None ; RUN: opt < %s -adce -S | FileCheck %s
1 ; RUN: opt < %s -passes=adce -S | FileCheck %s
2
3 ; Verify that a call to instrument a constant is deleted.
4
5 @__profc_foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
6 @__profd_foo = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 6699318081062747564, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i32 0, i32 0), i8* bitcast (i32 ()* @foo to i8*), i8* null, i32 1, [1 x i16] [i16 1] }, section "__llvm_prf_data", align 8
7
8 define i32 @foo() {
9 ; CHECK-NOT: call void @__llvm_profile_instrument_target
10 entry:
11 tail call void @__llvm_profile_instrument_target(i64 ptrtoint (i32 (i32)* @bar to i64), i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [1 x i16] }* @__profd_foo to i8*), i32 0)
12 %call = tail call i32 @bar(i32 21)
13 ret i32 %call
14 }
15
16 declare i32 @bar(i32)
17
18 declare void @__llvm_profile_instrument_target(i64, i8*, i32)
+0
-39
test/Transforms/ADCE/domtree-DoubleDeletion.ll less more
None ; RUN: opt < %s -gvn -simplifycfg -adce | llvm-dis
1 ; RUN: opt < %s -gvn -simplifycfg -adce -verify-dom-info | llvm-dis
2
3 ; This test makes sure that the DominatorTree properly handles
4 ; deletion of edges that go to forward-unreachable regions.
5 ; In this case, %land.end is already forward unreachable when
6 ; the DT gets informed about the deletion of %entry -> %land.end.
7
8 @a = common global i32 0, align 4
9
10 define i32 @main() {
11 entry:
12 %retval = alloca i32, align 4
13 store i32 0, i32* %retval, align 4
14 %0 = load i32, i32* @a, align 4
15 %cmp = icmp ne i32 %0, 1
16 br i1 %cmp, label %land.rhs, label %land.end4
17
18 land.rhs: ; preds = %entry
19 %1 = load i32, i32* @a, align 4
20 %tobool = icmp ne i32 %1, 0
21 br i1 %tobool, label %land.rhs1, label %land.end
22
23 land.rhs1: ; preds = %land.rhs
24 br label %land.end
25
26 land.end: ; preds = %land.rhs1, %land.rhs
27 %2 = phi i1 [ false, %land.rhs ], [ true, %land.rhs1 ]
28 %land.ext = zext i1 %2 to i32
29 %conv = trunc i32 %land.ext to i16
30 %conv2 = sext i16 %conv to i32
31 %tobool3 = icmp ne i32 %conv2, 0
32 br label %land.end4
33
34 land.end4: ; preds = %land.end, %entry
35 %3 = phi i1 [ false, %entry ], [ %tobool3, %land.end ]
36 %land.ext5 = zext i1 %3 to i32
37 ret i32 0
38 }
+0
-5
test/Transforms/ADCE/unreachable-function.ll less more
None ; RUN: opt < %s -adce -disable-output
1
2 define void @test() {
3 unreachable
4 }
+0
-18
test/Transforms/ADCE/unreachable.ll less more
None ; RUN: opt < %s -adce -simplifycfg | llvm-dis
1 ; RUN: opt < %s -passes=adce | llvm-dis
2
3 define i32 @Test(i32 %A, i32 %B) {
4 BB1:
5 br label %BB4
6
7 BB2: ; No predecessors!
8 br label %BB3
9
10 BB3: ; preds = %BB4, %BB2
11 %ret = phi i32 [ %X, %BB4 ], [ %B, %BB2 ] ; [#uses=1]
12 ret i32 %ret
13
14 BB4: ; preds = %BB1
15 %X = phi i32 [ %A, %BB1 ] ; [#uses=1]
16 br label %BB3
17 }
+0
-62
test/Transforms/AddDiscriminators/basic.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
2
3 ; Basic DWARF discriminator test. All the instructions in block
4 ; 'if.then' should have a different discriminator value than
5 ; the conditional branch at the end of block 'entry'.
6 ;
7 ; Original code:
8 ;
9 ; void foo(int i) {
10 ; int x;
11 ; if (i < 10) x = i;
12 ; }
13
14 define void @foo(i32 %i) #0 !dbg !4 {
15 entry:
16 %i.addr = alloca i32, align 4
17 %x = alloca i32, align 4
18 store i32 %i, i32* %i.addr, align 4
19 %0 = load i32, i32* %i.addr, align 4, !dbg !10
20 %cmp = icmp slt i32 %0, 10, !dbg !10
21 br i1 %cmp, label %if.then, label %if.end, !dbg !10
22
23 if.then: ; preds = %entry
24 %1 = load i32, i32* %i.addr, align 4, !dbg !10
25 ; CHECK: %1 = load i32, i32* %i.addr, align 4, !dbg ![[THEN:[0-9]+]]
26
27 store i32 %1, i32* %x, align 4, !dbg !10
28 ; CHECK: store i32 %1, i32* %x, align 4, !dbg ![[THEN]]
29
30 br label %if.end, !dbg !10
31 ; CHECK: br label %if.end, !dbg ![[THEN]]
32
33 if.end: ; preds = %if.then, %entry
34 ret void, !dbg !12
35 ; CHECK: ret void, !dbg ![[END:[0-9]+]]
36 }
37
38 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
39
40 !llvm.dbg.cu = !{!0}
41 !llvm.module.flags = !{!7, !8}
42 !llvm.ident = !{!9}
43
44 !0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
45 !1 = !DIFile(filename: "basic.c", directory: ".")
46 !2 = !{}
47 !4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
48 !5 = !DIFile(filename: "basic.c", directory: ".")
49 !6 = !DISubroutineType(types: !2)
50 !7 = !{i32 2, !"Dwarf Version", i32 4}
51 !8 = !{i32 1, !"Debug Info Version", i32 3}
52 !9 = !{!"clang version 3.5 "}
53 !10 = !DILocation(line: 3, scope: !11)
54 !11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
55 !12 = !DILocation(line: 4, scope: !4)
56
57 ; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
58 ; CHECK: ![[BLOCK:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
59 ; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[BLOCKFILE:[0-9]+]])
60 ; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 2)
61 ; CHECK: ![[END]] = !DILocation(line: 4, scope: ![[FOO]])
+0
-50
test/Transforms/AddDiscriminators/call-nested.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
2
3 ; Discriminator support for calls that are defined in one line:
4 ; #1 int foo(int, int);
5 ; #2 int bar();
6 ; #3 int baz() {
7 ; #4 return foo(bar(),
8 ; #5 bar());
9 ; #6 }
10
11 ; Function Attrs: uwtable
12 define i32 @_Z3bazv() #0 !dbg !4 {
13 %1 = call i32 @_Z3barv(), !dbg !11
14 ; CHECK: %1 = call i32 @_Z3barv(), !dbg ![[CALL0:[0-9]+]]
15 %2 = call i32 @_Z3barv(), !dbg !12
16 ; CHECK: %2 = call i32 @_Z3barv(), !dbg ![[CALL1:[0-9]+]]
17 %3 = call i32 @_Z3fooii(i32 %1, i32 %2), !dbg !13
18 ; CHECK: %3 = call i32 @_Z3fooii(i32 %1, i32 %2), !dbg ![[CALL2:[0-9]+]]
19 ret i32 %3, !dbg !14
20 }
21
22 declare i32 @_Z3fooii(i32, i32) #1
23
24 declare i32 @_Z3barv() #1
25
26 attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
27 attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
28
29 !llvm.dbg.cu = !{!0}
30 !llvm.module.flags = !{!8, !9}
31 !llvm.ident = !{!10}
32
33 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 266269)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
34 !1 = !DIFile(filename: "test.cc", directory: "")
35 !2 = !{}
36 !4 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
37 !5 = !DISubroutineType(types: !6)
38 !6 = !{!7}
39 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
40 !8 = !{i32 2, !"Dwarf Version", i32 4}
41 !9 = !{i32 2, !"Debug Info Version", i32 3}
42 !10 = !{!"clang version 3.9.0 (trunk 266269)"}
43 !11 = !DILocation(line: 4, column: 14, scope: !4)
44 !12 = !DILocation(line: 5, column: 14, scope: !4)
45 !13 = !DILocation(line: 4, column: 10, scope: !4)
46 !14 = !DILocation(line: 4, column: 3, scope: !4)
47
48 ; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 10, scope: ![[CALL2BLOCK:[0-9]+]])
49 ; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
+0
-54
test/Transforms/AddDiscriminators/call.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
2
3 ; Discriminator support for calls that are defined in one line:
4 ; #1 void bar();
5 ; #2
6 ; #3 void foo() {
7 ; #4 bar();bar()/*discriminator 2*/;bar()/*discriminator 4*/;
8 ; #5 }
9
10 ; Function Attrs: uwtable
11 define void @_Z3foov() #0 !dbg !4 {
12 call void @_Z3barv(), !dbg !10
13 ; CHECK: call void @_Z3barv(), !dbg ![[CALL0:[0-9]+]]
14 %a = alloca [100 x i8], align 16
15 %b = bitcast [100 x i8]* %a to i8*
16 call void @llvm.lifetime.start.p0i8(i64 100, i8* %b), !dbg !11
17 call void @llvm.lifetime.end.p0i8(i64 100, i8* %b), !dbg !11
18 call void @_Z3barv(), !dbg !11
19 ; CHECK: call void @_Z3barv(), !dbg ![[CALL1:[0-9]+]]
20 call void @_Z3barv(), !dbg !12
21 ; CHECK: call void @_Z3barv(), !dbg ![[CALL2:[0-9]+]]
22 ret void, !dbg !13
23 }
24
25 declare void @_Z3barv() #1
26 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind argmemonly
27 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind argmemonly
28
29 attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
30 attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
31
32 !llvm.dbg.cu = !{!0}
33 !llvm.module.flags = !{!7, !8}
34 !llvm.ident = !{!9}
35
36 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
37 !1 = !DIFile(filename: "c.cc", directory: "/tmp")
38 !2 = !{}
39 !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
40 !5 = !DISubroutineType(types: !6)
41 !6 = !{null}
42 !7 = !{i32 2, !"Dwarf Version", i32 4}
43 !8 = !{i32 2, !"Debug Info Version", i32 3}
44 !9 = !{!"clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)"}
45 !10 = !DILocation(line: 4, column: 3, scope: !4)
46 !11 = !DILocation(line: 4, column: 9, scope: !4)
47 !12 = !DILocation(line: 4, column: 15, scope: !4)
48 !13 = !DILocation(line: 5, column: 1, scope: !4)
49
50 ; CHECK: ![[CALL1]] = !DILocation(line: 4, column: 9, scope: ![[CALL1BLOCK:[0-9]+]])
51 ; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
52 ; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 15, scope: ![[CALL2BLOCK:[0-9]+]])
53 ; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
+0
-33
test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll less more
None ; RUN: opt -S -add-discriminators < %s | FileCheck %s
1 ; RUN: opt -S -passes=add-discriminators < %s | FileCheck %s
2
3 declare void @llvm.dbg.declare(metadata, metadata, metadata)
4
5 ; This checks whether the add-discriminators pass producess valid metadata on
6 ; llvm.dbg.declare instructions
7 ;
8 ; CHECK-LABEL: @test_valid_metadata
9 define void @test_valid_metadata() {
10 %a = alloca i8
11 call void @llvm.dbg.declare(metadata i8* %a, metadata !2, metadata !5), !dbg !6
12 %b = alloca i8
13 call void @llvm.dbg.declare(metadata i8* %b, metadata !9, metadata !5), !dbg !11
14 ret void
15 }
16
17 !llvm.module.flags = !{!0, !1}
18 !llvm.dbg.cu = !{!12}
19
20 !0 = !{i32 2, !"Dwarf Version", i32 4}
21 !1 = !{i32 2, !"Debug Info Version", i32 3}
22 !2 = !DILocalVariable(scope: !3)
23 !3 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false, unit: !12)
24 !4 = !DIFile(filename: "a.cpp", directory: "/tmp")
25 !5 = !DIExpression()
26 !6 = !DILocation(line: 0, scope: !3, inlinedAt: !7)
27 !7 = distinct !DILocation(line: 0, scope: !8)
28 !8 = distinct !DISubprogram(linkageName: "test_valid_metadata", scope: null, isLocal: false, isDefinition: true, isOptimized: false, unit: !12)
29 !9 = !DILocalVariable(scope: !10)
30 !10 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false, unit: !12)
31 !11 = !DILocation(line: 0, scope: !10)
32 !12 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !4)
+0
-72
test/Transforms/AddDiscriminators/diamond.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
2
3 ; Discriminator support for diamond-shaped CFG.:
4 ; #1 void bar(int);
5 ; #2
6 ; #3 void foo(int i) {
7 ; #4 if (i > 10)
8 ; #5 bar(5); else bar(3);
9 ; #6 }
10
11 ; bar(5): discriminator 0
12 ; bar(3): discriminator 2
13
14 ; Function Attrs: uwtable
15 define void @_Z3fooi(i32 %i) #0 !dbg !4 {
16 %1 = alloca i32, align 4
17 store i32 %i, i32* %1, align 4
18 call void @llvm.dbg.declare(metadata i32* %1, metadata !11, metadata !12), !dbg !13
19 %2 = load i32, i32* %1, align 4, !dbg !14
20 %3 = icmp sgt i32 %2, 10, !dbg !16
21 br i1 %3, label %4, label %5, !dbg !17
22
23 ;
24 call void @_Z3bari(i32 5), !dbg !18
25 br label %6, !dbg !18
26
27 ;
28 call void @_Z3bari(i32 3), !dbg !19
29 ; CHECK: call void @_Z3bari(i32 3), !dbg ![[ELSE:[0-9]+]]
30 br label %6
31
32 ;
33 ret void, !dbg !20
34 }
35
36 ; Function Attrs: nounwind readnone
37 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
38
39 declare void @_Z3bari(i32) #2
40
41 attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
42 attributes #1 = { nounwind readnone }
43 attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
44
45 !llvm.dbg.cu = !{!0}
46 !llvm.module.flags = !{!8, !9}
47 !llvm.ident = !{!10}
48
49 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253273)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
50 !1 = !DIFile(filename: "a.cc", directory: "/tmp")
51 !2 = !{}
52 !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
53 !5 = !DISubroutineType(types: !6)
54 !6 = !{null, !7}
55 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
56 !8 = !{i32 2, !"Dwarf Version", i32 4}
57 !9 = !{i32 2, !"Debug Info Version", i32 3}
58 !10 = !{!"clang version 3.8.0 (trunk 253273)"}
59 !11 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !7)
60 !12 = !DIExpression()
61 !13 = !DILocation(line: 3, column: 14, scope: !4)
62 !14 = !DILocation(line: 4, column: 7, scope: !15)
63 !15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7)
64 !16 = !DILocation(line: 4, column: 9, scope: !15)
65 !17 = !DILocation(line: 4, column: 7, scope: !4)
66 !18 = !DILocation(line: 5, column: 5, scope: !15)
67 !19 = !DILocation(line: 5, column: 18, scope: !15)
68 !20 = !DILocation(line: 6, column: 1, scope: !4)
69
70 ; CHECK: ![[ELSE]] = !DILocation(line: 5, column: 18, scope: ![[ELSEBLOCK:[0-9]+]])
71 ; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
+0
-83
test/Transforms/AddDiscriminators/first-only.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
2
3 ; Test that the only instructions that receive a new discriminator in
4 ; the block 'if.then' are those that share the same line number as
5 ; the branch in 'entry'.
6 ;
7 ; Original code:
8 ;
9 ; void foo(int i) {
10 ; int x, y;
11 ; if (i < 10) { x = i;
12 ; y = -i;
13 ; }
14 ; }
15
16 define void @foo(i32 %i) #0 !dbg !4 {
17 entry:
18 %i.addr = alloca i32, align 4
19 %x = alloca i32, align 4
20 %y = alloca i32, align 4
21 store i32 %i, i32* %i.addr, align 4
22 %0 = load i32, i32* %i.addr, align 4, !dbg !10
23 %cmp = icmp slt i32 %0, 10, !dbg !10
24 br i1 %cmp, label %if.then, label %if.end, !dbg !10
25
26 if.then: ; preds = %entry
27 %1 = load i32, i32* %i.addr, align 4, !dbg !12
28 store i32 %1, i32* %x, align 4, !dbg !12
29
30 %2 = load i32, i32* %i.addr, align 4, !dbg !14
31 ; CHECK: %2 = load i32, i32* %i.addr, align 4, !dbg ![[THEN:[0-9]+]]
32
33 %sub = sub nsw i32 0, %2, !dbg !14
34 ; CHECK: %sub = sub nsw i32 0, %2, !dbg ![[THEN]]
35
36 store i32 %sub, i32* %y, align 4, !dbg !14
37 ; CHECK: store i32 %sub, i32* %y, align 4, !dbg ![[THEN]]
38
39 br label %if.end, !dbg !15
40 ; CHECK: br label %if.end, !dbg ![[BR:[0-9]+]]
41
42 if.end: ; preds = %if.then, %entry
43 ret void, !dbg !16
44 ; CHECK: ret void, !dbg ![[END:[0-9]+]]
45 }
46
47 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
48
49 !llvm.dbg.cu = !{!0}
50 !llvm.module.flags = !{!7, !8}
51 !llvm.ident = !{!9}
52
53 !0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
54 !1 = !DIFile(filename: "first-only.c", directory: ".")
55 !2 = !{}
56 !4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
57 !5 = !DIFile(filename: "first-only.c", directory: ".")
58 !6 = !DISubroutineType(types: !{null})
59 !7 = !{i32 2, !"Dwarf Version", i32 4}
60 !8 = !{i32 1, !"Debug Info Version", i32 3}
61 !9 = !{!"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
62 !10 = !DILocation(line: 3, scope: !11)
63
64 !11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
65 ; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
66 ; CHECK: ![[BLOCK1:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
67
68 !12 = !DILocation(line: 3, scope: !13)
69
70 !13 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !11)
71 ; CHECK: !DILexicalBlockFile(scope: ![[BLOCK2:[0-9]+]],{{.*}} discriminator: 2)
72
73 !14 = !DILocation(line: 4, scope: !13)
74 ; CHECK: ![[BLOCK2]] = distinct !DILexicalBlock(scope: ![[BLOCK1]],{{.*}} line: 3)
75
76 !15 = !DILocation(line: 5, scope: !13)
77 ; CHECK: ![[THEN]] = !DILocation(line: 4, scope: ![[BLOCK2]])
78
79 !16 = !DILocation(line: 6, scope: !4)
80 ; CHECK: ![[BR]] = !DILocation(line: 5, scope: ![[BLOCK2]])
81 ; CHECK: ![[END]] = !DILocation(line: 6, scope: ![[FOO]])
82
+0
-83
test/Transforms/AddDiscriminators/inlined.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ;
2 ; Generated at -O3 from:
3 ; g();f(){for(;;){g();}}g(){__builtin___memset_chk(0,0,0,__builtin_object_size(1,0));}
4 ; The fact that everything is on one line is significant!
5 ;
6 ; This test ensures that inline info isn't dropped even if the call site and the
7 ; inlined function are defined on the same line.
8 source_filename = "t.c"
9 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
10 target triple = "arm64-apple-ios"
11
12 ; Function Attrs: noreturn nounwind ssp
13 define i32 @f() local_unnamed_addr #0 !dbg !7 {
14 entry:
15 %0 = tail call i64 @llvm.objectsize.i64.p0i8(i8* inttoptr (i64 1 to i8*), i1 false) #2, !dbg !11
16 br label %for.cond, !dbg !18
17
18 for.cond: ; preds = %for.cond, %entry
19 ; CHECK: %call.i
20 %call.i = tail call i8* @__memset_chk(i8* null, i32 0, i64 0, i64 %0) #2, !dbg !19
21 ; CHECK: br label %for.cond, !dbg ![[BR:[0-9]+]]
22 br label %for.cond, !dbg !20, !llvm.loop !21
23 }
24
25 ; Function Attrs: nounwind ssp
26 define i32 @g() local_unnamed_addr #1 !dbg !12 {
27 entry:
28 %0 = tail call i64 @llvm.objectsize.i64.p0i8(i8* inttoptr (i64 1 to i8*), i1 false), !dbg !22
29 %call = tail call i8* @__memset_chk(i8* null, i32 0, i64 0, i64 %0) #2, !dbg !23
30 ret i32 undef, !dbg !24
31 }
32
33 ; Function Attrs: nounwind
34 declare i8* @__memset_chk(i8*, i32, i64, i64) local_unnamed_addr #2
35
36 ; Function Attrs: nounwind readnone
37 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #3
38
39 attributes #0 = { noreturn nounwind ssp }
40 attributes #1 = { nounwind ssp }
41 attributes #2 = { nounwind }
42 attributes #3 = { nounwind readnone }
43
44 !llvm.dbg.cu = !{!0}
45 !llvm.module.flags = !{!3, !4, !5}
46 !llvm.ident = !{!6}
47
48 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
49 !1 = !DIFile(filename: "t.c", directory: "/")
50 !2 = !{}
51 !3 = !{i32 2, !"Dwarf Version", i32 4}
52 !4 = !{i32 2, !"Debug Info Version", i32 3}
53 !5 = !{i32 1, !"PIC Level", i32 2}
54 !6 = !{!"LLVM version 4.0.0"}
55 ; CHECK: ![[F:.*]] = distinct !DISubprogram(name: "f",
56 !7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2)
57 !8 = !DISubroutineType(types: !9)
58 !9 = !{!10}
59 !10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
60 !11 = !DILocation(line: 1, column: 56, scope: !12, inlinedAt: !13)
61 !12 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2)
62 !13 = distinct !DILocation(line: 1, column: 17, scope: !14)
63 ; CHECK: ![[BF:.*]] = !DILexicalBlockFile(scope: ![[LB1:[0-9]+]],
64 ; CHECK-SAME: discriminator: 2)
65 !14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2)
66 ; CHECK: ![[LB1]] = distinct !DILexicalBlock(scope: ![[LB2:[0-9]+]],
67 ; CHECK-SAME: line: 1, column: 16)
68 !15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 1, column: 16)
69 ; CHECK: ![[LB2]] = distinct !DILexicalBlock(scope: ![[LB3:[0-9]+]],
70 ; CHECK-SAME: line: 1, column: 9)
71 !16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 1, column: 9)
72 ; CHECK: ![[LB3]] = distinct !DILexicalBlock(scope: ![[F]],
73 ; CHECK-SAME: line: 1, column: 9)
74 !17 = distinct !DILexicalBlock(scope: !7, file: !1, line: 1, column: 9)
75 !18 = !DILocation(line: 1, column: 9, scope: !7)
76 !19 = !DILocation(line: 1, column: 27, scope: !12, inlinedAt: !13)
77 ; CHECK: ![[BR]] = !DILocation(line: 1, column: 9, scope: !14)
78 !20 = !DILocation(line: 1, column: 9, scope: !14)
79 !21 = distinct !{!21, !18}
80 !22 = !DILocation(line: 1, column: 56, scope: !12)
81 !23 = !DILocation(line: 1, column: 27, scope: !12)
82 !24 = !DILocation(line: 1, column: 84, scope: !12)
+0
-134
test/Transforms/AddDiscriminators/invoke.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ; ModuleID = 'invoke.bc'
2 source_filename = "invoke.cpp"
3 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-apple-macosx10.14.0"
5
6 ; Function Attrs: ssp uwtable
7 define void @_Z3foov() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !8 {
8 entry:
9 %exn.slot = alloca i8*
10 %ehselector.slot = alloca i32
11 ; CHECK: call void @_Z12bar_noexceptv({{.*}} !dbg ![[CALL1:[0-9]+]]
12 call void @_Z12bar_noexceptv() #4, !dbg !11
13 ; CHECK: call void @_Z12bar_noexceptv({{.*}} !dbg ![[CALL2:[0-9]+]]
14 call void @_Z12bar_noexceptv() #4, !dbg !13
15 invoke void @_Z3barv()
16 ; CHECK: unwind label {{.*}} !dbg ![[INVOKE:[0-9]+]]
17 to label %invoke.cont unwind label %lpad, !dbg !14
18
19 invoke.cont: ; preds = %entry
20 br label %try.cont, !dbg !15
21
22 lpad: ; preds = %entry
23 %0 = landingpad { i8*, i32 }
24 catch i8* null, !dbg !16
25 %1 = extractvalue { i8*, i32 } %0, 0, !dbg !16
26 store i8* %1, i8** %exn.slot, align 8, !dbg !16
27 %2 = extractvalue { i8*, i32 } %0, 1, !dbg !16
28 store i32 %2, i32* %ehselector.slot, align 4, !dbg !16
29 br label %catch, !dbg !16
30
31 catch: ; preds = %lpad
32 %exn = load i8*, i8** %exn.slot, align 8, !dbg !15
33 %3 = call i8* @__cxa_begin_catch(i8* %exn) #4, !dbg !15
34 invoke void @__cxa_rethrow() #5
35 to label %unreachable unwind label %lpad1, !dbg !17
36
37 lpad1: ; preds = %catch
38 %4 = landingpad { i8*, i32 }
39 cleanup, !dbg !19
40 %5 = extractvalue { i8*, i32 } %4, 0, !dbg !19
41 store i8* %5, i8** %exn.slot, align 8, !dbg !19
42 %6 = extractvalue { i8*, i32 } %4, 1, !dbg !19
43 store i32 %6, i32* %ehselector.slot, align 4, !dbg !19
44 invoke void @__cxa_end_catch()
45 to label %invoke.cont2 unwind label %terminate.lpad, !dbg !20
46
47 invoke.cont2: ; preds = %lpad1
48 br label %eh.resume, !dbg !20
49
50 try.cont: ; preds = %invoke.cont
51 ret void, !dbg !21
52
53 eh.resume: ; preds = %invoke.cont2
54 %exn3 = load i8*, i8** %exn.slot, align 8, !dbg !20
55 %sel = load i32, i32* %ehselector.slot, align 4, !dbg !20
56 %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0, !dbg !20
57 %lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1, !dbg !20
58 resume { i8*, i32 } %lpad.val4, !dbg !20
59
60 terminate.lpad: ; preds = %lpad1
61 %7 = landingpad { i8*, i32 }
62 catch i8* null, !dbg !20
63 %8 = extractvalue { i8*, i32 } %7, 0, !dbg !20
64 call void @__clang_call_terminate(i8* %8) #6, !dbg !20
65 unreachable, !dbg !20
66
67 unreachable: ; preds = %catch
68 unreachable
69 }
70
71 ; Function Attrs: nounwind
72 declare void @_Z12bar_noexceptv() #1
73
74 declare void @_Z3barv() #2
75
76 declare i32 @__gxx_personality_v0(...)
77
78 declare i8* @__cxa_begin_catch(i8*)
79
80 declare void @__cxa_rethrow()
81
82 declare void @__cxa_end_catch()
83
84 ; Function Attrs: noinline noreturn nounwind
85 define linkonce_odr hidden void @__clang_call_terminate(i8*) #3 {
86 %2 = call i8* @__cxa_begin_catch(i8* %0) #4
87 call void @_ZSt9terminatev() #6
88 unreachable
89 }
90
91 declare void @_ZSt9terminatev()
92
93 attributes #0 = { ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
94 attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
95 attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
96 attributes #3 = { noinline noreturn nounwind }
97 attributes #4 = { nounwind }
98 attributes #5 = { noreturn }
99 attributes #6 = { noreturn nounwind }
100
101 !llvm.dbg.cu = !{!0}
102 !llvm.module.flags = !{!3, !4, !5, !6}
103 !llvm.ident = !{!7}
104
105 ; CHECK: ![[CALL1]] = !DILocation(line: 7, column: 5, scope: ![[SCOPE1:[0-9]+]])
106 ; CHECK: ![[SCOPE1]] = distinct !DILexicalBlock(scope: !8, file: !1, line: 6, column: 7)
107 ; CHECK: ![[CALL2]] = !DILocation(line: 7, column: 21, scope: ![[SCOPE2:[0-9]+]])
108 ; CHECK: ![[SCOPE2]] = !DILexicalBlockFile(scope: ![[SCOPE1]], file: !1, discriminator: 2)
109 ; CHECK: ![[INVOKE]] = !DILocation(line: 7, column: 37, scope: ![[SCOPE3:[0-9]+]])
110 ; CHECK: ![[SCOPE3]] = !DILexicalBlockFile(scope: ![[SCOPE1]], file: !1, discriminator: 4)
111
112 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 8.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: GNU)
113 !1 = !DIFile(filename: "invoke.cpp", directory: "examples")
114 !2 = !{}
115 !3 = !{i32 2, !"Dwarf Version", i32 4}
116 !4 = !{i32 2, !"Debug Info Version", i32 3}
117 !5 = !{i32 1, !"wchar_size", i32 4}
118 !6 = !{i32 7, !"PIC Level", i32 2}
119 !7 = !{!"clang version 8.0.0"}
120 !8 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 5, type: !9, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
121 !9 = !DISubroutineType(types: !10)
122 !10 = !{null}
123 !11 = !DILocation(line: 7, column: 5, scope: !12)
124 !12 = distinct !DILexicalBlock(scope: !8, file: !1, line: 6, column: 7)
125 !13 = !DILocation(line: 7, column: 21, scope: !12)
126 !14 = !DILocation(line: 7, column: 37, scope: !12)
127 !15 = !DILocation(line: 8, column: 3, scope: !12)
128 !16 = !DILocation(line: 12, column: 1, scope: !12)
129 !17 = !DILocation(line: 10, column: 5, scope: !18)
130 !18 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9, column: 15)
131 !19 = !DILocation(line: 12, column: 1, scope: !18)
132 !20 = !DILocation(line: 11, column: 3, scope: !18)
133 !21 = !DILocation(line: 12, column: 1, scope: !8)
+0
-104
test/Transforms/AddDiscriminators/memcpy-discriminator.ll less more
None ; RUN: opt < %s -add-discriminators -sroa -S | FileCheck %s
1
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3
4 ; Test case obtained from the following C code:
5
6 ; struct A {
7 ; int field1;
8 ; short field2;
9 ; };
10 ;
11 ; struct B {
12 ; struct A field1;
13 ; int field2;
14 ; };
15 ;
16 ;
17 ; extern struct B g_b;
18 ; extern int bar(struct B b, int c);
19 ;
20 ; int foo(int cond) {
21 ; int result = cond ? bar(g_b, 33) : 42;
22 ; return result;
23 ; }
24
25 ; In this test, global variable g_b is passed by copy to function bar. That
26 ; copy is located on the stack (see alloca %g_b.coerce), and it is initialized
27 ; by a memcpy call.
28 ;
29 ; SROA would split alloca %g_b.coerce into two (smaller disjoint) slices:
30 ; slice [0,8) and slice [8, 12). Users of the original alloca are rewritten
31 ; as users of the new alloca slices.
32 ; In particular, the memcpy is rewritten by SROA as two load/store pairs.
33 ;
34 ; Later on, mem2reg successfully promotes the new alloca slices to registers,
35 ; and loads %3 and %5 are made redundant by the loads obtained from the memcpy
36 ; intrinsic expansion.
37 ;
38 ; If pass AddDiscriminators doesn't assign a discriminator to the intrinsic
39 ; memcpy call, then the loads obtained from the memcpy expansion would not have
40 ; a correct discriminator.
41 ;
42 ; This test checks that the two new loads inserted by SROA in %cond.true
43 ; correctly reference a debug location with a non-zero discriminator. This test
44 ; also checks that the same discriminator is used by all instructions from
45 ; basic block %cond.true.
46
47 %struct.B = type { %struct.A, i32 }
48 %struct.A = type { i32, i16 }
49
50 @g_b = external global %struct.B, align 4
51
52 define i32 @foo(i32 %cond) #0 !dbg !5 {
53 entry:
54 %g_b.coerce = alloca { i64, i32 }, align 4
55 %tobool = icmp ne i32 %cond, 0, !dbg !7
56 br i1 %tobool, label %cond.true, label %cond.end, !dbg !7
57
58 cond.true:
59 ; CHECK-LABEL: cond.true:
60 ; CHECK: load i64, {{.*}}, !dbg ![[LOC:[0-9]+]]
61 ; CHECK-NEXT: load i32, {{.*}}, !dbg ![[LOC]]
62 ; CHECK-NEXT: %call = call i32 @bar({{.*}}), !dbg ![[LOC]]
63 ; CHECK-NEXT: br label %cond.end, !dbg ![[BR_LOC:[0-9]+]]
64
65 ; CHECK-DAG: ![[LOC]] = !DILocation(line: 16, column: 23, scope: ![[SCOPE:[0-9]+]])
66 ; CHECK-DAG: ![[SCOPE]] = !DILexicalBlockFile({{.*}}, discriminator: 2)
67 ; CHECK-DAG: ![[BR_LOC]] = !DILocation(line: 16, column: 16, scope: ![[SCOPE]])
68
69 %0 = bitcast { i64, i32 }* %g_b.coerce to i8*, !dbg !8
70 %1 = bitcast %struct.B* @g_b to i8*, !dbg !8
71 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 12, i1 false), !dbg !8
72 %2 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %g_b.coerce, i32 0, i32 0, !dbg !8
73 %3 = load i64, i64* %2, align 4, !dbg !8
74 %4 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %g_b.coerce, i32 0, i32 1, !dbg !8
75 %5 = load i32, i32* %4, align 4, !dbg !8
76 %call = call i32 @bar(i64 %3, i32 %5, i32 33), !dbg !8
77 br label %cond.end, !dbg !7
78
79 cond.end: ; preds = %entry, %cond.true
80 %cond1 = phi i32 [ %call, %cond.true ], [ 42, %entry ], !dbg !7
81 ret i32 %cond1, !dbg !9
82 }
83
84 declare i32 @bar(i64, i32, i32)
85
86 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
87
88 attributes #0 = { noinline nounwind uwtable }
89 attributes #1 = { argmemonly nounwind }
90
91 !llvm.dbg.cu = !{!0}
92 !llvm.module.flags = !{!3, !4}
93
94 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
95 !1 = !DIFile(filename: "test.c", directory: ".")
96 !2 = !{}
97 !3 = !{i32 2, !"Dwarf Version", i32 4}
98 !4 = !{i32 2, !"Debug Info Version", i32 3}
99 !5 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 15, type: !6, isLocal: false, isDefinition: true, scopeLine: 15, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
100 !6 = !DISubroutineType(types: !2)
101 !7 = !DILocation(line: 16, column: 16, scope: !5)
102 !8 = !DILocation(line: 16, column: 23, scope: !5)
103 !9 = !DILocation(line: 17, column: 3, scope: !5)
+0
-72
test/Transforms/AddDiscriminators/multiple.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
2
3 ; Discriminator support for multiple CFG paths on the same line.
4 ;
5 ; void foo(int i) {
6 ; int x;
7 ; if (i < 10) x = i; else x = -i;
8 ; }
9 ;
10 ; The two stores inside the if-then-else line must have different discriminator
11 ; values.
12
13 define void @foo(i32 %i) #0 !dbg !4 {
14 entry:
15 %i.addr = alloca i32, align 4
16 %x = alloca i32, align 4
17 store i32 %i, i32* %i.addr, align 4
18 %0 = load i32, i32* %i.addr, align 4, !dbg !10
19 %cmp = icmp slt i32 %0, 10, !dbg !10
20 br i1 %cmp, label %if.then, label %if.else, !dbg !10
21
22 if.then: ; preds = %entry
23 %1 = load i32, i32* %i.addr, align 4, !dbg !10
24 ; CHECK: %1 = load i32, i32* %i.addr, align 4, !dbg ![[THEN:[0-9]+]]
25
26 store i32 %1, i32* %x, align 4, !dbg !10
27 ; CHECK: store i32 %1, i32* %x, align 4, !dbg ![[THEN]]
28
29 br label %if.end, !dbg !10
30 ; CHECK: br label %if.end, !dbg ![[THEN]]
31
32 if.else: ; preds = %entry
33 %2 = load i32, i32* %i.addr, align 4, !dbg !10
34 ; CHECK: %2 = load i32, i32* %i.addr, align 4, !dbg ![[ELSE:[0-9]+]]
35
36 %sub = sub nsw i32 0, %2, !dbg !10
37 ; CHECK: %sub = sub nsw i32 0, %2, !dbg ![[ELSE]]
38
39 store i32 %sub, i32* %x, align 4, !dbg !10
40 ; CHECK: store i32 %sub, i32* %x, align 4, !dbg ![[ELSE]]
41
42 br label %if.end
43
44 if.end: ; preds = %if.else, %if.then
45 ret void, !dbg !12
46 }
47
48 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
49
50 !llvm.dbg.cu = !{!0}
51 !llvm.module.flags = !{!7, !8}
52 !llvm.ident = !{!9}
53
54 !0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
55 !1 = !DIFile(filename: "multiple.c", directory: ".")
56 !2 = !{}
57 !4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
58 !5 = !DIFile(filename: "multiple.c", directory: ".")
59 !6 = !DISubroutineType(types: !{null, !13})
60 !13 = !DIBasicType(encoding: DW_ATE_signed, name: "int", size: 32, align: 32)
61 !7 = !{i32 2, !"Dwarf Version", i32 4}
62 !8 = !{i32 1, !"Debug Info Version", i32 3}
63 !9 = !{!"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
64 !10 = !DILocation(line: 3, scope: !11)
65 !11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
66 !12 = !DILocation(line: 4, scope: !4)
67
68 ; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[THENBLOCK:[0-9]+]])
69 ; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE:[0-9]+]],{{.*}} discriminator: 2)
70 ; CHECK: ![[ELSE]] = !DILocation(line: 3, scope: ![[ELSEBLOCK:[0-9]+]])
71 ; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[SCOPE]],{{.*}} discriminator: 4)
+0
-76
test/Transforms/AddDiscriminators/no-discriminators.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
2
3 ; We should not generate discriminators for DWARF versions prior to 4.
4 ;
5 ; Original code:
6 ;
7 ; int foo(long i) {
8 ; if (i < 5) return 2; else return 90;
9 ; }
10 ;
11 ; None of the !dbg nodes associated with the if() statement should be
12 ; altered. If they are, it means that the discriminators pass added a
13 ; new lexical scope.
14
15 define i32 @foo(i64 %i) #0 !dbg !4 {
16 entry:
17 %retval = alloca i32, align 4
18 %i.addr = alloca i64, align 8
19 store i64 %i, i64* %i.addr, align 8
20 call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !13, metadata !DIExpression()), !dbg !14
21 %0 = load i64, i64* %i.addr, align 8, !dbg !15
22 ; CHECK: %0 = load i64, i64* %i.addr, align 8, !dbg ![[ENTRY:[0-9]+]]
23 %cmp = icmp slt i64 %0, 5, !dbg !15
24 ; CHECK: %cmp = icmp slt i64 %0, 5, !dbg ![[ENTRY:[0-9]+]]
25 br i1 %cmp, label %if.then, label %if.else, !dbg !15
26 ; CHECK: br i1 %cmp, label %if.then, label %if.else, !dbg ![[ENTRY:[0-9]+]]
27
28 if.then: ; preds = %entry
29 store i32 2, i32* %retval, !dbg !15
30 br label %return, !dbg !15
31
32 if.else: ; preds = %entry
33 store i32 90, i32* %retval, !dbg !15
34 br label %return, !dbg !15
35
36 return: ; preds = %if.else, %if.then
37 %1 = load i32, i32* %retval, !dbg !17
38 ret i32 %1, !dbg !17
39 }
40
41 ; Function Attrs: nounwind readnone
42 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
43
44 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
45 attributes #1 = { nounwind readnone }
46
47 ; We should be able to add discriminators even in the absence of llvm.dbg.cu.
48 ; When using sample profiles, the front end will generate line tables but it
49 ; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
50 ; to the final binary.
51 !llvm.dbg.cu = !{!0}
52 !llvm.module.flags = !{!10, !11}
53 !llvm.ident = !{!12}
54
55 ; CHECK: !{i32 2, !"Dwarf Version", i32 2}
56 !0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
57 !1 = !DIFile(filename: "no-discriminators", directory: ".")
58 !2 = !{}
59 !4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
60 ; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
61 !5 = !DIFile(filename: "no-discriminators", directory: ".")
62 !6 = !DISubroutineType(types: !7)
63 !7 = !{!8, !9}
64 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
65 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
66 !10 = !{i32 2, !"Dwarf Version", i32 2}
67 !11 = !{i32 1, !"Debug Info Version", i32 3}
68 !12 = !{!"clang version 3.5.0 "}
69 !13 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !9)
70 !14 = !DILocation(line: 1, scope: !4)
71 !15 = !DILocation(line: 2, scope: !16)
72 ; CHECK: ![[ENTRY]] = !DILocation(line: 2, scope: ![[BLOCK:[0-9]+]])
73 !16 = distinct !DILexicalBlock(line: 2, column: 0, file: !1, scope: !4)
74 ; CHECK: ![[BLOCK]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 2)
75 !17 = !DILocation(line: 3, scope: !4)
+0
-101
test/Transforms/AddDiscriminators/oneline.ll less more
None ; RUN: opt < %s -add-discriminators -S | FileCheck %s
1 ; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
2
3 ; Discriminator support for code that is written in one line:
4 ; #1 int foo(int i) {
5 ; #2 if (i == 3 || i == 5) return 100; else return 99;
6 ; #3 }
7
8 ; i == 3: discriminator 0
9 ; i == 5: discriminator 2
10 ; return 100: discriminator 4
11 ; return 99: discriminator 6
12
13 define i32 @_Z3fooi(i32 %i) #0 !dbg !4 {
14 %1 = alloca i32, align 4
15 %2 = alloca i32, align 4
16 store i32 %i, i32* %2, align 4, !tbaa !13
17 call void @llvm.dbg.declare(metadata i32* %2, metadata !9, metadata !17), !dbg !18
18 %3 = load i32, i32* %2, align 4, !dbg !19, !tbaa !13
19 %4 = icmp eq i32 %3, 3, !dbg !21
20 br i1 %4, label %8, label %5, !dbg !22
21
22 ;
23 %6 = load i32, i32* %2, align 4, !dbg !23, !tbaa !13
24 ; CHECK: %6 = load i32, i32* %2, align 4, !dbg ![[THEN1:[0-9]+]],{{.*}}
25
26 %7 = icmp eq i32 %6, 5, !dbg !24
27 ; CHECK: %7 = icmp eq i32 %6, 5, !dbg ![[THEN2:[0-9]+]]
28
29 br i1 %7, label %8, label %9, !dbg !25
30 ; CHECK: br i1 %7, label %8, label %9, !dbg ![[THEN3:[0-9]+]]
31
32 ;
33 store i32 100, i32* %1, align 4, !dbg !26
34 ; CHECK: store i32 100, i32* %1, align 4, !dbg ![[ELSE:[0-9]+]]
35
36 br label %10, !dbg !26
37 ; CHECK: br label %10, !dbg ![[ELSE]]
38
39 ;
40 store i32 99, i32* %1, align 4, !dbg !27
41 ; CHECK: store i32 99, i32* %1, align 4, !dbg ![[COMBINE:[0-9]+]]
42
43 br label %10, !dbg !27
44 ; CHECK: br label %10, !dbg ![[COMBINE]]
45
46 ;
47 %11 = load i32, i32* %1, align 4, !dbg !28
48 ret i32 %11, !dbg !28
49 }
50
51 ; Function Attrs: nounwind readnone
52 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
53
54 attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
55 attributes #1 = { nounwind readnone }
56
57 !llvm.dbg.cu = !{!0}
58 !llvm.module.flags = !{!10, !11}
59 !llvm.ident = !{!12}
60
61 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
62 !1 = !DIFile(filename: "a.cc", directory: "/usr/local/google/home/dehao/discr")
63 !2 = !{}
64 !4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !8)
65 !5 = !DISubroutineType(types: !6)
66 !6 = !{!7, !7}
67 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
68 !8 = !{!9}
69 !9 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 1, type: !7)
70 !10 = !{i32 2, !"Dwarf Version", i32 4}
71 !11 = !{i32 2, !"Debug Info Version", i32 3}
72 !12 = !{!"clang version 3.8.0 (trunk 250915)"}
73 !13 = !{!14, !14, i64 0}
74 !14 = !{!"int", !15, i64 0}
75 !15 = !{!"omnipotent char", !16, i64 0}
76 !16 = !{!"Simple C/C++ TBAA"}
77 !17 = !DIExpression()
78 !18 = !DILocation(line: 1, column: 13, scope: !4)
79 !19 = !DILocation(line: 2, column: 7, scope: !20)
80 !20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7)
81 !21 = !DILocation(line: 2, column: 9, scope: !20)
82 !22 = !DILocation(line: 2, column: 14, scope: !20)
83 !23 = !DILocation(line: 2, column: 17, scope: !20)
84 !24 = !DILocation(line: 2, column: 19, scope: !20)
85 !25 = !DILocation(line: 2, column: 7, scope: !4)
86 !26 = !DILocation(line: 2, column: 25, scope: !20)
87 !27 = !DILocation(line: 2, column: 42, scope: !20)
88 !28 = !DILocation(line: 3, column: 1, scope: !4)
89
90 ; CHECK: ![[F:.*]] = distinct !DISubprogram(name: "foo",
91 ; CHECK: ![[IF:.*]] = distinct !DILexicalBlock(scope: ![[F]],{{.*}}line: 2, column: 7)
92 ; CHECK: ![[THEN1]] = !DILocation(line: 2, column: 17, scope: ![[THENBLOCK:[0-9]+]])
93 ; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 2)
94 ; CHECK: ![[THEN2]] = !DILocation(line: 2, column: 19, scope: ![[THENBLOCK]])
95 ; CHECK: ![[THEN3]] = !DILocation(line: 2, column: 7, scope: ![[BRBLOCK:[0-9]+]])
96 ; CHECK: ![[BRBLOCK]] = !DILexicalBlockFile(scope: ![[F]],{{.*}} discriminator: 2)
97 ; CHECK: ![[ELSE]] = !DILocation(line: 2, column: 25, scope: ![[ELSEBLOCK:[0-9]+]])
98 ; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 4)
99 ; CHECK: ![[COMBINE]] = !DILocation(line: 2, column: 42, scope: ![[COMBINEBLOCK:[0-9]+]])
100 ; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile(scope: ![[IF]],{{.*}} discriminator: 6)
+0
-235
test/Transforms/AggressiveInstCombine/masked-cmp.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s
2
3 ; PR37098 - https://bugs.llvm.org/show_bug.cgi?id=37098
4
5 define i32 @anyset_two_bit_mask(i32 %x) {
6 ; CHECK-LABEL: @anyset_two_bit_mask(
7 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 9
8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
9 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
10 ; CHECK-NEXT: ret i32 [[TMP3]]
11 ;
12 %s = lshr i32 %x, 3
13 %o = or i32 %s, %x
14 %r = and i32 %o, 1
15 ret i32 %r
16 }
17
18 define i32 @anyset_four_bit_mask(i32 %x) {
19 ; CHECK-LABEL: @anyset_four_bit_mask(
20 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 297
21 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
22 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
23 ; CHECK-NEXT: ret i32 [[TMP3]]
24 ;
25 %t1 = lshr i32 %x, 3
26 %t2 = lshr i32 %x, 5
27 %t3 = lshr i32 %x, 8
28 %o1 = or i32 %t1, %x
29 %o2 = or i32 %t2, %t3
30 %o3 = or i32 %o1, %o2
31 %r = and i32 %o3, 1
32 ret i32 %r
33 }
34
35 ; We're not testing the LSB here, so all of the 'or' operands are shifts.
36
37 define i32 @anyset_three_bit_mask_all_shifted_bits(i32 %x) {
38 ; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits(
39 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 296
40 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
41 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
42 ; CHECK-NEXT: ret i32 [[TMP3]]
43 ;
44 %t1 = lshr i32 %x, 3
45 %t2 = lshr i32 %x, 5
46 %t3 = lshr i32 %x, 8
47 %o2 = or i32 %t2, %t3
48 %o3 = or i32 %t1, %o2
49 %r = and i32 %o3, 1
50 ret i32 %r
51 }
52
53 ; Recognize the 'and' sibling pattern (all-bits-set). The 'and 1' may not be at the end.
54
55 define i32 @allset_two_bit_mask(i32 %x) {
56 ; CHECK-LABEL: @allset_two_bit_mask(
57 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 129
58 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 129
59 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
60 ; CHECK-NEXT: ret i32 [[TMP3]]
61 ;
62 %s = lshr i32 %x, 7
63 %o = and i32 %s, %x
64 %r = and i32 %o, 1
65 ret i32 %r
66 }
67
68 define i64 @allset_four_bit_mask(i64 %x) {
69 ; CHECK-LABEL: @allset_four_bit_mask(
70 ; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 30
71 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 30
72 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i64
73 ; CHECK-NEXT: ret i64 [[TMP3]]
74 ;
75 %t1 = lshr i64 %x, 1
76 %t2 = lshr i64 %x, 2
77 %t3 = lshr i64 %x, 3
78 %t4 = lshr i64 %x, 4
79 %a1 = and i64 %t4, 1
80 %a2 = and i64 %t2, %a1
81 %a3 = and i64 %a2, %t1
82 %r = and i64 %a3, %t3
83 ret i64 %r
84 }
85
86 declare void @use(i32)
87
88 ; negative test - extra use means the transform would increase instruction count
89
90 define i32 @allset_two_bit_mask_multiuse(i32 %x) {
91 ; CHECK-LABEL: @allset_two_bit_mask_multiuse(
92 ; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 7
93 ; CHECK-NEXT: [[O:%.*]] = and i32 [[S]], [[X]]
94 ; CHECK-NEXT: [[R:%.*]] = and i32 [[O]], 1
95 ; CHECK-NEXT: call void @use(i32 [[O]])
96 ; CHECK-NEXT: ret i32 [[R]]
97 ;
98 %s = lshr i32 %x, 7
99 %o = and i32 %s, %x
100 %r = and i32 %o, 1
101 call void @use(i32 %o)
102 ret i32 %r
103 }
104
105 ; negative test - missing 'and 1' mask, so more than the low bit is used here
106
107 define i8 @allset_three_bit_mask_no_and1(i8 %x) {
108 ; CHECK-LABEL: @allset_three_bit_mask_no_and1(
109 ; CHECK-NEXT: [[T1:%.*]] = lshr i8 [[X:%.*]], 1
110 ; CHECK-NEXT: [[T2:%.*]] = lshr i8 [[X]], 2
111 ; CHECK-NEXT: [[T3:%.*]] = lshr i8 [[X]], 3
112 ; CHECK-NEXT: [[A2:%.*]] = and i8 [[T1]], [[T2]]
113 ; CHECK-NEXT: [[R:%.*]] = and i8 [[A2]], [[T3]]
114 ; CHECK-NEXT: ret i8 [[R]]
115 ;
116 %t1 = lshr i8 %x, 1
117 %t2 = lshr i8 %x, 2
118 %t3 = lshr i8 %x, 3
119 %a2 = and i8 %t1, %t2
120 %r = and i8 %a2, %t3
121 ret i8 %r
122 }
123
124 ; This test demonstrates that the transform can be large. If the implementation
125 ; is slow or explosive (stack overflow due to recursion), it should be made efficient.
126
127 define i64 @allset_40_bit_mask(i64 %x) {
128 ; CHECK-LABEL: @allset_40_bit_mask(
129 ; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2199023255550
130 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 2199023255550
131 ; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i64
132 ; CHECK-NEXT: ret i64 [[TMP3]]
133 ;
134 %t1 = lshr i64 %x, 1
135 %t2 = lshr i64 %x, 2
136 %t3 = lshr i64 %x, 3
137 %t4 = lshr i64 %x, 4
138 %t5 = lshr i64 %x, 5
139 %t6 = lshr i64 %x, 6
140 %t7 = lshr i64 %x, 7
141 %t8 = lshr i64 %x, 8
142 %t9 = lshr i64 %x, 9
143 %t10 = lshr i64 %x, 10
144 %t11 = lshr i64 %x, 11
145 %t12 = lshr i64 %x, 12
146 %t13 = lshr i64 %x, 13
147 %t14 = lshr i64 %x, 14
148 %t15 = lshr i64 %x, 15
149 %t16 = lshr i64 %x, 16
150 %t17 = lshr i64 %x, 17
151 %t18 = lshr i64 %x, 18
152 %t19 = lshr i64 %x, 19
153 %t20 = lshr i64 %x, 20
154 %t21 = lshr i64 %x, 21
155 %t22 = lshr i64 %x, 22
156 %t23 = lshr i64 %x, 23
157 %t24 = lshr i64 %x, 24
158 %t25 = lshr i64 %x, 25
159 %t26 = lshr i64 %x, 26
160 %t27 = lshr i64 %x, 27
161 %t28 = lshr i64 %x, 28
162 %t29 = lshr i64 %x, 29
163 %t30 = lshr i64 %x, 30
164 %t31 = lshr i64 %x, 31
165 %t32 = lshr i64 %x, 32
166 %t33 = lshr i64 %x, 33
167 %t34 = lshr i64 %x, 34
168 %t35 = lshr i64 %x, 35
169 %t36 = lshr i64 %x, 36
170 %t37 = lshr i64 %x, 37
171 %t38 = lshr i64 %x, 38
172 %t39 = lshr i64 %x, 39
173 %t40 = lshr i64 %x, 40
174
175 %a1 = and i64 %t1, 1
176 %a2 = and i64 %t2, %a1
177 %a3 = and i64 %t3, %a2
178 %a4 = and i64 %t4, %a3
179 %a5 = and i64 %t5, %a4
180 %a6 = and i64 %t6, %a5
181 %a7 = and i64 %t7, %a6
182 %a8 = and i64 %t8, %a7
183 %a9 = and i64 %t9, %a8
184 %a10 = and i64 %t10, %a9
185 %a11 = and i64 %t11, %a10
186 %a12 = and i64 %t12, %a11
187 %a13 = and i64 %t13, %a12
188 %a14 = and i64 %t14, %a13
189 %a15 = and i64 %t15, %a14
190 %a16 = and i64 %t16, %a15
191 %a17 = and i64 %t17, %a16
192 %a18 = and i64 %t18, %a17
193 %a19 = and i64 %t19, %a18
194 %a20 = and i64 %t20, %a19
195 %a21 = and i64 %t21, %a20
196 %a22 = and i64 %t22, %a21
197 %a23 = and i64 %t23, %a22
198 %a24 = and i64 %t24, %a23
199 %a25 = and i64 %t25, %a24
200 %a26 = and i64 %t26, %a25
201 %a27 = and i64 %t27, %a26
202 %a28 = and i64 %t28, %a27
203 %a29 = and i64 %t29, %a28
204 %a30 = and i64 %t30, %a29
205 %a31 = and i64 %t31, %a30
206 %a32 = and i64 %t32, %a31
207 %a33 = and i64 %t33, %a32
208 %a34 = and i64 %t34, %a33
209 %a35 = and i64 %t35, %a34
210 %a36 = and i64 %t36, %a35
211 %a37 = and i64 %t37, %a36
212 %a38 = and i64 %t38, %a37
213 %a39 = and i64 %t39, %a38
214 %a40 = and i64 %t40, %a39
215
216 ret i64 %a40
217 }
218
219 ; Verify that unsimplified code doesn't crash:
220 ; https://bugs.llvm.org/show_bug.cgi?id=37446
221
222 define i32 @PR37446(i32 %x) {
223 ; CHECK-LABEL: @PR37446(
224 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, 33
225 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], 15
226 ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[AND]], [[X:%.*]]
227 ; CHECK-NEXT: ret i32 [[AND1]]
228 ;
229 %shr = lshr i32 1, 33
230 %and = and i32 %shr, 15
231 %and1 = and i32 %and, %x
232 ret i32 %and1
233 }
234
+0
-476
test/Transforms/AggressiveInstCombine/rotate.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s
2
3 ; https://bugs.llvm.org/show_bug.cgi?id=34924
4
5 define i32 @rotl(i32 %a, i32 %b) {
6 ; CHECK-LABEL: @rotl(
7 ; CHECK-NEXT: entry:
8 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
9 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
10 ; CHECK: rotbb: