llvm.org GIT mirror llvm / 57e6b2d
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
17 changed file(s) with 633 addition(s) and 44 deletion(s). Raw diff Collapse all Expand all
169169 /// The returned cost is defined in terms of \c TargetCostConstants, see its
170170 /// comments for a detailed explanation of the cost values.
171171 virtual unsigned getUserCost(const User *U) const;
172
173 /// \brief hasBranchDivergence - Return true if branch divergence exists.
174 /// Branch divergence has a significantly negative impact on GPU performance
175 /// when threads in the same wavefront take different paths due to conditional
176 /// branches.
177 virtual bool hasBranchDivergence() const;
172178
173179 /// \brief Test whether calls to a function lower to actual program function
174180 /// calls.
8585 void initializeCFGOnlyPrinterPass(PassRegistry&);
8686 void initializeCFGOnlyViewerPass(PassRegistry&);
8787 void initializeCFGPrinterPass(PassRegistry&);
88 void initializeCFGSimplifyPassPass(PassRegistry&);
88 void initializeCFGOptimizePass(PassRegistry&);
89 void initializeCFGCanonicalizePass(PassRegistry&);
8990 void initializeStructurizeCFGPass(PassRegistry&);
9091 void initializeCFGViewerPass(PassRegistry&);
9192 void initializeCalculateSpillWeightsPass(PassRegistry&);
195195 // CFGSimplification - Merge basic blocks, eliminate unreachable blocks,
196196 // simplify terminator instructions, etc...
197197 //
198 FunctionPass *createCFGSimplificationPass();
198 FunctionPass *createCFGSimplificationPass(bool IsTargetAware = false);
199199
200200 //===----------------------------------------------------------------------===//
201201 //
3838 class TargetLibraryInfo;
3939 class TargetTransformInfo;
4040 class DIBuilder;
41 class AliasAnalysis;
4142
4243 template class SmallVectorImpl;
4344
135136 /// the basic block that was pointed to.
136137 ///
137138 bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
138 const DataLayout *TD = 0);
139 const DataLayout *TD = 0, AliasAnalysis *AA = 0);
139140
140141 /// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
141142 /// and if a predecessor branches to us and one of our successors, fold the
8585
8686 unsigned TargetTransformInfo::getUserCost(const User *U) const {
8787 return PrevTTI->getUserCost(U);
88 }
89
90 bool TargetTransformInfo::hasBranchDivergence() const {
91 return PrevTTI->hasBranchDivergence();
8892 }
8993
9094 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
419423 U->getOperand(0)->getType() : 0);
420424 }
421425
426 bool hasBranchDivergence() const { return false; }
427
422428 bool isLoweredToCall(const Function *F) const {
423429 // FIXME: These should almost certainly not be handled here, and instead
424430 // handled with the help of TLI or the target itself. This was largely
6363 return (TargetTransformInfo*)this;
6464 return this;
6565 }
66
67 virtual bool hasBranchDivergence() const;
6668
6769 /// \name Scalar TTI Implementations
6870 /// @{
123125 return new BasicTTI(TM);
124126 }
125127
128 bool BasicTTI::hasBranchDivergence() const { return false; }
126129
127130 bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
128131 return getTLI()->isLegalAddImmediate(imm);
4343 FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
4444 FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm);
4545 FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
46
47 /// \brief Creates an AMDGPU-specific Target Transformation Info pass.
48 ImmutablePass *
49 createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM);
4650
4751 extern Target TheAMDGPUTarget;
4852
104104 return new AMDGPUPassConfig(this, PM);
105105 }
106106
107 //===----------------------------------------------------------------------===//
108 // AMDGPU Analysis Pass Setup
109 //===----------------------------------------------------------------------===//
110
111 void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
112 // Add first the target-independent BasicTTI pass, then our AMDGPU pass. This
113 // allows the AMDGPU pass to delegate to the target independent layer when
114 // appropriate.
115 PM.add(createBasicTargetTransformInfoPass(this));
116 PM.add(createAMDGPUTargetTransformInfoPass(this));
117 }
118
107119 bool
108120 AMDGPUPassConfig::addPreISel() {
109121 const AMDGPUSubtarget &ST = TM->getSubtarget();
6060 }
6161 virtual const DataLayout *getDataLayout() const { return &Layout; }
6262 virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
63
64 /// \brief Register R600 analysis passes with a pass manager.
65 virtual void addAnalysisPasses(PassManagerBase &PM);
6366 };
6467
6568 } // End namespace llvm
0 //===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // AMDGPU target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #define DEBUG_TYPE "AMDGPUtti"
18 #include "AMDGPU.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "llvm/Analysis/TargetTransformInfo.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Target/TargetLowering.h"
23 #include "llvm/Target/CostTable.h"
24 using namespace llvm;
25
26 // Declare the pass initialization routine locally as target-specific passes
27 // don't have a target-wide initialization entry point, and so we rely on the
28 // pass constructor initialization.
29 namespace llvm {
30 void initializeAMDGPUTTIPass(PassRegistry &);
31 }
32
33 namespace {
34
35 class AMDGPUTTI : public ImmutablePass, public TargetTransformInfo {
36 const AMDGPUTargetMachine *TM;
37 const AMDGPUSubtarget *ST;
38 const AMDGPUTargetLowering *TLI;
39
40 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
41 /// are set if the result needs to be inserted and/or extracted from vectors.
42 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
43
44 public:
45 AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
46 llvm_unreachable("This pass cannot be directly constructed");
47 }
48
49 AMDGPUTTI(const AMDGPUTargetMachine *TM)
50 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
51 TLI(TM->getTargetLowering()) {
52 initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
53 }
54
55 virtual void initializePass() { pushTTIStack(this); }
56
57 virtual void finalizePass() { popTTIStack(); }
58
59 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
60 TargetTransformInfo::getAnalysisUsage(AU);
61 }
62
63 /// Pass identification.
64 static char ID;
65
66 /// Provide necessary pointer adjustments for the two base classes.
67 virtual void *getAdjustedAnalysisPointer(const void *ID) {
68 if (ID == &TargetTransformInfo::ID)
69 return (TargetTransformInfo *)this;
70 return this;
71 }
72
73 virtual bool hasBranchDivergence() const;
74
75 /// @}
76 };
77
78 } // end anonymous namespace
79
80 INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
81 "AMDGPU Target Transform Info", true, true, false)
82 char AMDGPUTTI::ID = 0;
83
84 ImmutablePass *
85 llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
86 return new AMDGPUTTI(TM);
87 }
88
89 bool AMDGPUTTI::hasBranchDivergence() const { return true; }
2222 AMDGPUMachineFunction.cpp
2323 AMDGPUSubtarget.cpp
2424 AMDGPUTargetMachine.cpp
25 AMDGPUTargetTransformInfo.cpp
2526 AMDGPUISelLowering.cpp
2627 AMDGPUConvertToISA.cpp
2728 AMDGPUInstrInfo.cpp
234234 }
235235
236236 MPM.add(createAggressiveDCEPass()); // Delete dead instructions
237 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
237 MPM.add(createCFGSimplificationPass(true)); // Merge & remove BBs
238238 MPM.add(createInstructionCombiningPass()); // Clean up after everything.
239239
240240 // As an experimental mode, run any vectorization passes in a separate
370370 PM.add(createJumpThreadingPass());
371371
372372 // Delete basic blocks, which optimization passes may have killed.
373 PM.add(createCFGSimplificationPass());
373 PM.add(createCFGSimplificationPass(true));
374374
375375 // Now that we have optimized the program, discard unreachable functions.
376376 PM.add(createGlobalDCEPass());
5656 initializeSROAPass(Registry);
5757 initializeSROA_DTPass(Registry);
5858 initializeSROA_SSAUpPass(Registry);
59 initializeCFGSimplifyPassPass(Registry);
59 initializeCFGCanonicalizePass(Registry);
60 initializeCFGOptimizePass(Registry);
6061 initializeStructurizeCFGPass(Registry);
6162 initializeSinkingPass(Registry);
6263 initializeTailCallElimPass(Registry);
2626 #include "llvm/ADT/SmallVector.h"
2727 #include "llvm/ADT/Statistic.h"
2828 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/Analysis/AliasAnalysis.h"
2930 #include "llvm/IR/Attributes.h"
3031 #include "llvm/IR/Constants.h"
3132 #include "llvm/IR/DataLayout.h"
4041 STATISTIC(NumSimpl, "Number of blocks simplified");
4142
4243 namespace {
43 struct CFGSimplifyPass : public FunctionPass {
44 static char ID; // Pass identification, replacement for typeid
45 CFGSimplifyPass() : FunctionPass(ID) {
46 initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
47 }
48
49 virtual bool runOnFunction(Function &F);
50
51 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
52 AU.addRequired();
53 }
54 };
55 }
56
57 char CFGSimplifyPass::ID = 0;
58 INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
59 false, false)
44 struct CFGSimplifyPass : public FunctionPass {
45 CFGSimplifyPass(char &ID, bool isTargetAware)
46 : FunctionPass(ID), IsTargetAware(isTargetAware) {}
47 virtual bool runOnFunction(Function &F);
48
49 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
50 AU.addRequired();
51 }
52 private:
53 AliasAnalysis *AA;
54 bool IsTargetAware; // Should the pass be target-aware?
55 };
56
57 // CFGSimplifyPass that does optimizations.
58 struct CFGOptimize : public CFGSimplifyPass {
59 static char ID; // Pass identification, replacement for typeid
60 public:
61 CFGOptimize() : CFGSimplifyPass(ID, true) {
62 initializeCFGOptimizePass(*PassRegistry::getPassRegistry());
63 }
64 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
65 AU.addRequired();
66 AU.addRequired();
67 }
68 };
69
70 // CFGSimplifyPass that does canonicalizations.
71 struct CFGCanonicalize : public CFGSimplifyPass {
72 static char ID; // Pass identification, replacement for typeid
73 public:
74 CFGCanonicalize() : CFGSimplifyPass(ID, false) {
75 initializeCFGCanonicalizePass(*PassRegistry::getPassRegistry());
76 }
77 };
78 }
79
80 char CFGCanonicalize::ID = 0;
81 char CFGOptimize::ID = 0;
82 INITIALIZE_PASS_BEGIN(CFGCanonicalize, "simplifycfg", "Simplify the CFG", false,
83 false)
6084 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
61 INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
62 false, false)
85 INITIALIZE_PASS_END(CFGCanonicalize, "simplifycfg", "Simplify the CFG", false,
86 false)
87 INITIALIZE_PASS_BEGIN(CFGOptimize, "optimizecfg", "optimize the CFG", false,
88 false)
89 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
90 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
91 INITIALIZE_PASS_END(CFGOptimize, "optimizecfg", "Optimize the CFG", false,
92 false)
6393
6494 // Public interface to the CFGSimplification pass
65 FunctionPass *llvm::createCFGSimplificationPass() {
66 return new CFGSimplifyPass();
95 FunctionPass *llvm::createCFGSimplificationPass(bool IsTargetAware) {
96 if (IsTargetAware)
97 return new CFGOptimize();
98 else
99 return new CFGCanonicalize();
67100 }
68101
69102 /// changeToUnreachable - Insert an unreachable instruction before the specified
300333 /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
301334 /// iterating until no more changes are made.
302335 static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
303 const DataLayout *TD) {
336 const DataLayout *TD, AliasAnalysis *AA) {
304337 bool Changed = false;
305338 bool LocalChange = true;
306339 while (LocalChange) {
309342 // Loop over all of the basic blocks and remove them if they are unneeded...
310343 //
311344 for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
312 if (SimplifyCFG(BBIt++, TTI, TD)) {
345 if (SimplifyCFG(BBIt++, TTI, TD, AA)) {
313346 LocalChange = true;
314347 ++NumSimpl;
315348 }
323356 // simplify the CFG.
324357 //
325358 bool CFGSimplifyPass::runOnFunction(Function &F) {
359 if (IsTargetAware)
360 AA = &getAnalysis();
361 else
362 AA = NULL;
326363 const TargetTransformInfo &TTI = getAnalysis();
327364 const DataLayout *TD = getAnalysisIfAvailable();
328365 bool EverChanged = removeUnreachableBlocksFromFn(F);
329366 EverChanged |= mergeEmptyReturnBlocks(F);
330 EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
367 EverChanged |= iterativelySimplifyCFG(F, TTI, TD, AA);
331368
332369 // If neither pass changed anything, we're done.
333370 if (!EverChanged) return false;
341378 return true;
342379
343380 do {
344 EverChanged = iterativelySimplifyCFG(F, TTI, TD);
381 EverChanged = iterativelySimplifyCFG(F, TTI, TD, AA);
345382 EverChanged |= removeUnreachableBlocksFromFn(F);
346383 } while (EverChanged);
347384
1818 #include "llvm/ADT/SmallPtrSet.h"
1919 #include "llvm/ADT/SmallVector.h"
2020 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
2122 #include "llvm/Analysis/InstructionSimplify.h"
2223 #include "llvm/Analysis/TargetTransformInfo.h"
2324 #include "llvm/Analysis/ValueTracking.h"
6465 HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
6566 cl::desc("Hoist conditional stores if an unconditional store preceeds"));
6667
68 static cl::opt
69 ParallelAndOr("simplifycfg-parallel-and-or", cl::Hidden, cl::init(true),
70 cl::desc("Use parallel-and-or mode for branch conditions"));
71
6772 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
6873 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
6974 STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
8994 class SimplifyCFGOpt {
9095 const TargetTransformInfo &TTI;
9196 const DataLayout *const TD;
97 AliasAnalysis *AA;
9298
9399 Value *isValueEqualityComparison(TerminatorInst *TI);
94100 BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
106112 bool SimplifyIndirectBr(IndirectBrInst *IBI);
107113 bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder);
108114 bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
115 /// \brief Use parallel-and or parallel-or to generate conditions for
116 /// conditional branches.
117 bool SimplifyParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
118 /// \brief If \param BB is the merge block of an if-region, attempt to merge
119 /// the if-region with an adjacent if-region upstream if two if-regions
120 /// contain identical instructions.
121 bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
122 /// \brief Compare a pair of blocks: \param Block1 and \param Block2, which
123 /// are from two if-regions whose entry blocks are \param Head1 and \param
124 /// Head2. \returns true if \param Block1 and \param Block2 contain identical
125 /// instructions, and have no memory reference alias with \param Head2.
126 /// This is used as a legality check for merging if-regions.
127 bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
128 BasicBlock *Block1, BasicBlock *Block2);
109129
110130 public:
111 SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD)
112 : TTI(TTI), TD(TD) {}
131 SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD,
132 AliasAnalysis *AA)
133 : TTI(TTI), TD(TD), AA(AA) {}
113134 bool run(BasicBlock *BB);
114135 };
115136 }
196217 }
197218
198219
199 /// GetIfCondition - Given a basic block (BB) with two predecessors (and at
200 /// least one PHI node in it), check to see if the merge at this block is due
220 /// GetIfCondition - Given a basic block (BB) with two predecessors,
221 /// check to see if the merge at this block is due
201222 /// to an "if condition". If so, return the boolean condition that determines
202223 /// which entry into BB will be taken. Also, return by references the block
203224 /// that will be entered from if the condition is true, and the block that will
207228 /// instructions in them.
208229 static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
209230 BasicBlock *&IfFalse) {
210 PHINode *SomePHI = cast(BB->begin());
211 assert(SomePHI->getNumIncomingValues() == 2 &&
212 "Function can only handle blocks with 2 predecessors!");
213 BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
214 BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
231 PHINode *SomePHI = dyn_cast(BB->begin());
232 BasicBlock *Pred1 = NULL;
233 BasicBlock *Pred2 = NULL;
234
235 if (SomePHI) {
236 if (SomePHI->getNumIncomingValues() != 2)
237 return NULL;
238 Pred1 = SomePHI->getIncomingBlock(0);
239 Pred2 = SomePHI->getIncomingBlock(1);
240 } else {
241 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
242 if (PI == PE) // No predecessor
243 return NULL;
244 Pred1 = *PI++;
245 if (PI == PE) // Only one predecessor
246 return NULL;
247 Pred2 = *PI++;
248 if (PI != PE) // More than two predecessors
249 return NULL;
250 }
215251
216252 // We can only handle branches. Other control flow will be lowered to
217253 // branches if possible anyway.
40654101 return false;
40664102 }
40674103
4104 /// If \param [in] BB has more than one predecessor that is a conditional
4105 /// branch, attempt to use parallel and/or for the branch condition. \returns
4106 /// true on success.
4107 ///
4108 /// Before:
4109 /// ......
4110 /// %cmp10 = fcmp une float %tmp1, %tmp2
4111 /// br i1 %cmp1, label %if.then, label %lor.rhs
4112 ///
4113 /// lor.rhs:
4114 /// ......
4115 /// %cmp11 = fcmp une float %tmp3, %tmp4
4116 /// br i1 %cmp11, label %if.then, label %ifend
4117 ///
4118 /// if.end: // the merge block
4119 /// ......
4120 ///
4121 /// if.then: // has two predecessors, both of them contains conditional branch.
4122 /// ......
4123 /// br label %if.end;
4124 ///
4125 /// After:
4126 /// ......
4127 /// %cmp10 = fcmp une float %tmp1, %tmp2
4128 /// ......
4129 /// %cmp11 = fcmp une float %tmp3, %tmp4
4130 /// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode.
4131 /// br i1 %cmp12, label %if.then, label %ifend
4132 ///
4133 /// if.end:
4134 /// ......
4135 ///
4136 /// if.then:
4137 /// ......
4138 /// br label %if.end;
4139 ///
4140 /// Current implementation handles two cases.
4141 /// Case 1: \param BB is on the else-path.
4142 ///
4143 /// BB1
4144 /// / |
4145 /// BB2 |
4146 /// / \ |
4147 /// BB3 \ | where, BB1, BB2 contain conditional branches.
4148 /// \ | / BB3 contains unconditional branch.
4149 /// \ | / BB4 corresponds to \param BB which is also the merge.
4150 /// BB => BB4
4151 ///
4152 ///
4153 /// Corresponding source code:
4154 ///
4155 /// if (a == b && c == d)
4156 /// statement; // BB3
4157 ///
4158 /// Case 2: \param BB BB is on the then-path.
4159 ///
4160 /// BB1
4161 /// / |
4162 /// | BB2
4163 /// \ / | where BB1, BB2 contain conditional branches.
4164 /// BB => BB3 | BB3 contains unconditiona branch and corresponds
4165 /// \ / to \param BB. BB4 is the merge.
4166 /// BB4
4167 ///
4168 /// Corresponding source code:
4169 ///
4170 /// if (a == b || c == d)
4171 /// statement; // BB3
4172 ///
4173 /// In both cases, \param BB is the common successor of conditional branches.
4174 /// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
4175 /// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
4176 /// as its predecessors.
4177 ///
4178 bool SimplifyCFGOpt::SimplifyParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
4179 Pass *P) {
4180 PHINode *PHI = dyn_cast(BB->begin());
4181 if (PHI)
4182 return false; // For simplicity, avoid cases containing PHI nodes.
4183
4184 BasicBlock *LastCondBlock = NULL;
4185 BasicBlock *FirstCondBlock = NULL;
4186 BasicBlock *UnCondBlock = NULL;
4187 int Idx = -1;
4188
4189 // Check predecessors of \param BB.
4190 SmallPtrSet Preds(pred_begin(BB), pred_end(BB));
4191 for (SmallPtrSetIterator PI = Preds.begin(), PE = Preds.end();
4192 PI != PE; ++PI) {
4193 BasicBlock *Pred = *PI;
4194 BranchInst *PBI = dyn_cast(Pred->getTerminator());
4195
4196 // All predecessors should terminate with a branch.
4197 if (!PBI)
4198 return false;
4199
4200 BasicBlock *PP = Pred->getSinglePredecessor();
4201
4202 if (PBI->isUnconditional()) {
4203 // Case 1: Pred (BB3) is an unconditional block, it should
4204 // have a single predecessor (BB2) that is also a predecessor
4205 // of \param BB (BB4) and should not have address-taken.
4206 // There should exist only one such unconditional
4207 // branch among the predecessors.
4208 if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
4209 Pred->hasAddressTaken())
4210 return false;
4211
4212 UnCondBlock = Pred;
4213 continue;
4214 }
4215
4216 // Only conditional branches are allowed beyond this point.
4217 assert(PBI->isConditional());
4218
4219 // Condition's unique use should be the branch instruction.
4220 Value *PC = PBI->getCondition();
4221 if (!PC || !PC->hasOneUse())
4222 return false;
4223
4224 if (PP && Preds.count(PP)) {
4225 // These are internal condition blocks to be merged from, e.g.,
4226 // BB2 in both cases.
4227 // Should not be address-taken.
4228 if (Pred->hasAddressTaken())
4229 return false;
4230
4231 // Instructions in the internal condition blocks should be safe
4232 // to hoist up.
4233 for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
4234 Instruction *CI = BI++;
4235 if (isa(CI) ||
4236 !isSafeToSpeculativelyExecute(CI))
4237 return false;
4238 }
4239 } else {
4240 // This is the condition block to be merged into, e.g. BB1 in
4241 // both cases.
4242 if (FirstCondBlock)
4243 return false;
4244 FirstCondBlock = Pred;
4245 }
4246
4247 // Find whether BB is uniformly on the true (or false) path
4248 // for all of its predecessors.
4249 BasicBlock *PS1 = PBI->getSuccessor(0);
4250 BasicBlock *PS2 = PBI->getSuccessor(1);
4251 BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
4252 int CIdx = (PS1 == BB) ? 0 : 1;
4253
4254 if (Idx == -1)
4255 Idx = CIdx;
4256 else if (CIdx != Idx)
4257 return false;
4258
4259 // PS is the successor which is not BB. Check successors to identify
4260 // the last conditional branch.
4261 if (Preds.count(PS) == 0) {
4262 // Case 2.
4263 // BB must have an unique successor.
4264 TerminatorInst *TBB = BB->getTerminator();
4265 if (TBB->getNumSuccessors() != 1)
4266 return false;
4267
4268 BasicBlock *SBB = TBB->getSuccessor(0);
4269 PHI = dyn_cast(SBB->begin());
4270 if (PHI)
4271 return false;
4272
4273 // PS (BB4) should be BB's successor.
4274 if (SBB != PS)
4275 return false;
4276 LastCondBlock = Pred;
4277 } else {
4278 BranchInst *BPS = dyn_cast(PS->getTerminator());
4279 if (BPS && BPS->isUnconditional()) {
4280 // Case 1: PS(BB3) should be an unconditional branch.
4281 LastCondBlock = Pred;
4282 }
4283 }
4284 }
4285
4286 if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
4287 return false;
4288
4289 // Do the transformation.
4290 BasicBlock *CB;
4291 bool Iteration = true;
4292 BasicBlock::iterator ItOld = Builder.GetInsertPoint();
4293 BranchInst *PBI = dyn_cast(FirstCondBlock->getTerminator());
4294 Value *PC = PBI->getCondition();
4295 do {
4296 CB = PBI->getSuccessor(1 - Idx);
4297 // Delete the conditional branch.
4298 FirstCondBlock->getInstList().pop_back();
4299 FirstCondBlock->getInstList().splice(FirstCondBlock->end(), CB->getInstList());
4300 PBI = cast(FirstCondBlock->getTerminator());
4301 Value *CC = PBI->getCondition();
4302 // Merge conditions.
4303 Builder.SetInsertPoint(PBI);
4304 Value *NC;
4305 if (Idx == 0)
4306 // Case 2, use parallel or.
4307 NC = Builder.CreateOr(PC, CC);
4308 else
4309 // Case 1, use parallel and.
4310 NC = Builder.CreateAnd(PC, CC);
4311
4312 PBI->replaceUsesOfWith(CC, NC);
4313 PC = NC;
4314 if (CB == LastCondBlock)
4315 Iteration = false;
4316 // Remove internal conditional branches.
4317 CB->dropAllReferences();
4318 // make CB unreachable and let downstream to delete the block.
4319 new UnreachableInst(CB->getContext(), CB);
4320 } while (Iteration);
4321
4322 Builder.SetInsertPoint(ItOld);
4323 DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
4324 return true;
4325 }
4326
4327 /// Compare blocks from two if-regions, where \param Head1 is the entry of the
4328 /// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
4329 /// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
4330 // in the 2nd if-region to compare. \returns true if \param Block1 and \param
4331 /// Block2 have identical instructions and do not have memory reference alias
4332 /// with \param Head2.
4333 ///
4334 bool SimplifyCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
4335 BasicBlock *Block1, BasicBlock *Block2) {
4336 TerminatorInst *PTI2 = Head2->getTerminator();
4337 Instruction *PBI2 = Head2->begin();
4338
4339 bool eq1 = (Block1 == Head1);
4340 bool eq2 = (Block2 == Head2);
4341 if (eq1 || eq2) {
4342 // An empty then-path or else-path.
4343 return (eq1 == eq2);
4344 }
4345
4346 // Check whether instructions in Block1 and Block2 are identical
4347 // and do not alias with instructions in Head2.
4348 BasicBlock::iterator iter1 = Block1->begin();
4349 BasicBlock::iterator end1 = Block1->getTerminator();
4350 BasicBlock::iterator iter2 = Block2->begin();
4351 BasicBlock::iterator end2 = Block2->getTerminator();
4352
4353 while (1) {
4354 if (iter1 == end1) {
4355 if (iter2 != end2)
4356 return false;
4357 break;
4358 }
4359
4360 if (!iter1->isIdenticalTo(iter2))
4361 return false;
4362
4363 // Illegal to remove instructions with side effects except
4364 // non-volatile stores.
4365 if (iter1->mayHaveSideEffects()) {
4366 Instruction *CurI = &*iter1;
4367 StoreInst *SI = dyn_cast(CurI);
4368 if (!SI || SI->isVolatile())
4369 return false;
4370 }
4371
4372 // For simplicity and speed, data dependency check can be
4373 // avoided if read from memory doesn't exist.
4374 if (iter1->mayReadFromMemory())
4375 return false;
4376
4377 if (iter1->mayWriteToMemory()) {
4378 for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
4379 if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
4380 // Check alias with Head2.
4381 if (!AA || AA->alias(iter1, BI))
4382 return false;
4383 }
4384 }
4385 }
4386 ++iter1;
4387 ++iter2;
4388 }
4389
4390 return true;
4391 }
4392
4393 /// Check whether \param BB is the merge block of a if-region. If yes, check
4394 /// whether there exists an adjacent if-region upstream, the two if-regions
4395 /// contain identical instuctions and can be legally merged. \returns true if
4396 /// the two if-regions are merged.
4397 ///
4398 /// From:
4399 /// if (a)
4400 /// statement;
4401 /// if (b)
4402 /// statement;
4403 ///
4404 /// To:
4405 /// if (a || b)
4406 /// statement;
4407 ///
4408 bool SimplifyCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
4409 Pass *P) {
4410 BasicBlock *IfTrue2, *IfFalse2;
4411 Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
4412 Instruction *CInst2 = dyn_cast_or_null(IfCond2);
4413 if (!CInst2)
4414 return false;
4415
4416 BasicBlock *SecondEntryBlock = CInst2->getParent();
4417 if (SecondEntryBlock->hasAddressTaken())
4418 return false;
4419
4420 BasicBlock *IfTrue1, *IfFalse1;
4421 Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
4422 Instruction *CInst1 = dyn_cast_or_null(IfCond1);
4423 if (!CInst1)
4424 return false;
4425
4426 BasicBlock *FirstEntryBlock = CInst1->getParent();
4427
4428 // Either then-path or else-path should be empty.
4429 if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
4430 return false;
4431 if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
4432 return false;
4433
4434 TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
4435 Instruction *PBI2 = SecondEntryBlock->begin();
4436
4437 if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, IfTrue2))
4438 return false;
4439
4440 if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, IfFalse2))
4441 return false;
4442
4443 // Check whether \param SecondEntryBlock has side-effect and is safe to speculate.
4444 for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
4445 Instruction *CI = BI;
4446 if (isa(CI) || CI->mayHaveSideEffects() ||
4447 !isSafeToSpeculativelyExecute(CI))
4448 return false;
4449 }
4450
4451 // Merge \param SecondEntryBlock into \param FirstEntryBlock.
4452 FirstEntryBlock->getInstList().pop_back();
4453 FirstEntryBlock->getInstList().splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
4454 BranchInst *PBI = dyn_cast(FirstEntryBlock->getTerminator());
4455 Value *CC = PBI->getCondition();
4456 BasicBlock::iterator ItOld = Builder.GetInsertPoint();
4457 Builder.SetInsertPoint(PBI);
4458 Value *NC = Builder.CreateOr(CInst1, CC);
4459 PBI->replaceUsesOfWith(CC, NC);
4460 Builder.SetInsertPoint(ItOld);
4461
4462 // Remove IfTrue1
4463 if (IfTrue1 != FirstEntryBlock) {
4464 IfTrue1->dropAllReferences();
4465 IfTrue1->eraseFromParent();
4466 }
4467
4468 // Remove IfFalse1
4469 if (IfFalse1 != FirstEntryBlock) {
4470 IfFalse1->dropAllReferences();
4471 IfFalse1->eraseFromParent();
4472 }
4473
4474 // Remove \param SecondEntryBlock
4475 SecondEntryBlock->dropAllReferences();
4476 SecondEntryBlock->eraseFromParent();
4477 DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
4478 return true;
4479 }
4480
40684481 /// Check if passing a value to an instruction will cause undefined behavior.
40694482 static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
40704483 Constant *C = dyn_cast(V);
41674580 return true;
41684581
41694582 IRBuilder<> Builder(BB);
4583 // Whether to optimize conditional branches.
4584 bool OptCB = (ParallelAndOr && AA && TTI.hasBranchDivergence());
4585
4586 if (OptCB && SimplifyParallelAndOr(BB, Builder))
4587 return true;
41704588
41714589 // If there is a trivial two-entry PHI node in this basic block, and we can
41724590 // eliminate it, do so now.
41954613 if (SimplifyIndirectBr(IBI)) return true;
41964614 }
41974615
4616 if (OptCB && MergeIfRegion(BB, Builder))
4617 return true;
4618
41984619 return Changed;
41994620 }
42004621
42044625 /// of the CFG. It returns true if a modification was made.
42054626 ///
42064627 bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
4207 const DataLayout *TD) {
4208 return SimplifyCFGOpt(TTI, TD).run(BB);
4209 }
4628 const DataLayout *TD, AliasAnalysis *AA) {
4629 return SimplifyCFGOpt(TTI, TD, AA).run(BB);
4630 }
117117 initializeGVNPass(R);
118118 initializeMemCpyOptPass(R);
119119 initializeDCEPass(R);
120 initializeCFGSimplifyPassPass(R);
120 initializeCFGCanonicalizePass(R);
121121 }
122122
123123 bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
666666 FPasses.reset(new FunctionPassManager(M.get()));
667667 if (TD)
668668 FPasses->add(new DataLayout(*TD));
669 if (TM.get())
670 TM->addAnalysisPasses(*FPasses);
671
669672 }
670673
671674 if (PrintBreakpoints) {