llvm.org GIT mirror llvm / c38c850
[PGO] Control Height Reduction Summary: Control height reduction merges conditional blocks of code and reduces the number of conditional branches in the hot path based on profiles. if (hot_cond1) { // Likely true. do_stg_hot1(); } if (hot_cond2) { // Likely true. do_stg_hot2(); } -> if (hot_cond1 && hot_cond2) { // Hot path. do_stg_hot1(); do_stg_hot2(); } else { // Cold path. if (hot_cond1) { do_stg_hot1(); } if (hot_cond2) { do_stg_hot2(); } } This speeds up some internal benchmarks up to ~30%. Reviewers: davidxl Reviewed By: davidxl Subscribers: xbolva00, dmgreen, mehdi_amini, llvm-commits, mgorny Differential Revision: https://reviews.llvm.org/D50591 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@341386 91177308-0d34-0410-b5e6-96231b3b80d8 Hiroshi Yamauchi 1 year, 17 days ago
11 changed file(s) with 3983 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
102102 void initializeConstantHoistingLegacyPassPass(PassRegistry&);
103103 void initializeConstantMergeLegacyPassPass(PassRegistry&);
104104 void initializeConstantPropagationPass(PassRegistry&);
105 void initializeControlHeightReductionLegacyPassPass(PassRegistry&);
105106 void initializeCorrelatedValuePropagationPass(PassRegistry&);
106107 void initializeCostModelAnalysisPass(PassRegistry&);
107108 void initializeCrossDSOCFIPass(PassRegistry&);
8787 (void) llvm::createCalledValuePropagationPass();
8888 (void) llvm::createConstantMergePass();
8989 (void) llvm::createConstantPropagationPass();
90 (void) llvm::createControlHeightReductionLegacyPass();
9091 (void) llvm::createCostModelAnalysisPass();
9192 (void) llvm::createDeadArgEliminationPass();
9293 (void) llvm::createDeadCodeEliminationPass();
0 //===- ControlHeightReduction.h - Control Height Reduction ------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges conditional blocks of code and reduces the number of
10 // conditional branches in the hot paths based on profiles.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
15 #define LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
16
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/IR/PassManager.h"
19 #include "llvm/Transforms/Scalar/LoopPassManager.h"
20
21 namespace llvm {
22
23 class ControlHeightReductionPass :
24 public PassInfoMixin {
25 public:
26 ControlHeightReductionPass();
27 PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
28 };
29 } // end namespace llvm
30
31 #endif // LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
112112 /// This function returns a new pass that downgrades the debug info in the
113113 /// module to line tables only.
114114 ModulePass *createStripNonLineTableDebugInfoPass();
115
116 //===----------------------------------------------------------------------===//
117 //
118 // ControlHeightReudction - Merges conditional blocks of code and reduces the
119 // number of conditional branches in the hot paths based on profiles.
120 //
121 FunctionPass *createControlHeightReductionLegacyPass();
115122 }
116123
117124 #endif
8686 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
8787 #include "llvm/Transforms/InstCombine/InstCombine.h"
8888 #include "llvm/Transforms/Instrumentation/BoundsChecking.h"
89 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
8990 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
9091 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
9192 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
192193 static Regex DefaultAliasRegex(
193194 "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
194195
196 static cl::opt
197 EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
198 cl::desc("Enable control height reduction optimization (CHR)"));
199
195200 static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
196201 switch (Level) {
197202 case PassBuilder::O0:
484489 FPM.addPass(SimplifyCFGPass());
485490 FPM.addPass(InstCombinePass());
486491 invokePeepholeEPCallbacks(FPM, Level);
492
493 if (EnableCHR && Level == O3 && PGOOpt &&
494 (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty()))
495 FPM.addPass(ControlHeightReductionPass());
487496
488497 return FPM;
489498 }
147147 FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
148148 FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
149149 FUNCTION_PASS("consthoist", ConstantHoistingPass())
150 FUNCTION_PASS("chr", ControlHeightReductionPass())
150151 FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
151152 FUNCTION_PASS("dce", DCEPass())
152153 FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
151151 "enable-gvn-sink", cl::init(false), cl::Hidden,
152152 cl::desc("Enable the GVN sinking pass (default = off)"));
153153
154 static cl::opt
155 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
156 cl::desc("Enable control height reduction optimization (CHR)"));
157
154158 PassManagerBuilder::PassManagerBuilder() {
155159 OptLevel = 2;
156160 SizeLevel = 0;
410414 // Clean up after everything.
411415 addInstructionCombiningPass(MPM);
412416 addExtensionsToPM(EP_Peephole, MPM);
417
418 if (EnableCHR && OptLevel >= 3 &&
419 (!PGOInstrUse.empty() || !PGOSampleUse.empty()))
420 MPM.add(createControlHeightReductionLegacyPass());
413421 }
414422
415423 void PassManagerBuilder::populateModulePassManager(
11 AddressSanitizer.cpp
22 BoundsChecking.cpp
33 CGProfile.cpp
4 ControlHeightReduction.cpp
45 DataFlowSanitizer.cpp
56 GCOVProfiling.cpp
67 MemorySanitizer.cpp
0 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges conditional blocks of code and reduces the number of
10 // conditional branches in the hot paths based on profiles.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
15 #include "llvm/Transforms/Utils.h"
16 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
17 #include "llvm/Transforms/Utils/Cloning.h"
18 #include "llvm/Transforms/Utils/ValueMapper.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/ProfileSummaryInfo.h"
25 #include "llvm/Analysis/RegionInfo.h"
26 #include "llvm/Analysis/RegionIterator.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/CFG.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include "llvm/Support/BranchProbability.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Transforms/Scalar.h"
35
36 #include
37 #include
38 #include
39
40 using namespace llvm;
41
42 #define DEBUG_TYPE "chr"
43
44 #define CHR_DEBUG(X) LLVM_DEBUG(X)
45
46 static cl::opt ForceCHR("force-chr", cl::init(false), cl::Hidden,
47 cl::desc("Apply CHR for all functions"));
48
49 static cl::opt CHRBiasThreshold(
50 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
51 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
52
53 static cl::opt CHRMergeThreshold(
54 "chr-merge-threshold", cl::init(2), cl::Hidden,
55 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
56
57 static cl::opt CHRModuleList(
58 "chr-module-list", cl::init(""), cl::Hidden,
59 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
60
61 static cl::opt CHRFunctionList(
62 "chr-function-list", cl::init(""), cl::Hidden,
63 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
64
65 static StringSet<> CHRModules;
66 static StringSet<> CHRFunctions;
67
68 static void ParseCHRFilterFiles() {
69 if (!CHRModuleList.empty()) {
70 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
71 if (!FileOrErr) {
72 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
73 std::exit(1);
74 }
75 StringRef Buf = FileOrErr->get()->getBuffer();
76 SmallVector Lines;
77 Buf.split(Lines, '\n');
78 for (StringRef Line : Lines) {
79 Line = Line.trim();
80 if (!Line.empty())
81 CHRModules.insert(Line);
82 }
83 }
84 if (!CHRFunctionList.empty()) {
85 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
86 if (!FileOrErr) {
87 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
88 std::exit(1);
89 }
90 StringRef Buf = FileOrErr->get()->getBuffer();
91 SmallVector Lines;
92 Buf.split(Lines, '\n');
93 for (StringRef Line : Lines) {
94 Line = Line.trim();
95 if (!Line.empty())
96 CHRFunctions.insert(Line);
97 }
98 }
99 }
100
101 namespace {
102 class ControlHeightReductionLegacyPass : public FunctionPass {
103 public:
104 static char ID;
105
106 ControlHeightReductionLegacyPass() : FunctionPass(ID) {
107 initializeControlHeightReductionLegacyPassPass(
108 *PassRegistry::getPassRegistry());
109 ParseCHRFilterFiles();
110 }
111
112 bool runOnFunction(Function &F) override;
113 void getAnalysisUsage(AnalysisUsage &AU) const override {
114 AU.addRequired();
115 AU.addRequired();
116 AU.addRequired();
117 AU.addRequired();
118 AU.addPreserved();
119 }
120 };
121 } // end anonymous namespace
122
123 char ControlHeightReductionLegacyPass::ID = 0;
124
125 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
126 "chr",
127 "Reduce control height in the hot paths",
128 false, false)
129 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
130 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
131 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
132 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
133 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
134 "chr",
135 "Reduce control height in the hot paths",
136 false, false)
137
138 FunctionPass *llvm::createControlHeightReductionLegacyPass() {
139 return new ControlHeightReductionLegacyPass();
140 }
141
142 namespace {
143
144 struct CHRStats {
145 CHRStats() : NumBranches(0), NumBranchesDelta(0),
146 WeightedNumBranchesDelta(0) {}
147 void print(raw_ostream &OS) const {
148 OS << "CHRStats: NumBranches " << NumBranches
149 << " NumBranchesDelta " << NumBranchesDelta
150 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
151 }
152 uint64_t NumBranches; // The original number of conditional branches /
153 // selects
154 uint64_t NumBranchesDelta; // The decrease of the number of conditional
155 // branches / selects in the hot paths due to CHR.
156 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
157 // count at the scope entry.
158 };
159
160 inline raw_ostream &operator<<(raw_ostream &OS, const CHRStats &Stats) {
161 Stats.print(OS);
162 return OS;
163 }
164
165 // RegInfo - some properties of a Region.
166 struct RegInfo {
167 RegInfo() : R(nullptr), HasBranch(false) {}
168 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
169 Region *R;
170 bool HasBranch;
171 SmallVector Selects;
172 };
173
174 typedef DenseMap> HoistStopMapTy;
175
176 // CHRScope - a sequence of regions to CHR together. It corresponds to a
177 // sequence of conditional blocks. It can have subscopes which correspond to
178 // nested conditional blocks. Nested CHRScopes form a tree.
179 class CHRScope {
180 public:
181 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
182 assert(RI.R && "Null RegionIn");
183 RegInfos.push_back(RI);
184 }
185
186 Region *getParentRegion() {
187 assert(RegInfos.size() > 0 && "Empty CHRScope");
188 Region *Parent = RegInfos[0].R->getParent();
189 assert(Parent && "Unexpected to call this on the top-level region");
190 return Parent;
191 }
192
193 BasicBlock *getEntryBlock() {
194 assert(RegInfos.size() > 0 && "Empty CHRScope");
195 return RegInfos.front().R->getEntry();
196 }
197
198 BasicBlock *getExitBlock() {
199 assert(RegInfos.size() > 0 && "Empty CHRScope");
200 return RegInfos.back().R->getExit();
201 }
202
203 bool appendable(CHRScope *Next) {
204 // The next scope is appendable only if this scope is directly connected to
205 // it (which implies it post-dominates this scope) and this scope dominates
206 // it (no edge to the next scope outside this scope).
207 BasicBlock *NextEntry = Next->getEntryBlock();
208 if (getExitBlock() != NextEntry)
209 // Not directly connected.
210 return false;
211 Region *LastRegion = RegInfos.back().R;
212 for (BasicBlock *Pred : predecessors(NextEntry))
213 if (!LastRegion->contains(Pred))
214 // There's an edge going into the entry of the next scope from outside
215 // of this scope.
216 return false;
217 return true;
218 }
219
220 void append(CHRScope *Next) {
221 assert(RegInfos.size() > 0 && "Empty CHRScope");
222 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
223 assert(getParentRegion() == Next->getParentRegion() &&
224 "Must be siblings");
225 assert(getExitBlock() == Next->getEntryBlock() &&
226 "Must be adjacent");
227 for (RegInfo &RI : Next->RegInfos)
228 RegInfos.push_back(RI);
229 for (CHRScope *Sub : Next->Subs)
230 Subs.push_back(Sub);
231 }
232
233 void addSub(CHRScope *SubIn) {
234 #ifndef NDEBUG
235 bool is_child = false;
236 for (RegInfo &RI : RegInfos)
237 if (RI.R == SubIn->getParentRegion()) {
238 is_child = true;
239 break;
240 }
241 assert(is_child && "Must be a child");
242 #endif
243 Subs.push_back(SubIn);
244 }
245
246 // Split this scope at the boundary region into two, which will belong to the
247 // tail and returns the tail.
248 CHRScope *split(Region *Boundary) {
249 assert(Boundary && "Boundary null");
250 assert(RegInfos.begin()->R != Boundary &&
251 "Can't be split at beginning");
252 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
253 [&Boundary](const RegInfo& RI) {
254 return Boundary == RI.R;
255 });
256 if (BoundaryIt == RegInfos.end())
257 return nullptr;
258 SmallVector TailRegInfos;
259 SmallVector TailSubs;
260 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
261 RegInfos.resize(BoundaryIt - RegInfos.begin());
262 DenseSet TailRegionSet;
263 for (RegInfo &RI : TailRegInfos)
264 TailRegionSet.insert(RI.R);
265 for (auto It = Subs.begin(); It != Subs.end(); ) {
266 CHRScope *Sub = *It;
267 assert(Sub && "null Sub");
268 Region *Parent = Sub->getParentRegion();
269 if (TailRegionSet.count(Parent)) {
270 TailSubs.push_back(Sub);
271 It = Subs.erase(It);
272 } else {
273 assert(std::find_if(RegInfos.begin(), RegInfos.end(),
274 [&Parent](const RegInfo& RI) {
275 return Parent == RI.R;
276 }) != RegInfos.end() &&
277 "Must be in head");
278 ++It;
279 }
280 }
281 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
282 return new CHRScope(TailRegInfos, TailSubs);
283 }
284
285 bool contains(Instruction *I) const {
286 BasicBlock *Parent = I->getParent();
287 for (const RegInfo &RI : RegInfos)
288 if (RI.R->contains(Parent))
289 return true;
290 return false;
291 }
292
293 void print(raw_ostream &OS) const;
294
295 SmallVector RegInfos; // Regions that belong to this scope
296 SmallVector Subs; // Subscopes.
297
298 // The instruction at which to insert the CHR conditional branch (and hoist
299 // the dependent condition values).
300 Instruction *BranchInsertPoint;
301
302 // True-biased and false-biased regions (conditional blocks),
303 // respectively. Used only for the outermost scope and includes regions in
304 // subscopes. The rest are unbiased.
305 DenseSet TrueBiasedRegions;
306 DenseSet FalseBiasedRegions;
307 // Among the biased regions, the regions that get CHRed.
308 SmallVector CHRRegions;
309
310 // True-biased and false-biased selects, respectively. Used only for the
311 // outermost scope and includes ones in subscopes.
312 DenseSet TrueBiasedSelects;
313 DenseSet FalseBiasedSelects;
314
315 // Map from one of the above regions to the instructions to stop
316 // hoisting instructions at through use-def chains.
317 HoistStopMapTy HoistStopMap;
318
319 private:
320 CHRScope(SmallVector &RegInfosIn,
321 SmallVector &SubsIn)
322 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
323 };
324
325 inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
326 Scope.print(OS);
327 return OS;
328 }
329
330 class CHR {
331 public:
332 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
333 ProfileSummaryInfo &PSIin, RegionInfo &RIin)
334 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {}
335
336 ~CHR() {
337 for (CHRScope *Scope : Scopes) {
338 delete Scope;
339 }
340 }
341
342 bool run();
343
344 private:
345 // See the comments in CHR::run() for the high level flow of the algorithm and
346 // what the following functions do.
347
348 void findScopes(SmallVectorImpl &Output) {
349 Region *R = RI.getTopLevelRegion();
350 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
351 if (Scope) {
352 Output.push_back(Scope);
353 }
354 }
355 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
356 SmallVectorImpl &Scopes);
357 CHRScope *findScope(Region *R);
358 void checkScopeHoistable(CHRScope *Scope);
359
360 void splitScopes(SmallVectorImpl &Input,
361 SmallVectorImpl &Output);
362 SmallVector splitScope(CHRScope *Scope,
363 CHRScope *Outer,
364 DenseSet *OuterConditionValues,
365 Instruction *OuterInsertPoint,
366 SmallVectorImpl &Output,
367 DenseSet &Unhoistables);
368
369 void classifyBiasedScopes(SmallVectorImpl &Scopes);
370 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
371
372 void filterScopes(SmallVectorImpl &Input,
373 SmallVectorImpl &Output);
374
375 void setCHRRegions(SmallVectorImpl &Input,
376 SmallVectorImpl &Output);
377 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
378
379 void sortScopes(SmallVectorImpl &Input,
380 SmallVectorImpl &Output);
381
382 void transformScopes(SmallVectorImpl &CHRScopes);
383 void transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs);
384 void cloneScopeBlocks(CHRScope *Scope,
385 BasicBlock *PreEntryBlock,
386 BasicBlock *ExitBlock,
387 Region *LastRegion,
388 ValueToValueMapTy &VMap);
389 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
390 BasicBlock *EntryBlock,
391 BasicBlock *NewEntryBlock,
392 ValueToValueMapTy &VMap);
393 void fixupBranchesAndSelects(CHRScope *Scope,
394 BasicBlock *PreEntryBlock,
395 BranchInst *MergedBR,
396 uint64_t ProfileCount);
397 void fixupBranch(Region *R,
398 CHRScope *Scope,
399 IRBuilder<> &IRB,
400 Value *&MergedCondition, BranchProbability &CHRBranchBias);
401 void fixupSelect(SelectInst* SI,
402 CHRScope *Scope,
403 IRBuilder<> &IRB,
404 Value *&MergedCondition, BranchProbability &CHRBranchBias);
405 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
406 Instruction *BranchOrSelect,
407 CHRScope *Scope,
408 IRBuilder<> &IRB,
409 Value *&MergedCondition);
410
411 Function &F;
412 BlockFrequencyInfo &BFI;
413 DominatorTree &DT;
414 ProfileSummaryInfo &PSI;
415 RegionInfo &RI;
416 CHRStats Stats;
417
418 // All the true-biased regions in the function
419 DenseSet TrueBiasedRegionsGlobal;
420 // All the false-biased regions in the function
421 DenseSet FalseBiasedRegionsGlobal;
422 // All the true-biased selects in the function
423 DenseSet TrueBiasedSelectsGlobal;
424 // All the false-biased selects in the function
425 DenseSet FalseBiasedSelectsGlobal;
426 // A map from biased regions to their branch bias
427 DenseMap BranchBiasMap;
428 // A map from biased selects to their branch bias
429 DenseMap SelectBiasMap;
430 // All the scopes.
431 DenseSet Scopes;
432 };
433
434 } // end anonymous namespace
435
436 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
437 if (ForceCHR)
438 return true;
439
440 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
441 if (CHRModules.count(F.getParent()->getName()))
442 return true;
443 StringRef Name = F.getName();
444 if (CHRFunctions.count(Name))
445 return true;
446 const char* DemangledName = nullptr;
447 int Status = -1;
448 DemangledName = abi::__cxa_demangle(Name.str().c_str(),
449 nullptr, nullptr, &Status);
450 return DemangledName && CHRFunctions.count(DemangledName);
451 }
452
453 assert(PSI.hasProfileSummary() && "Empty PSI?");
454 return PSI.isFunctionEntryHot(&F);
455 }
456
457 static void dumpIR(Function &F, const char *Label, CHRStats *Stats) {
458 std::string Name = F.getName().str();
459 const char *DemangledName = nullptr;
460 int Status = -1;
461 DemangledName = abi::__cxa_demangle(Name.c_str(),
462 nullptr, nullptr, &Status);
463 if (DemangledName == nullptr) {
464 DemangledName = "";
465 }
466 std::string ModuleName = F.getParent()->getName().str();
467 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
468 << Name);
469 if (Stats)
470 CHR_DEBUG(dbgs() << " " << *Stats);
471 CHR_DEBUG(dbgs() << "\n");
472 CHR_DEBUG(F.dump());
473 }
474
475
476 void CHRScope::print(raw_ostream &OS) const {
477 assert(RegInfos.size() > 0 && "Empty CHRScope");
478 OS << "CHRScope[";
479 OS << RegInfos.size() << ", Regions[";
480 for (const RegInfo &RI : RegInfos) {
481 OS << RI.R->getNameStr();
482 if (RI.HasBranch)
483 OS << " B";
484 if (RI.Selects.size() > 0)
485 OS << " S" << RI.Selects.size();
486 OS << ", ";
487 }
488 if (RegInfos[0].R->getParent()) {
489 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
490 } else {
491 // top level region
492 OS << "]";
493 }
494 OS << ", Subs[";
495 for (CHRScope *Sub : Subs) {
496 OS << *Sub << ", ";
497 }
498 OS << "]]";
499 }
500
501 // Return true if the given instruction type can be hoisted by CHR.
502 static bool isHoistableInstructionType(Instruction *I) {
503 return isa(I) || isa(I) || isa(I) ||
504 isa(I) || isa(I) ||
505 isa(I) || isa(I) ||
506 isa(I) || isa(I) ||
507 isa(I);
508 }
509
510 // Return true if the given instruction can be hoisted by CHR.
511 static bool isHoistable(Instruction *I, DominatorTree &DT) {
512 if (!isHoistableInstructionType(I))
513 return false;
514 return isSafeToSpeculativelyExecute(I, nullptr, &DT);
515 }
516
517 // Recursively traverse the use-def chains of the given value and return a set
518 // of the unhoistable base values defined within the scope (excluding the
519 // first-region entry block) or the (hoistable or unhoistable) base values that
520 // are defined outside (including the first-region entry block) of the
521 // scope. The returned set doesn't include constants.
522 static std::set getBaseValues(Value *V,
523 DominatorTree &DT) {
524 std::set Result;
525 if (auto *I = dyn_cast(V)) {
526 // We don't stop at a block that's not in the Scope because we would miss some
527 // instructions that are based on the same base values if we stop there.
528 if (!isHoistable(I, DT)) {
529 Result.insert(I);
530 return Result;
531 }
532 // I is hoistable above the Scope.
533 for (Value *Op : I->operands()) {
534 std::set OpResult = getBaseValues(Op, DT);
535 Result.insert(OpResult.begin(), OpResult.end());
536 }
537 return Result;
538 }
539 if (isa(V)) {
540 Result.insert(V);
541 return Result;
542 }
543 // We don't include others like constants because those won't lead to any
544 // chance of folding of conditions (eg two bit checks merged into one check)
545 // after CHR.
546 return Result; // empty
547 }
548
549 // Return true if V is already hoisted or can be hoisted (along with its
550 // operands) above the insert point. When it returns true and HoistStops is
551 // non-null, the instructions to stop hoisting at through the use-def chains are
552 // inserted into HoistStops.
553 static bool
554 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
555 DenseSet &Unhoistables,
556 DenseSet *HoistStops) {
557 assert(InsertPoint && "Null InsertPoint");
558 if (auto *I = dyn_cast(V)) {
559 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
560 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
561 if (Unhoistables.count(I)) {
562 // Don't hoist if they are not to be hoisted.
563 return false;
564 }
565 if (DT.dominates(I, InsertPoint)) {
566 // We are already above the insert point. Stop here.
567 if (HoistStops)
568 HoistStops->insert(I);
569 return true;
570 }
571 // We aren't not above the insert point, check if we can hoist it above the
572 // insert point.
573 if (isHoistable(I, DT)) {
574 // Check operands first.
575 DenseSet OpsHoistStops;
576 bool AllOpsHoisted = true;
577 for (Value *Op : I->operands()) {
578 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
579 AllOpsHoisted = false;
580 break;
581 }
582 }
583 if (AllOpsHoisted) {
584 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
585 if (HoistStops)
586 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
587 return true;
588 }
589 }
590 return false;
591 }
592 // Non-instructions are considered hoistable.
593 return true;
594 }
595
596 // Returns true and sets the true probability and false probability of an
597 // MD_prof metadata if it's well-formed.
598 static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb,
599 BranchProbability &FalseProb) {
600 if (!MD) return false;
601 MDString *MDName = cast(MD->getOperand(0));
602 if (MDName->getString() != "branch_weights" ||
603 MD->getNumOperands() != 3)
604 return false;
605 ConstantInt *TrueWeight = mdconst::extract(MD->getOperand(1));
606 ConstantInt *FalseWeight = mdconst::extract(MD->getOperand(2));
607 if (!TrueWeight || !FalseWeight)
608 return false;
609 APInt TrueWt = TrueWeight->getValue();
610 APInt FalseWt = FalseWeight->getValue();
611 APInt SumWt = TrueWt + FalseWt;
612 TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(),
613 SumWt.getZExtValue());
614 FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(),
615 SumWt.getZExtValue());
616 return true;
617 }
618
619 static BranchProbability getCHRBiasThreshold() {
620 return BranchProbability::getBranchProbability(
621 static_cast(CHRBiasThreshold * 1000000), 1000000);
622 }
623
624 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
625 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
626 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
627 // false.
628 template
629 bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb,
630 S &TrueSet, S &FalseSet, M &BiasMap) {
631 BranchProbability Threshold = getCHRBiasThreshold();
632 if (TrueProb >= Threshold) {
633 TrueSet.insert(Key);
634 BiasMap[Key] = TrueProb;
635 return true;
636 } else if (FalseProb >= Threshold) {
637 FalseSet.insert(Key);
638 BiasMap[Key] = FalseProb;
639 return true;
640 }
641 return false;
642 }
643
644 // Returns true and insert a region into the right biased set and the map if the
645 // branch of the region is biased.
646 static bool CheckBiasedBranch(BranchInst *BI, Region *R,
647 DenseSet &TrueBiasedRegionsGlobal,
648 DenseSet &FalseBiasedRegionsGlobal,
649 DenseMap &BranchBiasMap) {
650 if (!BI->isConditional())
651 return false;
652 BranchProbability ThenProb, ElseProb;
653 if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof),
654 ThenProb, ElseProb))
655 return false;
656 BasicBlock *IfThen = BI->getSuccessor(0);
657 BasicBlock *IfElse = BI->getSuccessor(1);
658 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
659 IfThen != IfElse &&
660 "Invariant from findScopes");
661 if (IfThen == R->getExit()) {
662 // Swap them so that IfThen/ThenProb means going into the conditional code
663 // and IfElse/ElseProb means skipping it.
664 std::swap(IfThen, IfElse);
665 std::swap(ThenProb, ElseProb);
666 }
667 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
668 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
669 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
670 return CheckBias(R, ThenProb, ElseProb,
671 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
672 BranchBiasMap);
673 }
674
675 // Returns true and insert a select into the right biased set and the map if the
676 // select is biased.
677 static bool CheckBiasedSelect(
678 SelectInst *SI, Region *R,
679 DenseSet &TrueBiasedSelectsGlobal,
680 DenseSet &FalseBiasedSelectsGlobal,
681 DenseMap &SelectBiasMap) {
682 BranchProbability TrueProb, FalseProb;
683 if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof),
684 TrueProb, FalseProb))
685 return false;
686 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
687 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
688 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
689 return CheckBias(SI, TrueProb, FalseProb,
690 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
691 SelectBiasMap);
692 }
693
694 // Returns the instruction at which to hoist the dependent condition values and
695 // insert the CHR branch for a region. This is the terminator branch in the
696 // entry block or the first select in the entry block, if any.
697 static Instruction* getBranchInsertPoint(RegInfo &RI) {
698 Region *R = RI.R;
699 BasicBlock *EntryBB = R->getEntry();
700 // The hoist point is by default the terminator of the entry block, which is
701 // the same as the branch instruction if RI.HasBranch is true.
702 Instruction *HoistPoint = EntryBB->getTerminator();
703 for (SelectInst *SI : RI.Selects) {
704 if (SI->getParent() == EntryBB) {
705 // Pick the first select in Selects in the entry block. Note Selects is
706 // sorted in the instruction order within a block (asserted below).
707 HoistPoint = SI;
708 break;
709 }
710 }
711 assert(HoistPoint && "Null HoistPoint");
712 #ifndef NDEBUG
713 // Check that HoistPoint is the first one in Selects in the entry block,
714 // if any.
715 DenseSet EntryBlockSelectSet;
716 for (SelectInst *SI : RI.Selects) {
717 if (SI->getParent() == EntryBB) {
718 EntryBlockSelectSet.insert(SI);
719 }
720 }
721 for (Instruction &I : *EntryBB) {
722 if (EntryBlockSelectSet.count(&I) > 0) {
723 assert(&I == HoistPoint &&
724 "HoistPoint must be the first one in Selects");
725 break;
726 }
727 }
728 #endif
729 return HoistPoint;
730 }
731
732 // Find a CHR scope in the given region.
733 CHRScope * CHR::findScope(Region *R) {
734 CHRScope *Result = nullptr;
735 BasicBlock *Entry = R->getEntry();
736 BasicBlock *Exit = R->getExit(); // null if top level.
737 assert(Entry && "Entry must not be null");
738 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
739 "Only top level region has a null exit");
740 if (Entry)
741 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
742 else
743 CHR_DEBUG(dbgs() << "Entry null\n");
744 if (Exit)
745 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
746 else
747 CHR_DEBUG(dbgs() << "Exit null\n");
748 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
749 // to this region).
750 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
751 if (EntryInSubregion)
752 return nullptr;
753 // Exclude loops
754 for (BasicBlock *Pred : predecessors(Entry))
755 if (R->contains(Pred))
756 return nullptr;
757 if (Exit) {
758 // Try to find an if-then block (check if R is an if-then).
759 // if (cond) {
760 // ...
761 // }
762 auto *BI = dyn_cast(Entry->getTerminator());
763 if (BI)
764 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
765 else
766 CHR_DEBUG(dbgs() << "BI null\n");
767 if (BI && BI->isConditional()) {
768 BasicBlock *S0 = BI->getSuccessor(0);
769 BasicBlock *S1 = BI->getSuccessor(1);
770 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
771 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
772 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
773 RegInfo RI(R);
774 RI.HasBranch = CheckBiasedBranch(
775 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
776 BranchBiasMap);
777 Result = new CHRScope(RI);
778 Scopes.insert(Result);
779 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
780 ++Stats.NumBranches;
781 }
782 }
783 }
784 {
785 // Try to look for selects in the direct child blocks (as opposed to in
786 // subregions) of R.
787 // ...
788 // if (..) { // Some subregion
789 // ...
790 // }
791 // if (..) { // Some subregion
792 // ...
793 // }
794 // ...
795 // a = cond ? b : c;
796 // ...
797 SmallVector Selects;
798 for (RegionNode *E : R->elements()) {
799 if (E->isSubRegion())
800 continue;
801 // This returns the basic block of E if E is a direct child of R (not a
802 // subregion.)
803 BasicBlock *BB = E->getEntry();
804 // Need to push in the order to make it easier to find the first Select
805 // later.
806 for (Instruction &I : *BB) {
807 if (auto *SI = dyn_cast(&I)) {
808 Selects.push_back(SI);
809 ++Stats.NumBranches;
810 }
811 }
812 }
813 if (Selects.size() > 0) {
814 auto AddSelects = [&](RegInfo &RI) {
815 for (auto *SI : Selects)
816 if (CheckBiasedSelect(SI, RI.R,
817 TrueBiasedSelectsGlobal,
818 FalseBiasedSelectsGlobal,
819 SelectBiasMap))
820 RI.Selects.push_back(SI);
821 };
822 if (!Result) {
823 CHR_DEBUG(dbgs() << "Found a select-only region\n");
824 RegInfo RI(R);
825 AddSelects(RI);
826 Result = new CHRScope(RI);
827 Scopes.insert(Result);
828 } else {
829 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
830 AddSelects(Result->RegInfos[0]);
831 }
832 }
833 }
834
835 if (Result) {
836 checkScopeHoistable(Result);
837 }
838 return Result;
839 }
840
841 // Check that any of the branch and the selects in the region could be
842 // hoisted above the the CHR branch insert point (the most dominating of
843 // them, either the branch (at the end of the first block) or the first
844 // select in the first block). If the branch can't be hoisted, drop the
845 // selects in the first blocks.
846 //
847 // For example, for the following scope/region with selects, we want to insert
848 // the merged branch right before the first select in the first/entry block by
849 // hoisting c1, c2, c3, and c4.
850 //
851 // // Branch insert point here.
852 // a = c1 ? b : c; // Select 1
853 // d = c2 ? e : f; // Select 2
854 // if (c3) { // Branch
855 // ...
856 // c4 = foo() // A call.
857 // g = c4 ? h : i; // Select 3
858 // }
859 //
860 // But suppose we can't hoist c4 because it's dependent on the preceding
861 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
862 // Select 2. If we can't hoist c3, we drop Selects 1 & 2.
863 void CHR::checkScopeHoistable(CHRScope *Scope) {
864 RegInfo &RI = Scope->RegInfos[0];
865 Region *R = RI.R;
866 BasicBlock *EntryBB = R->getEntry();
867 auto *Branch = RI.HasBranch ?
868 cast(EntryBB->getTerminator()) : nullptr;
869 SmallVector &Selects = RI.Selects;
870 if (RI.HasBranch || !Selects.empty()) {
871 Instruction *InsertPoint = getBranchInsertPoint(RI);
872 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
873 // Avoid a data dependence from a select or a branch to a(nother)
874 // select. Note no instruction can't data-depend on a branch (a branch
875 // instruction doesn't produce a value).
876 DenseSet Unhoistables;
877 // Initialize Unhoistables with the selects.
878 for (SelectInst *SI : Selects) {
879 Unhoistables.insert(SI);
880 }
881 // Remove Selects that can't be hoisted.
882 for (auto it = Selects.begin(); it != Selects.end(); ) {
883 SelectInst *SI = *it;
884 if (SI == InsertPoint) {
885 ++it;
886 continue;
887 }
888 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
889 DT, Unhoistables, nullptr);
890 if (!IsHoistable) {
891 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
892 it = Selects.erase(it);
893 // Since we are dropping the select here, we also drop it from
894 // Unhoistables.
895 Unhoistables.erase(SI);
896 } else
897 ++it;
898 }
899 // Update InsertPoint after potentially removing selects.
900 InsertPoint = getBranchInsertPoint(RI);
901 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
902 if (RI.HasBranch && InsertPoint != Branch) {
903 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
904 DT, Unhoistables, nullptr);
905 if (!IsHoistable) {
906 // If the branch isn't hoistable, drop the selects in the entry
907 // block, preferring the branch, which makes the branch the hoist
908 // point.
909 assert(InsertPoint != Branch && "Branch must not be the hoist point");
910 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
911 CHR_DEBUG(
912 for (SelectInst *SI : Selects) {
913 dbgs() << "SI " << *SI << "\n";
914 });
915 Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
916 [EntryBB](SelectInst *SI) {
917 return SI->getParent() == EntryBB;
918 }), Selects.end());
919 Unhoistables.clear();
920 InsertPoint = Branch;
921 }
922 }
923 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
924 #ifndef NDEBUG
925 if (RI.HasBranch) {
926 assert(!DT.dominates(Branch, InsertPoint) &&
927 "Branch can't be already above the hoist point");
928 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
929 DT, Unhoistables, nullptr) &&
930 "checkHoistValue for branch");
931 }
932 for (auto *SI : Selects) {
933 assert(!DT.dominates(SI, InsertPoint) &&
934 "SI can't be already above the hoist point");
935 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
936 Unhoistables, nullptr) &&
937 "checkHoistValue for selects");
938 }
939 CHR_DEBUG(dbgs() << "Result\n");
940 if (RI.HasBranch) {
941 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
942 }
943 for (auto *SI : Selects) {
944 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
945 }
946 #endif
947 }
948 }
949
950 // Traverse the region tree, find all nested scopes and merge them if possible.
951 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
952 SmallVectorImpl &Scopes) {
953 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
954 CHRScope *Result = findScope(R);
955 // Visit subscopes.
956 CHRScope *ConsecutiveSubscope = nullptr;
957 SmallVector Subscopes;
958 for (auto It = R->begin(); It != R->end(); ++It) {
959 const std::unique_ptr &SubR = *It;
960 auto Next_It = std::next(It);
961 Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr;
962 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
963 << "\n");
964 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
965 if (SubCHRScope) {
966 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
967 } else {
968 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
969 }
970 if (SubCHRScope) {
971 if (!ConsecutiveSubscope)
972 ConsecutiveSubscope = SubCHRScope;
973 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
974 Subscopes.push_back(ConsecutiveSubscope);
975 ConsecutiveSubscope = SubCHRScope;
976 } else
977 ConsecutiveSubscope->append(SubCHRScope);
978 } else {
979 if (ConsecutiveSubscope) {
980 Subscopes.push_back(ConsecutiveSubscope);
981 }
982 ConsecutiveSubscope = nullptr;
983 }
984 }
985 if (ConsecutiveSubscope) {
986 Subscopes.push_back(ConsecutiveSubscope);
987 }
988 for (CHRScope *Sub : Subscopes) {
989 if (Result) {
990 // Combine it with the parent.
991 Result->addSub(Sub);
992 } else {
993 // Push Subscopes as they won't be combined with the parent.
994 Scopes.push_back(Sub);
995 }
996 }
997 return Result;
998 }
999
1000 static DenseSet getCHRConditionValuesForRegion(RegInfo &RI) {
1001 DenseSet ConditionValues;
1002 if (RI.HasBranch) {
1003 auto *BI = cast(RI.R->getEntry()->getTerminator());
1004 ConditionValues.insert(BI->getCondition());
1005 }
1006 for (SelectInst *SI : RI.Selects) {
1007 ConditionValues.insert(SI->getCondition());
1008 }
1009 return ConditionValues;
1010 }
1011
1012
1013 // Determine whether to split a scope depending on the sets of the branch
1014 // condition values of the previous region and the current region. We split
1015 // (return true) it if 1) the condition values of the inner/lower scope can't be
1016 // hoisted up to the outer/upper scope, or 2) the two sets of the condition
1017 // values have an empty intersection (because the combined branch conditions
1018 // won't probably lead to a simpler combined condition).
1019 static bool shouldSplit(Instruction *InsertPoint,
1020 DenseSet &PrevConditionValues,
1021 DenseSet &ConditionValues,
1022 DominatorTree &DT,
1023 DenseSet &Unhoistables) {
1024 CHR_DEBUG(
1025 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1026 for (Value *V : PrevConditionValues) {
1027 dbgs() << *V << ", ";
1028 }
1029 dbgs() << " ConditionValues ";
1030 for (Value *V : ConditionValues) {
1031 dbgs() << *V << ", ";
1032 }
1033 dbgs() << "\n");
1034 assert(InsertPoint && "Null InsertPoint");
1035 // If any of Bases isn't hoistable to the hoist point, split.
1036 for (Value *V : ConditionValues) {
1037 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
1038 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1039 return true; // Not hoistable, split.
1040 }
1041 }
1042 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1043 // unnecessary splits at scopes with no branch/selects. If
1044 // PrevConditionValues and ConditionValues don't intersect at all, split.
1045 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1046 // Use std::set as DenseSet doesn't work with set_intersection.
1047 std::set PrevBases, Bases;
1048 for (Value *V : PrevConditionValues) {
1049 std::set BaseValues = getBaseValues(V, DT);
1050 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1051 }
1052 for (Value *V : ConditionValues) {
1053 std::set BaseValues = getBaseValues(V, DT);
1054 Bases.insert(BaseValues.begin(), BaseValues.end());
1055 }
1056 CHR_DEBUG(
1057 dbgs() << "PrevBases ";
1058 for (Value *V : PrevBases) {
1059 dbgs() << *V << ", ";
1060 }
1061 dbgs() << " Bases ";
1062 for (Value *V : Bases) {
1063 dbgs() << *V << ", ";
1064 }
1065 dbgs() << "\n");
1066 std::set Intersection;
1067 std::set_intersection(PrevBases.begin(), PrevBases.end(),
1068 Bases.begin(), Bases.end(),
1069 std::inserter(Intersection, Intersection.begin()));
1070 if (Intersection.empty()) {
1071 // Empty intersection, split.
1072 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1073 return true;
1074 }
1075 }
1076 CHR_DEBUG(dbgs() << "No split\n");
1077 return false; // Don't split.
1078 }
1079
1080 static void GetSelectsInScope(CHRScope *Scope,
1081 DenseSet &Output) {
1082 for (RegInfo &RI : Scope->RegInfos) {
1083 for (SelectInst *SI : RI.Selects) {
1084 Output.insert(SI);
1085 }
1086 }
1087 for (CHRScope *Sub : Scope->Subs) {
1088 GetSelectsInScope(Sub, Output);
1089 }
1090 }
1091
1092 void CHR::splitScopes(SmallVectorImpl &Input,
1093 SmallVectorImpl &Output) {
1094 for (CHRScope *Scope : Input) {
1095 assert(!Scope->BranchInsertPoint &&
1096 "BranchInsertPoint must not be set");
1097 DenseSet Unhoistables;
1098 GetSelectsInScope(Scope, Unhoistables);
1099 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1100 }
1101 #ifndef NDEBUG
1102 for (CHRScope *Scope : Output) {
1103 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1104 }
1105 #endif
1106 }
1107
1108 SmallVector CHR::splitScope(
1109 CHRScope *Scope,
1110 CHRScope *Outer,
1111 DenseSet *OuterConditionValues,
1112 Instruction *OuterInsertPoint,
1113 SmallVectorImpl &Output,
1114 DenseSet &Unhoistables) {
1115 if (Outer) {
1116 assert(OuterConditionValues && "Null OuterConditionValues");
1117 assert(OuterInsertPoint && "Null OuterInsertPoint");
1118 }
1119 bool PrevSplitFromOuter = true;
1120 DenseSet PrevConditionValues;
1121 Instruction *PrevInsertPoint = nullptr;
1122 SmallVector Splits;
1123 SmallVector SplitsSplitFromOuter;
1124 SmallVector, 8> SplitsConditionValues;
1125 SmallVector SplitsInsertPoints;
1126 SmallVector RegInfos(Scope->RegInfos); // Copy
1127 for (RegInfo &RI : RegInfos) {
1128 Instruction *InsertPoint = getBranchInsertPoint(RI);
1129 DenseSet ConditionValues = getCHRConditionValuesForRegion(RI);
1130 CHR_DEBUG(
1131 dbgs() << "ConditionValues ";
1132 for (Value *V : ConditionValues) {
1133 dbgs() << *V << ", ";
1134 }
1135 dbgs() << "\n");
1136 if (RI.R == RegInfos[0].R) {
1137 // First iteration. Check to see if we should split from the outer.
1138 if (Outer) {
1139 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1140 CHR_DEBUG(dbgs() << "Should split from outer at "
1141 << RI.R->getNameStr() << "\n");
1142 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1143 ConditionValues, DT, Unhoistables)) {
1144 PrevConditionValues = ConditionValues;
1145 PrevInsertPoint = InsertPoint;
1146 } else {
1147 // Not splitting from the outer. Use the outer bases and insert
1148 // point. Union the bases.
1149 PrevSplitFromOuter = false;
1150 PrevConditionValues = *OuterConditionValues;
1151 PrevConditionValues.insert(ConditionValues.begin(),
1152 ConditionValues.end());
1153 PrevInsertPoint = OuterInsertPoint;
1154 }
1155 } else {
1156 CHR_DEBUG(dbgs() << "Outer null\n");
1157 PrevConditionValues = ConditionValues;
1158 PrevInsertPoint = InsertPoint;
1159 }
1160 } else {
1161 CHR_DEBUG(dbgs() << "Should split from prev at "
1162 << RI.R->getNameStr() << "\n");
1163 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1164 DT, Unhoistables)) {
1165 CHRScope *Tail = Scope->split(RI.R);
1166 Scopes.insert(Tail);
1167 Splits.push_back(Scope);
1168 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1169 SplitsConditionValues.push_back(PrevConditionValues);
1170 SplitsInsertPoints.push_back(PrevInsertPoint);
1171 Scope = Tail;
1172 PrevConditionValues = ConditionValues;
1173 PrevInsertPoint = InsertPoint;
1174 PrevSplitFromOuter = true;
1175 } else {
1176 // Not splitting. Union the bases. Keep the hoist point.
1177 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1178 }
1179 }
1180 }
1181 Splits.push_back(Scope);
1182 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1183 SplitsConditionValues.push_back(PrevConditionValues);
1184 assert(PrevInsertPoint && "Null PrevInsertPoint");
1185 SplitsInsertPoints.push_back(PrevInsertPoint);
1186 assert(Splits.size() == SplitsConditionValues.size() &&
1187 Splits.size() == SplitsSplitFromOuter.size() &&
1188 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1189 for (size_t I = 0; I < Splits.size(); ++I) {
1190 CHRScope *Split = Splits[I];
1191 DenseSet &SplitConditionValues = SplitsConditionValues[I];
1192 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1193 SmallVector NewSubs;
1194 DenseSet SplitUnhoistables;
1195 GetSelectsInScope(Split, SplitUnhoistables);
1196 for (CHRScope *Sub : Split->Subs) {
1197 SmallVector SubSplits = splitScope(
1198 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1199 SplitUnhoistables);
1200 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
1201 }
1202 Split->Subs = NewSubs;
1203 }
1204 SmallVector Result;
1205 for (size_t I = 0; I < Splits.size(); ++I) {
1206 CHRScope *Split = Splits[I];
1207 if (SplitsSplitFromOuter[I]) {
1208 // Split from the outer.
1209 Output.push_back(Split);
1210 Split->BranchInsertPoint = SplitsInsertPoints[I];
1211 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1212 << "\n");
1213 } else {
1214 // Connected to the outer.
1215 Result.push_back(Split);
1216 }
1217 }
1218 if (!Outer)
1219 assert(Result.empty() &&
1220 "If no outer (top-level), must return no nested ones");
1221 return Result;
1222 }
1223
1224 void CHR::classifyBiasedScopes(SmallVectorImpl &Scopes) {
1225 for (CHRScope *Scope : Scopes) {
1226 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1227 classifyBiasedScopes(Scope, Scope);
1228 CHR_DEBUG(
1229 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1230 dbgs() << "TrueBiasedRegions ";
1231 for (Region *R : Scope->TrueBiasedRegions) {
1232 dbgs() << R->getNameStr() << ", ";
1233 }
1234 dbgs() << "\n";
1235 dbgs() << "FalseBiasedRegions ";
1236 for (Region *R : Scope->FalseBiasedRegions) {
1237 dbgs() << R->getNameStr() << ", ";
1238 }
1239 dbgs() << "\n";
1240 dbgs() << "TrueBiasedSelects ";
1241 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1242 dbgs() << *SI << ", ";
1243 }
1244 dbgs() << "\n";
1245 dbgs() << "FalseBiasedSelects ";
1246 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1247 dbgs() << *SI << ", ";
1248 }
1249 dbgs() << "\n";);
1250 }
1251 }
1252
1253 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1254 for (RegInfo &RI : Scope->RegInfos) {
1255 if (RI.HasBranch) {
1256 Region *R = RI.R;
1257 if (TrueBiasedRegionsGlobal.count(R) > 0)
1258 OutermostScope->TrueBiasedRegions.insert(R);
1259 else if (FalseBiasedRegionsGlobal.count(R) > 0)
1260 OutermostScope->FalseBiasedRegions.insert(R);
1261 else
1262 llvm_unreachable("Must be biased");
1263 }
1264 for (SelectInst *SI : RI.Selects) {
1265 if (TrueBiasedSelectsGlobal.count(SI) > 0)
1266 OutermostScope->TrueBiasedSelects.insert(SI);
1267 else if (FalseBiasedSelectsGlobal.count(SI) > 0)
1268 OutermostScope->FalseBiasedSelects.insert(SI);
1269 else
1270 llvm_unreachable("Must be biased");
1271 }
1272 }
1273 for (CHRScope *Sub : Scope->Subs) {
1274 classifyBiasedScopes(Sub, OutermostScope);
1275 }
1276 }
1277
1278 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1279 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1280 Scope->FalseBiasedRegions.size() +
1281 Scope->TrueBiasedSelects.size() +
1282 Scope->FalseBiasedSelects.size();
1283 return NumBiased >= CHRMergeThreshold;
1284 }
1285
1286 void CHR::filterScopes(SmallVectorImpl &Input,
1287 SmallVectorImpl &Output) {
1288 for (CHRScope *Scope : Input) {
1289 // Filter out the ones with only one region and no subs.
1290 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1291 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1292 << Scope->TrueBiasedRegions.size()
1293 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1294 << " true-selects " << Scope->TrueBiasedSelects.size()
1295 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1296 continue;
1297 }
1298 Output.push_back(Scope);
1299 }
1300 }
1301
1302 void CHR::setCHRRegions(SmallVectorImpl &Input,
1303 SmallVectorImpl &Output) {
1304 for (CHRScope *Scope : Input) {
1305 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1306 "Empty");
1307 setCHRRegions(Scope, Scope);
1308 Output.push_back(Scope);
1309 CHR_DEBUG(
1310 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1311 for (auto pair : Scope->HoistStopMap) {
1312 Region *R = pair.first;
1313 dbgs() << "Region " << R->getNameStr() << "\n";
1314 for (Instruction *I : pair.second) {
1315 dbgs() << "HoistStop " << *I << "\n";
1316 }
1317 }
1318 dbgs() << "CHRRegions" << "\n";
1319 for (RegInfo &RI : Scope->CHRRegions) {
1320 dbgs() << RI.R->getNameStr() << "\n";
1321 });
1322 }
1323 }
1324
1325 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1326 DenseSet Unhoistables;
1327 // Put the biased selects in Unhoistables because they should stay where they
1328 // are and constant-folded after CHR (in case one biased select or a branch
1329 // can depend on another biased select.)
1330 for (RegInfo &RI : Scope->RegInfos) {
1331 for (SelectInst *SI : RI.Selects) {
1332 Unhoistables.insert(SI);
1333 }
1334 }
1335 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1336 for (RegInfo &RI : Scope->RegInfos) {
1337 Region *R = RI.R;
1338 DenseSet HoistStops;
1339 bool IsHoisted = false;
1340 if (RI.HasBranch) {
1341 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
1342 OutermostScope->FalseBiasedRegions.count(R) > 0) &&
1343 "Must be truthy or falsy");
1344 auto *BI = cast(R->getEntry()->getTerminator());
1345 // Note checkHoistValue fills in HoistStops.
1346 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1347 Unhoistables, &HoistStops);
1348 assert(IsHoistable && "Must be hoistable");
1349 (void)(IsHoistable); // Unused in release build
1350 IsHoisted = true;
1351 }
1352 for (SelectInst *SI : RI.Selects) {
1353 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
1354 OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
1355 "Must be true or false biased");
1356 // Note checkHoistValue fills in HoistStops.
1357 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1358 Unhoistables, &HoistStops);
1359 assert(IsHoistable && "Must be hoistable");
1360 (void)(IsHoistable); // Unused in release build
1361 IsHoisted = true;
1362 }
1363 if (IsHoisted) {
1364 OutermostScope->CHRRegions.push_back(RI);
1365 OutermostScope->HoistStopMap[R] = HoistStops;
1366 }
1367 }
1368 for (CHRScope *Sub : Scope->Subs)
1369 setCHRRegions(Sub, OutermostScope);
1370 }
1371
1372 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1373 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1374 }
1375
1376 void CHR::sortScopes(SmallVectorImpl &Input,
1377 SmallVectorImpl &Output) {
1378 Output.resize(Input.size());
1379 std::copy(Input.begin(), Input.end(), Output.begin());
1380 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
1381 }
1382
1383 // Return true if V is already hoisted or was hoisted (along with its operands)
1384 // to the insert point.
1385 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1386 HoistStopMapTy &HoistStopMap,
1387 DenseSet &HoistedSet,
1388 DenseSet &TrivialPHIs) {
1389 auto IT = HoistStopMap.find(R);
1390 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1391 DenseSet &HoistStops = IT->second;
1392 if (auto *I = dyn_cast(V)) {
1393 if (I == HoistPoint)
1394 return;
1395 if (HoistStops.count(I))
1396 return;
1397 if (auto *PN = dyn_cast(I))
1398 if (TrivialPHIs.count(PN))
1399 // The trivial phi inserted by the previous CHR scope could replace a
1400 // non-phi in HoistStops. Note that since this phi is at the exit of a
1401 // previous CHR scope, which dominates this scope, it's safe to stop
1402 // hoisting there.
1403 return;
1404 if (HoistedSet.count(I))
1405 // Already hoisted, return.
1406 return;
1407 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1408 for (Value *Op : I->operands()) {
1409 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
1410 }
1411 I->moveBefore(HoistPoint);
1412 HoistedSet.insert(I);
1413 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1414 }
1415 }
1416
1417 // Hoist the dependent condition values of the branches and the selects in the
1418 // scope to the insert point.
1419 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1420 DenseSet &TrivialPHIs) {
1421 DenseSet HoistedSet;
1422 for (const RegInfo &RI : Scope->CHRRegions) {
1423 Region *R = RI.R;
1424 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1425 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1426 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1427 auto *BI = cast(R->getEntry()->getTerminator());
1428 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1429 HoistedSet, TrivialPHIs);
1430 }
1431 for (SelectInst *SI : RI.Selects) {
1432 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1433 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1434 if (!(IsTrueBiased || IsFalseBiased))
1435 continue;
1436 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1437 HoistedSet, TrivialPHIs);
1438 }
1439 }
1440 }
1441
1442 // Negate the predicate if an ICmp if it's used only by branches or selects by
1443 // swapping the operands of the branches or the selects. Returns true if success.
1444 static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1445 Instruction *ExcludedUser,
1446 CHRScope *Scope) {
1447 for (User *U : ICmp->users()) {
1448 if (U == ExcludedUser)
1449 continue;
1450 if (isa(U) && cast(U)->isConditional())
1451 continue;
1452 if (isa(U) && cast(U)->getCondition() == ICmp)
1453 continue;
1454 return false;
1455 }
1456 for (User *U : ICmp->users()) {
1457 if (U == ExcludedUser)
1458 continue;
1459 if (auto *BI = dyn_cast(U)) {
1460 assert(BI->isConditional() && "Must be conditional");
1461 BI->swapSuccessors();
1462 // Don't need to swap this in terms of
1463 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1464 // mean whehter the branch is likely go into the if-then rather than
1465 // successor0/successor1 and because we can tell which edge is the then or
1466 // the else one by comparing the destination to the region exit block.
1467 continue;
1468 }
1469 if (auto *SI = dyn_cast(U)) {
1470 // Swap operands
1471 Value *TrueValue = SI->getTrueValue();
1472 Value *FalseValue = SI->getFalseValue();
1473 SI->setTrueValue(FalseValue);
1474 SI->setFalseValue(TrueValue);
1475 SI->swapProfMetadata();
1476 if (Scope->TrueBiasedSelects.count(SI)) {
1477 assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1478 "Must not be already in");
1479 Scope->FalseBiasedSelects.insert(SI);
1480 } else if (Scope->FalseBiasedSelects.count(SI)) {
1481 assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1482 "Must not be already in");
1483 Scope->TrueBiasedSelects.insert(SI);
1484 }
1485 continue;
1486 }
1487 llvm_unreachable("Must be a branch or a select");
1488 }
1489 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1490 return true;
1491 }
1492
1493 // A helper for transformScopes. Insert a trivial phi at the scope exit block
1494 // for a value that's defined in the scope but used outside it (meaning it's
1495 // alive at the exit block).
1496 static void insertTrivialPHIs(CHRScope *Scope,
1497 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1498 DenseSet &TrivialPHIs) {
1499 DenseSet BlocksInScopeSet;
1500 SmallVector BlocksInScopeVec;
1501 for (RegInfo &RI : Scope->RegInfos) {
1502 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1503 // sub-Scopes.
1504 BlocksInScopeSet.insert(BB);
1505 BlocksInScopeVec.push_back(BB);
1506 }
1507 }
1508 CHR_DEBUG(
1509 dbgs() << "Inserting redudant phis\n";
1510 for (BasicBlock *BB : BlocksInScopeVec) {
1511 dbgs() << "BlockInScope " << BB->getName() << "\n";
1512 });
1513 for (BasicBlock *BB : BlocksInScopeVec) {
1514 for (Instruction &I : *BB) {
1515 SmallVector Users;
1516 for (User *U : I.users()) {
1517 if (auto *UI = dyn_cast(U)) {
1518 if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
1519 // Unless there's already a phi for I at the exit block.
1520 !(isa(UI) && UI->getParent() == ExitBlock)) {
1521 CHR_DEBUG(dbgs() << "V " << I << "\n");
1522 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1523 Users.push_back(UI);
1524 } else if (UI->getParent() == EntryBlock && isa(UI)) {
1525 // There's a loop backedge from a block that's dominated by this
1526 // scope to the entry block.
1527 CHR_DEBUG(dbgs() << "V " << I << "\n");
1528 CHR_DEBUG(dbgs()
1529 << "Used at entry block (for a back edge) by a phi user "
1530 << *UI << "\n");
1531 Users.push_back(UI);
1532 }
1533 }
1534 }
1535 if (Users.size() > 0) {
1536 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1537 // ExitBlock. Replace I with the new phi in UI unless UI is another
1538 // phi at ExitBlock.
1539 unsigned PredCount = std::distance(pred_begin(ExitBlock),
1540 pred_end(ExitBlock));
1541 PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
1542 &ExitBlock->front());
1543 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1544 PN->addIncoming(&I, Pred);
1545 }
1546 TrivialPHIs.insert(PN);
1547 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1548 for (Instruction *UI : Users) {
1549 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1550 if (UI->getOperand(J) == &I) {
1551 UI->setOperand(J, PN);
1552 }
1553 }
1554 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1555 }
1556 }
1557 }
1558 }
1559 }
1560
1561 // Assert that all the CHR regions of the scope have a biased branch or select.
1562 static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1563 #ifndef NDEBUG
1564 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1565 if (Scope->TrueBiasedRegions.count(RI.R) ||
1566 Scope->FalseBiasedRegions.count(RI.R))
1567 return true;
1568 for (SelectInst *SI : RI.Selects)
1569 if (Scope->TrueBiasedSelects.count(SI) ||
1570 Scope->FalseBiasedSelects.count(SI))
1571 return true;
1572 return false;
1573 };
1574 for (RegInfo &RI : Scope->CHRRegions) {
1575 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1576 "Must have biased branch or select");
1577 }
1578 #endif
1579 }
1580
1581 // Assert that all the condition values of the biased branches and selects have
1582 // been hoisted to the pre-entry block or outside of the scope.
1583 static void assertBranchOrSelectConditionHoisted(CHRScope *Scope,
1584 BasicBlock *PreEntryBlock) {
1585 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1586 for (RegInfo &RI : Scope->CHRRegions) {
1587 Region *R = RI.R;
1588 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1589 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1590 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1591 auto *BI = cast(R->getEntry()->getTerminator());
1592 Value *V = BI->getCondition();
1593 CHR_DEBUG(dbgs() << *V << "\n");
1594 if (auto *I = dyn_cast(V)) {
1595 assert((I->getParent() == PreEntryBlock ||
1596 !Scope->contains(I)) &&
1597 "Must have been hoisted to PreEntryBlock or outside the scope");
1598 }
1599 }
1600 for (SelectInst *SI : RI.Selects) {
1601 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1602 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1603 if (!(IsTrueBiased || IsFalseBiased))
1604 continue;
1605 Value *V = SI->getCondition();
1606 CHR_DEBUG(dbgs() << *V << "\n");
1607 if (auto *I = dyn_cast(V)) {
1608 assert((I->getParent() == PreEntryBlock ||
1609 !Scope->contains(I)) &&
1610 "Must have been hoisted to PreEntryBlock or outside the scope");
1611 }
1612 }
1613 }
1614 }
1615
1616 void CHR::transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs) {
1617 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1618
1619 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1620 Region *FirstRegion = Scope->RegInfos[0].R;
1621 BasicBlock *EntryBlock = FirstRegion->getEntry();
1622 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1623 BasicBlock *ExitBlock = LastRegion->getExit();
1624 Optional ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1625
1626 if (ExitBlock) {
1627 // Insert a trivial phi at the exit block (where the CHR hot path and the
1628 // cold path merges) for a value that's defined in the scope but used
1629 // outside it (meaning it's alive at the exit block). We will add the
1630 // incoming values for the CHR cold paths to it below. Without this, we'd
1631 // miss updating phi's for such values unless there happens to already be a
1632 // phi for that value there.
1633 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1634 }
1635
1636 // Split the entry block of the first region. The new block becomes the new
1637 // entry block of the first region. The old entry block becomes the block to
1638 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1639 // through the split, we update the entry of the first region after the split,
1640 // and Region only points to the entry and the exit blocks, rather than
1641 // keeping everything in a list or set, the blocks membership and the
1642 // entry/exit blocks of the region are still valid after the split.
1643 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1644 << " at " << *Scope->BranchInsertPoint << "\n");
1645 BasicBlock *NewEntryBlock =
1646 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1647 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1648 "NewEntryBlock's only pred must be EntryBlock");
1649 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1650 BasicBlock *PreEntryBlock = EntryBlock;
1651
1652 ValueToValueMapTy VMap;
1653 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1654 // hot path (originals) and a cold path (clones) and update the PHIs at the
1655 // exit block.
1656 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1657
1658 // Replace the old (placeholder) branch with the new (merged) conditional
1659 // branch.
1660 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1661 NewEntryBlock, VMap);
1662
1663 #ifndef NDEBUG
1664 assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1665 #endif
1666
1667 // Hoist the conditional values of the branches/selects.
1668 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
1669
1670 #ifndef NDEBUG
1671 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1672 #endif
1673
1674 // Create the combined branch condition and constant-fold the branches/selects
1675 // in the hot path.
1676 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1677 ProfileCount ? ProfileCount.getValue() : 0);
1678 }
1679
1680 // A helper for transformScopes. Clone the blocks in the scope (excluding the
1681 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1682 // at the exit block.
1683 void CHR::cloneScopeBlocks(CHRScope *Scope,
1684 BasicBlock *PreEntryBlock,
1685 BasicBlock *ExitBlock,
1686 Region *LastRegion,
1687 ValueToValueMapTy &VMap) {
1688 // Clone all the blocks. The original blocks will be the hot-path
1689 // CHR-optimized code and the cloned blocks will be the original unoptimized
1690 // code. This is so that the block pointers from the
1691 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1692 // which CHR should apply to.
1693 SmallVector NewBlocks;
1694 for (RegInfo &RI : Scope->RegInfos)
1695 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1696 // sub-Scopes.
1697 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1698 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1699 NewBlocks.push_back(NewBB);
1700 VMap[BB] = NewBB;
1701 }
1702
1703 // Place the cloned blocks right after the original blocks (right before the
1704 // exit block of.)
1705 if (ExitBlock)
1706 F.getBasicBlockList().splice(ExitBlock->getIterator(),
1707 F.getBasicBlockList(),
1708 NewBlocks[0]->getIterator(), F.end());
1709
1710 // Update the cloned blocks/instructions to refer to themselves.
1711 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1712 for (Instruction &I : *NewBlocks[i])
1713 RemapInstruction(&I, VMap,
1714 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1715
1716 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1717 // the top-level region but we don't need to add PHIs. The trivial PHIs
1718 // inserted above will be updated here.
1719 if (ExitBlock)
1720 for (PHINode &PN : ExitBlock->phis())
1721 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1722 ++I) {
1723 BasicBlock *Pred = PN.getIncomingBlock(I);
1724 if (LastRegion->contains(Pred)) {
1725 Value *V = PN.getIncomingValue(I);
1726 auto It = VMap.find(V);
1727 if (It != VMap.end()) V = It->second;
1728 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1729 PN.addIncoming(V, cast(VMap[Pred]));
1730 }
1731 }
1732 }
1733
1734 // A helper for transformScope. Replace the old (placeholder) branch with the
1735 // new (merged) conditional branch.
1736 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1737 BasicBlock *EntryBlock,
1738 BasicBlock *NewEntryBlock,
1739 ValueToValueMapTy &VMap) {
1740 BranchInst *OldBR = cast(PreEntryBlock->getTerminator());
1741 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1742 "SplitBlock did not work correctly!");
1743 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1744 "NewEntryBlock's only pred must be EntryBlock");
1745 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1746 "NewEntryBlock must have been copied");
1747 OldBR->removeFromParent();
1748 OldBR->dropAllReferences();
1749 // The true predicate is a placeholder. It will be replaced later in
1750 // fixupBranchesAndSelects().
1751 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1752 cast(VMap[NewEntryBlock]),
1753 ConstantInt::getTrue(F.getContext()));
1754 PreEntryBlock->getInstList().push_back(NewBR);
1755 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1756 "NewEntryBlock's only pred must be EntryBlock");
1757 return NewBR;
1758 }
1759
1760 // A helper for transformScopes. Create the combined branch condition and
1761 // constant-fold the branches/selects in the hot path.
1762 void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1763 BasicBlock *PreEntryBlock,
1764 BranchInst *MergedBR,
1765 uint64_t ProfileCount) {
1766 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1767 BranchProbability CHRBranchBias(1, 1);
1768 uint64_t NumCHRedBranches = 0;
1769 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1770 for (RegInfo &RI : Scope->CHRRegions) {
1771 Region *R = RI.R;
1772 if (RI.HasBranch) {
1773 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1774 ++NumCHRedBranches;
1775 }
1776 for (SelectInst *SI : RI.Selects) {
1777 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1778 ++NumCHRedBranches;
1779 }
1780 }
1781 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1782 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1783 MergedBR->setCondition(MergedCondition);
1784 SmallVector Weights;
1785 Weights.push_back(static_cast(CHRBranchBias.scale(1000)));
1786 Weights.push_back(static_cast(CHRBranchBias.getCompl().scale(1000)));
1787 MDBuilder MDB(F.getContext());
1788 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1789 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1790 << "\n");
1791 }
1792
1793 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1794 // and constant-fold a branch in the hot path.
1795 void CHR::fixupBranch(Region *R, CHRScope *Scope,
1796 IRBuilder<> &IRB,
1797 Value *&MergedCondition,
1798 BranchProbability &CHRBranchBias) {
1799 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1800 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1801 "Must be truthy or falsy");
1802 auto *BI = cast(R->getEntry()->getTerminator());
1803 assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1804 "Must be in the bias map");
1805 BranchProbability Bias = BranchBiasMap[R];
1806 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1807 // Take the min.
1808 if (CHRBranchBias > Bias)
1809 CHRBranchBias = Bias;
1810 BasicBlock *IfThen = BI->getSuccessor(1);
1811 BasicBlock *IfElse = BI->getSuccessor(0);
1812 BasicBlock *RegionExitBlock = R->getExit();
1813 assert(RegionExitBlock && "Null ExitBlock");
1814 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1815 IfThen != IfElse && "Invariant from findScopes");
1816 if (IfThen == RegionExitBlock) {
1817 // Swap them so that IfThen means going into it and IfElse means skipping
1818 // it.
1819 std::swap(IfThen, IfElse);
1820 }
1821 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1822 << " IfElse " << IfElse->getName() << "\n");
1823 Value *Cond = BI->getCondition();
1824 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1825 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1826 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1827 MergedCondition);
1828 // Constant-fold the branch at ClonedEntryBlock.
1829 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1830 "The successor shouldn't change");
1831 Value *NewCondition = ConditionTrue ?
1832 ConstantInt::getTrue(F.getContext()) :
1833 ConstantInt::getFalse(F.getContext());
1834 BI->setCondition(NewCondition);
1835 }
1836
1837 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1838 // and constant-fold a select in the hot path.
1839 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1840 IRBuilder<> &IRB,
1841 Value *&MergedCondition,
1842 BranchProbability &CHRBranchBias) {
1843 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1844 assert((IsTrueBiased ||
1845 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1846 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1847 "Must be in the bias map");
1848 BranchProbability Bias = SelectBiasMap[SI];
1849 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1850 // Take the min.
1851 if (CHRBranchBias > Bias)
1852 CHRBranchBias = Bias;
1853 Value *Cond = SI->getCondition();
1854 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1855 MergedCondition);
1856 Value *NewCondition = IsTrueBiased ?
1857 ConstantInt::getTrue(F.getContext()) :
1858 ConstantInt::getFalse(F.getContext());
1859 SI->setCondition(NewCondition);
1860 }
1861
1862 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1863 // condition.
1864 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1865 Instruction *BranchOrSelect,
1866 CHRScope *Scope,
1867 IRBuilder<> &IRB,
1868 Value *&MergedCondition) {
1869 if (IsTrueBiased) {
1870 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1871 } else {
1872 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1873 // branch, negate the icmp predicate and swap the branch targets and avoid
1874 // inserting an Xor to negate Cond.
1875 bool Done = false;
1876 if (auto *ICmp = dyn_cast(Cond))
1877 if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1878 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1879 Done = true;
1880 }
1881 if (!Done) {
1882 Value *Negate = IRB.CreateXor(
1883 ConstantInt::getTrue(F.getContext()), Cond);
1884 MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1885 }
1886 }
1887 }
1888
1889 void CHR::transformScopes(SmallVectorImpl &CHRScopes) {
1890 unsigned i = 0;
1891 (void)(i); // Unused in release build.
1892 DenseSet TrivialPHIs;
1893 for (CHRScope *Scope : CHRScopes) {
1894 transformScopes(Scope, TrivialPHIs);
1895 CHR_DEBUG(
1896 std::ostringstream oss;
1897 oss << " after transformScopes " << i++;
1898 dumpIR(F, oss.str().c_str(), nullptr));
1899 }
1900 }
1901
1902 static void dumpScopes(SmallVectorImpl &Scopes, const char * Label) {
1903 dbgs() << Label << " " << Scopes.size() << "\n";
1904 for (CHRScope *Scope : Scopes) {
1905 dbgs() << *Scope << "\n";
1906 }
1907 }
1908
1909 bool CHR::run() {
1910 if (!shouldApply(F, PSI))
1911 return false;
1912
1913 CHR_DEBUG(dumpIR(F, "before", nullptr));
1914
1915 bool Changed = false;
1916 {
1917 CHR_DEBUG(
1918 dbgs() << "RegionInfo:\n";
1919 RI.print(dbgs()));
1920
1921 // Recursively traverse the region tree and find regions that have biased
1922 // branches and/or selects and create scopes.
1923 SmallVector AllScopes;
1924 findScopes(AllScopes);
1925 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
1926
1927 // Split the scopes if 1) the conditiona values of the biased
1928 // branches/selects of the inner/lower scope can't be hoisted up to the
1929 // outermost/uppermost scope entry, or 2) the condition values of the biased
1930 // branches/selects in a scope (including subscopes) don't share at least
1931 // one common value.
1932 SmallVector SplitScopes;
1933 splitScopes(AllScopes, SplitScopes);
1934 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
1935
1936 // After splitting, set the biased regions and selects of a scope (a tree
1937 // root) that include those of the subscopes.
1938 classifyBiasedScopes(SplitScopes);
1939 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
1940
1941 // Filter out the scopes that has only one biased region or select (CHR
1942 // isn't useful in such a case).
1943 SmallVector FilteredScopes;
1944 filterScopes(SplitScopes, FilteredScopes);
1945 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
1946
1947 // Set the regions to be CHR'ed and their hoist stops for each scope.
1948 SmallVector SetScopes;
1949 setCHRRegions(FilteredScopes, SetScopes);
1950 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
1951
1952 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
1953 // ones. We need to apply CHR from outer to inner so that we apply CHR only
1954 // to the hot path, rather than both hot and cold paths.
1955 SmallVector SortedScopes;
1956 sortScopes(SetScopes, SortedScopes);
1957 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
1958
1959 CHR_DEBUG(
1960 dbgs() << "RegionInfo:\n";
1961 RI.print(dbgs()));
1962
1963 // Apply the CHR transformation.
1964 if (!SortedScopes.empty()) {
1965 transformScopes(SortedScopes);
1966 Changed = true;
1967 }
1968 }
1969
1970 if (Changed)
1971 CHR_DEBUG(dumpIR(F, "after", &Stats));
1972
1973 return Changed;
1974 }
1975
1976 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
1977 BlockFrequencyInfo &BFI =
1978 getAnalysis().getBFI();
1979 DominatorTree &DT = getAnalysis().getDomTree();
1980 ProfileSummaryInfo &PSI =
1981 *getAnalysis().getPSI();
1982 RegionInfo &RI = getAnalysis().getRegionInfo();
1983 return CHR(F, BFI, DT, PSI, RI).run();
1984 }
1985
1986 namespace llvm {
1987
1988 ControlHeightReductionPass::ControlHeightReductionPass() {
1989 ParseCHRFilterFiles();
1990 }
1991
1992 PreservedAnalyses ControlHeightReductionPass::run(
1993 Function &F,
1994 FunctionAnalysisManager &FAM) {
1995 auto &BFI = FAM.getResult(F);
1996 auto &DT = FAM.getResult(F);
1997 auto &MAMProxy = FAM.getResult(F);
1998 auto &MAM = MAMProxy.getManager();
1999 auto &PSI = *MAM.getCachedResult(*F.getParent());
2000 auto &RI = FAM.getResult(F);
2001 bool Changed = CHR(F, BFI, DT, PSI, RI).run();
2002 if (!Changed)
2003 return PreservedAnalyses::all();
2004 auto PA = PreservedAnalyses();
2005 PA.preserve();
2006 return PA;
2007 }
2008
2009 } // namespace llvm
5858 initializeAddressSanitizerPass(Registry);
5959 initializeAddressSanitizerModulePass(Registry);
6060 initializeBoundsCheckingLegacyPassPass(Registry);
61 initializeControlHeightReductionLegacyPassPass(Registry);
6162 initializeGCOVProfilerLegacyPassPass(Registry);
6263 initializePGOInstrumentationGenLegacyPassPass(Registry);
6364 initializePGOInstrumentationUseLegacyPassPass(Registry);
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -chr -instcombine -simplifycfg -S | FileCheck %s
2 ; RUN: opt < %s -passes='require,function(chr,instcombine,simplify-cfg)' -S | FileCheck %s
3
4 declare void @foo()
5 declare void @bar()
6
7 ; Simple case.
8 ; Roughly,
9 ; t0 = *i
10 ; if ((t0 & 1) != 0) // Likely true
11 ; foo()
12 ; if ((t0 & 2) != 0) // Likely true
13 ; foo()
14 ; ->
15 ; t0 = *i
16 ; if ((t0 & 3) != 0) { // Likely true
17 ; foo()
18 ; foo()
19 ; } else {
20 ; if ((t0 & 1) != 0)
21 ; foo()
22 ; if ((t0 & 2) != 0)
23 ; foo()
24 ; }
25 define void @test_chr_1(i32* %i) !prof !14 {
26 ; CHECK-LABEL: @test_chr_1(
27 ; CHECK-NEXT: entry:
28 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
29 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
30 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
31 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
32 ; CHECK: bb0:
33 ; CHECK-NEXT: call void @foo()
34 ; CHECK-NEXT: call void @foo()
35 ; CHECK-NEXT: br label [[BB3:%.*]]
36 ; CHECK: entry.split.nonchr:
37 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
38 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
39 ; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
40 ; CHECK: bb0.nonchr:
41 ; CHECK-NEXT: call void @foo()
42 ; CHECK-NEXT: br label [[BB1_NONCHR]]
43 ; CHECK: bb1.nonchr:
44 ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2
45 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
46 ; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
47 ; CHECK: bb2.nonchr:
48 ; CHECK-NEXT: call void @foo()
49 ; CHECK-NEXT: br label [[BB3]]
50 ; CHECK: bb3:
51 ; CHECK-NEXT: ret void
52 ;
53 entry:
54 %0 = load i32, i32* %i
55 %1 = and i32 %0, 1
56 %2 = icmp eq i32 %1, 0
57 br i1 %2, label %bb1, label %bb0, !prof !15
58
59 bb0:
60 call void @foo()
61 br label %bb1
62
63 bb1:
64 %3 = and i32 %0, 2
65 %4 = icmp eq i32 %3, 0
66 br i1 %4, label %bb3, label %bb2, !prof !15
67
68 bb2:
69 call void @foo()
70 br label %bb3
71
72 bb3:
73 ret void
74 }
75
76 ; Simple case with a cold block.
77 ; Roughly,
78 ; t0 = *i
79 ; if ((t0 & 1) != 0) // Likely true
80 ; foo()
81 ; if ((t0 & 2) == 0) // Likely false
82 ; bar()
83 ; if ((t0 & 4) != 0) // Likely true
84 ; foo()
85 ; ->
86 ; t0 = *i
87 ; if ((t0 & 7) == 7) { // Likely true
88 ; foo()
89 ; foo()
90 ; } else {
91 ; if ((t0 & 1) != 0)
92 ; foo()
93 ; if ((t0 & 2) == 0)
94 ; bar()
95 ; if ((t0 & 4) != 0)
96 ; foo()
97 ; }
98 define void @test_chr_1_1(i32* %i) !prof !14 {
99 ; CHECK-LABEL: @test_chr_1_1(
100 ; CHECK-NEXT: entry:
101 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
102 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 7
103 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 7
104 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
105 ; CHECK: bb0:
106 ; CHECK-NEXT: call void @foo()
107 ; CHECK-NEXT: call void @foo()
108 ; CHECK-NEXT: br label [[BB5:%.*]]
109 ; CHECK: entry.split.nonchr:
110 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
111 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
112 ; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
113 ; CHECK: bb0.nonchr:
114 ; CHECK-NEXT: call void @foo()
115 ; CHECK-NEXT: br label [[BB1_NONCHR]]
116 ; CHECK: bb1.nonchr:
117 ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2
118 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
119 ; CHECK-NEXT: br i1 [[TMP6]], label [[BB2_NONCHR:%.*]], label [[BB3_NONCHR:%.*]], !prof !16
120 ; CHECK: bb2.nonchr:
121 ; CHECK-NEXT: call void @bar()
122 ; CHECK-NEXT: br label [[BB3_NONCHR]]
123 ; CHECK: bb3.nonchr:
124 ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 4
125 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
126 ; CHECK-NEXT: br i1 [[TMP8]], label [[BB5]], label [[BB4_NONCHR:%.*]], !prof !16
127 ; CHECK: bb4.nonchr:
128 ; CHECK-NEXT: call void @foo()
129 ; CHECK-NEXT: br label [[BB5]]
130 ; CHECK: bb5:
131 ; CHECK-NEXT: ret void
132 ;
133 entry:
134 %0 = load i32, i32* %i
135 %1 = and i32 %0, 1
136 %2 = icmp eq i32 %1, 0
137 br i1 %2, label %bb1, label %bb0, !prof !15
138
139 bb0:
140 call void @foo()
141 br label %bb1
142
143 bb1:
144 %3 = and i32 %0, 2
145 %4 = icmp eq i32 %3, 0
146 br i1 %4, label %bb2, label %bb3, !prof !15
147
148 bb2:
149 call void @bar()
150 br label %bb3
151
152 bb3:
153 %5 = and i32 %0, 4
154 %6 = icmp eq i32 %5, 0
155 br i1 %6, label %bb5, label %bb4, !prof !15
156
157 bb4:
158 call void @foo()
159 br label %bb5
160
161 bb5:
162 ret void
163 }
164
165 ; With an aggregate bit check.
166 ; Roughly,
167 ; t0 = *i
168 ; if ((t0 & 255) != 0) // Likely true
169 ; if ((t0 & 1) != 0) // Likely true
170 ; foo()
171 ; if ((t0 & 2) != 0) // Likely true
172 ; foo()
173 ; ->
174 ; t0 = *i
175 ; if ((t0 & 3) != 0) { // Likely true
176 ; foo()
177 ; foo()
178 ; } else if ((t0 & 255) != 0)
179 ; if ((t0 & 1) != 0)
180 ; foo()
181 ; if ((t0 & 2) != 0)
182 ; foo()
183 ; }
184 define void @test_chr_2(i32* %i) !prof !14 {
185 ; CHECK-LABEL: @test_chr_2(
186 ; CHECK-NEXT: entry:
187 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
188 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
189 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
190 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
191 ; CHECK: bb1:
192 ; CHECK-NEXT: call void @foo()
193 ; CHECK-NEXT: call void @foo()
194 ; CHECK-NEXT: br label [[BB4:%.*]]
195 ; CHECK: entry.split.nonchr:
196 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 255
197 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
198 ; CHECK-NEXT: br i1 [[TMP4]], label [[BB4]], label [[BB0_NONCHR:%.*]], !prof !16
199 ; CHECK: bb0.nonchr:
200 ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 1
201 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
202 ; CHECK-NEXT: br i1 [[TMP6]], label [[BB2_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !16
203 ; CHECK: bb2.nonchr:
204 ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2
205 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
206 ; CHECK-NEXT: br i1 [[TMP8]], label [[BB4]], label [[BB3_NONCHR:%.*]], !prof !16
207 ; CHECK: bb3.nonchr:
208 ; CHECK-NEXT: call void @foo()
209 ; CHECK-NEXT: br label [[BB4]]
210 ; CHECK: bb1.nonchr:
211 ; CHECK-NEXT: call void @foo()
212 ; CHECK-NEXT: br label [[BB2_NONCHR]]
213 ; CHECK: bb4:
214 ; CHECK-NEXT: ret void
215 ;
216 entry:
217 %0 = load i32, i32* %i
218 %1 = and i32 %0, 255
219 %2 = icmp eq i32 %1, 0
220 br i1 %2, label %bb4, label %bb0, !prof !15
221
222 bb0:
223 %3 = and i32 %0, 1
224 %4 = icmp eq i32 %3, 0
225 br i1 %4, label %bb2, label %bb1, !prof !15
226
227 bb1:
228 call void @foo()
229 br label %bb2
230
231 bb2:
232 %5 = and i32 %0, 2
233 %6 = icmp eq i32 %5, 0
234 br i1 %6, label %bb4, label %bb3, !prof !15
235
236 bb3:
237 call void @foo()
238 br label %bb4
239
240 bb4:
241 ret void
242 }
243
244 ; Split case.
245 ; Roughly,
246 ; t1 = *i
247 ; if ((t1 & 1) != 0) // Likely true
248 ; foo()
249 ; if ((t1 & 2) != 0) // Likely true
250 ; foo()
251 ; t2 = *i
252 ; if ((t2 & 4) != 0) // Likely true
253 ; foo()
254 ; if ((t2 & 8) != 0) // Likely true
255 ; foo()
256 ; ->
257 ; t1 = *i
258 ; if ((t1 & 3) != 0) { // Likely true
259 ; foo()
260 ; foo()
261 ; } else {
262 ; if ((t1 & 1) != 0)
263 ; foo()
264 ; if ((t1 & 2) != 0)
265 ; foo()
266 ; }
267 ; t2 = *i
268 ; if ((t2 & 12) != 0) { // Likely true
269 ; foo()
270 ; foo()
271 ; } else {
272 ; if ((t2 & 4) != 0)
273 ; foo()
274 ; if ((t2 & 8) != 0)
275 ; foo()
276 ; }
277 define void @test_chr_3(i32* %i) !prof !14 {
278 ; CHECK-LABEL: @test_chr_3(
279 ; CHECK-NEXT: entry:
280 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
281 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
282 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
283 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
284 ; CHECK: bb0:
285 ; CHECK-NEXT: call void @foo()
286 ; CHECK-NEXT: call void @foo()
287 ; CHECK-NEXT: br label [[BB3:%.*]]
288 ; CHECK: entry.split.nonchr:
289 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
290 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
291 ; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
292 ; CHECK: bb0.nonchr:
293 ; CHECK-NEXT: call void @foo()
294 ; CHECK-NEXT: br label [[BB1_NONCHR]]
295 ; CHECK: bb1.nonchr:
296 ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2
297 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
298 ; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
299 ; CHECK: bb2.nonchr:
300 ; CHECK-NEXT: call void @foo()
301 ; CHECK-NEXT: br label [[BB3]]
302 ; CHECK: bb3:
303 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
304 ; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 12
305 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 12
306 ; CHECK-NEXT: br i1 [[TMP9]], label [[BB4:%.*]], label [[BB3_SPLIT_NONCHR:%.*]], !prof !15
307 ; CHECK: bb4:
308 ; CHECK-NEXT: call void @foo()
309 ; CHECK-NEXT: call void @foo()
310 ; CHECK-NEXT: br label [[BB7:%.*]]
311 ; CHECK: bb3.split.nonchr:
312 ; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP7]], 4
313 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
314 ; CHECK-NEXT: br i1 [[TMP11]], label [[BB5_NONCHR:%.*]], label [[BB4_NONCHR:%.*]], !prof !16
315 ; CHECK: bb4.nonchr:
316 ; CHECK-NEXT: call void @foo()
317 ; CHECK-NEXT: br label [[BB5_NONCHR]]
318 ; CHECK: bb5.nonchr:
319 ; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP7]], 8
320 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0
321 ; CHECK-NEXT: br i1 [[TMP13]], label [[BB7]], label [[BB6_NONCHR:%.*]], !prof !16
322 ; CHECK: bb6.nonchr:
323 ; CHECK-NEXT: call void @foo()
324 ; CHECK-NEXT: br label [[BB7]]
325 ; CHECK: bb7:
326 ; CHECK-NEXT: ret void
327 ;
328 entry:
329 %0 = load i32, i32* %i
330 %1 = and i32 %0, 1
331 %2 = icmp eq i32 %1, 0
332 br i1 %2, label %bb1, label %bb0, !prof !15
333
334 bb0:
335 call void @foo()
336 br label %bb1
337
338 bb1:
339 %3 = and i32 %0, 2
340 %4 = icmp eq i32 %3, 0
341 br i1 %4, label %bb3, label %bb2, !prof !15
342
343 bb2:
344 call void @foo()
345 br label %bb3
346
347 bb3:
348 %5 = load i32, i32* %i
349 %6 = and i32 %5, 4
350 %7 = icmp eq i32 %6, 0
351 br i1 %7, label %bb5, label %bb4, !prof !15
352
353 bb4:
354 call void @foo()
355 br label %bb5
356
357 bb5:
358 %8 = and i32 %5, 8
359 %9 = icmp eq i32 %8, 0
360 br i1 %9, label %bb7, label %bb6, !prof !15
361
362 bb6:
363 call void @foo()
364 br label %bb7
365
366 bb7:
367 ret void
368 }
369
370 ; Selects.
371 ; Roughly,
372 ; t0 = *i
373 ; sum1 = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
374 ; sum2 = (t0 & 2) ? sum1 : (sum1 + 43) // Likely false
375 ; return sum2
376 ; ->
377 ; t0 = *i
378 ; if ((t0 & 3) == 3)
379 ; return sum0 + 85
380 ; else {
381 ; sum1 = (t0 & 1) ? sum0 : (sum0 + 42)
382 ; sum2 = (t0 & 2) ? sum1 : (sum1 + 43)
383 ; return sum2
384 ; }
385 define i32 @test_chr_4(i32* %i, i32 %sum0) !prof !14 {
386 ; CHECK-LABEL: @test_chr_4(
387 ; CHECK-NEXT: entry:
388 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
389 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
390 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
391 ; CHECK-NEXT: br i1 [[TMP2]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
392 ; CHECK: entry.split:
393 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
394 ; CHECK-NEXT: ret i32 [[TMP3]]
395 ; CHECK: entry.split.nonchr:
396 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 42
397 ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 1
398 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
399 ; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP6]], i32 [[SUM0]], i32 [[TMP4]], !prof !16
400 ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2
401 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
402 ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[SUM1_NONCHR]], 43
403 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM1_NONCHR]], i32 [[TMP9]], !prof !16
404 ; CHECK-NEXT: ret i32 [[SUM2_NONCHR]]
405 ;
406 entry:
407 %0 = load i32, i32* %i
408 %1 = and i32 %0, 1
409 %2 = icmp eq i32 %1, 0
410 %3 = add i32 %sum0, 42
411 %sum1 = select i1 %2, i32 %sum0, i32 %3, !prof !15
412 %4 = and i32 %0, 2
413 %5 = icmp eq i32 %4, 0
414 %6 = add i32 %sum1, 43
415 %sum2 = select i1 %5, i32 %sum1, i32 %6, !prof !15
416 ret i32 %sum2
417 }
418
419 ; Selects + Brs
420 ; Roughly,
421 ; t0 = *i
422 ; if ((t0 & 255) != 0) { // Likely true
423 ; sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
424 ; sum = (t0 & 2) ? sum : (sum + 43) // Likely false
425 ; if ((t0 & 4) != 0) { // Likely true
426 ; sum3 = sum + 44
427 ; sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false
428 ; }
429 ; }
430 ; return sum
431 ; ->
432 ; t0 = *i
433 ; if ((t0 & 15) != 15) { // Likely true
434 ; sum = sum0 + 173
435 ; } else if ((t0 & 255) != 0) {
436 ; sum = (t0 & 1) ? sum0 : (sum0 + 42)
437 ; sum = (t0 & 2) ? sum : (sum + 43)
438 ; if ((t0 & 4) != 0) {
439 ; sum3 = sum + 44
440 ; sum = (t0 & 8) ? sum3 : (sum3 + 44)
441 ; }
442 ; }
443 ; return sum
444 define i32 @test_chr_5(i32* %i, i32 %sum0) !prof !14 {
445 ; CHECK-LABEL: @test_chr_5(
446 ; CHECK-NEXT: entry:
447 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
448 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 15
449 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 15
450 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
451 ; CHECK: bb0:
452 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85
453 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 173
454 ; CHECK-NEXT: br label [[BB3:%.*]]
455 ; CHECK: entry.split.nonchr:
456 ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 255
457 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
458 ; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
459 ; CHECK: bb0.nonchr:
460 ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 1
461 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
462 ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[SUM0]], 42
463 ; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM0]], i32 [[TMP9]], !prof !16
464 ; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP0]], 2
465 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
466 ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[SUM1_NONCHR]], 43
467 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM1_NONCHR]], i32 [[TMP12]], !prof !16
468 ; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP0]], 4
469 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0
470 ; CHECK-NEXT: br i1 [[TMP14]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
471 ; CHECK: bb1.nonchr:
472 ; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP0]], 8
473 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
474 ; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16
475 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
476 ; CHECK-NEXT: br label [[BB3]]
477 ; CHECK: bb3:
478 ; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
479 ; CHECK-NEXT: ret i32 [[SUM6]]
480 ;
481 entry:
482 %0 = load i32, i32* %i
483 %1 = and i32 %0, 255
484 %2 = icmp eq i32 %1, 0
485 br i1 %2, label %bb3, label %bb0, !prof !15
486
487 bb0:
488 %3 = and i32 %0, 1
489 %4 = icmp eq i32 %3, 0
490 %5 = add i32 %sum0, 42
491 %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
492 %6 = and i32 %0, 2
493 %7 = icmp eq i32 %6, 0
494 %8 = add i32 %sum1, 43
495 %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
496 %9 = and i32 %0, 4
497 %10 = icmp eq i32 %9, 0
498 br i1 %10, label %bb2, label %bb1, !prof !15
499
500 bb1:
501 %sum3 = add i32 %sum2, 44
502 %11 = and i32 %0, 8
503 %12 = icmp eq i32 %11, 0
504 %13 = add i32 %sum3, 44
505 %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
506 br label %bb2
507
508 bb2:
509 %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
510 br label %bb3
511
512 bb3:
513 %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
514 ret i32 %sum6
515 }
516
517 ; Selects + Brs with a scope split in the middle
518 ; Roughly,
519 ; t0 = *i
520 ; if ((t0 & 255) != 0) { // Likely true
521 ; sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false
522 ; sum = (t0 & 2) ? sum : (sum + 43) // Likely false
523 ; if ((sum0 & 4) != 0) { // Likely true. The condition doesn't use v.
524 ; sum3 = sum + 44
525 ; sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false
526 ; }
527 ; }
528 ; return sum
529 ; ->
530 ; t0 = *i
531 ; if ((sum0 & 4) != 0 & (t0 & 11) != 11) { // Likely true
532 ; sum = sum0 + 173
533 ; } else if ((t0 & 255) != 0) {
534 ; sum = (t0 & 1) ? sum0 : (sum0 + 42)
535 ; sum = (t0 & 2) ? sum : (sum + 43)
536 ; if ((sum0 & 4) != 0) {
537 ; sum3 = sum + 44
538 ; sum = (t0 & 8) ? sum3 : (sum3 + 44)
539 ; }
540 ; }
541 ; return sum
542 define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 {
543 ; CHECK-LABEL: @test_chr_5_1(
544 ; CHECK-NEXT: entry:
545 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
546 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUM0:%.*]], 4
547 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
548 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 11
549 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 11
550 ; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]]
551 ; CHECK-NEXT: br i1 [[TMP5]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
552 ; CHECK: bb0:
553 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SUM0]], 85
554 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SUM0]], 173
555 ; CHECK-NEXT: br label [[BB3:%.*]]
556 ; CHECK: entry.split.nonchr:
557 ; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP0]], 255
558 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
559 ; CHECK-NEXT: br i1 [[TMP9]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
560 ; CHECK: bb0.nonchr:
561 ; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP0]], 1
562 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
563 ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[SUM0]], 42
564 ; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM0]], i32 [[TMP12]], !prof !16
565 ; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP0]], 2
566 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0
567 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[SUM1_NONCHR]], 43
568 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP14]], i32 [[SUM1_NONCHR]], i32 [[TMP15]], !prof !16
569 ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[SUM0]], 4
570 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0
571 ; CHECK-NEXT: br i1 [[TMP17]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
572 ; CHECK: bb1.nonchr:
573 ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP0]], 8
574 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0
575 ; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP19]], i32 44, i32 88, !prof !16
576 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
577 ; CHECK-NEXT: br label [[BB3]]
578 ; CHECK: bb3:
579 ; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
580 ; CHECK-NEXT: ret i32 [[SUM6]]
581 ;
582 entry:
583 %0 = load i32, i32* %i
584 %1 = and i32 %0, 255
585 %2 = icmp eq i32 %1, 0
586 br i1 %2, label %bb3, label %bb0, !prof !15
587
588 bb0:
589 %3 = and i32 %0, 1
590 %4 = icmp eq i32 %3, 0
591 %5 = add i32 %sum0, 42
592 %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
593 %6 = and i32 %0, 2
594 %7 = icmp eq i32 %6, 0
595 %8 = add i32 %sum1, 43
596 %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
597 %9 = and i32 %sum0, 4 ; Split
598 %10 = icmp eq i32 %9, 0
599 br i1 %10, label %bb2, label %bb1, !prof !15
600
601 bb1:
602 %sum3 = add i32 %sum2, 44
603 %11 = and i32 %0, 8
604 %12 = icmp eq i32 %11, 0
605 %13 = add i32 %sum3, 44
606 %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
607 br label %bb2
608
609 bb2:
610 %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
611 br label %bb3
612
613 bb3:
614 %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
615 ret i32 %sum6
616 }
617
618 ; Selects + Brs, non-matching bases
619 ; Roughly,
620 ; i0 = *i
621 ; j0 = *j
622 ; if ((i0 & 255) != 0) { // Likely true
623 ; sum = (i0 & 2) ? sum0 : (sum0 + 43) // Likely false
624 ; if ((j0 & 4) != 0) { // Likely true. The condition uses j0, not i0.
625 ; sum3 = sum + 44
626 ; sum = (i0 & 8) ? sum3 : (sum3 + 44) // Likely false
627 ; }
628 ; }
629 ; return sum
630 ; ->
631 ; i0 = *i
632 ; j0 = *j
633 ; if ((j0 & 4) != 0 & (i0 & 10) != 10) { // Likely true
634 ; sum = sum0 + 131
635 ; } else if ((i0 & 255) != 0) {
636 ; sum = (i0 & 2) ? sum0 : (sum0 + 43)
637 ; if ((j0 & 4) != 0) {
638 ; sum3 = sum + 44
639 ; sum = (i0 & 8) ? sum3 : (sum3 + 44)
640 ; }
641 ; }
642 ; return sum
643 define i32 @test_chr_6(i32* %i, i32* %j, i32 %sum0) !prof !14 {
644 ; CHECK-LABEL: @test_chr_6(
645 ; CHECK-NEXT: entry:
646 ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
647 ; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
648 ; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4
649 ; CHECK-NEXT: [[V10:%.*]] = icmp ne i32 [[V9]], 0
650 ; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I0]], 10
651 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 10
652 ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[V10]]
653 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
654 ; CHECK: bb0:
655 ; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
656 ; CHECK-NEXT: [[V13:%.*]] = add i32 [[SUM0]], 131
657 ; CHECK-NEXT: br label [[BB3:%.*]]
658 ; CHECK: entry.split.nonchr:
659 ; CHECK-NEXT: [[V1:%.*]] = and i32 [[I0]], 255
660 ; CHECK-NEXT: [[V2:%.*]] = icmp eq i32 [[V1]], 0
661 ; CHECK-NEXT: br i1 [[V2]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16
662 ; CHECK: bb0.nonchr:
663 ; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i32 [[I0]], 2
664 ; CHECK-NEXT: [[V4_NONCHR:%.*]] = icmp eq i32 [[V3_NONCHR]], 0
665 ; CHECK-NEXT: [[V8_NONCHR:%.*]] = add i32 [[SUM0]], 43
666 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4_NONCHR]], i32 [[SUM0]], i32 [[V8_NONCHR]], !prof !16
667 ; CHECK-NEXT: [[V9_NONCHR:%.*]] = and i32 [[J0]], 4
668 ; CHECK-NEXT: [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0
669 ; CHECK-NEXT: br i1 [[V10_NONCHR]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16
670 ; CHECK: bb1.nonchr:
671 ; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[I0]], 8
672 ; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0
673 ; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88, !prof !16
674 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]]
675 ; CHECK-NEXT: br label [[BB3]]
676 ; CHECK: bb3:
677 ; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
678 ; CHECK-NEXT: ret i32 [[SUM6]]
679 ;
680 entry:
681 %i0 = load i32, i32* %i
682 %j0 = load i32, i32* %j
683 %v1 = and i32 %i0, 255
684 %v2 = icmp eq i32 %v1, 0
685 br i1 %v2, label %bb3, label %bb0, !prof !15
686
687 bb0:
688 %v3 = and i32 %i0, 2
689 %v4 = icmp eq i32 %v3, 0
690 %v8 = add i32 %sum0, 43
691 %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
692 %v9 = and i32 %j0, 4
693 %v10 = icmp eq i32 %v9, 0
694 br i1 %v10, label %bb2, label %bb1, !prof !15
695
696 bb1:
697 %sum3 = add i32 %sum2, 44
698 %v11 = and i32 %i0, 8
699 %v12 = icmp eq i32 %v11, 0
700 %v13 = add i32 %sum3, 44
701 %sum4 = select i1 %v12, i32 %sum3, i32 %v13, !prof !15
702 br label %bb2
703
704 bb2:
705 %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
706 br label %bb3
707
708 bb3:
709 %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
710 ret i32 %sum6
711 }
712
713 ; Selects + Brs, the branch condition can't be hoisted to be merged with a
714 ; select. No CHR happens.
715 ; Roughly,
716 ; i0 = *i
717 ; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
718 ; foo();
719 ; j0 = *j
720 ; if ((j0 & 4) != 0) { // Likely true
721 ; foo();
722 ; sum = sum + 44
723 ; }
724 ; return sum
725 ; ->
726 ; (no change)
727 define i32 @test_chr_7(i32* %i, i32* %j, i32 %sum0) !prof !14 {
728 ; CHECK-LABEL: @test_chr_7(
729 ; CHECK-NEXT: entry:
730 ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
731 ; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
732 ; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0
733 ; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
734 ; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
735 ; CHECK-NEXT: call void @foo()
736 ; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
737 ; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4
738 ; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
739 ; CHECK-NEXT: br i1 [[V10]], label [[BB2:%.*]], label [[BB1:%.*]], !prof !16
740 ; CHECK: bb1:
741 ; CHECK-NEXT: call void @foo()
742 ; CHECK-NEXT: [[SUM4:%.*]] = add i32 [[SUM2]], 44
743 ; CHECK-NEXT: br label [[BB2]]
744 ; CHECK: bb2:
745 ; CHECK-NEXT: [[SUM5:%.*]] = phi i32 [ [[SUM2]], [[ENTRY:%.*]] ], [ [[SUM4]], [[BB1]] ]
746 ; CHECK-NEXT: ret i32 [[SUM5]]
747 ;
748 entry:
749 %i0 = load i32, i32* %i
750 %v3 = and i32 %i0, 2
751 %v4 = icmp eq i32 %v3, 0
752 %v8 = add i32 %sum0, 43
753 %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
754 call void @foo()
755 %j0 = load i32, i32* %j
756 %v9 = and i32 %j0, 4
757 %v10 = icmp eq i32 %v9, 0
758 br i1 %v10, label %bb2, label %bb1, !prof !15 ; %v10 can't be hoisted above the above select
759
760 bb1:
761 call void @foo()
762 %sum4 = add i32 %sum2, 44
763 br label %bb2
764
765 bb2:
766 %sum5 = phi i32 [ %sum2, %entry ], [ %sum4, %bb1 ]
767 ret i32 %sum5
768 }
769
770 ; Selects + Brs, the branch condition can't be hoisted to be merged with the
771 ; selects. Dropping the select.
772 ; Roughly,
773 ; i0 = *i
774 ; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
775 ; foo();
776 ; j0 = *j
777 ; if ((j0 & 4) != 0) // Likely true
778 ; foo()
779 ; if ((j0 & 8) != 0) // Likely true
780 ; foo()
781 ; return sum
782 ; ->
783 ; i0 = *i
784 ; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false
785 ; foo();
786 ; j0 = *j
787 ; if ((j0 & 12) != 12) { // Likely true
788 ; foo()
789 ; foo()
790 ; } else {
791 ; if ((j0 & 4) != 0)
792 ; foo()
793 ; if ((j0 & 8) != 0)
794 ; foo()
795 ; }
796 ; return sum
797 define i32 @test_chr_7_1(i32* %i, i32* %j, i32 %sum0) !prof !14 {
798 ; CHECK-LABEL: @test_chr_7_1(
799 ; CHECK-NEXT: entry:
800 ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
801 ; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2
802 ; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0
803 ; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
804 ; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16
805 ; CHECK-NEXT: call void @foo()
806 ; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
807 ; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[J0]], 12
808 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 12
809 ; CHECK-NEXT: br i1 [[TMP1]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
810 ; CHECK: bb0:
811 ; CHECK-NEXT: call void @foo()
812 ; CHECK-NEXT: call void @foo()
813 ; CHECK-NEXT: br label [[BB3:%.*]]
814 ; CHECK: entry.split.nonchr:
815 ; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4
816 ; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
817 ; CHECK-NEXT: br i1 [[V10]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
818 ; CHECK: bb0.nonchr:
819 ; CHECK-NEXT: call void @foo()
820 ; CHECK-NEXT: br label [[BB1_NONCHR]]
821 ; CHECK: bb1.nonchr:
822 ; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[J0]], 8
823 ; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0
824 ; CHECK-NEXT: br i1 [[V12_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
825 ; CHECK: bb2.nonchr:
826 ; CHECK-NEXT: call void @foo()
827 ; CHECK-NEXT: br label [[BB3]]
828 ; CHECK: bb3:
829 ; CHECK-NEXT: ret i32 [[SUM2]]
830 ;
831 entry:
832 %i0 = load i32, i32* %i
833 %v3 = and i32 %i0, 2
834 %v4 = icmp eq i32 %v3, 0
835 %v8 = add i32 %sum0, 43
836 %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15
837 call void @foo()
838 %j0 = load i32, i32* %j
839 %v9 = and i32 %j0, 4
840 %v10 = icmp eq i32 %v9, 0
841 br i1 %v10, label %bb1, label %bb0, !prof !15 ; %v10 can't be hoisted above the above select
842
843 bb0:
844 call void @foo()
845 br label %bb1
846
847 bb1:
848 %v11 = and i32 %j0, 8
849 %v12 = icmp eq i32 %v11, 0
850 br i1 %v12, label %bb3, label %bb2, !prof !15
851
852 bb2:
853 call void @foo()
854 br label %bb3
855
856 bb3:
857 ret i32 %sum2
858 }
859
860 ; Branches aren't biased enough. No CHR happens.
861 ; Roughly,
862 ; t0 = *i
863 ; if ((t0 & 1) != 0) // Not biased
864 ; foo()
865 ; if ((t0 & 2) != 0) // Not biased
866 ; foo()
867 ; ->
868 ; (no change)
869 define void @test_chr_8(i32* %i) !prof !14 {
870 ; CHECK-LABEL: @test_chr_8(
871 ; CHECK-NEXT: entry:
872 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
873 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
874 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
875 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !17
876 ; CHECK: bb0:
877 ; CHECK-NEXT: call void @foo()
878 ; CHECK-NEXT: br label [[BB1]]
879 ; CHECK: bb1:
880 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2
881 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
882 ; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !17
883 ; CHECK: bb2:
884 ; CHECK-NEXT: call void @foo()
885 ; CHECK-NEXT: br label [[BB3]]
886 ; CHECK: bb3:
887 ; CHECK-NEXT: ret void
888 ;
889 entry:
890 %0 = load i32, i32* %i
891 %1 = and i32 %0, 1
892 %2 = icmp eq i32 %1, 0
893 br i1 %2, label %bb1, label %bb0, !prof !16
894
895 bb0:
896 call void @foo()
897 br label %bb1
898
899 bb1:
900 %3 = and i32 %0, 2
901 %4 = icmp eq i32 %3, 0
902 br i1 %4, label %bb3, label %bb2, !prof !16
903
904 bb2:
905 call void @foo()
906 br label %bb3
907
908 bb3:
909 ret void
910 }
911
912 ; With an existing phi at the exit.
913 ; Roughly,
914 ; t = *i
915 ; if ((t0 & 1) != 0) // Likely true
916 ; foo()
917 ; if ((t0 & 2) != 0) { // Likely true
918 ; t = *j
919 ; foo()
920 ; }
921 ; // There's a phi for t here.
922 ; return t
923 ; ->
924 ; t = *i
925 ; if ((t & 3) == 3) { // Likely true
926 ; foo()
927 ; t = *j
928 ; foo()
929 ; } else {
930 ; if ((t & 1) != 0)
931 ; foo()
932 ; if ((t & 2) != 0) {
933 ; t = *j
934 ; foo()
935 ; }
936 ; }
937 ; // There's a phi for t here.
938 ; return t
939 define i32 @test_chr_9(i32* %i, i32* %j) !prof !14 {
940 ; CHECK-LABEL: @test_chr_9(
941 ; CHECK-NEXT: entry:
942 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
943 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
944 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
945 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
946 ; CHECK: bb0:
947 ; CHECK-NEXT: call void @foo()
948 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4
949 ; CHECK-NEXT: call void @foo()
950 ; CHECK-NEXT: br label [[BB3:%.*]]
951 ; CHECK: entry.split.nonchr:
952 ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1
953 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
954 ; CHECK-NEXT: br i1 [[TMP5]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
955 ; CHECK: bb0.nonchr:
956 ; CHECK-NEXT: call void @foo()
957 ; CHECK-NEXT: br label [[BB1_NONCHR]]
958 ; CHECK: bb1.nonchr:
959 ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2
960 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
961 ; CHECK-NEXT: br i1 [[TMP7]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
962 ; CHECK: bb2.nonchr:
963 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[J]], align 4
964 ; CHECK-NEXT: call void @foo()
965 ; CHECK-NEXT: br label [[BB3]]
966 ; CHECK: bb3:
967 ; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP0]], [[BB1_NONCHR]] ], [ [[TMP8]], [[BB2_NONCHR]] ]
968 ; CHECK-NEXT: ret i32 [[TMP9]]
969 ;
970 entry:
971 %0 = load i32, i32* %i
972 %1 = and i32 %0, 1
973 %2 = icmp eq i32 %1, 0
974 br i1 %2, label %bb1, label %bb0, !prof !15
975
976 bb0:
977 call void @foo()
978 br label %bb1
979
980 bb1:
981 %3 = and i32 %0, 2
982 %4 = icmp eq i32 %3, 0
983 br i1 %4, label %bb3, label %bb2, !prof !15
984
985 bb2:
986 %5 = load i32, i32* %j
987 call void @foo()
988 br label %bb3
989
990 bb3:
991 %6 = phi i32 [ %0, %bb1 ], [ %5, %bb2 ]
992 ret i32 %6
993 }
994
995 ; With no phi at the exit, but the exit needs a phi inserted after CHR.
996 ; Roughly,
997 ; t0 = *i
998 ; if ((t0 & 1) != 0) // Likely true
999 ; foo()
1000 ; t1 = *j
1001 ; if ((t1 & 2) != 0) // Likely true
1002 ; foo()
1003 ; return (t1 * 42) - (t1 - 99)
1004 ; ->
1005 ; t0 = *i
1006 ; if ((t0 & 3) == 3) { // Likely true
1007 ; foo()
1008 ; t1 = *j
1009 ; foo()
1010 ; } else {
1011 ; if ((t0 & 1) != 0)
1012 ; foo()
1013 ; if ((t0 & 2) != 0) {
1014 ; t1 = *j
1015 ; foo()
1016 ; }
1017 ; }
1018 ; // A new phi for t1 is inserted here.
1019 ; return (t1 * 42) - (t1 - 99)
1020 define i32 @test_chr_10(i32* %i, i32* %j) !prof !14 {
1021 ; CHECK-LABEL: @test_chr_10(
1022 ; CHECK-NEXT: entry:
1023 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
1024 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3
1025 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
1026 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
1027 ; CHECK: bb0:
1028 ; CHECK-NEXT: call void @foo()
1029 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4
1030 ; CHECK-NEXT: call void @foo()
1031 ; CHECK-NEXT: br label [[BB3:%.*]]
1032 ; CHECK: entry.split.nonchr:
1033 ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1
1034 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
1035 ; CHECK-NEXT: br i1 [[TMP5]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16
1036 ; CHECK: bb0.nonchr:
1037 ; CHECK-NEXT: call void @foo()
1038 ; CHECK-NEXT: br label [[BB1_NONCHR]]
1039 ; CHECK: bb1.nonchr:
1040 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[J]], align 4
1041 ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2
1042 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
1043 ; CHECK-NEXT: br i1 [[TMP8]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
1044 ; CHECK: bb2.nonchr:
1045 ; CHECK-NEXT: call void @foo()
1046 ; CHECK-NEXT: br label [[BB3]]
1047 ; CHECK: bb3:
1048 ; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP6]], [[BB2_NONCHR]] ], [ [[TMP6]], [[BB1_NONCHR]] ]
1049 ; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 42
1050 ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP9]], -99
1051 ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
1052 ; CHECK-NEXT: ret i32 [[TMP12]]
1053 ;
1054 entry:
1055 %0 = load i32, i32* %i
1056 %1 = and i32 %0, 1
1057 %2 = icmp eq i32 %1, 0
1058 br i1 %2, label %bb1, label %bb0, !prof !15
1059
1060 bb0:
1061 call void @foo()
1062 br label %bb1
1063
1064 bb1:
1065 %3 = load i32, i32* %j
1066 %4 = and i32 %0, 2
1067 %5 = icmp eq i32 %4, 0
1068 br i1 %5, label %bb3, label %bb2, !prof !15
1069
1070 bb2:
1071 call void @foo()
1072 br label %bb3
1073
1074 bb3:
1075 %6 = mul i32 %3, 42
1076 %7 = sub i32 %3, 99
1077 %8 = add i32 %6, %7
1078 ret i32 %8
1079 }
1080
1081 ; Test a case where there are two use-def chain paths to the same value (t0)
1082 ; from the branch condition. This is a regression test for an old bug that
1083 ; caused a bad hoisting that moves (hoists) a value (%conv) twice to the end of
1084 ; the %entry block (once for %div and once for %mul16) and put a use ahead of
1085 ; its definition like:
1086 ; %entry:
1087 ; ...
1088 ; %div = fdiv double 1.000000e+00, %conv
1089 ; %conv = sitofp i32 %0 to double
1090 ; %mul16 = fmul double %div, %conv
1091 ;
1092 ; Roughly,
1093 ; t0 = *i
1094 ; if ((t0 & 1) != 0) // Likely true
1095 ; foo()
1096 ; // there are two use-def paths from the branch condition to t0.
1097 ; if ((1.0 / t0) * t0 < 1) // Likely true
1098 ; foo()
1099 ; ->
1100 ; t0 = *i
1101 ; if ((t0 & 1) != 0 & (1.0 / t0) * t0 > 0) { // Likely true
1102 ; foo()
1103 ; foo()
1104 ; } else {
1105 ; if ((t0 & 1) != 0)
1106 ; foo()
1107 ; if ((1.0 / t0) * t0 < 1) // Likely true
1108 ; foo()
1109 ; }
1110 define void @test_chr_11(i32* %i, i32 %x) !prof !14 {
1111 ; CHECK-LABEL: @test_chr_11(
1112 ; CHECK-NEXT: entry:
1113 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
1114 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
1115 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
1116 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
1117 ; CHECK-NEXT: [[DIV:%.*]] = fdiv double 1.000000e+00, [[CONV]]
1118 ; CHECK-NEXT: [[MUL16:%.*]] = fmul double [[DIV]], [[CONV]]
1119 ; CHECK-NEXT: [[CONV717:%.*]] = fptosi double [[MUL16]] to i32
1120 ; CHECK-NEXT: [[CMP18:%.*]] = icmp sgt i32 [[CONV717]], 0
1121 ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[CMP18]]
1122 ; CHECK-NEXT: br i1 [[TMP3]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
1123 ; CHECK: bb0:
1124 ; CHECK-NEXT: call void @foo()
1125 ; CHECK-NEXT: call void @foo()
1126 ; CHECK-NEXT: br label [[BB3:%.*]]
1127 ; CHECK: entry.split.nonchr:
1128 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !18
1129 ; CHECK: bb0.nonchr:
1130 ; CHECK-NEXT: call void @foo()
1131 ; CHECK-NEXT: br label [[BB1_NONCHR]]
1132 ; CHECK: bb1.nonchr:
1133 ; CHECK-NEXT: [[CONV_NONCHR:%.*]] = sitofp i32 [[TMP0]] to double
1134 ; CHECK-NEXT: [[DIV_NONCHR:%.*]] = fdiv double 1.000000e+00, [[CONV_NONCHR]]
1135 ; CHECK-NEXT: [[MUL16_NONCHR:%.*]] = fmul double [[DIV_NONCHR]], [[CONV_NONCHR]]
1136 ; CHECK-NEXT: [[CONV717_NONCHR:%.*]] = fptosi double [[MUL16_NONCHR]] to i32
1137 ; CHECK-NEXT: [[CMP18_NONCHR:%.*]] = icmp slt i32 [[CONV717_NONCHR]], 1
1138 ; CHECK-NEXT: br i1 [[CMP18_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16
1139 ; CHECK: bb2.nonchr:
1140 ; CHECK-NEXT: call void @foo()
1141 ; CHECK-NEXT: br label [[BB3]]
1142 ; CHECK: bb3:
1143 ; CHECK-NEXT: ret void
1144 ;
1145 entry:
1146 %0 = load i32, i32* %i
1147 %1 = and i32 %0, 1
1148 %2 = icmp eq i32 %1, 0
1149 br i1 %2, label %bb1, label %bb0, !prof !15
1150
1151 bb0:
1152 call void @foo()
1153 br label %bb1
1154
1155 bb1:
1156 %conv = sitofp i32 %0 to double
1157 %div = fdiv double 1.000000e+00, %conv
1158 %mul16 = fmul double %div, %conv
1159 %conv717 = fptosi double %mul16 to i32
1160 %cmp18 = icmp slt i32 %conv717, 1
1161 br i1 %cmp18, label %bb3, label %bb2, !prof !15
1162
1163 bb2:
1164 call void @foo()
1165 br label %bb3
1166
1167 bb3:
1168 ret void
1169 }
1170
1171 ; Selects + unrelated br only
1172 define i32 @test_chr_12(i32* %i, i32 %sum0) !prof !14 {
1173 ; CHECK-LABEL: @test_chr_12(
1174 ; CHECK-NEXT: entry:
1175 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
1176 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 255
1177 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
1178 ; CHECK-NEXT: br i1 [[TMP2]], label [[BB3:%.*]], label [[BB0:%.*]], !prof !16
1179 ; CHECK: bb0:
1180 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1
1181 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
1182 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SUM0:%.*]], 42
1183 ; CHECK-NEXT: [[SUM1:%.*]] = select i1 [[TMP4]], i32 [[SUM0]], i32 [[TMP5]], !prof !16
1184 ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2
1185 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
1186 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM1]], 43
1187 ; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[TMP7]], i32 [[SUM1]], i32 [[TMP8]], !prof !16
1188 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4
1189 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
1190 ; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP0]], 8
1191 ; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
1192 ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP10]], [[TMP12]]
1193 ; CHECK-NEXT: br i1 [[TMP13]], label [[BB1:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15
1194 ; CHECK: bb1:
1195 ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[SUM2]], 88
1196 ; CHECK-NEXT: br label [[BB3]]
1197 ; CHECK: bb0.split.nonchr:
1198 ; CHECK-NEXT: br i1 [[TMP10]], label [[BB1_NONCHR:%.*]], label [[BB3]], !prof !18
1199 ; CHECK: bb1.nonchr:
1200 ; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP0]], 8
1201 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
1202 ; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16
1203 ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2]], [[SUM4_NONCHR_V]]
1204 ; CHECK-NEXT: br label [[BB3]]
1205 ; CHECK: bb3:
1206 ; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[SUM0]], [[ENTRY:%.*]] ], [ [[TMP14]], [[BB1]] ], [ [[SUM2]], [[BB0_SPLIT_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ]
1207 ; CHECK-NEXT: ret i32 [[SUM6]]
1208 ;
1209 entry:
1210 %0 = load i32, i32* %i
1211 %1 = and i32 %0, 255
1212 %2 = icmp eq i32 %1, 0
1213 br i1 %2, label %bb3, label %bb0, !prof !15
1214
1215 bb0:
1216 %3 = and i32 %0, 1
1217 %4 = icmp eq i32 %3, 0
1218 %5 = add i32 %sum0, 42
1219 %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15
1220 %6 = and i32 %0, 2
1221 %7 = icmp eq i32 %6, 0
1222 %8 = add i32 %sum1, 43
1223 %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15
1224 %9 = load i32, i32* %i
1225 %10 = icmp eq i32 %9, 0
1226 br i1 %10, label %bb2, label %bb1, !prof !15
1227
1228 bb1:
1229 %sum3 = add i32 %sum2, 44
1230 %11 = and i32 %0, 8
1231 %12 = icmp eq i32 %11, 0
1232 %13 = add i32 %sum3, 44
1233 %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15
1234 br label %bb2
1235
1236 bb2:
1237 %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ]
1238 br label %bb3
1239
1240 bb3:
1241 %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ]
1242 ret i32 %sum6
1243 }
1244
1245 ; In the second CHR, a condition value depends on a trivial phi that's inserted
1246 ; by the first CHR.
1247 ; Roughly,
1248 ; i0 = *i
1249 ; v2 = (z != 1) ? pred : true // Likely false
1250 ; if (z == 0 & pred) // Likely false
1251 ; foo()
1252 ; j0 = *j
1253 ; sum2 = ((i0 & 2) == j0) ? sum0 : (sum0 + 43) // Likely false
1254 ; sum3 = ((i0 == j0) ? sum0 : (sum0 + 43) // Likely false
1255 ; foo()
1256 ; if ((i0 & 4) == 0) // Unbiased
1257 ; foo()
1258 ; return i0 + sum3
1259 ; ->
1260 ; i0 = *i
1261 ; if (z != 1 & (z == 0 & pred)) // First CHR
1262 ; foo()
1263 ; // A trivial phi for i0 is inserted here by the first CHR (which gets removed
1264 ; // later) and the subsequent branch condition (for the second CHR) uses it.
1265 ; j0 = *j
1266 ; if ((i0 & 2) != j0 & i0 != j0) { // Second CHR
1267 ; sum3 = sum0 + 43
1268 ; foo()
1269 ; if (i0 & 4) == 0)
1270 ; foo()
1271 ; } else {
1272 ; sum3 = (i0 == j0) ? sum0 : (sum0 + 43)
1273 ; foo()
1274 ; if (i0 & 4) == 0)
1275 ; foo()
1276 ; }
1277 ; return i0 + sum3
1278 define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 {
1279 ; CHECK-LABEL: @test_chr_14(
1280 ; CHECK-NEXT: entry:
1281 ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
1282 ; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1
1283 ; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0
1284 ; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
1285 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[V1]], [[V3_NONCHR]]
1286 ; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19
1287 ; CHECK: bb0.nonchr:
1288 ; CHECK-NEXT: call void @foo()
1289 ; CHECK-NEXT: br label [[BB1]]
1290 ; CHECK: bb1:
1291 ; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
1292 ; CHECK-NEXT: [[V6:%.*]] = and i32 [[I0]], 2
1293 ; CHECK-NEXT: [[V4:%.*]] = icmp ne i32 [[V6]], [[J0]]
1294 ; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
1295 ; CHECK-NEXT: [[V5:%.*]] = icmp ne i32 [[I0]], [[J0]]
1296 ; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[V4]], [[V5]]
1297 ; CHECK-NEXT: br i1 [[TMP0]], label [[BB1_SPLIT:%.*]], label [[BB1_SPLIT_NONCHR:%.*]], !prof !15
1298 ; CHECK: bb1.split:
1299 ; CHECK-NEXT: call void @foo()
1300 ; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4
1301 ; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0
1302 ; CHECK-NEXT: br i1 [[V10]], label [[BB3:%.*]], label [[BB2:%.*]]
1303 ; CHECK: bb2:
1304 ; CHECK-NEXT: call void @foo()
1305 ; CHECK-NEXT: br label [[BB3]]
1306 ; CHECK: bb1.split.nonchr:
1307 ; CHECK-NEXT: [[V5_NONCHR:%.*]] = icmp eq i32 [[I0]], [[J0]]
1308 ; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = select i1 [[V5_NONCHR]], i32 [[SUM0]], i32 [[V8]