LLVM 19.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/Attributes.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/Metadata.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/NoFolder.h"
58#include "llvm/IR/Operator.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/User.h"
64#include "llvm/IR/Value.h"
65#include "llvm/IR/ValueHandle.h"
69#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <climits>
80#include <cstddef>
81#include <cstdint>
82#include <iterator>
83#include <map>
84#include <optional>
85#include <set>
86#include <tuple>
87#include <utility>
88#include <vector>
89
90using namespace llvm;
91using namespace PatternMatch;
92
93#define DEBUG_TYPE "simplifycfg"
94
96 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
97
98 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
99 "into preserving DomTree,"));
100
101// Chosen as 2 so as to be cheap, but still to have enough power to fold
102// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
103// To catch this, we need to fold a compare and a select, hence '2' being the
104// minimum reasonable default.
106 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
107 cl::desc(
108 "Control the amount of phi node folding to perform (default = 2)"));
109
111 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
112 cl::desc("Control the maximal total instruction cost that we are willing "
113 "to speculatively execute to fold a 2-entry PHI node into a "
114 "select (default = 4)"));
115
116static cl::opt<bool>
117 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
118 cl::desc("Hoist common instructions up to the parent block"));
119
121 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
122 cl::init(20),
123 cl::desc("Allow reordering across at most this many "
124 "instructions when hoisting"));
125
126static cl::opt<bool>
127 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
128 cl::desc("Sink common instructions down to the end block"));
129
131 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
132 cl::desc("Hoist conditional stores if an unconditional store precedes"));
133
135 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
136 cl::desc("Hoist conditional stores even if an unconditional store does not "
137 "precede - hoist multiple conditional stores into a single "
138 "predicated store"));
139
141 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
142 cl::desc("When merging conditional stores, do so even if the resultant "
143 "basic blocks are unlikely to be if-converted as a result"));
144
146 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
147 cl::desc("Allow exactly one expensive instruction to be speculatively "
148 "executed"));
149
151 "max-speculation-depth", cl::Hidden, cl::init(10),
152 cl::desc("Limit maximum recursion depth when calculating costs of "
153 "speculatively executed instructions"));
154
155static cl::opt<int>
156 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
157 cl::init(10),
158 cl::desc("Max size of a block which is still considered "
159 "small enough to thread through"));
160
161// Two is chosen to allow one negation and a logical combine.
163 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
164 cl::init(2),
165 cl::desc("Maximum cost of combining conditions when "
166 "folding branches"));
167
169 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
170 cl::init(2),
171 cl::desc("Multiplier to apply to threshold when determining whether or not "
172 "to fold branch to common destination when vector operations are "
173 "present"));
174
176 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
177 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
178
180 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
181 cl::desc("Limit cases to analyze when converting a switch to select"));
182
183STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
184STATISTIC(NumLinearMaps,
185 "Number of switch instructions turned into linear mapping");
186STATISTIC(NumLookupTables,
187 "Number of switch instructions turned into lookup tables");
189 NumLookupTablesHoles,
190 "Number of switch instructions turned into lookup tables (holes checked)");
191STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
192STATISTIC(NumFoldValueComparisonIntoPredecessors,
193 "Number of value comparisons folded into predecessor basic blocks");
194STATISTIC(NumFoldBranchToCommonDest,
195 "Number of branches folded into predecessor basic block");
197 NumHoistCommonCode,
198 "Number of common instruction 'blocks' hoisted up to the begin block");
199STATISTIC(NumHoistCommonInstrs,
200 "Number of common instructions hoisted up to the begin block");
201STATISTIC(NumSinkCommonCode,
202 "Number of common instruction 'blocks' sunk down to the end block");
203STATISTIC(NumSinkCommonInstrs,
204 "Number of common instructions sunk down to the end block");
205STATISTIC(NumSpeculations, "Number of speculative executed instructions");
206STATISTIC(NumInvokes,
207 "Number of invokes with empty resume blocks simplified into calls");
208STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
209STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
210
211namespace {
212
213// The first field contains the value that the switch produces when a certain
214// case group is selected, and the second field is a vector containing the
215// cases composing the case group.
216using SwitchCaseResultVectorTy =
218
219// The first field contains the phi node that generates a result of the switch
220// and the second field contains the value generated for a certain case in the
221// switch for that PHI.
222using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
223
224/// ValueEqualityComparisonCase - Represents a case of a switch.
225struct ValueEqualityComparisonCase {
227 BasicBlock *Dest;
228
229 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
230 : Value(Value), Dest(Dest) {}
231
232 bool operator<(ValueEqualityComparisonCase RHS) const {
233 // Comparing pointers is ok as we only rely on the order for uniquing.
234 return Value < RHS.Value;
235 }
236
237 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
238};
239
240class SimplifyCFGOpt {
242 DomTreeUpdater *DTU;
243 const DataLayout &DL;
244 ArrayRef<WeakVH> LoopHeaders;
246 bool Resimplify;
247
248 Value *isValueEqualityComparison(Instruction *TI);
249 BasicBlock *GetValueEqualityComparisonCases(
250 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
251 bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
252 BasicBlock *Pred,
253 IRBuilder<> &Builder);
254 bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
255 Instruction *PTI,
256 IRBuilder<> &Builder);
257 bool FoldValueComparisonIntoPredecessors(Instruction *TI,
258 IRBuilder<> &Builder);
259
260 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
261 bool simplifySingleResume(ResumeInst *RI);
262 bool simplifyCommonResume(ResumeInst *RI);
263 bool simplifyCleanupReturn(CleanupReturnInst *RI);
264 bool simplifyUnreachable(UnreachableInst *UI);
265 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
266 bool simplifyIndirectBr(IndirectBrInst *IBI);
267 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
268 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
269 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
270
271 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
272 IRBuilder<> &Builder);
273
274 bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
275 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
276 Instruction *TI, Instruction *I1,
277 SmallVectorImpl<Instruction *> &OtherSuccTIs);
278 bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
279 bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
280 BasicBlock *TrueBB, BasicBlock *FalseBB,
281 uint32_t TrueWeight, uint32_t FalseWeight);
282 bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
283 const DataLayout &DL);
284 bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
285 bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
286 bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
287
288public:
289 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
290 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
291 const SimplifyCFGOptions &Opts)
292 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
293 assert((!DTU || !DTU->hasPostDomTree()) &&
294 "SimplifyCFG is not yet capable of maintaining validity of a "
295 "PostDomTree, so don't ask for it.");
296 }
297
298 bool simplifyOnce(BasicBlock *BB);
299 bool run(BasicBlock *BB);
300
301 // Helper to set Resimplify and return change indication.
302 bool requestResimplify() {
303 Resimplify = true;
304 return true;
305 }
306};
307
308} // end anonymous namespace
309
310/// Return true if all the PHI nodes in the basic block \p BB
311/// receive compatible (identical) incoming values when coming from
312/// all of the predecessor blocks that are specified in \p IncomingBlocks.
313///
314/// Note that if the values aren't exactly identical, but \p EquivalenceSet
315/// is provided, and *both* of the values are present in the set,
316/// then they are considered equal.
318 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
319 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
320 assert(IncomingBlocks.size() == 2 &&
321 "Only for a pair of incoming blocks at the time!");
322
323 // FIXME: it is okay if one of the incoming values is an `undef` value,
324 // iff the other incoming value is guaranteed to be a non-poison value.
325 // FIXME: it is okay if one of the incoming values is a `poison` value.
326 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
327 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
328 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
329 if (IV0 == IV1)
330 return true;
331 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
332 EquivalenceSet->contains(IV1))
333 return true;
334 return false;
335 });
336}
337
338/// Return true if it is safe to merge these two
339/// terminator instructions together.
340static bool
342 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
343 if (SI1 == SI2)
344 return false; // Can't merge with self!
345
346 // It is not safe to merge these two switch instructions if they have a common
347 // successor, and if that successor has a PHI node, and if *that* PHI node has
348 // conflicting incoming values from the two switch blocks.
349 BasicBlock *SI1BB = SI1->getParent();
350 BasicBlock *SI2BB = SI2->getParent();
351
352 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
353 bool Fail = false;
354 for (BasicBlock *Succ : successors(SI2BB)) {
355 if (!SI1Succs.count(Succ))
356 continue;
357 if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
358 continue;
359 Fail = true;
360 if (FailBlocks)
361 FailBlocks->insert(Succ);
362 else
363 break;
364 }
365
366 return !Fail;
367}
368
369/// Update PHI nodes in Succ to indicate that there will now be entries in it
370/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
371/// will be the same as those coming in from ExistPred, an existing predecessor
372/// of Succ.
373static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
374 BasicBlock *ExistPred,
375 MemorySSAUpdater *MSSAU = nullptr) {
376 for (PHINode &PN : Succ->phis())
377 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
378 if (MSSAU)
379 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
380 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
381}
382
383/// Compute an abstract "cost" of speculating the given instruction,
384/// which is assumed to be safe to speculate. TCC_Free means cheap,
385/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
386/// expensive.
388 const TargetTransformInfo &TTI) {
389 assert((!isa<Instruction>(I) ||
390 isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
391 "Instruction is not safe to speculatively execute!");
393}
394
395/// If we have a merge point of an "if condition" as accepted above,
396/// return true if the specified value dominates the block. We
397/// don't handle the true generality of domination here, just a special case
398/// which works well enough for us.
399///
400/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
401/// see if V (which must be an instruction) and its recursive operands
402/// that do not dominate BB have a combined cost lower than Budget and
403/// are non-trapping. If both are true, the instruction is inserted into the
404/// set and true is returned.
405///
406/// The cost for most non-trapping instructions is defined as 1 except for
407/// Select whose cost is 2.
408///
409/// After this function returns, Cost is increased by the cost of
410/// V plus its non-dominating operands. If that cost is greater than
411/// Budget, false is returned and Cost is undefined.
413 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
415 InstructionCost Budget,
417 unsigned Depth = 0) {
418 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
419 // so limit the recursion depth.
420 // TODO: While this recursion limit does prevent pathological behavior, it
421 // would be better to track visited instructions to avoid cycles.
423 return false;
424
425 Instruction *I = dyn_cast<Instruction>(V);
426 if (!I) {
427 // Non-instructions dominate all instructions and can be executed
428 // unconditionally.
429 return true;
430 }
431 BasicBlock *PBB = I->getParent();
432
433 // We don't want to allow weird loops that might have the "if condition" in
434 // the bottom of this block.
435 if (PBB == BB)
436 return false;
437
438 // If this instruction is defined in a block that contains an unconditional
439 // branch to BB, then it must be in the 'conditional' part of the "if
440 // statement". If not, it definitely dominates the region.
441 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
442 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
443 return true;
444
445 // If we have seen this instruction before, don't count it again.
446 if (AggressiveInsts.count(I))
447 return true;
448
449 // Okay, it looks like the instruction IS in the "condition". Check to
450 // see if it's a cheap instruction to unconditionally compute, and if it
451 // only uses stuff defined outside of the condition. If so, hoist it out.
453 return false;
454
456
457 // Allow exactly one instruction to be speculated regardless of its cost
458 // (as long as it is safe to do so).
459 // This is intended to flatten the CFG even if the instruction is a division
460 // or other expensive operation. The speculation of an expensive instruction
461 // is expected to be undone in CodeGenPrepare if the speculation has not
462 // enabled further IR optimizations.
463 if (Cost > Budget &&
464 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
465 !Cost.isValid()))
466 return false;
467
468 // Okay, we can only really hoist these out if their operands do
469 // not take us over the cost threshold.
470 for (Use &Op : I->operands())
471 if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
472 Depth + 1))
473 return false;
474 // Okay, it's safe to do this! Remember this instruction.
475 AggressiveInsts.insert(I);
476 return true;
477}
478
479/// Extract ConstantInt from value, looking through IntToPtr
480/// and PointerNullValue. Return NULL if value is not a constant int.
482 // Normal constant int.
483 ConstantInt *CI = dyn_cast<ConstantInt>(V);
484 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
485 DL.isNonIntegralPointerType(V->getType()))
486 return CI;
487
488 // This is some kind of pointer constant. Turn it into a pointer-sized
489 // ConstantInt if possible.
490 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
491
492 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
493 if (isa<ConstantPointerNull>(V))
494 return ConstantInt::get(PtrTy, 0);
495
496 // IntToPtr const int.
497 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
498 if (CE->getOpcode() == Instruction::IntToPtr)
499 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
500 // The constant is very likely to have the right type already.
501 if (CI->getType() == PtrTy)
502 return CI;
503 else
504 return cast<ConstantInt>(
505 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
506 }
507 return nullptr;
508}
509
510namespace {
511
512/// Given a chain of or (||) or and (&&) comparison of a value against a
513/// constant, this will try to recover the information required for a switch
514/// structure.
515/// It will depth-first traverse the chain of comparison, seeking for patterns
516/// like %a == 12 or %a < 4 and combine them to produce a set of integer
517/// representing the different cases for the switch.
518/// Note that if the chain is composed of '||' it will build the set of elements
519/// that matches the comparisons (i.e. any of this value validate the chain)
520/// while for a chain of '&&' it will build the set elements that make the test
521/// fail.
522struct ConstantComparesGatherer {
523 const DataLayout &DL;
524
525 /// Value found for the switch comparison
526 Value *CompValue = nullptr;
527
528 /// Extra clause to be checked before the switch
529 Value *Extra = nullptr;
530
531 /// Set of integers to match in switch
533
534 /// Number of comparisons matched in the and/or chain
535 unsigned UsedICmps = 0;
536
537 /// Construct and compute the result for the comparison instruction Cond
538 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
539 gather(Cond);
540 }
541
542 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
543 ConstantComparesGatherer &
544 operator=(const ConstantComparesGatherer &) = delete;
545
546private:
547 /// Try to set the current value used for the comparison, it succeeds only if
548 /// it wasn't set before or if the new value is the same as the old one
549 bool setValueOnce(Value *NewVal) {
550 if (CompValue && CompValue != NewVal)
551 return false;
552 CompValue = NewVal;
553 return (CompValue != nullptr);
554 }
555
556 /// Try to match Instruction "I" as a comparison against a constant and
557 /// populates the array Vals with the set of values that match (or do not
558 /// match depending on isEQ).
559 /// Return false on failure. On success, the Value the comparison matched
560 /// against is placed in CompValue.
561 /// If CompValue is already set, the function is expected to fail if a match
562 /// is found but the value compared to is different.
563 bool matchInstruction(Instruction *I, bool isEQ) {
564 // If this is an icmp against a constant, handle this as one of the cases.
565 ICmpInst *ICI;
566 ConstantInt *C;
567 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
568 (C = GetConstantInt(I->getOperand(1), DL)))) {
569 return false;
570 }
571
572 Value *RHSVal;
573 const APInt *RHSC;
574
575 // Pattern match a special case
576 // (x & ~2^z) == y --> x == y || x == y|2^z
577 // This undoes a transformation done by instcombine to fuse 2 compares.
578 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
579 // It's a little bit hard to see why the following transformations are
580 // correct. Here is a CVC3 program to verify them for 64-bit values:
581
582 /*
583 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
584 x : BITVECTOR(64);
585 y : BITVECTOR(64);
586 z : BITVECTOR(64);
587 mask : BITVECTOR(64) = BVSHL(ONE, z);
588 QUERY( (y & ~mask = y) =>
589 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
590 );
591 QUERY( (y | mask = y) =>
592 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
593 );
594 */
595
596 // Please note that each pattern must be a dual implication (<--> or
597 // iff). One directional implication can create spurious matches. If the
598 // implication is only one-way, an unsatisfiable condition on the left
599 // side can imply a satisfiable condition on the right side. Dual
600 // implication ensures that satisfiable conditions are transformed to
601 // other satisfiable conditions and unsatisfiable conditions are
602 // transformed to other unsatisfiable conditions.
603
604 // Here is a concrete example of a unsatisfiable condition on the left
605 // implying a satisfiable condition on the right:
606 //
607 // mask = (1 << z)
608 // (x & ~mask) == y --> (x == y || x == (y | mask))
609 //
610 // Substituting y = 3, z = 0 yields:
611 // (x & -2) == 3 --> (x == 3 || x == 2)
612
613 // Pattern match a special case:
614 /*
615 QUERY( (y & ~mask = y) =>
616 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
617 );
618 */
619 if (match(ICI->getOperand(0),
620 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
621 APInt Mask = ~*RHSC;
622 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
623 // If we already have a value for the switch, it has to match!
624 if (!setValueOnce(RHSVal))
625 return false;
626
627 Vals.push_back(C);
628 Vals.push_back(
629 ConstantInt::get(C->getContext(),
630 C->getValue() | Mask));
631 UsedICmps++;
632 return true;
633 }
634 }
635
636 // Pattern match a special case:
637 /*
638 QUERY( (y | mask = y) =>
639 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
640 );
641 */
642 if (match(ICI->getOperand(0),
643 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
644 APInt Mask = *RHSC;
645 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(RHSVal))
648 return false;
649
650 Vals.push_back(C);
651 Vals.push_back(ConstantInt::get(C->getContext(),
652 C->getValue() & ~Mask));
653 UsedICmps++;
654 return true;
655 }
656 }
657
658 // If we already have a value for the switch, it has to match!
659 if (!setValueOnce(ICI->getOperand(0)))
660 return false;
661
662 UsedICmps++;
663 Vals.push_back(C);
664 return ICI->getOperand(0);
665 }
666
667 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
668 ConstantRange Span =
670
671 // Shift the range if the compare is fed by an add. This is the range
672 // compare idiom as emitted by instcombine.
673 Value *CandidateVal = I->getOperand(0);
674 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
675 Span = Span.subtract(*RHSC);
676 CandidateVal = RHSVal;
677 }
678
679 // If this is an and/!= check, then we are looking to build the set of
680 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
681 // x != 0 && x != 1.
682 if (!isEQ)
683 Span = Span.inverse();
684
685 // If there are a ton of values, we don't want to make a ginormous switch.
686 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
687 return false;
688 }
689
690 // If we already have a value for the switch, it has to match!
691 if (!setValueOnce(CandidateVal))
692 return false;
693
694 // Add all values from the range to the set
695 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
696 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
697
698 UsedICmps++;
699 return true;
700 }
701
702 /// Given a potentially 'or'd or 'and'd together collection of icmp
703 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
704 /// the value being compared, and stick the list constants into the Vals
705 /// vector.
706 /// One "Extra" case is allowed to differ from the other.
707 void gather(Value *V) {
708 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
709
710 // Keep a stack (SmallVector for efficiency) for depth-first traversal
713
714 // Initialize
715 Visited.insert(V);
716 DFT.push_back(V);
717
718 while (!DFT.empty()) {
719 V = DFT.pop_back_val();
720
721 if (Instruction *I = dyn_cast<Instruction>(V)) {
722 // If it is a || (or && depending on isEQ), process the operands.
723 Value *Op0, *Op1;
724 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
725 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
726 if (Visited.insert(Op1).second)
727 DFT.push_back(Op1);
728 if (Visited.insert(Op0).second)
729 DFT.push_back(Op0);
730
731 continue;
732 }
733
734 // Try to match the current instruction
735 if (matchInstruction(I, isEQ))
736 // Match succeed, continue the loop
737 continue;
738 }
739
740 // One element of the sequence of || (or &&) could not be match as a
741 // comparison against the same value as the others.
742 // We allow only one "Extra" case to be checked before the switch
743 if (!Extra) {
744 Extra = V;
745 continue;
746 }
747 // Failed to parse a proper sequence, abort now
748 CompValue = nullptr;
749 break;
750 }
751 }
752};
753
754} // end anonymous namespace
755
757 MemorySSAUpdater *MSSAU = nullptr) {
758 Instruction *Cond = nullptr;
759 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
760 Cond = dyn_cast<Instruction>(SI->getCondition());
761 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
762 if (BI->isConditional())
763 Cond = dyn_cast<Instruction>(BI->getCondition());
764 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
765 Cond = dyn_cast<Instruction>(IBI->getAddress());
766 }
767
768 TI->eraseFromParent();
769 if (Cond)
771}
772
773/// Return true if the specified terminator checks
774/// to see if a value is equal to constant integer value.
775Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
776 Value *CV = nullptr;
777 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
778 // Do not permit merging of large switch instructions into their
779 // predecessors unless there is only one predecessor.
780 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
781 CV = SI->getCondition();
782 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
783 if (BI->isConditional() && BI->getCondition()->hasOneUse())
784 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
785 if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
786 CV = ICI->getOperand(0);
787 }
788
789 // Unwrap any lossless ptrtoint cast.
790 if (CV) {
791 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
792 Value *Ptr = PTII->getPointerOperand();
793 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
794 CV = Ptr;
795 }
796 }
797 return CV;
798}
799
800/// Given a value comparison instruction,
801/// decode all of the 'cases' that it represents and return the 'default' block.
802BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
803 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
804 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
805 Cases.reserve(SI->getNumCases());
806 for (auto Case : SI->cases())
807 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
808 Case.getCaseSuccessor()));
809 return SI->getDefaultDest();
810 }
811
812 BranchInst *BI = cast<BranchInst>(TI);
813 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
814 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
815 Cases.push_back(ValueEqualityComparisonCase(
816 GetConstantInt(ICI->getOperand(1), DL), Succ));
817 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
818}
819
820/// Given a vector of bb/value pairs, remove any entries
821/// in the list that match the specified block.
822static void
824 std::vector<ValueEqualityComparisonCase> &Cases) {
825 llvm::erase(Cases, BB);
826}
827
828/// Return true if there are any keys in C1 that exist in C2 as well.
829static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
830 std::vector<ValueEqualityComparisonCase> &C2) {
831 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
832
833 // Make V1 be smaller than V2.
834 if (V1->size() > V2->size())
835 std::swap(V1, V2);
836
837 if (V1->empty())
838 return false;
839 if (V1->size() == 1) {
840 // Just scan V2.
841 ConstantInt *TheVal = (*V1)[0].Value;
842 for (const ValueEqualityComparisonCase &VECC : *V2)
843 if (TheVal == VECC.Value)
844 return true;
845 }
846
847 // Otherwise, just sort both lists and compare element by element.
848 array_pod_sort(V1->begin(), V1->end());
849 array_pod_sort(V2->begin(), V2->end());
850 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
851 while (i1 != e1 && i2 != e2) {
852 if ((*V1)[i1].Value == (*V2)[i2].Value)
853 return true;
854 if ((*V1)[i1].Value < (*V2)[i2].Value)
855 ++i1;
856 else
857 ++i2;
858 }
859 return false;
860}
861
862// Set branch weights on SwitchInst. This sets the metadata if there is at
863// least one non-zero weight.
865 // Check that there is at least one non-zero weight. Otherwise, pass
866 // nullptr to setMetadata which will erase the existing metadata.
867 MDNode *N = nullptr;
868 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
869 N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
870 SI->setMetadata(LLVMContext::MD_prof, N);
871}
872
873// Similar to the above, but for branch and select instructions that take
874// exactly 2 weights.
875static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
876 uint32_t FalseWeight) {
877 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
878 // Check that there is at least one non-zero weight. Otherwise, pass
879 // nullptr to setMetadata which will erase the existing metadata.
880 MDNode *N = nullptr;
881 if (TrueWeight || FalseWeight)
882 N = MDBuilder(I->getParent()->getContext())
883 .createBranchWeights(TrueWeight, FalseWeight);
884 I->setMetadata(LLVMContext::MD_prof, N);
885}
886
887/// If TI is known to be a terminator instruction and its block is known to
888/// only have a single predecessor block, check to see if that predecessor is
889/// also a value comparison with the same value, and if that comparison
890/// determines the outcome of this comparison. If so, simplify TI. This does a
891/// very limited form of jump threading.
892bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
893 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
894 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
895 if (!PredVal)
896 return false; // Not a value comparison in predecessor.
897
898 Value *ThisVal = isValueEqualityComparison(TI);
899 assert(ThisVal && "This isn't a value comparison!!");
900 if (ThisVal != PredVal)
901 return false; // Different predicates.
902
903 // TODO: Preserve branch weight metadata, similarly to how
904 // FoldValueComparisonIntoPredecessors preserves it.
905
906 // Find out information about when control will move from Pred to TI's block.
907 std::vector<ValueEqualityComparisonCase> PredCases;
908 BasicBlock *PredDef =
909 GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
910 EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
911
912 // Find information about how control leaves this block.
913 std::vector<ValueEqualityComparisonCase> ThisCases;
914 BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
915 EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
916
917 // If TI's block is the default block from Pred's comparison, potentially
918 // simplify TI based on this knowledge.
919 if (PredDef == TI->getParent()) {
920 // If we are here, we know that the value is none of those cases listed in
921 // PredCases. If there are any cases in ThisCases that are in PredCases, we
922 // can simplify TI.
923 if (!ValuesOverlap(PredCases, ThisCases))
924 return false;
925
926 if (isa<BranchInst>(TI)) {
927 // Okay, one of the successors of this condbr is dead. Convert it to a
928 // uncond br.
929 assert(ThisCases.size() == 1 && "Branch can only have one case!");
930 // Insert the new branch.
931 Instruction *NI = Builder.CreateBr(ThisDef);
932 (void)NI;
933
934 // Remove PHI node entries for the dead edge.
935 ThisCases[0].Dest->removePredecessor(PredDef);
936
937 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
938 << "Through successor TI: " << *TI << "Leaving: " << *NI
939 << "\n");
940
942
943 if (DTU)
944 DTU->applyUpdates(
945 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
946
947 return true;
948 }
949
950 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
951 // Okay, TI has cases that are statically dead, prune them away.
953 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
954 DeadCases.insert(PredCases[i].Value);
955
956 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
957 << "Through successor TI: " << *TI);
958
959 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
960 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
961 --i;
962 auto *Successor = i->getCaseSuccessor();
963 if (DTU)
964 ++NumPerSuccessorCases[Successor];
965 if (DeadCases.count(i->getCaseValue())) {
966 Successor->removePredecessor(PredDef);
967 SI.removeCase(i);
968 if (DTU)
969 --NumPerSuccessorCases[Successor];
970 }
971 }
972
973 if (DTU) {
974 std::vector<DominatorTree::UpdateType> Updates;
975 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
976 if (I.second == 0)
977 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
978 DTU->applyUpdates(Updates);
979 }
980
981 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
982 return true;
983 }
984
985 // Otherwise, TI's block must correspond to some matched value. Find out
986 // which value (or set of values) this is.
987 ConstantInt *TIV = nullptr;
988 BasicBlock *TIBB = TI->getParent();
989 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
990 if (PredCases[i].Dest == TIBB) {
991 if (TIV)
992 return false; // Cannot handle multiple values coming to this block.
993 TIV = PredCases[i].Value;
994 }
995 assert(TIV && "No edge from pred to succ?");
996
997 // Okay, we found the one constant that our value can be if we get into TI's
998 // BB. Find out which successor will unconditionally be branched to.
999 BasicBlock *TheRealDest = nullptr;
1000 for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
1001 if (ThisCases[i].Value == TIV) {
1002 TheRealDest = ThisCases[i].Dest;
1003 break;
1004 }
1005
1006 // If not handled by any explicit cases, it is handled by the default case.
1007 if (!TheRealDest)
1008 TheRealDest = ThisDef;
1009
1010 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1011
1012 // Remove PHI node entries for dead edges.
1013 BasicBlock *CheckEdge = TheRealDest;
1014 for (BasicBlock *Succ : successors(TIBB))
1015 if (Succ != CheckEdge) {
1016 if (Succ != TheRealDest)
1017 RemovedSuccs.insert(Succ);
1018 Succ->removePredecessor(TIBB);
1019 } else
1020 CheckEdge = nullptr;
1021
1022 // Insert the new branch.
1023 Instruction *NI = Builder.CreateBr(TheRealDest);
1024 (void)NI;
1025
1026 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1027 << "Through successor TI: " << *TI << "Leaving: " << *NI
1028 << "\n");
1029
1031 if (DTU) {
1033 Updates.reserve(RemovedSuccs.size());
1034 for (auto *RemovedSucc : RemovedSuccs)
1035 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1036 DTU->applyUpdates(Updates);
1037 }
1038 return true;
1039}
1040
1041namespace {
1042
1043/// This class implements a stable ordering of constant
1044/// integers that does not depend on their address. This is important for
1045/// applications that sort ConstantInt's to ensure uniqueness.
1046struct ConstantIntOrdering {
1047 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1048 return LHS->getValue().ult(RHS->getValue());
1049 }
1050};
1051
1052} // end anonymous namespace
1053
1055 ConstantInt *const *P2) {
1056 const ConstantInt *LHS = *P1;
1057 const ConstantInt *RHS = *P2;
1058 if (LHS == RHS)
1059 return 0;
1060 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1061}
1062
1063/// Get Weights of a given terminator, the default weight is at the front
1064/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1065/// metadata.
1067 SmallVectorImpl<uint64_t> &Weights) {
1068 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1069 assert(MD && "Invalid branch-weight metadata");
1070 extractFromBranchWeightMD64(MD, Weights);
1071
1072 // If TI is a conditional eq, the default case is the false case,
1073 // and the corresponding branch-weight data is at index 2. We swap the
1074 // default weight to be the first entry.
1075 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1076 assert(Weights.size() == 2);
1077 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1078 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1079 std::swap(Weights.front(), Weights.back());
1080 }
1081}
1082
1083/// Keep halving the weights until all can fit in uint32_t.
1085 uint64_t Max = *llvm::max_element(Weights);
1086 if (Max > UINT_MAX) {
1087 unsigned Offset = 32 - llvm::countl_zero(Max);
1088 for (uint64_t &I : Weights)
1089 I >>= Offset;
1090 }
1091}
1092
1094 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1095 Instruction *PTI = PredBlock->getTerminator();
1096
1097 // If we have bonus instructions, clone them into the predecessor block.
1098 // Note that there may be multiple predecessor blocks, so we cannot move
1099 // bonus instructions to a predecessor block.
1100 for (Instruction &BonusInst : *BB) {
1101 if (BonusInst.isTerminator())
1102 continue;
1103
1104 Instruction *NewBonusInst = BonusInst.clone();
1105
1106 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1107 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1108 // Unless the instruction has the same !dbg location as the original
1109 // branch, drop it. When we fold the bonus instructions we want to make
1110 // sure we reset their debug locations in order to avoid stepping on
1111 // dead code caused by folding dead branches.
1112 NewBonusInst->setDebugLoc(DebugLoc());
1113 }
1114
1115 RemapInstruction(NewBonusInst, VMap,
1117
1118 // If we speculated an instruction, we need to drop any metadata that may
1119 // result in undefined behavior, as the metadata might have been valid
1120 // only given the branch precondition.
1121 // Similarly strip attributes on call parameters that may cause UB in
1122 // location the call is moved to.
1123 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1124
1125 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1126 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1127 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1129
1130 if (isa<DbgInfoIntrinsic>(BonusInst))
1131 continue;
1132
1133 NewBonusInst->takeName(&BonusInst);
1134 BonusInst.setName(NewBonusInst->getName() + ".old");
1135 VMap[&BonusInst] = NewBonusInst;
1136
1137 // Update (liveout) uses of bonus instructions,
1138 // now that the bonus instruction has been cloned into predecessor.
1139 // Note that we expect to be in a block-closed SSA form for this to work!
1140 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1141 auto *UI = cast<Instruction>(U.getUser());
1142 auto *PN = dyn_cast<PHINode>(UI);
1143 if (!PN) {
1144 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1145 "If the user is not a PHI node, then it should be in the same "
1146 "block as, and come after, the original bonus instruction.");
1147 continue; // Keep using the original bonus instruction.
1148 }
1149 // Is this the block-closed SSA form PHI node?
1150 if (PN->getIncomingBlock(U) == BB)
1151 continue; // Great, keep using the original bonus instruction.
1152 // The only other alternative is an "use" when coming from
1153 // the predecessor block - here we should refer to the cloned bonus instr.
1154 assert(PN->getIncomingBlock(U) == PredBlock &&
1155 "Not in block-closed SSA form?");
1156 U.set(NewBonusInst);
1157 }
1158 }
1159}
1160
1161bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1162 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1163 BasicBlock *BB = TI->getParent();
1164 BasicBlock *Pred = PTI->getParent();
1165
1167
1168 // Figure out which 'cases' to copy from SI to PSI.
1169 std::vector<ValueEqualityComparisonCase> BBCases;
1170 BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1171
1172 std::vector<ValueEqualityComparisonCase> PredCases;
1173 BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1174
1175 // Based on whether the default edge from PTI goes to BB or not, fill in
1176 // PredCases and PredDefault with the new switch cases we would like to
1177 // build.
1179
1180 // Update the branch weight metadata along the way
1182 bool PredHasWeights = hasBranchWeightMD(*PTI);
1183 bool SuccHasWeights = hasBranchWeightMD(*TI);
1184
1185 if (PredHasWeights) {
1186 GetBranchWeights(PTI, Weights);
1187 // branch-weight metadata is inconsistent here.
1188 if (Weights.size() != 1 + PredCases.size())
1189 PredHasWeights = SuccHasWeights = false;
1190 } else if (SuccHasWeights)
1191 // If there are no predecessor weights but there are successor weights,
1192 // populate Weights with 1, which will later be scaled to the sum of
1193 // successor's weights
1194 Weights.assign(1 + PredCases.size(), 1);
1195
1196 SmallVector<uint64_t, 8> SuccWeights;
1197 if (SuccHasWeights) {
1198 GetBranchWeights(TI, SuccWeights);
1199 // branch-weight metadata is inconsistent here.
1200 if (SuccWeights.size() != 1 + BBCases.size())
1201 PredHasWeights = SuccHasWeights = false;
1202 } else if (PredHasWeights)
1203 SuccWeights.assign(1 + BBCases.size(), 1);
1204
1205 if (PredDefault == BB) {
1206 // If this is the default destination from PTI, only the edges in TI
1207 // that don't occur in PTI, or that branch to BB will be activated.
1208 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1209 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1210 if (PredCases[i].Dest != BB)
1211 PTIHandled.insert(PredCases[i].Value);
1212 else {
1213 // The default destination is BB, we don't need explicit targets.
1214 std::swap(PredCases[i], PredCases.back());
1215
1216 if (PredHasWeights || SuccHasWeights) {
1217 // Increase weight for the default case.
1218 Weights[0] += Weights[i + 1];
1219 std::swap(Weights[i + 1], Weights.back());
1220 Weights.pop_back();
1221 }
1222
1223 PredCases.pop_back();
1224 --i;
1225 --e;
1226 }
1227
1228 // Reconstruct the new switch statement we will be building.
1229 if (PredDefault != BBDefault) {
1230 PredDefault->removePredecessor(Pred);
1231 if (DTU && PredDefault != BB)
1232 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1233 PredDefault = BBDefault;
1234 ++NewSuccessors[BBDefault];
1235 }
1236
1237 unsigned CasesFromPred = Weights.size();
1238 uint64_t ValidTotalSuccWeight = 0;
1239 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1240 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1241 PredCases.push_back(BBCases[i]);
1242 ++NewSuccessors[BBCases[i].Dest];
1243 if (SuccHasWeights || PredHasWeights) {
1244 // The default weight is at index 0, so weight for the ith case
1245 // should be at index i+1. Scale the cases from successor by
1246 // PredDefaultWeight (Weights[0]).
1247 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1248 ValidTotalSuccWeight += SuccWeights[i + 1];
1249 }
1250 }
1251
1252 if (SuccHasWeights || PredHasWeights) {
1253 ValidTotalSuccWeight += SuccWeights[0];
1254 // Scale the cases from predecessor by ValidTotalSuccWeight.
1255 for (unsigned i = 1; i < CasesFromPred; ++i)
1256 Weights[i] *= ValidTotalSuccWeight;
1257 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1258 Weights[0] *= SuccWeights[0];
1259 }
1260 } else {
1261 // If this is not the default destination from PSI, only the edges
1262 // in SI that occur in PSI with a destination of BB will be
1263 // activated.
1264 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1265 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1266 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1267 if (PredCases[i].Dest == BB) {
1268 PTIHandled.insert(PredCases[i].Value);
1269
1270 if (PredHasWeights || SuccHasWeights) {
1271 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1272 std::swap(Weights[i + 1], Weights.back());
1273 Weights.pop_back();
1274 }
1275
1276 std::swap(PredCases[i], PredCases.back());
1277 PredCases.pop_back();
1278 --i;
1279 --e;
1280 }
1281
1282 // Okay, now we know which constants were sent to BB from the
1283 // predecessor. Figure out where they will all go now.
1284 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1285 if (PTIHandled.count(BBCases[i].Value)) {
1286 // If this is one we are capable of getting...
1287 if (PredHasWeights || SuccHasWeights)
1288 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1289 PredCases.push_back(BBCases[i]);
1290 ++NewSuccessors[BBCases[i].Dest];
1291 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1292 }
1293
1294 // If there are any constants vectored to BB that TI doesn't handle,
1295 // they must go to the default destination of TI.
1296 for (ConstantInt *I : PTIHandled) {
1297 if (PredHasWeights || SuccHasWeights)
1298 Weights.push_back(WeightsForHandled[I]);
1299 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1300 ++NewSuccessors[BBDefault];
1301 }
1302 }
1303
1304 // Okay, at this point, we know which new successor Pred will get. Make
1305 // sure we update the number of entries in the PHI nodes for these
1306 // successors.
1307 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1308 if (DTU) {
1309 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1310 Updates.reserve(Updates.size() + NewSuccessors.size());
1311 }
1312 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1313 NewSuccessors) {
1314 for (auto I : seq(NewSuccessor.second)) {
1315 (void)I;
1316 AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
1317 }
1318 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1319 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1320 }
1321
1322 Builder.SetInsertPoint(PTI);
1323 // Convert pointer to int before we switch.
1324 if (CV->getType()->isPointerTy()) {
1325 CV =
1326 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1327 }
1328
1329 // Now that the successors are updated, create the new Switch instruction.
1330 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1331 NewSI->setDebugLoc(PTI->getDebugLoc());
1332 for (ValueEqualityComparisonCase &V : PredCases)
1333 NewSI->addCase(V.Value, V.Dest);
1334
1335 if (PredHasWeights || SuccHasWeights) {
1336 // Halve the weights if any of them cannot fit in an uint32_t
1337 FitWeights(Weights);
1338
1339 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1340
1341 setBranchWeights(NewSI, MDWeights);
1342 }
1343
1345
1346 // Okay, last check. If BB is still a successor of PSI, then we must
1347 // have an infinite loop case. If so, add an infinitely looping block
1348 // to handle the case to preserve the behavior of the code.
1349 BasicBlock *InfLoopBlock = nullptr;
1350 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1351 if (NewSI->getSuccessor(i) == BB) {
1352 if (!InfLoopBlock) {
1353 // Insert it at the end of the function, because it's either code,
1354 // or it won't matter if it's hot. :)
1355 InfLoopBlock =
1356 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1357 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1358 if (DTU)
1359 Updates.push_back(
1360 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1361 }
1362 NewSI->setSuccessor(i, InfLoopBlock);
1363 }
1364
1365 if (DTU) {
1366 if (InfLoopBlock)
1367 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1368
1369 Updates.push_back({DominatorTree::Delete, Pred, BB});
1370
1371 DTU->applyUpdates(Updates);
1372 }
1373
1374 ++NumFoldValueComparisonIntoPredecessors;
1375 return true;
1376}
1377
1378/// The specified terminator is a value equality comparison instruction
1379/// (either a switch or a branch on "X == c").
1380/// See if any of the predecessors of the terminator block are value comparisons
1381/// on the same value. If so, and if safe to do so, fold them together.
1382bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1383 IRBuilder<> &Builder) {
1384 BasicBlock *BB = TI->getParent();
1385 Value *CV = isValueEqualityComparison(TI); // CondVal
1386 assert(CV && "Not a comparison?");
1387
1388 bool Changed = false;
1389
1391 while (!Preds.empty()) {
1392 BasicBlock *Pred = Preds.pop_back_val();
1393 Instruction *PTI = Pred->getTerminator();
1394
1395 // Don't try to fold into itself.
1396 if (Pred == BB)
1397 continue;
1398
1399 // See if the predecessor is a comparison with the same value.
1400 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1401 if (PCV != CV)
1402 continue;
1403
1405 if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1406 for (auto *Succ : FailBlocks) {
1407 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1408 return false;
1409 }
1410 }
1411
1412 PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1413 Changed = true;
1414 }
1415 return Changed;
1416}
1417
1418// If we would need to insert a select that uses the value of this invoke
1419// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1420// need to do this), we can't hoist the invoke, as there is nowhere to put the
1421// select in this case.
1423 Instruction *I1, Instruction *I2) {
1424 for (BasicBlock *Succ : successors(BB1)) {
1425 for (const PHINode &PN : Succ->phis()) {
1426 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1427 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1428 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1429 return false;
1430 }
1431 }
1432 }
1433 return true;
1434}
1435
1436// Get interesting characteristics of instructions that
1437// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1438// instructions can be reordered across.
1444
1446 unsigned Flags = 0;
1447 if (I->mayReadFromMemory())
1448 Flags |= SkipReadMem;
1449 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1450 // inalloca) across stacksave/stackrestore boundaries.
1451 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1452 Flags |= SkipSideEffect;
1454 Flags |= SkipImplicitControlFlow;
1455 return Flags;
1456}
1457
1458// Returns true if it is safe to reorder an instruction across preceding
1459// instructions in a basic block.
1460static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1461 // Don't reorder a store over a load.
1462 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1463 return false;
1464
1465 // If we have seen an instruction with side effects, it's unsafe to reorder an
1466 // instruction which reads memory or itself has side effects.
1467 if ((Flags & SkipSideEffect) &&
1468 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1469 return false;
1470
1471 // Reordering across an instruction which does not necessarily transfer
1472 // control to the next instruction is speculation.
1474 return false;
1475
1476 // Hoisting of llvm.deoptimize is only legal together with the next return
1477 // instruction, which this pass is not always able to do.
1478 if (auto *CB = dyn_cast<CallBase>(I))
1479 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1480 return false;
1481
1482 // It's also unsafe/illegal to hoist an instruction above its instruction
1483 // operands
1484 BasicBlock *BB = I->getParent();
1485 for (Value *Op : I->operands()) {
1486 if (auto *J = dyn_cast<Instruction>(Op))
1487 if (J->getParent() == BB)
1488 return false;
1489 }
1490
1491 return true;
1492}
1493
1494static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1495
1496/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1497/// instructions \p I1 and \p I2 can and should be hoisted.
1499 const TargetTransformInfo &TTI) {
1500 // If we're going to hoist a call, make sure that the two instructions
1501 // we're commoning/hoisting are both marked with musttail, or neither of
1502 // them is marked as such. Otherwise, we might end up in a situation where
1503 // we hoist from a block where the terminator is a `ret` to a block where
1504 // the terminator is a `br`, and `musttail` calls expect to be followed by
1505 // a return.
1506 auto *C1 = dyn_cast<CallInst>(I1);
1507 auto *C2 = dyn_cast<CallInst>(I2);
1508 if (C1 && C2)
1509 if (C1->isMustTailCall() != C2->isMustTailCall())
1510 return false;
1511
1513 return false;
1514
1515 // If any of the two call sites has nomerge or convergent attribute, stop
1516 // hoisting.
1517 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1518 if (CB1->cannotMerge() || CB1->isConvergent())
1519 return false;
1520 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1521 if (CB2->cannotMerge() || CB2->isConvergent())
1522 return false;
1523
1524 return true;
1525}
1526
1527/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1528/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1529/// hoistCommonCodeFromSuccessors. e.g. The input:
1530/// I1 DVRs: { x, z },
1531/// OtherInsts: { I2 DVRs: { x, y, z } }
1532/// would result in hoisting only DbgVariableRecord x.
1534 Instruction *TI, Instruction *I1,
1535 SmallVectorImpl<Instruction *> &OtherInsts) {
1536 if (!I1->hasDbgRecords())
1537 return;
1538 using CurrentAndEndIt =
1539 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1540 // Vector of {Current, End} iterators.
1542 Itrs.reserve(OtherInsts.size() + 1);
1543 // Helper lambdas for lock-step checks:
1544 // Return true if this Current == End.
1545 auto atEnd = [](const CurrentAndEndIt &Pair) {
1546 return Pair.first == Pair.second;
1547 };
1548 // Return true if all Current are identical.
1549 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1550 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1552 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1553 });
1554 };
1555
1556 // Collect the iterators.
1557 Itrs.push_back(
1558 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1559 for (Instruction *Other : OtherInsts) {
1560 if (!Other->hasDbgRecords())
1561 return;
1562 Itrs.push_back(
1563 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1564 }
1565
1566 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1567 // the lock-step DbgRecord are identical, hoist all of them to TI.
1568 // This replicates the dbg.* intrinsic behaviour in
1569 // hoistCommonCodeFromSuccessors.
1570 while (none_of(Itrs, atEnd)) {
1571 bool HoistDVRs = allIdentical(Itrs);
1572 for (CurrentAndEndIt &Pair : Itrs) {
1573 // Increment Current iterator now as we may be about to move the
1574 // DbgRecord.
1575 DbgRecord &DR = *Pair.first++;
1576 if (HoistDVRs) {
1577 DR.removeFromParent();
1578 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1579 }
1580 }
1581 }
1582}
1583
1584/// Hoist any common code in the successor blocks up into the block. This
1585/// function guarantees that BB dominates all successors. If EqTermsOnly is
1586/// given, only perform hoisting in case both blocks only contain a terminator.
1587/// In that case, only the original BI will be replaced and selects for PHIs are
1588/// added.
1589bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1590 bool EqTermsOnly) {
1591 // This does very trivial matching, with limited scanning, to find identical
1592 // instructions in the two blocks. In particular, we don't want to get into
1593 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1594 // such, we currently just scan for obviously identical instructions in an
1595 // identical order, possibly separated by the same number of non-identical
1596 // instructions.
1597 unsigned int SuccSize = succ_size(BB);
1598 if (SuccSize < 2)
1599 return false;
1600
1601 // If either of the blocks has it's address taken, then we can't do this fold,
1602 // because the code we'd hoist would no longer run when we jump into the block
1603 // by it's address.
1604 for (auto *Succ : successors(BB))
1605 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1606 return false;
1607
1608 auto *TI = BB->getTerminator();
1609
1610 // The second of pair is a SkipFlags bitmask.
1611 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1612 SmallVector<SuccIterPair, 8> SuccIterPairs;
1613 for (auto *Succ : successors(BB)) {
1614 BasicBlock::iterator SuccItr = Succ->begin();
1615 if (isa<PHINode>(*SuccItr))
1616 return false;
1617 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1618 }
1619
1620 // Check if only hoisting terminators is allowed. This does not add new
1621 // instructions to the hoist location.
1622 if (EqTermsOnly) {
1623 // Skip any debug intrinsics, as they are free to hoist.
1624 for (auto &SuccIter : make_first_range(SuccIterPairs)) {
1625 auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
1626 if (!INonDbg->isTerminator())
1627 return false;
1628 }
1629 // Now we know that we only need to hoist debug intrinsics and the
1630 // terminator. Let the loop below handle those 2 cases.
1631 }
1632
1633 // Count how many instructions were not hoisted so far. There's a limit on how
1634 // many instructions we skip, serving as a compilation time control as well as
1635 // preventing excessive increase of life ranges.
1636 unsigned NumSkipped = 0;
1637 // If we find an unreachable instruction at the beginning of a basic block, we
1638 // can still hoist instructions from the rest of the basic blocks.
1639 if (SuccIterPairs.size() > 2) {
1640 erase_if(SuccIterPairs,
1641 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1642 if (SuccIterPairs.size() < 2)
1643 return false;
1644 }
1645
1646 bool Changed = false;
1647
1648 for (;;) {
1649 auto *SuccIterPairBegin = SuccIterPairs.begin();
1650 auto &BB1ItrPair = *SuccIterPairBegin++;
1651 auto OtherSuccIterPairRange =
1652 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1653 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1654
1655 Instruction *I1 = &*BB1ItrPair.first;
1656
1657 // Skip debug info if it is not identical.
1658 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1659 Instruction *I2 = &*Iter;
1660 return I1->isIdenticalToWhenDefined(I2);
1661 });
1662 if (!AllDbgInstsAreIdentical) {
1663 while (isa<DbgInfoIntrinsic>(I1))
1664 I1 = &*++BB1ItrPair.first;
1665 for (auto &SuccIter : OtherSuccIterRange) {
1666 Instruction *I2 = &*SuccIter;
1667 while (isa<DbgInfoIntrinsic>(I2))
1668 I2 = &*++SuccIter;
1669 }
1670 }
1671
1672 bool AllInstsAreIdentical = true;
1673 bool HasTerminator = I1->isTerminator();
1674 for (auto &SuccIter : OtherSuccIterRange) {
1675 Instruction *I2 = &*SuccIter;
1676 HasTerminator |= I2->isTerminator();
1677 if (AllInstsAreIdentical && (!I1->isIdenticalToWhenDefined(I2) ||
1678 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1679 AllInstsAreIdentical = false;
1680 }
1681
1683 for (auto &SuccIter : OtherSuccIterRange)
1684 OtherInsts.push_back(&*SuccIter);
1685
1686 // If we are hoisting the terminator instruction, don't move one (making a
1687 // broken BB), instead clone it, and remove BI.
1688 if (HasTerminator) {
1689 // Even if BB, which contains only one unreachable instruction, is ignored
1690 // at the beginning of the loop, we can hoist the terminator instruction.
1691 // If any instructions remain in the block, we cannot hoist terminators.
1692 if (NumSkipped || !AllInstsAreIdentical) {
1693 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1694 return Changed;
1695 }
1696
1697 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1698 Changed;
1699 }
1700
1701 if (AllInstsAreIdentical) {
1702 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1703 AllInstsAreIdentical =
1704 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1705 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1706 Instruction *I2 = &*Pair.first;
1707 unsigned SkipFlagsBB2 = Pair.second;
1708 // Even if the instructions are identical, it may not
1709 // be safe to hoist them if we have skipped over
1710 // instructions with side effects or their operands
1711 // weren't hoisted.
1712 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1714 });
1715 }
1716
1717 if (AllInstsAreIdentical) {
1718 BB1ItrPair.first++;
1719 if (isa<DbgInfoIntrinsic>(I1)) {
1720 // The debug location is an integral part of a debug info intrinsic
1721 // and can't be separated from it or replaced. Instead of attempting
1722 // to merge locations, simply hoist both copies of the intrinsic.
1723 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1724 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1725 // and leave any that were not hoisted behind (by calling moveBefore
1726 // rather than moveBeforePreserving).
1727 I1->moveBefore(TI);
1728 for (auto &SuccIter : OtherSuccIterRange) {
1729 auto *I2 = &*SuccIter++;
1730 assert(isa<DbgInfoIntrinsic>(I2));
1731 I2->moveBefore(TI);
1732 }
1733 } else {
1734 // For a normal instruction, we just move one to right before the
1735 // branch, then replace all uses of the other with the first. Finally,
1736 // we remove the now redundant second instruction.
1737 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1738 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1739 // and leave any that were not hoisted behind (by calling moveBefore
1740 // rather than moveBeforePreserving).
1741 I1->moveBefore(TI);
1742 for (auto &SuccIter : OtherSuccIterRange) {
1743 Instruction *I2 = &*SuccIter++;
1744 assert(I2 != I1);
1745 if (!I2->use_empty())
1746 I2->replaceAllUsesWith(I1);
1747 I1->andIRFlags(I2);
1748 combineMetadataForCSE(I1, I2, true);
1749 // I1 and I2 are being combined into a single instruction. Its debug
1750 // location is the merged locations of the original instructions.
1751 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1752 I2->eraseFromParent();
1753 }
1754 }
1755 if (!Changed)
1756 NumHoistCommonCode += SuccIterPairs.size();
1757 Changed = true;
1758 NumHoistCommonInstrs += SuccIterPairs.size();
1759 } else {
1760 if (NumSkipped >= HoistCommonSkipLimit) {
1761 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1762 return Changed;
1763 }
1764 // We are about to skip over a pair of non-identical instructions. Record
1765 // if any have characteristics that would prevent reordering instructions
1766 // across them.
1767 for (auto &SuccIterPair : SuccIterPairs) {
1768 Instruction *I = &*SuccIterPair.first++;
1769 SuccIterPair.second |= skippedInstrFlags(I);
1770 }
1771 ++NumSkipped;
1772 }
1773 }
1774}
1775
1776bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1777 Instruction *TI, Instruction *I1,
1778 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1779
1780 auto *BI = dyn_cast<BranchInst>(TI);
1781
1782 bool Changed = false;
1783 BasicBlock *TIParent = TI->getParent();
1784 BasicBlock *BB1 = I1->getParent();
1785
1786 // Use only for an if statement.
1787 auto *I2 = *OtherSuccTIs.begin();
1788 auto *BB2 = I2->getParent();
1789 if (BI) {
1790 assert(OtherSuccTIs.size() == 1);
1791 assert(BI->getSuccessor(0) == I1->getParent());
1792 assert(BI->getSuccessor(1) == I2->getParent());
1793 }
1794
1795 // In the case of an if statement, we try to hoist an invoke.
1796 // FIXME: Can we define a safety predicate for CallBr?
1797 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
1798 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
1799 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
1800 return false;
1801
1802 // TODO: callbr hoisting currently disabled pending further study.
1803 if (isa<CallBrInst>(I1))
1804 return false;
1805
1806 for (BasicBlock *Succ : successors(BB1)) {
1807 for (PHINode &PN : Succ->phis()) {
1808 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1809 for (Instruction *OtherSuccTI : OtherSuccTIs) {
1810 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
1811 if (BB1V == BB2V)
1812 continue;
1813
1814 // In the case of an if statement, check for
1815 // passingValueIsAlwaysUndefined here because we would rather eliminate
1816 // undefined control flow then converting it to a select.
1817 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
1819 return false;
1820 }
1821 }
1822 }
1823
1824 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
1825 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
1826 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
1827 // Clone the terminator and hoist it into the pred, without any debug info.
1828 Instruction *NT = I1->clone();
1829 NT->insertInto(TIParent, TI->getIterator());
1830 if (!NT->getType()->isVoidTy()) {
1831 I1->replaceAllUsesWith(NT);
1832 for (Instruction *OtherSuccTI : OtherSuccTIs)
1833 OtherSuccTI->replaceAllUsesWith(NT);
1834 NT->takeName(I1);
1835 }
1836 Changed = true;
1837 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
1838
1839 // Ensure terminator gets a debug location, even an unknown one, in case
1840 // it involves inlinable calls.
1842 Locs.push_back(I1->getDebugLoc());
1843 for (auto *OtherSuccTI : OtherSuccTIs)
1844 Locs.push_back(OtherSuccTI->getDebugLoc());
1845 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
1846
1847 // PHIs created below will adopt NT's merged DebugLoc.
1848 IRBuilder<NoFolder> Builder(NT);
1849
1850 // In the case of an if statement, hoisting one of the terminators from our
1851 // successor is a great thing. Unfortunately, the successors of the if/else
1852 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
1853 // must agree for all PHI nodes, so we insert select instruction to compute
1854 // the final result.
1855 if (BI) {
1856 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1857 for (BasicBlock *Succ : successors(BB1)) {
1858 for (PHINode &PN : Succ->phis()) {
1859 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1860 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1861 if (BB1V == BB2V)
1862 continue;
1863
1864 // These values do not agree. Insert a select instruction before NT
1865 // that determines the right value.
1866 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1867 if (!SI) {
1868 // Propagate fast-math-flags from phi node to its replacement select.
1869 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1870 if (isa<FPMathOperator>(PN))
1871 Builder.setFastMathFlags(PN.getFastMathFlags());
1872
1873 SI = cast<SelectInst>(Builder.CreateSelect(
1874 BI->getCondition(), BB1V, BB2V,
1875 BB1V->getName() + "." + BB2V->getName(), BI));
1876 }
1877
1878 // Make the PHI node use the select for all incoming values for BB1/BB2
1879 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1880 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1881 PN.setIncomingValue(i, SI);
1882 }
1883 }
1884 }
1885
1887
1888 // Update any PHI nodes in our new successors.
1889 for (BasicBlock *Succ : successors(BB1)) {
1890 AddPredecessorToBlock(Succ, TIParent, BB1);
1891 if (DTU)
1892 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
1893 }
1894
1895 if (DTU)
1896 for (BasicBlock *Succ : successors(TI))
1897 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
1898
1900 if (DTU)
1901 DTU->applyUpdates(Updates);
1902 return Changed;
1903}
1904
1905// Check lifetime markers.
1906static bool isLifeTimeMarker(const Instruction *I) {
1907 if (auto II = dyn_cast<IntrinsicInst>(I)) {
1908 switch (II->getIntrinsicID()) {
1909 default:
1910 break;
1911 case Intrinsic::lifetime_start:
1912 case Intrinsic::lifetime_end:
1913 return true;
1914 }
1915 }
1916 return false;
1917}
1918
1919// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1920// into variables.
1922 int OpIdx) {
1923 return !isa<IntrinsicInst>(I);
1924}
1925
1926// All instructions in Insts belong to different blocks that all unconditionally
1927// branch to a common successor. Analyze each instruction and return true if it
1928// would be possible to sink them into their successor, creating one common
1929// instruction instead. For every value that would be required to be provided by
1930// PHI node (because an operand varies in each input block), add to PHIOperands.
1933 DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1934 // Prune out obviously bad instructions to move. Each instruction must have
1935 // exactly zero or one use, and we check later that use is by a single, common
1936 // PHI instruction in the successor.
1937 bool HasUse = !Insts.front()->user_empty();
1938 for (auto *I : Insts) {
1939 // These instructions may change or break semantics if moved.
1940 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1941 I->getType()->isTokenTy())
1942 return false;
1943
1944 // Do not try to sink an instruction in an infinite loop - it can cause
1945 // this algorithm to infinite loop.
1946 if (I->getParent()->getSingleSuccessor() == I->getParent())
1947 return false;
1948
1949 // Conservatively return false if I is an inline-asm instruction. Sinking
1950 // and merging inline-asm instructions can potentially create arguments
1951 // that cannot satisfy the inline-asm constraints.
1952 // If the instruction has nomerge or convergent attribute, return false.
1953 if (const auto *C = dyn_cast<CallBase>(I))
1954 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
1955 return false;
1956
1957 // Each instruction must have zero or one use.
1958 if (HasUse && !I->hasOneUse())
1959 return false;
1960 if (!HasUse && !I->user_empty())
1961 return false;
1962 }
1963
1964 const Instruction *I0 = Insts.front();
1965 const auto I0MMRA = MMRAMetadata(*I0);
1966 for (auto *I : Insts) {
1967 if (!I->isSameOperationAs(I0))
1968 return false;
1969
1970 // swifterror pointers can only be used by a load or store; sinking a load
1971 // or store would require introducing a select for the pointer operand,
1972 // which isn't allowed for swifterror pointers.
1973 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
1974 return false;
1975 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
1976 return false;
1977
1978 // Treat MMRAs conservatively. This pass can be quite aggressive and
1979 // could drop a lot of MMRAs otherwise.
1980 if (MMRAMetadata(*I) != I0MMRA)
1981 return false;
1982 }
1983
1984 // All instructions in Insts are known to be the same opcode. If they have a
1985 // use, check that the only user is a PHI or in the same block as the
1986 // instruction, because if a user is in the same block as an instruction we're
1987 // contemplating sinking, it must already be determined to be sinkable.
1988 if (HasUse) {
1989 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1990 auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1991 if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1992 auto *U = cast<Instruction>(*I->user_begin());
1993 return (PNUse &&
1994 PNUse->getParent() == Succ &&
1995 PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
1996 U->getParent() == I->getParent();
1997 }))
1998 return false;
1999 }
2000
2001 // Because SROA can't handle speculating stores of selects, try not to sink
2002 // loads, stores or lifetime markers of allocas when we'd have to create a
2003 // PHI for the address operand. Also, because it is likely that loads or
2004 // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
2005 // them.
2006 // This can cause code churn which can have unintended consequences down
2007 // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
2008 // FIXME: This is a workaround for a deficiency in SROA - see
2009 // https://llvm.org/bugs/show_bug.cgi?id=30188
2010 if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
2011 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2012 }))
2013 return false;
2014 if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
2015 return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
2016 }))
2017 return false;
2018 if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
2019 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2020 }))
2021 return false;
2022
2023 // For calls to be sinkable, they must all be indirect, or have same callee.
2024 // I.e. if we have two direct calls to different callees, we don't want to
2025 // turn that into an indirect call. Likewise, if we have an indirect call,
2026 // and a direct call, we don't actually want to have a single indirect call.
2027 if (isa<CallBase>(I0)) {
2028 auto IsIndirectCall = [](const Instruction *I) {
2029 return cast<CallBase>(I)->isIndirectCall();
2030 };
2031 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2032 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2033 if (HaveIndirectCalls) {
2034 if (!AllCallsAreIndirect)
2035 return false;
2036 } else {
2037 // All callees must be identical.
2038 Value *Callee = nullptr;
2039 for (const Instruction *I : Insts) {
2040 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2041 if (!Callee)
2042 Callee = CurrCallee;
2043 else if (Callee != CurrCallee)
2044 return false;
2045 }
2046 }
2047 }
2048
2049 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2050 Value *Op = I0->getOperand(OI);
2051 if (Op->getType()->isTokenTy())
2052 // Don't touch any operand of token type.
2053 return false;
2054
2055 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2056 assert(I->getNumOperands() == I0->getNumOperands());
2057 return I->getOperand(OI) == I0->getOperand(OI);
2058 };
2059 if (!all_of(Insts, SameAsI0)) {
2060 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2062 // We can't create a PHI from this GEP.
2063 return false;
2064 for (auto *I : Insts)
2065 PHIOperands[I].push_back(I->getOperand(OI));
2066 }
2067 }
2068 return true;
2069}
2070
2071// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2072// instruction of every block in Blocks to their common successor, commoning
2073// into one instruction.
2075 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2076
2077 // canSinkInstructions returning true guarantees that every block has at
2078 // least one non-terminator instruction.
2080 for (auto *BB : Blocks) {
2081 Instruction *I = BB->getTerminator();
2082 do {
2083 I = I->getPrevNode();
2084 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2085 if (!isa<DbgInfoIntrinsic>(I))
2086 Insts.push_back(I);
2087 }
2088
2089 // The only checking we need to do now is that all users of all instructions
2090 // are the same PHI node. canSinkInstructions should have checked this but
2091 // it is slightly over-aggressive - it gets confused by commutative
2092 // instructions so double-check it here.
2093 Instruction *I0 = Insts.front();
2094 if (!I0->user_empty()) {
2095 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
2096 if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
2097 auto *U = cast<Instruction>(*I->user_begin());
2098 return U == PNUse;
2099 }))
2100 return false;
2101 }
2102
2103 // We don't need to do any more checking here; canSinkInstructions should
2104 // have done it all for us.
2105 SmallVector<Value*, 4> NewOperands;
2106 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2107 // This check is different to that in canSinkInstructions. There, we
2108 // cared about the global view once simplifycfg (and instcombine) have
2109 // completed - it takes into account PHIs that become trivially
2110 // simplifiable. However here we need a more local view; if an operand
2111 // differs we create a PHI and rely on instcombine to clean up the very
2112 // small mess we may make.
2113 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2114 return I->getOperand(O) != I0->getOperand(O);
2115 });
2116 if (!NeedPHI) {
2117 NewOperands.push_back(I0->getOperand(O));
2118 continue;
2119 }
2120
2121 // Create a new PHI in the successor block and populate it.
2122 auto *Op = I0->getOperand(O);
2123 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2124 auto *PN =
2125 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2126 PN->insertBefore(BBEnd->begin());
2127 for (auto *I : Insts)
2128 PN->addIncoming(I->getOperand(O), I->getParent());
2129 NewOperands.push_back(PN);
2130 }
2131
2132 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2133 // and move it to the start of the successor block.
2134 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2135 I0->getOperandUse(O).set(NewOperands[O]);
2136
2137 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2138
2139 // Update metadata and IR flags, and merge debug locations.
2140 for (auto *I : Insts)
2141 if (I != I0) {
2142 // The debug location for the "common" instruction is the merged locations
2143 // of all the commoned instructions. We start with the original location
2144 // of the "common" instruction and iteratively merge each location in the
2145 // loop below.
2146 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2147 // However, as N-way merge for CallInst is rare, so we use simplified API
2148 // instead of using complex API for N-way merge.
2149 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2150 combineMetadataForCSE(I0, I, true);
2151 I0->andIRFlags(I);
2152 }
2153
2154 if (!I0->user_empty()) {
2155 // canSinkLastInstruction checked that all instructions were used by
2156 // one and only one PHI node. Find that now, RAUW it to our common
2157 // instruction and nuke it.
2158 auto *PN = cast<PHINode>(*I0->user_begin());
2159 PN->replaceAllUsesWith(I0);
2160 PN->eraseFromParent();
2161 }
2162
2163 // Finally nuke all instructions apart from the common instruction.
2164 for (auto *I : Insts) {
2165 if (I == I0)
2166 continue;
2167 // The remaining uses are debug users, replace those with the common inst.
2168 // In most (all?) cases this just introduces a use-before-def.
2169 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2170 I->replaceAllUsesWith(I0);
2171 I->eraseFromParent();
2172 }
2173
2174 return true;
2175}
2176
2177namespace {
2178
2179 // LockstepReverseIterator - Iterates through instructions
2180 // in a set of blocks in reverse order from the first non-terminator.
2181 // For example (assume all blocks have size n):
2182 // LockstepReverseIterator I([B1, B2, B3]);
2183 // *I-- = [B1[n], B2[n], B3[n]];
2184 // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
2185 // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
2186 // ...
2187 class LockstepReverseIterator {
2190 bool Fail;
2191
2192 public:
2193 LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
2194 reset();
2195 }
2196
2197 void reset() {
2198 Fail = false;
2199 Insts.clear();
2200 for (auto *BB : Blocks) {
2201 Instruction *Inst = BB->getTerminator();
2202 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2203 Inst = Inst->getPrevNode();
2204 if (!Inst) {
2205 // Block wasn't big enough.
2206 Fail = true;
2207 return;
2208 }
2209 Insts.push_back(Inst);
2210 }
2211 }
2212
2213 bool isValid() const {
2214 return !Fail;
2215 }
2216
2217 void operator--() {
2218 if (Fail)
2219 return;
2220 for (auto *&Inst : Insts) {
2221 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2222 Inst = Inst->getPrevNode();
2223 // Already at beginning of block.
2224 if (!Inst) {
2225 Fail = true;
2226 return;
2227 }
2228 }
2229 }
2230
2231 void operator++() {
2232 if (Fail)
2233 return;
2234 for (auto *&Inst : Insts) {
2235 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2236 Inst = Inst->getNextNode();
2237 // Already at end of block.
2238 if (!Inst) {
2239 Fail = true;
2240 return;
2241 }
2242 }
2243 }
2244
2246 return Insts;
2247 }
2248 };
2249
2250} // end anonymous namespace
2251
2252/// Check whether BB's predecessors end with unconditional branches. If it is
2253/// true, sink any common code from the predecessors to BB.
2255 DomTreeUpdater *DTU) {
2256 // We support two situations:
2257 // (1) all incoming arcs are unconditional
2258 // (2) there are non-unconditional incoming arcs
2259 //
2260 // (2) is very common in switch defaults and
2261 // else-if patterns;
2262 //
2263 // if (a) f(1);
2264 // else if (b) f(2);
2265 //
2266 // produces:
2267 //
2268 // [if]
2269 // / \
2270 // [f(1)] [if]
2271 // | | \
2272 // | | |
2273 // | [f(2)]|
2274 // \ | /
2275 // [ end ]
2276 //
2277 // [end] has two unconditional predecessor arcs and one conditional. The
2278 // conditional refers to the implicit empty 'else' arc. This conditional
2279 // arc can also be caused by an empty default block in a switch.
2280 //
2281 // In this case, we attempt to sink code from all *unconditional* arcs.
2282 // If we can sink instructions from these arcs (determined during the scan
2283 // phase below) we insert a common successor for all unconditional arcs and
2284 // connect that to [end], to enable sinking:
2285 //
2286 // [if]
2287 // / \
2288 // [x(1)] [if]
2289 // | | \
2290 // | | \
2291 // | [x(2)] |
2292 // \ / |
2293 // [sink.split] |
2294 // \ /
2295 // [ end ]
2296 //
2297 SmallVector<BasicBlock*,4> UnconditionalPreds;
2298 bool HaveNonUnconditionalPredecessors = false;
2299 for (auto *PredBB : predecessors(BB)) {
2300 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2301 if (PredBr && PredBr->isUnconditional())
2302 UnconditionalPreds.push_back(PredBB);
2303 else
2304 HaveNonUnconditionalPredecessors = true;
2305 }
2306 if (UnconditionalPreds.size() < 2)
2307 return false;
2308
2309 // We take a two-step approach to tail sinking. First we scan from the end of
2310 // each block upwards in lockstep. If the n'th instruction from the end of each
2311 // block can be sunk, those instructions are added to ValuesToSink and we
2312 // carry on. If we can sink an instruction but need to PHI-merge some operands
2313 // (because they're not identical in each instruction) we add these to
2314 // PHIOperands.
2315 int ScanIdx = 0;
2316 SmallPtrSet<Value*,4> InstructionsToSink;
2318 LockstepReverseIterator LRI(UnconditionalPreds);
2319 while (LRI.isValid() &&
2320 canSinkInstructions(*LRI, PHIOperands)) {
2321 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2322 << "\n");
2323 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2324 ++ScanIdx;
2325 --LRI;
2326 }
2327
2328 // If no instructions can be sunk, early-return.
2329 if (ScanIdx == 0)
2330 return false;
2331
2332 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2333
2334 if (!followedByDeoptOrUnreachable) {
2335 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2336 // actually sink before encountering instruction that is unprofitable to
2337 // sink?
2338 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2339 unsigned NumPHIdValues = 0;
2340 for (auto *I : *LRI)
2341 for (auto *V : PHIOperands[I]) {
2342 if (!InstructionsToSink.contains(V))
2343 ++NumPHIdValues;
2344 // FIXME: this check is overly optimistic. We may end up not sinking
2345 // said instruction, due to the very same profitability check.
2346 // See @creating_too_many_phis in sink-common-code.ll.
2347 }
2348 LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
2349 unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2350 if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
2351 NumPHIInsts++;
2352
2353 return NumPHIInsts <= 1;
2354 };
2355
2356 // We've determined that we are going to sink last ScanIdx instructions,
2357 // and recorded them in InstructionsToSink. Now, some instructions may be
2358 // unprofitable to sink. But that determination depends on the instructions
2359 // that we are going to sink.
2360
2361 // First, forward scan: find the first instruction unprofitable to sink,
2362 // recording all the ones that are profitable to sink.
2363 // FIXME: would it be better, after we detect that not all are profitable.
2364 // to either record the profitable ones, or erase the unprofitable ones?
2365 // Maybe we need to choose (at runtime) the one that will touch least
2366 // instrs?
2367 LRI.reset();
2368 int Idx = 0;
2369 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2370 while (Idx < ScanIdx) {
2371 if (!ProfitableToSinkInstruction(LRI)) {
2372 // Too many PHIs would be created.
2373 LLVM_DEBUG(
2374 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2375 break;
2376 }
2377 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2378 --LRI;
2379 ++Idx;
2380 }
2381
2382 // If no instructions can be sunk, early-return.
2383 if (Idx == 0)
2384 return false;
2385
2386 // Did we determine that (only) some instructions are unprofitable to sink?
2387 if (Idx < ScanIdx) {
2388 // Okay, some instructions are unprofitable.
2389 ScanIdx = Idx;
2390 InstructionsToSink = InstructionsProfitableToSink;
2391
2392 // But, that may make other instructions unprofitable, too.
2393 // So, do a backward scan, do any earlier instructions become
2394 // unprofitable?
2395 assert(
2396 !ProfitableToSinkInstruction(LRI) &&
2397 "We already know that the last instruction is unprofitable to sink");
2398 ++LRI;
2399 --Idx;
2400 while (Idx >= 0) {
2401 // If we detect that an instruction becomes unprofitable to sink,
2402 // all earlier instructions won't be sunk either,
2403 // so preemptively keep InstructionsProfitableToSink in sync.
2404 // FIXME: is this the most performant approach?
2405 for (auto *I : *LRI)
2406 InstructionsProfitableToSink.erase(I);
2407 if (!ProfitableToSinkInstruction(LRI)) {
2408 // Everything starting with this instruction won't be sunk.
2409 ScanIdx = Idx;
2410 InstructionsToSink = InstructionsProfitableToSink;
2411 }
2412 ++LRI;
2413 --Idx;
2414 }
2415 }
2416
2417 // If no instructions can be sunk, early-return.
2418 if (ScanIdx == 0)
2419 return false;
2420 }
2421
2422 bool Changed = false;
2423
2424 if (HaveNonUnconditionalPredecessors) {
2425 if (!followedByDeoptOrUnreachable) {
2426 // It is always legal to sink common instructions from unconditional
2427 // predecessors. However, if not all predecessors are unconditional,
2428 // this transformation might be pessimizing. So as a rule of thumb,
2429 // don't do it unless we'd sink at least one non-speculatable instruction.
2430 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2431 LRI.reset();
2432 int Idx = 0;
2433 bool Profitable = false;
2434 while (Idx < ScanIdx) {
2435 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2436 Profitable = true;
2437 break;
2438 }
2439 --LRI;
2440 ++Idx;
2441 }
2442 if (!Profitable)
2443 return false;
2444 }
2445
2446 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2447 // We have a conditional edge and we're going to sink some instructions.
2448 // Insert a new block postdominating all blocks we're going to sink from.
2449 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2450 // Edges couldn't be split.
2451 return false;
2452 Changed = true;
2453 }
2454
2455 // Now that we've analyzed all potential sinking candidates, perform the
2456 // actual sink. We iteratively sink the last non-terminator of the source
2457 // blocks into their common successor unless doing so would require too
2458 // many PHI instructions to be generated (currently only one PHI is allowed
2459 // per sunk instruction).
2460 //
2461 // We can use InstructionsToSink to discount values needing PHI-merging that will
2462 // actually be sunk in a later iteration. This allows us to be more
2463 // aggressive in what we sink. This does allow a false positive where we
2464 // sink presuming a later value will also be sunk, but stop half way through
2465 // and never actually sink it which means we produce more PHIs than intended.
2466 // This is unlikely in practice though.
2467 int SinkIdx = 0;
2468 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2469 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2470 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2471 << "\n");
2472
2473 // Because we've sunk every instruction in turn, the current instruction to
2474 // sink is always at index 0.
2475 LRI.reset();
2476
2477 if (!sinkLastInstruction(UnconditionalPreds)) {
2478 LLVM_DEBUG(
2479 dbgs()
2480 << "SINK: stopping here, failed to actually sink instruction!\n");
2481 break;
2482 }
2483
2484 NumSinkCommonInstrs++;
2485 Changed = true;
2486 }
2487 if (SinkIdx != 0)
2488 ++NumSinkCommonCode;
2489 return Changed;
2490}
2491
2492namespace {
2493
2494struct CompatibleSets {
2495 using SetTy = SmallVector<InvokeInst *, 2>;
2496
2498
2499 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2500
2501 SetTy &getCompatibleSet(InvokeInst *II);
2502
2503 void insert(InvokeInst *II);
2504};
2505
2506CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2507 // Perform a linear scan over all the existing sets, see if the new `invoke`
2508 // is compatible with any particular set. Since we know that all the `invokes`
2509 // within a set are compatible, only check the first `invoke` in each set.
2510 // WARNING: at worst, this has quadratic complexity.
2511 for (CompatibleSets::SetTy &Set : Sets) {
2512 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2513 return Set;
2514 }
2515
2516 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2517 return Sets.emplace_back();
2518}
2519
2520void CompatibleSets::insert(InvokeInst *II) {
2521 getCompatibleSet(II).emplace_back(II);
2522}
2523
2524bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2525 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2526
2527 // Can we theoretically merge these `invoke`s?
2528 auto IsIllegalToMerge = [](InvokeInst *II) {
2529 return II->cannotMerge() || II->isInlineAsm();
2530 };
2531 if (any_of(Invokes, IsIllegalToMerge))
2532 return false;
2533
2534 // Either both `invoke`s must be direct,
2535 // or both `invoke`s must be indirect.
2536 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2537 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2538 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2539 if (HaveIndirectCalls) {
2540 if (!AllCallsAreIndirect)
2541 return false;
2542 } else {
2543 // All callees must be identical.
2544 Value *Callee = nullptr;
2545 for (InvokeInst *II : Invokes) {
2546 Value *CurrCallee = II->getCalledOperand();
2547 assert(CurrCallee && "There is always a called operand.");
2548 if (!Callee)
2549 Callee = CurrCallee;
2550 else if (Callee != CurrCallee)
2551 return false;
2552 }
2553 }
2554
2555 // Either both `invoke`s must not have a normal destination,
2556 // or both `invoke`s must have a normal destination,
2557 auto HasNormalDest = [](InvokeInst *II) {
2558 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2559 };
2560 if (any_of(Invokes, HasNormalDest)) {
2561 // Do not merge `invoke` that does not have a normal destination with one
2562 // that does have a normal destination, even though doing so would be legal.
2563 if (!all_of(Invokes, HasNormalDest))
2564 return false;
2565
2566 // All normal destinations must be identical.
2567 BasicBlock *NormalBB = nullptr;
2568 for (InvokeInst *II : Invokes) {
2569 BasicBlock *CurrNormalBB = II->getNormalDest();
2570 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2571 if (!NormalBB)
2572 NormalBB = CurrNormalBB;
2573 else if (NormalBB != CurrNormalBB)
2574 return false;
2575 }
2576
2577 // In the normal destination, the incoming values for these two `invoke`s
2578 // must be compatible.
2579 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2581 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2582 &EquivalenceSet))
2583 return false;
2584 }
2585
2586#ifndef NDEBUG
2587 // All unwind destinations must be identical.
2588 // We know that because we have started from said unwind destination.
2589 BasicBlock *UnwindBB = nullptr;
2590 for (InvokeInst *II : Invokes) {
2591 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2592 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2593 if (!UnwindBB)
2594 UnwindBB = CurrUnwindBB;
2595 else
2596 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2597 }
2598#endif
2599
2600 // In the unwind destination, the incoming values for these two `invoke`s
2601 // must be compatible.
2603 Invokes.front()->getUnwindDest(),
2604 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2605 return false;
2606
2607 // Ignoring arguments, these `invoke`s must be identical,
2608 // including operand bundles.
2609 const InvokeInst *II0 = Invokes.front();
2610 for (auto *II : Invokes.drop_front())
2611 if (!II->isSameOperationAs(II0))
2612 return false;
2613
2614 // Can we theoretically form the data operands for the merged `invoke`?
2615 auto IsIllegalToMergeArguments = [](auto Ops) {
2616 Use &U0 = std::get<0>(Ops);
2617 Use &U1 = std::get<1>(Ops);
2618 if (U0 == U1)
2619 return false;
2620 return U0->getType()->isTokenTy() ||
2621 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2622 U0.getOperandNo());
2623 };
2624 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2625 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2626 IsIllegalToMergeArguments))
2627 return false;
2628
2629 return true;
2630}
2631
2632} // namespace
2633
2634// Merge all invokes in the provided set, all of which are compatible
2635// as per the `CompatibleSets::shouldBelongToSameSet()`.
2637 DomTreeUpdater *DTU) {
2638 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2639
2641 if (DTU)
2642 Updates.reserve(2 + 3 * Invokes.size());
2643
2644 bool HasNormalDest =
2645 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2646
2647 // Clone one of the invokes into a new basic block.
2648 // Since they are all compatible, it doesn't matter which invoke is cloned.
2649 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2650 InvokeInst *II0 = Invokes.front();
2651 BasicBlock *II0BB = II0->getParent();
2652 BasicBlock *InsertBeforeBlock =
2653 II0->getParent()->getIterator()->getNextNode();
2654 Function *Func = II0BB->getParent();
2655 LLVMContext &Ctx = II0->getContext();
2656
2657 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2658 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2659
2660 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2661 // NOTE: all invokes have the same attributes, so no handling needed.
2662 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2663
2664 if (!HasNormalDest) {
2665 // This set does not have a normal destination,
2666 // so just form a new block with unreachable terminator.
2667 BasicBlock *MergedNormalDest = BasicBlock::Create(
2668 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2669 new UnreachableInst(Ctx, MergedNormalDest);
2670 MergedInvoke->setNormalDest(MergedNormalDest);
2671 }
2672
2673 // The unwind destination, however, remainds identical for all invokes here.
2674
2675 return MergedInvoke;
2676 }();
2677
2678 if (DTU) {
2679 // Predecessor blocks that contained these invokes will now branch to
2680 // the new block that contains the merged invoke, ...
2681 for (InvokeInst *II : Invokes)
2682 Updates.push_back(
2683 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2684
2685 // ... which has the new `unreachable` block as normal destination,
2686 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2687 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2688 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2689 SuccBBOfMergedInvoke});
2690
2691 // Since predecessor blocks now unconditionally branch to a new block,
2692 // they no longer branch to their original successors.
2693 for (InvokeInst *II : Invokes)
2694 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2695 Updates.push_back(
2696 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2697 }
2698
2699 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2700
2701 // Form the merged operands for the merged invoke.
2702 for (Use &U : MergedInvoke->operands()) {
2703 // Only PHI together the indirect callees and data operands.
2704 if (MergedInvoke->isCallee(&U)) {
2705 if (!IsIndirectCall)
2706 continue;
2707 } else if (!MergedInvoke->isDataOperand(&U))
2708 continue;
2709
2710 // Don't create trivial PHI's with all-identical incoming values.
2711 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2712 return II->getOperand(U.getOperandNo()) != U.get();
2713 });
2714 if (!NeedPHI)
2715 continue;
2716
2717 // Form a PHI out of all the data ops under this index.
2719 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2720 for (InvokeInst *II : Invokes)
2721 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2722
2723 U.set(PN);
2724 }
2725
2726 // We've ensured that each PHI node has compatible (identical) incoming values
2727 // when coming from each of the `invoke`s in the current merge set,
2728 // so update the PHI nodes accordingly.
2729 for (BasicBlock *Succ : successors(MergedInvoke))
2730 AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2731 /*ExistPred=*/Invokes.front()->getParent());
2732
2733 // And finally, replace the original `invoke`s with an unconditional branch
2734 // to the block with the merged `invoke`. Also, give that merged `invoke`
2735 // the merged debugloc of all the original `invoke`s.
2736 DILocation *MergedDebugLoc = nullptr;
2737 for (InvokeInst *II : Invokes) {
2738 // Compute the debug location common to all the original `invoke`s.
2739 if (!MergedDebugLoc)
2740 MergedDebugLoc = II->getDebugLoc();
2741 else
2742 MergedDebugLoc =
2743 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2744
2745 // And replace the old `invoke` with an unconditionally branch
2746 // to the block with the merged `invoke`.
2747 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2748 OrigSuccBB->removePredecessor(II->getParent());
2749 BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2750 II->replaceAllUsesWith(MergedInvoke);
2751 II->eraseFromParent();
2752 ++NumInvokesMerged;
2753 }
2754 MergedInvoke->setDebugLoc(MergedDebugLoc);
2755 ++NumInvokeSetsFormed;
2756
2757 if (DTU)
2758 DTU->applyUpdates(Updates);
2759}
2760
2761/// If this block is a `landingpad` exception handling block, categorize all
2762/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2763/// being "mergeable" together, and then merge invokes in each set together.
2764///
2765/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2766/// [...] [...]
2767/// | |
2768/// [invoke0] [invoke1]
2769/// / \ / \
2770/// [cont0] [landingpad] [cont1]
2771/// to:
2772/// [...] [...]
2773/// \ /
2774/// [invoke]
2775/// / \
2776/// [cont] [landingpad]
2777///
2778/// But of course we can only do that if the invokes share the `landingpad`,
2779/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2780/// and the invoked functions are "compatible".
2783 return false;
2784
2785 bool Changed = false;
2786
2787 // FIXME: generalize to all exception handling blocks?
2788 if (!BB->isLandingPad())
2789 return Changed;
2790
2791 CompatibleSets Grouper;
2792
2793 // Record all the predecessors of this `landingpad`. As per verifier,
2794 // the only allowed predecessor is the unwind edge of an `invoke`.
2795 // We want to group "compatible" `invokes` into the same set to be merged.
2796 for (BasicBlock *PredBB : predecessors(BB))
2797 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2798
2799 // And now, merge `invoke`s that were grouped togeter.
2800 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2801 if (Invokes.size() < 2)
2802 continue;
2803 Changed = true;
2804 MergeCompatibleInvokesImpl(Invokes, DTU);
2805 }
2806
2807 return Changed;
2808}
2809
2810namespace {
2811/// Track ephemeral values, which should be ignored for cost-modelling
2812/// purposes. Requires walking instructions in reverse order.
2813class EphemeralValueTracker {
2815
2816 bool isEphemeral(const Instruction *I) {
2817 if (isa<AssumeInst>(I))
2818 return true;
2819 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2820 all_of(I->users(), [&](const User *U) {
2821 return EphValues.count(cast<Instruction>(U));
2822 });
2823 }
2824
2825public:
2826 bool track(const Instruction *I) {
2827 if (isEphemeral(I)) {
2828 EphValues.insert(I);
2829 return true;
2830 }
2831 return false;
2832 }
2833
2834 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2835};
2836} // namespace
2837
2838/// Determine if we can hoist sink a sole store instruction out of a
2839/// conditional block.
2840///
2841/// We are looking for code like the following:
2842/// BrBB:
2843/// store i32 %add, i32* %arrayidx2
2844/// ... // No other stores or function calls (we could be calling a memory
2845/// ... // function).
2846/// %cmp = icmp ult %x, %y
2847/// br i1 %cmp, label %EndBB, label %ThenBB
2848/// ThenBB:
2849/// store i32 %add5, i32* %arrayidx2
2850/// br label EndBB
2851/// EndBB:
2852/// ...
2853/// We are going to transform this into:
2854/// BrBB:
2855/// store i32 %add, i32* %arrayidx2
2856/// ... //
2857/// %cmp = icmp ult %x, %y
2858/// %add.add5 = select i1 %cmp, i32 %add, %add5
2859/// store i32 %add.add5, i32* %arrayidx2
2860/// ...
2861///
2862/// \return The pointer to the value of the previous store if the store can be
2863/// hoisted into the predecessor block. 0 otherwise.
2865 BasicBlock *StoreBB, BasicBlock *EndBB) {
2866 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2867 if (!StoreToHoist)
2868 return nullptr;
2869
2870 // Volatile or atomic.
2871 if (!StoreToHoist->isSimple())
2872 return nullptr;
2873
2874 Value *StorePtr = StoreToHoist->getPointerOperand();
2875 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2876
2877 // Look for a store to the same pointer in BrBB.
2878 unsigned MaxNumInstToLookAt = 9;
2879 // Skip pseudo probe intrinsic calls which are not really killing any memory
2880 // accesses.
2881 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2882 if (!MaxNumInstToLookAt)
2883 break;
2884 --MaxNumInstToLookAt;
2885
2886 // Could be calling an instruction that affects memory like free().
2887 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2888 return nullptr;
2889
2890 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2891 // Found the previous store to same location and type. Make sure it is
2892 // simple, to avoid introducing a spurious non-atomic write after an
2893 // atomic write.
2894 if (SI->getPointerOperand() == StorePtr &&
2895 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
2896 SI->getAlign() >= StoreToHoist->getAlign())
2897 // Found the previous store, return its value operand.
2898 return SI->getValueOperand();
2899 return nullptr; // Unknown store.
2900 }
2901
2902 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
2903 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2904 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
2905 // Local objects (created by an `alloca` instruction) are always
2906 // writable, so once we are past a read from a location it is valid to
2907 // also write to that same location.
2908 // If the address of the local object never escapes the function, that
2909 // means it's never concurrently read or written, hence moving the store
2910 // from under the condition will not introduce a data race.
2911 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
2912 if (AI && !PointerMayBeCaptured(AI, false, true))
2913 // Found a previous load, return it.
2914 return LI;
2915 }
2916 // The load didn't work out, but we may still find a store.
2917 }
2918 }
2919
2920 return nullptr;
2921}
2922
2923/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2924/// converted to selects.
2926 BasicBlock *EndBB,
2927 unsigned &SpeculatedInstructions,
2929 const TargetTransformInfo &TTI) {
2931 BB->getParent()->hasMinSize()
2934
2935 bool HaveRewritablePHIs = false;
2936 for (PHINode &PN : EndBB->phis()) {
2937 Value *OrigV = PN.getIncomingValueForBlock(BB);
2938 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2939
2940 // FIXME: Try to remove some of the duplication with
2941 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
2942 if (ThenV == OrigV)
2943 continue;
2944
2945 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2947
2948 // Don't convert to selects if we could remove undefined behavior instead.
2949 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2951 return false;
2952
2953 HaveRewritablePHIs = true;
2954 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2955 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2956 if (!OrigCE && !ThenCE)
2957 continue; // Known cheap (FIXME: Maybe not true for aggregates).
2958
2959 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2960 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2961 InstructionCost MaxCost =
2963 if (OrigCost + ThenCost > MaxCost)
2964 return false;
2965
2966 // Account for the cost of an unfolded ConstantExpr which could end up
2967 // getting expanded into Instructions.
2968 // FIXME: This doesn't account for how many operations are combined in the
2969 // constant expression.
2970 ++SpeculatedInstructions;
2971 if (SpeculatedInstructions > 1)
2972 return false;
2973 }
2974
2975 return HaveRewritablePHIs;
2976}
2977
2978/// Speculate a conditional basic block flattening the CFG.
2979///
2980/// Note that this is a very risky transform currently. Speculating
2981/// instructions like this is most often not desirable. Instead, there is an MI
2982/// pass which can do it with full awareness of the resource constraints.
2983/// However, some cases are "obvious" and we should do directly. An example of
2984/// this is speculating a single, reasonably cheap instruction.
2985///
2986/// There is only one distinct advantage to flattening the CFG at the IR level:
2987/// it makes very common but simplistic optimizations such as are common in
2988/// instcombine and the DAG combiner more powerful by removing CFG edges and
2989/// modeling their effects with easier to reason about SSA value graphs.
2990///
2991///
2992/// An illustration of this transform is turning this IR:
2993/// \code
2994/// BB:
2995/// %cmp = icmp ult %x, %y
2996/// br i1 %cmp, label %EndBB, label %ThenBB
2997/// ThenBB:
2998/// %sub = sub %x, %y
2999/// br label BB2
3000/// EndBB:
3001/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
3002/// ...
3003/// \endcode
3004///
3005/// Into this IR:
3006/// \code
3007/// BB:
3008/// %cmp = icmp ult %x, %y
3009/// %sub = sub %x, %y
3010/// %cond = select i1 %cmp, 0, %sub
3011/// ...
3012/// \endcode
3013///
3014/// \returns true if the conditional block is removed.
3015bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
3016 BasicBlock *ThenBB) {
3017 if (!Options.SpeculateBlocks)
3018 return false;
3019
3020 // Be conservative for now. FP select instruction can often be expensive.
3021 Value *BrCond = BI->getCondition();
3022 if (isa<FCmpInst>(BrCond))
3023 return false;
3024
3025 BasicBlock *BB = BI->getParent();
3026 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3027 InstructionCost Budget =
3029
3030 // If ThenBB is actually on the false edge of the conditional branch, remember
3031 // to swap the select operands later.
3032 bool Invert = false;
3033 if (ThenBB != BI->getSuccessor(0)) {
3034 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3035 Invert = true;
3036 }
3037 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3038
3039 // If the branch is non-unpredictable, and is predicted to *not* branch to
3040 // the `then` block, then avoid speculating it.
3041 if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
3042 uint64_t TWeight, FWeight;
3043 if (extractBranchWeights(*BI, TWeight, FWeight) &&
3044 (TWeight + FWeight) != 0) {
3045 uint64_t EndWeight = Invert ? TWeight : FWeight;
3046 BranchProbability BIEndProb =
3047 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3049 if (BIEndProb >= Likely)
3050 return false;
3051 }
3052 }
3053
3054 // Keep a count of how many times instructions are used within ThenBB when
3055 // they are candidates for sinking into ThenBB. Specifically:
3056 // - They are defined in BB, and
3057 // - They have no side effects, and
3058 // - All of their uses are in ThenBB.
3059 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3060
3061 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3062
3063 unsigned SpeculatedInstructions = 0;
3064 Value *SpeculatedStoreValue = nullptr;
3065 StoreInst *SpeculatedStore = nullptr;
3066 EphemeralValueTracker EphTracker;
3067 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3068 // Skip debug info.
3069 if (isa<DbgInfoIntrinsic>(I)) {
3070 SpeculatedDbgIntrinsics.push_back(&I);
3071 continue;
3072 }
3073
3074 // Skip pseudo probes. The consequence is we lose track of the branch
3075 // probability for ThenBB, which is fine since the optimization here takes
3076 // place regardless of the branch probability.
3077 if (isa<PseudoProbeInst>(I)) {
3078 // The probe should be deleted so that it will not be over-counted when
3079 // the samples collected on the non-conditional path are counted towards
3080 // the conditional path. We leave it for the counts inference algorithm to
3081 // figure out a proper count for an unknown probe.
3082 SpeculatedDbgIntrinsics.push_back(&I);
3083 continue;
3084 }
3085
3086 // Ignore ephemeral values, they will be dropped by the transform.
3087 if (EphTracker.track(&I))
3088 continue;
3089
3090 // Only speculatively execute a single instruction (not counting the
3091 // terminator) for now.
3092 ++SpeculatedInstructions;
3093 if (SpeculatedInstructions > 1)
3094 return false;
3095
3096 // Don't hoist the instruction if it's unsafe or expensive.
3098 !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
3099 &I, BB, ThenBB, EndBB))))
3100 return false;
3101 if (!SpeculatedStoreValue &&
3104 return false;
3105
3106 // Store the store speculation candidate.
3107 if (SpeculatedStoreValue)
3108 SpeculatedStore = cast<StoreInst>(&I);
3109
3110 // Do not hoist the instruction if any of its operands are defined but not
3111 // used in BB. The transformation will prevent the operand from
3112 // being sunk into the use block.
3113 for (Use &Op : I.operands()) {
3114 Instruction *OpI = dyn_cast<Instruction>(Op);
3115 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3116 continue; // Not a candidate for sinking.
3117
3118 ++SinkCandidateUseCounts[OpI];
3119 }
3120 }
3121
3122 // Consider any sink candidates which are only used in ThenBB as costs for
3123 // speculation. Note, while we iterate over a DenseMap here, we are summing
3124 // and so iteration order isn't significant.
3125 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3126 if (Inst->hasNUses(Count)) {
3127 ++SpeculatedInstructions;
3128 if (SpeculatedInstructions > 1)
3129 return false;
3130 }
3131
3132 // Check that we can insert the selects and that it's not too expensive to do
3133 // so.
3134 bool Convert = SpeculatedStore != nullptr;
3136 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3137 SpeculatedInstructions,
3138 Cost, TTI);
3139 if (!Convert || Cost > Budget)
3140 return false;
3141
3142 // If we get here, we can hoist the instruction and if-convert.
3143 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3144
3145 // Insert a select of the value of the speculated store.
3146 if (SpeculatedStoreValue) {
3147 IRBuilder<NoFolder> Builder(BI);
3148 Value *OrigV = SpeculatedStore->getValueOperand();
3149 Value *TrueV = SpeculatedStore->getValueOperand();
3150 Value *FalseV = SpeculatedStoreValue;
3151 if (Invert)
3152 std::swap(TrueV, FalseV);
3153 Value *S = Builder.CreateSelect(
3154 BrCond, TrueV, FalseV, "spec.store.select", BI);
3155 SpeculatedStore->setOperand(0, S);
3156 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3157 SpeculatedStore->getDebugLoc());
3158 // The value stored is still conditional, but the store itself is now
3159 // unconditonally executed, so we must be sure that any linked dbg.assign
3160 // intrinsics are tracking the new stored value (the result of the
3161 // select). If we don't, and the store were to be removed by another pass
3162 // (e.g. DSE), then we'd eventually end up emitting a location describing
3163 // the conditional value, unconditionally.
3164 //
3165 // === Before this transformation ===
3166 // pred:
3167 // store %one, %x.dest, !DIAssignID !1
3168 // dbg.assign %one, "x", ..., !1, ...
3169 // br %cond if.then
3170 //
3171 // if.then:
3172 // store %two, %x.dest, !DIAssignID !2
3173 // dbg.assign %two, "x", ..., !2, ...
3174 //
3175 // === After this transformation ===
3176 // pred:
3177 // store %one, %x.dest, !DIAssignID !1
3178 // dbg.assign %one, "x", ..., !1
3179 /// ...
3180 // %merge = select %cond, %two, %one
3181 // store %merge, %x.dest, !DIAssignID !2
3182 // dbg.assign %merge, "x", ..., !2
3183 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3184 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3185 DbgAssign->replaceVariableLocationOp(OrigV, S);
3186 };
3187 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3188 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3189 }
3190
3191 // Metadata can be dependent on the condition we are hoisting above.
3192 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3193 // to avoid making it appear as if the condition is a constant, which would
3194 // be misleading while debugging.
3195 // Similarly strip attributes that maybe dependent on condition we are
3196 // hoisting above.
3197 for (auto &I : make_early_inc_range(*ThenBB)) {
3198 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3199 // Don't update the DILocation of dbg.assign intrinsics.
3200 if (!isa<DbgAssignIntrinsic>(&I))
3201 I.setDebugLoc(DebugLoc());
3202 }
3203 I.dropUBImplyingAttrsAndMetadata();
3204
3205 // Drop ephemeral values.
3206 if (EphTracker.contains(&I)) {
3207 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3208 I.eraseFromParent();
3209 }
3210 }
3211
3212 // Hoist the instructions.
3213 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3214 // to these instructions, in the same way that dbg.value intrinsics are
3215 // dropped at the end of this block.
3216 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3217 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3218 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3219 // equivalent).
3220 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3221 !DVR || !DVR->isDbgAssign())
3222 It.dropOneDbgRecord(&DR);
3223 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3224 std::prev(ThenBB->end()));
3225
3226 // Insert selects and rewrite the PHI operands.
3227 IRBuilder<NoFolder> Builder(BI);
3228 for (PHINode &PN : EndBB->phis()) {
3229 unsigned OrigI = PN.getBasicBlockIndex(BB);
3230 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3231 Value *OrigV = PN.getIncomingValue(OrigI);
3232 Value *ThenV = PN.getIncomingValue(ThenI);
3233
3234 // Skip PHIs which are trivial.
3235 if (OrigV == ThenV)
3236 continue;
3237
3238 // Create a select whose true value is the speculatively executed value and
3239 // false value is the pre-existing value. Swap them if the branch
3240 // destinations were inverted.
3241 Value *TrueV = ThenV, *FalseV = OrigV;
3242 if (Invert)
3243 std::swap(TrueV, FalseV);
3244 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3245 PN.setIncomingValue(OrigI, V);
3246 PN.setIncomingValue(ThenI, V);
3247 }
3248
3249 // Remove speculated dbg intrinsics.
3250 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3251 // dbg value for the different flows and inserting it after the select.
3252 for (Instruction *I : SpeculatedDbgIntrinsics) {
3253 // We still want to know that an assignment took place so don't remove
3254 // dbg.assign intrinsics.
3255 if (!isa<DbgAssignIntrinsic>(I))
3256 I->eraseFromParent();
3257 }
3258
3259 ++NumSpeculations;
3260 return true;
3261}
3262
3263/// Return true if we can thread a branch across this block.
3265 int Size = 0;
3266 EphemeralValueTracker EphTracker;
3267
3268 // Walk the loop in reverse so that we can identify ephemeral values properly
3269 // (values only feeding assumes).
3270 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3271 // Can't fold blocks that contain noduplicate or convergent calls.
3272 if (CallInst *CI = dyn_cast<CallInst>(&I))
3273 if (CI->cannotDuplicate() || CI->isConvergent())
3274 return false;
3275
3276 // Ignore ephemeral values which are deleted during codegen.
3277 // We will delete Phis while threading, so Phis should not be accounted in
3278 // block's size.
3279 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3280 if (Size++ > MaxSmallBlockSize)
3281 return false; // Don't clone large BB's.
3282 }
3283
3284 // We can only support instructions that do not define values that are
3285 // live outside of the current basic block.
3286 for (User *U : I.users()) {
3287 Instruction *UI = cast<Instruction>(U);
3288 if (UI->getParent() != BB || isa<PHINode>(UI))
3289 return false;
3290 }
3291
3292 // Looks ok, continue checking.
3293 }
3294
3295 return true;
3296}
3297
3299 BasicBlock *To) {
3300 // Don't look past the block defining the value, we might get the value from
3301 // a previous loop iteration.
3302 auto *I = dyn_cast<Instruction>(V);
3303 if (I && I->getParent() == To)
3304 return nullptr;
3305
3306 // We know the value if the From block branches on it.
3307 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3308 if (BI && BI->isConditional() && BI->getCondition() == V &&
3309 BI->getSuccessor(0) != BI->getSuccessor(1))
3310 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3312
3313 return nullptr;
3314}
3315
3316/// If we have a conditional branch on something for which we know the constant
3317/// value in predecessors (e.g. a phi node in the current block), thread edges
3318/// from the predecessor to their ultimate destination.
3319static std::optional<bool>
3321 const DataLayout &DL,
3322 AssumptionCache *AC) {
3324 BasicBlock *BB = BI->getParent();
3325 Value *Cond = BI->getCondition();
3326 PHINode *PN = dyn_cast<PHINode>(Cond);
3327 if (PN && PN->getParent() == BB) {
3328 // Degenerate case of a single entry PHI.
3329 if (PN->getNumIncomingValues() == 1) {
3331 return true;
3332 }
3333
3334 for (Use &U : PN->incoming_values())
3335 if (auto *CB = dyn_cast<ConstantInt>(U))
3336 KnownValues[CB].insert(PN->getIncomingBlock(U));
3337 } else {
3338 for (BasicBlock *Pred : predecessors(BB)) {
3339 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3340 KnownValues[CB].insert(Pred);
3341 }
3342 }
3343
3344 if (KnownValues.empty())
3345 return false;
3346
3347 // Now we know that this block has multiple preds and two succs.
3348 // Check that the block is small enough and values defined in the block are
3349 // not used outside of it.
3351 return false;
3352
3353 for (const auto &Pair : KnownValues) {
3354 // Okay, we now know that all edges from PredBB should be revectored to
3355 // branch to RealDest.
3356 ConstantInt *CB = Pair.first;
3357 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3358 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3359
3360 if (RealDest == BB)
3361 continue; // Skip self loops.
3362
3363 // Skip if the predecessor's terminator is an indirect branch.
3364 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3365 return isa<IndirectBrInst>(PredBB->getTerminator());
3366 }))
3367 continue;
3368
3369 LLVM_DEBUG({
3370 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3371 << " has value " << *Pair.first << " in predecessors:\n";
3372 for (const BasicBlock *PredBB : Pair.second)
3373 dbgs() << " " << PredBB->getName() << "\n";
3374 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3375 });
3376
3377 // Split the predecessors we are threading into a new edge block. We'll
3378 // clone the instructions into this block, and then redirect it to RealDest.
3379 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3380
3381 // TODO: These just exist to reduce test diff, we can drop them if we like.
3382 EdgeBB->setName(RealDest->getName() + ".critedge");
3383 EdgeBB->moveBefore(RealDest);
3384
3385 // Update PHI nodes.
3386 AddPredecessorToBlock(RealDest, EdgeBB, BB);
3387
3388 // BB may have instructions that are being threaded over. Clone these
3389 // instructions into EdgeBB. We know that there will be no uses of the
3390 // cloned instructions outside of EdgeBB.
3391 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3392 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3393 TranslateMap[Cond] = CB;
3394
3395 // RemoveDIs: track instructions that we optimise away while folding, so
3396 // that we can copy DbgVariableRecords from them later.
3397 BasicBlock::iterator SrcDbgCursor = BB->begin();
3398 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3399 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3400 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3401 continue;
3402 }
3403 // Clone the instruction.
3404 Instruction *N = BBI->clone();
3405 // Insert the new instruction into its new home.
3406 N->insertInto(EdgeBB, InsertPt);
3407
3408 if (BBI->hasName())
3409 N->setName(BBI->getName() + ".c");
3410
3411 // Update operands due to translation.
3412 for (Use &Op : N->operands()) {
3413 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3414 if (PI != TranslateMap.end())
3415 Op = PI->second;
3416 }
3417
3418 // Check for trivial simplification.
3419 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3420 if (!BBI->use_empty())
3421 TranslateMap[&*BBI] = V;
3422 if (!N->mayHaveSideEffects()) {
3423 N->eraseFromParent(); // Instruction folded away, don't need actual
3424 // inst
3425 N = nullptr;
3426 }
3427 } else {
3428 if (!BBI->use_empty())
3429 TranslateMap[&*BBI] = N;
3430 }
3431 if (N) {
3432 // Copy all debug-info attached to instructions from the last we
3433 // successfully clone, up to this instruction (they might have been
3434 // folded away).
3435 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3436 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3437 SrcDbgCursor = std::next(BBI);
3438 // Clone debug-info on this instruction too.
3439 N->cloneDebugInfoFrom(&*BBI);
3440
3441 // Register the new instruction with the assumption cache if necessary.
3442 if (auto *Assume = dyn_cast<AssumeInst>(N))
3443 if (AC)
3444 AC->registerAssumption(Assume);
3445 }
3446 }
3447
3448 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3449 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3450 InsertPt->cloneDebugInfoFrom(BI);
3451
3452 BB->removePredecessor(EdgeBB);
3453 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3454 EdgeBI->setSuccessor(0, RealDest);
3455 EdgeBI->setDebugLoc(BI->getDebugLoc());
3456
3457 if (DTU) {
3459 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3460 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3461 DTU->applyUpdates(Updates);
3462 }
3463
3464 // For simplicity, we created a separate basic block for the edge. Merge
3465 // it back into the predecessor if possible. This not only avoids
3466 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3467 // bypass the check for trivial cycles above.
3468 MergeBlockIntoPredecessor(EdgeBB, DTU);
3469
3470 // Signal repeat, simplifying any other constants.
3471 return std::nullopt;
3472 }
3473
3474 return false;
3475}
3476
3478 DomTreeUpdater *DTU,
3479 const DataLayout &DL,
3480 AssumptionCache *AC) {
3481 std::optional<bool> Result;
3482 bool EverChanged = false;
3483 do {
3484 // Note that None means "we changed things, but recurse further."
3485 Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3486 EverChanged |= Result == std::nullopt || *Result;
3487 } while (Result == std::nullopt);
3488 return EverChanged;
3489}
3490
3491/// Given a BB that starts with the specified two-entry PHI node,
3492/// see if we can eliminate it.
3494 DomTreeUpdater *DTU, const DataLayout &DL) {
3495 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3496 // statement", which has a very simple dominance structure. Basically, we
3497 // are trying to find the condition that is being branched on, which
3498 // subsequently causes this merge to happen. We really want control
3499 // dependence information for this check, but simplifycfg can't keep it up
3500 // to date, and this catches most of the cases we care about anyway.
3501 BasicBlock *BB = PN->getParent();
3502
3503 BasicBlock *IfTrue, *IfFalse;
3504 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3505 if (!DomBI)
3506 return false;
3507 Value *IfCond = DomBI->getCondition();
3508 // Don't bother if the branch will be constant folded trivially.
3509 if (isa<ConstantInt>(IfCond))
3510 return false;
3511
3512 BasicBlock *DomBlock = DomBI->getParent();
3515 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3516 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3517 });
3518 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3519 "Will have either one or two blocks to speculate.");
3520
3521 // If the branch is non-unpredictable, see if we either predictably jump to
3522 // the merge bb (if we have only a single 'then' block), or if we predictably
3523 // jump to one specific 'then' block (if we have two of them).
3524 // It isn't beneficial to speculatively execute the code
3525 // from the block that we know is predictably not entered.
3526 if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
3527 uint64_t TWeight, FWeight;
3528 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3529 (TWeight + FWeight) != 0) {
3530 BranchProbability BITrueProb =
3531 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3533 BranchProbability BIFalseProb = BITrueProb.getCompl();
3534 if (IfBlocks.size() == 1) {
3535 BranchProbability BIBBProb =
3536 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3537 if (BIBBProb >= Likely)
3538 return false;
3539 } else {
3540 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3541 return false;
3542 }
3543 }
3544 }
3545
3546 // Don't try to fold an unreachable block. For example, the phi node itself
3547 // can't be the candidate if-condition for a select that we want to form.
3548 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3549 if (IfCondPhiInst->getParent() == BB)
3550 return false;
3551
3552 // Okay, we found that we can merge this two-entry phi node into a select.
3553 // Doing so would require us to fold *all* two entry phi nodes in this block.
3554 // At some point this becomes non-profitable (particularly if the target
3555 // doesn't support cmov's). Only do this transformation if there are two or
3556 // fewer PHI nodes in this block.
3557 unsigned NumPhis = 0;
3558 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3559 if (NumPhis > 2)
3560 return false;
3561
3562 // Loop over the PHI's seeing if we can promote them all to select
3563 // instructions. While we are at it, keep track of the instructions
3564 // that need to be moved to the dominating block.
3565 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3567 InstructionCost Budget =
3569
3570 bool Changed = false;
3571 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3572 PHINode *PN = cast<PHINode>(II++);
3573 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3574 PN->replaceAllUsesWith(V);
3575 PN->eraseFromParent();
3576 Changed = true;
3577 continue;
3578 }
3579
3580 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
3581 Cost, Budget, TTI) ||
3582 !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
3583 Cost, Budget, TTI))
3584 return Changed;
3585 }
3586
3587 // If we folded the first phi, PN dangles at this point. Refresh it. If
3588 // we ran out of PHIs then we simplified them all.
3589 PN = dyn_cast<PHINode>(BB->begin());
3590 if (!PN)
3591 return true;
3592
3593 // Return true if at least one of these is a 'not', and another is either
3594 // a 'not' too, or a constant.
3595 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3596 if (!match(V0, m_Not(m_Value())))
3597 std::swap(V0, V1);
3598 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3599 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3600 };
3601
3602 // Don't fold i1 branches on PHIs which contain binary operators or
3603 // (possibly inverted) select form of or/ands, unless one of
3604 // the incoming values is an 'not' and another one is freely invertible.
3605 // These can often be turned into switches and other things.
3606 auto IsBinOpOrAnd = [](Value *V) {
3607 return match(
3608 V, m_CombineOr(
3609 m_BinOp(),
3612 };
3613 if (PN->getType()->isIntegerTy(1) &&
3614 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3615 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3616 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3617 PN->getIncomingValue(1)))
3618 return Changed;
3619
3620 // If all PHI nodes are promotable, check to make sure that all instructions
3621 // in the predecessor blocks can be promoted as well. If not, we won't be able
3622 // to get rid of the control flow, so it's not worth promoting to select
3623 // instructions.
3624 for (BasicBlock *IfBlock : IfBlocks)
3625 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3626 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3627 // This is not an aggressive instruction that we can promote.
3628 // Because of this, we won't be able to get rid of the control flow, so
3629 // the xform is not worth it.
3630 return Changed;
3631 }
3632
3633 // If either of the blocks has it's address taken, we can't do this fold.
3634 if (any_of(IfBlocks,
3635 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3636 return Changed;
3637
3638 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
3639 << " T: " << IfTrue->getName()
3640 << " F: " << IfFalse->getName() << "\n");
3641
3642 // If we can still promote the PHI nodes after this gauntlet of tests,
3643 // do all of the PHI's now.
3644
3645 // Move all 'aggressive' instructions, which are defined in the
3646 // conditional parts of the if's up to the dominating block.
3647 for (BasicBlock *IfBlock : IfBlocks)
3648 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3649
3650 IRBuilder<NoFolder> Builder(DomBI);
3651 // Propagate fast-math-flags from phi nodes to replacement selects.
3652 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3653 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3654 if (isa<FPMathOperator>(PN))
3655 Builder.setFastMathFlags(PN->getFastMathFlags());
3656
3657 // Change the PHI node into a select instruction.
3658 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3659 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3660
3661 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
3662 PN->replaceAllUsesWith(Sel);
3663 Sel->takeName(PN);
3664 PN->eraseFromParent();
3665 }
3666
3667 // At this point, all IfBlocks are empty, so our if statement
3668 // has been flattened. Change DomBlock to jump directly to our new block to
3669 // avoid other simplifycfg's kicking in on the diamond.
3670 Builder.CreateBr(BB);
3671
3673 if (DTU) {
3674 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3675 for (auto *Successor : successors(DomBlock))
3676 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3677 }
3678
3679 DomBI->eraseFromParent();
3680 if (DTU)
3681 DTU->applyUpdates(Updates);
3682
3683 return true;
3684}
3685
3687 Instruction::BinaryOps Opc, Value *LHS,
3688 Value *RHS, const Twine &Name = "") {
3689 // Try to relax logical op to binary op.
3690 if (impliesPoison(RHS, LHS))
3691 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3692 if (Opc == Instruction::And)
3693 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3694 if (Opc == Instruction::Or)
3695 return Builder.CreateLogicalOr(LHS, RHS, Name);
3696 llvm_unreachable("Invalid logical opcode");
3697}
3698
3699/// Return true if either PBI or BI has branch weight available, and store
3700/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3701/// not have branch weight, use 1:1 as its weight.
3703 uint64_t &PredTrueWeight,
3704 uint64_t &PredFalseWeight,
3705 uint64_t &SuccTrueWeight,
3706 uint64_t &SuccFalseWeight) {
3707 bool PredHasWeights =
3708 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3709 bool SuccHasWeights =
3710 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3711 if (PredHasWeights || SuccHasWeights) {
3712 if (!PredHasWeights)
3713 PredTrueWeight = PredFalseWeight = 1;
3714 if (!SuccHasWeights)
3715 SuccTrueWeight = SuccFalseWeight = 1;
3716 return true;
3717 } else {
3718 return false;
3719 }
3720}
3721
3722/// Determine if the two branches share a common destination and deduce a glue
3723/// that joins the branches' conditions to arrive at the common destination if
3724/// that would be profitable.
3725static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3727 const TargetTransformInfo *TTI) {
3728 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3729 "Both blocks must end with a conditional branches.");
3731 "PredBB must be a predecessor of BB.");
3732
3733 // We have the potential to fold the conditions together, but if the
3734 // predecessor branch is predictable, we may not want to merge them.
3735 uint64_t PTWeight, PFWeight;
3736 BranchProbability PBITrueProb, Likely;
3737 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3738 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3739 (PTWeight + PFWeight) != 0) {
3740 PBITrueProb =
3741 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3743 }
3744
3745 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3746 // Speculate the 2nd condition unless the 1st is probably true.
3747 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3748 return {{BI->getSuccessor(0), Instruction::Or, false}};
3749 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3750 // Speculate the 2nd condition unless the 1st is probably false.
3751 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3752 return {{BI->getSuccessor(1), Instruction::And, false}};
3753 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3754 // Speculate the 2nd condition unless the 1st is probably true.
3755 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3756 return {{BI->getSuccessor(1), Instruction::And, true}};
3757 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3758 // Speculate the 2nd condition unless the 1st is probably false.
3759 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3760 return {{BI->getSuccessor(0), Instruction::Or, true}};
3761 }
3762 return std::nullopt;
3763}
3764
3766 DomTreeUpdater *DTU,
3767 MemorySSAUpdater *MSSAU,
3768 const TargetTransformInfo *TTI) {
3769 BasicBlock *BB = BI->getParent();
3770 BasicBlock *PredBlock = PBI->getParent();
3771
3772 // Determine if the two branches share a common destination.
3773 BasicBlock *CommonSucc;
3775 bool InvertPredCond;
3776 std::tie(CommonSucc, Opc, InvertPredCond) =
3778
3779 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3780
3781 IRBuilder<> Builder(PBI);
3782 // The builder is used to create instructions to eliminate the branch in BB.
3783 // If BB's terminator has !annotation metadata, add it to the new
3784 // instructions.
3786 {LLVMContext::MD_annotation});
3787
3788 // If we need to invert the condition in the pred block to match, do so now.
3789 if (InvertPredCond) {
3790 InvertBranch(PBI, Builder);
3791 }
3792
3793 BasicBlock *UniqueSucc =
3794 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3795
3796 // Before cloning instructions, notify the successor basic block that it
3797 // is about to have a new predecessor. This will update PHI nodes,
3798 // which will allow us to update live-out uses of bonus instructions.
3799 AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3800
3801 // Try to update branch weights.
3802 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3803 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3804 SuccTrueWeight, SuccFalseWeight)) {
3805 SmallVector<uint64_t, 8> NewWeights;
3806
3807 if (PBI->getSuccessor(0) == BB) {
3808 // PBI: br i1 %x, BB, FalseDest
3809 // BI: br i1 %y, UniqueSucc, FalseDest
3810 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3811 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3812 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3813 // TrueWeight for PBI * FalseWeight for BI.
3814 // We assume that total weights of a BranchInst can fit into 32 bits.
3815 // Therefore, we will not have overflow using 64-bit arithmetic.
3816 NewWeights.push_back(PredFalseWeight *
3817 (SuccFalseWeight + SuccTrueWeight) +
3818 PredTrueWeight * SuccFalseWeight);
3819 } else {
3820 // PBI: br i1 %x, TrueDest, BB
3821 // BI: br i1 %y, TrueDest, UniqueSucc
3822 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3823 // FalseWeight for PBI * TrueWeight for BI.
3824 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3825 PredFalseWeight * SuccTrueWeight);
3826 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3827 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3828 }
3829
3830 // Halve the weights if any of them cannot fit in an uint32_t
3831 FitWeights(NewWeights);
3832
3833 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3834 setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
3835
3836 // TODO: If BB is reachable from all paths through PredBlock, then we
3837 // could replace PBI's branch probabilities with BI's.
3838 } else
3839 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3840
3841 // Now, update the CFG.
3842 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3843
3844 if (DTU)
3845 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3846 {DominatorTree::Delete, PredBlock, BB}});
3847
3848 // If BI was a loop latch, it may have had associated loop metadata.
3849 // We need to copy it to the new latch, that is, PBI.
3850 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3851 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3852
3853 ValueToValueMapTy VMap; // maps original values to cloned values
3855
3856 Module *M = BB->getModule();
3857
3858 if (PredBlock->IsNewDbgInfoFormat) {
3859 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
3860 for (DbgVariableRecord &DVR :
3862 RemapDbgRecord(M, &DVR, VMap,
3864 }
3865 }
3866
3867 // Now that the Cond was cloned into the predecessor basic block,
3868 // or/and the two conditions together.
3869 Value *BICond = VMap[BI->getCondition()];
3870 PBI->setCondition(
3871 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3872
3873 ++NumFoldBranchToCommonDest;
3874 return true;
3875}
3876
3877/// Return if an instruction's type or any of its operands' types are a vector
3878/// type.
3879static bool isVectorOp(Instruction &I) {
3880 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
3881 return U->getType()->isVectorTy();
3882 });
3883}
3884
3885/// If this basic block is simple enough, and if a predecessor branches to us
3886/// and one of our successors, fold the block into the predecessor and use
3887/// logical operations to pick the right destination.
3889 MemorySSAUpdater *MSSAU,
3890 const TargetTransformInfo *TTI,
3891 unsigned BonusInstThreshold) {
3892 // If this block ends with an unconditional branch,
3893 // let SpeculativelyExecuteBB() deal with it.
3894 if (!BI->isConditional())
3895 return false;
3896
3897 BasicBlock *BB = BI->getParent();
3901
3902 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3903
3904 if (!Cond ||
3905 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
3906 !isa<SelectInst>(Cond)) ||
3907 Cond->getParent() != BB || !Cond->hasOneUse())
3908 return false;
3909
3910 // Finally, don't infinitely unroll conditional loops.
3911 if (is_contained(successors(BB), BB))
3912 return false;
3913
3914 // With which predecessors will we want to deal with?
3916 for (BasicBlock *PredBlock : predecessors(BB)) {
3917 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3918
3919 // Check that we have two conditional branches. If there is a PHI node in
3920 // the common successor, verify that the same value flows in from both
3921 // blocks.
3922 if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
3923 continue;
3924
3925 // Determine if the two branches share a common destination.
3926 BasicBlock *CommonSucc;
3928 bool InvertPredCond;
3929 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3930 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
3931 else
3932 continue;
3933
3934 // Check the cost of inserting the necessary logic before performing the
3935 // transformation.
3936 if (TTI) {
3937 Type *Ty = BI->getCondition()->getType();
3939 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3940 !isa<CmpInst>(PBI->getCondition())))
3941 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3942
3944 continue;
3945 }
3946
3947 // Ok, we do want to deal with this predecessor. Record it.
3948 Preds.emplace_back(PredBlock);
3949 }
3950
3951 // If there aren't any predecessors into which we can fold,
3952 // don't bother checking the cost.
3953 if (Preds.empty())
3954 return false;
3955
3956 // Only allow this transformation if computing the condition doesn't involve
3957 // too many instructions and these involved instructions can be executed
3958 // unconditionally. We denote all involved instructions except the condition
3959 // as "bonus instructions", and only allow this transformation when the
3960 // number of the bonus instructions we'll need to create when cloning into
3961 // each predecessor does not exceed a certain threshold.
3962 unsigned NumBonusInsts = 0;
3963 bool SawVectorOp = false;
3964 const unsigned PredCount = Preds.size();
3965 for (Instruction &I : *BB) {
3966 // Don't check the branch condition comparison itself.
3967 if (&I == Cond)
3968 continue;
3969 // Ignore dbg intrinsics, and the terminator.
3970 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3971 continue;
3972 // I must be safe to execute unconditionally.
3974 return false;
3975 SawVectorOp |= isVectorOp(I);
3976
3977 // Account for the cost of duplicating this instruction into each
3978 // predecessor. Ignore free instructions.
3979 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
3981 NumBonusInsts += PredCount;
3982
3983 // Early exits once we reach the limit.
3984 if (NumBonusInsts >
3985 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3986 return false;
3987 }
3988
3989 auto IsBCSSAUse = [BB, &I](Use &U) {
3990 auto *UI = cast<Instruction>(U.getUser());
3991 if (auto *PN = dyn_cast<PHINode>(UI))
3992 return PN->getIncomingBlock(U) == BB;
3993 return UI->getParent() == BB && I.comesBefore(UI);
3994 };
3995
3996 // Does this instruction require rewriting of uses?
3997 if (!all_of(I.uses(), IsBCSSAUse))
3998 return false;
3999 }
4000 if (NumBonusInsts >
4001 BonusInstThreshold *
4002 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4003 return false;
4004
4005 // Ok, we have the budget. Perform the transformation.
4006 for (BasicBlock *PredBlock : Preds) {
4007 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4008 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4009 }
4010 return false;
4011}
4012
4013// If there is only one store in BB1 and BB2, return it, otherwise return
4014// nullptr.
4016 StoreInst *S = nullptr;
4017 for (auto *BB : {BB1, BB2}) {
4018 if (!BB)
4019 continue;
4020 for (auto &I : *BB)
4021 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4022 if (S)
4023 // Multiple stores seen.
4024 return nullptr;
4025 else
4026 S = SI;
4027 }
4028 }
4029 return S;
4030}
4031
4033 Value *AlternativeV = nullptr) {
4034 // PHI is going to be a PHI node that allows the value V that is defined in
4035 // BB to be referenced in BB's only successor.
4036 //
4037 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4038 // doesn't matter to us what the other operand is (it'll never get used). We
4039 // could just create a new PHI with an undef incoming value, but that could
4040 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4041 // other PHI. So here we directly look for some PHI in BB's successor with V
4042 // as an incoming operand. If we find one, we use it, else we create a new
4043 // one.
4044 //
4045 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4046 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4047 // where OtherBB is the single other predecessor of BB's only successor.
4048 PHINode *PHI = nullptr;
4049 BasicBlock *Succ = BB->getSingleSuccessor();
4050
4051 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4052 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4053 PHI = cast<PHINode>(I);
4054 if (!AlternativeV)
4055 break;
4056
4057 assert(Succ->hasNPredecessors(2));
4058 auto PredI = pred_begin(Succ);
4059 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4060 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4061 break;
4062 PHI = nullptr;
4063 }
4064 if (PHI)
4065 return PHI;
4066
4067 // If V is not an instruction defined in BB, just return it.
4068 if (!AlternativeV &&
4069 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4070 return V;
4071
4072 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4073 PHI->insertBefore(Succ->begin());
4074 PHI->addIncoming(V, BB);
4075 for (BasicBlock *PredBB : predecessors(Succ))
4076 if (PredBB != BB)
4077 PHI->addIncoming(
4078 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4079 return PHI;
4080}
4081
4083 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4084 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4085 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4086 // For every pointer, there must be exactly two stores, one coming from
4087 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4088 // store (to any address) in PTB,PFB or QTB,QFB.
4089 // FIXME: We could relax this restriction with a bit more work and performance
4090 // testing.
4091 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4092 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4093 if (!PStore || !QStore)
4094 return false;
4095
4096 // Now check the stores are compatible.
4097 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4098 PStore->getValueOperand()->getType() !=
4099 QStore->getValueOperand()->getType())
4100 return false;
4101
4102 // Check that sinking the store won't cause program behavior changes. Sinking
4103 // the store out of the Q blocks won't change any behavior as we're sinking
4104 // from a block to its unconditional successor. But we're moving a store from
4105 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4106 // So we need to check that there are no aliasing loads or stores in
4107 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4108 // operations between PStore and the end of its parent block.
4109 //
4110 // The ideal way to do this is to query AliasAnalysis, but we don't
4111 // preserve AA currently so that is dangerous. Be super safe and just
4112 // check there are no other memory operations at all.
4113 for (auto &I : *QFB->getSinglePredecessor())
4114 if (I.mayReadOrWriteMemory())
4115 return false;
4116 for (auto &I : *QFB)
4117 if (&I != QStore && I.mayReadOrWriteMemory())
4118 return false;
4119 if (QTB)
4120 for (auto &I : *QTB)
4121 if (&I != QStore && I.mayReadOrWriteMemory())
4122 return false;
4123 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4124 I != E; ++I)
4125 if (&*I != PStore && I->mayReadOrWriteMemory())
4126 return false;
4127
4128 // If we're not in aggressive mode, we only optimize if we have some
4129 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4130 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4131 if (!BB)
4132 return true;
4133 // Heuristic: if the block can be if-converted/phi-folded and the
4134 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4135 // thread this store.
4137 InstructionCost Budget =
4139 for (auto &I : BB->instructionsWithoutDebug(false)) {
4140 // Consider terminator instruction to be free.
4141 if (I.isTerminator())
4142 continue;
4143 // If this is one the stores that we want to speculate out of this BB,
4144 // then don't count it's cost, consider it to be free.
4145 if (auto *S = dyn_cast<StoreInst>(&I))
4146 if (llvm::find(FreeStores, S))
4147 continue;
4148 // Else, we have a white-list of instructions that we are ak speculating.
4149 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4150 return false; // Not in white-list - not worthwhile folding.
4151 // And finally, if this is a non-free instruction that we are okay
4152 // speculating, ensure that we consider the speculation budget.
4153 Cost +=
4155 if (Cost > Budget)
4156 return false; // Eagerly refuse to fold as soon as we're out of budget.
4157 }
4158 assert(Cost <= Budget &&
4159 "When we run out of budget we will eagerly return from within the "
4160 "per-instruction loop.");
4161 return true;
4162 };
4163
4164 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4166 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4167 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4168 return false;
4169
4170 // If PostBB has more than two predecessors, we need to split it so we can
4171 // sink the store.
4172 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4173 // We know that QFB's only successor is PostBB. And QFB has a single
4174 // predecessor. If QTB exists, then its only successor is also PostBB.
4175 // If QTB does not exist, then QFB's only predecessor has a conditional
4176 // branch to QFB and PostBB.
4177 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4178 BasicBlock *NewBB =
4179 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4180 if (!NewBB)
4181 return false;
4182 PostBB = NewBB;
4183 }
4184
4185 // OK, we're going to sink the stores to PostBB. The store has to be
4186 // conditional though, so first create the predicate.
4187 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4188 ->getCondition();
4189 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4190 ->getCondition();
4191
4193 PStore->getParent());
4195 QStore->getParent(), PPHI);
4196
4197 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4198 IRBuilder<> QB(PostBB, PostBBFirst);
4199 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4200
4201 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4202 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4203
4204 if (InvertPCond)
4205 PPred = QB.CreateNot(PPred);
4206 if (InvertQCond)
4207 QPred = QB.CreateNot(QPred);
4208 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4209
4210 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4211 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4212 /*Unreachable=*/false,
4213 /*BranchWeights=*/nullptr, DTU);
4214
4215 QB.SetInsertPoint(T);
4216 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4217 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4218 // Choose the minimum alignment. If we could prove both stores execute, we
4219 // could use biggest one. In this case, though, we only know that one of the
4220 // stores executes. And we don't know it's safe to take the alignment from a
4221 // store that doesn't execute.
4222 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4223
4224 QStore->eraseFromParent();
4225 PStore->eraseFromParent();
4226
4227 return true;
4228}
4229
4231 DomTreeUpdater *DTU, const DataLayout &DL,
4232 const TargetTransformInfo &TTI) {
4233 // The intention here is to find diamonds or triangles (see below) where each
4234 // conditional block contains a store to the same address. Both of these
4235 // stores are conditional, so they can't be unconditionally sunk. But it may
4236 // be profitable to speculatively sink the stores into one merged store at the
4237 // end, and predicate the merged store on the union of the two conditions of
4238 // PBI and QBI.
4239 //
4240 // This can reduce the number of stores executed if both of the conditions are
4241 // true, and can allow the blocks to become small enough to be if-converted.
4242 // This optimization will also chain, so that ladders of test-and-set
4243 // sequences can be if-converted away.
4244 //
4245 // We only deal with simple diamonds or triangles:
4246 //
4247 // PBI or PBI or a combination of the two
4248 // / \ | \
4249 // PTB PFB | PFB
4250 // \ / | /
4251 // QBI QBI
4252 // / \ | \
4253 // QTB QFB | QFB
4254 // \ / | /
4255 // PostBB PostBB
4256 //
4257 // We model triangles as a type of diamond with a nullptr "true" block.
4258 // Triangles are canonicalized so that the fallthrough edge is represented by
4259 // a true condition, as in the diagram above.
4260 BasicBlock *PTB = PBI->getSuccessor(0);
4261 BasicBlock *PFB = PBI->getSuccessor(1);
4262 BasicBlock *QTB = QBI->getSuccessor(0);
4263 BasicBlock *QFB = QBI->getSuccessor(1);
4264 BasicBlock *PostBB = QFB->getSingleSuccessor();
4265
4266 // Make sure we have a good guess for PostBB. If QTB's only successor is
4267 // QFB, then QFB is a better PostBB.
4268 if (QTB->getSingleSuccessor() == QFB)
4269 PostBB = QFB;
4270
4271 // If we couldn't find a good PostBB, stop.
4272 if (!PostBB)
4273 return false;
4274
4275 bool InvertPCond = false, InvertQCond = false;
4276 // Canonicalize fallthroughs to the true branches.
4277 if (PFB == QBI->getParent()) {
4278 std::swap(PFB, PTB);
4279 InvertPCond = true;
4280 }
4281 if (QFB == PostBB) {
4282 std::swap(QFB, QTB);
4283 InvertQCond = true;
4284 }
4285
4286 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4287 // and QFB may not. Model fallthroughs as a nullptr block.
4288 if (PTB == QBI->getParent())
4289 PTB = nullptr;
4290 if (QTB == PostBB)
4291 QTB = nullptr;
4292
4293 // Legality bailouts. We must have at least the non-fallthrough blocks and
4294 // the post-dominating block, and the non-fallthroughs must only have one
4295 // predecessor.
4296 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4297 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4298 };
4299 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4300 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4301 return false;
4302 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4303 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4304 return false;
4305 if (!QBI->getParent()->hasNUses(2))
4306 return false;
4307
4308 // OK, this is a sequence of two diamonds or triangles.
4309 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4310 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4311 for (auto *BB : {PTB, PFB}) {
4312 if (!BB)
4313 continue;
4314 for (auto &I : *BB)
4315 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4316 PStoreAddresses.insert(SI->getPointerOperand());
4317 }
4318 for (auto *BB : {QTB, QFB}) {
4319 if (!BB)
4320 continue;
4321 for (auto &I : *BB)
4322 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4323 QStoreAddresses.insert(SI->getPointerOperand());
4324 }
4325
4326 set_intersect(PStoreAddresses, QStoreAddresses);
4327 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4328 // clear what it contains.
4329 auto &CommonAddresses = PStoreAddresses;
4330
4331 bool Changed = false;
4332 for (auto *Address : CommonAddresses)
4333 Changed |=
4334 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4335 InvertPCond, InvertQCond, DTU, DL, TTI);
4336 return Changed;
4337}
4338
4339/// If the previous block ended with a widenable branch, determine if reusing
4340/// the target block is profitable and legal. This will have the effect of
4341/// "widening" PBI, but doesn't require us to reason about hosting safety.
4343 DomTreeUpdater *DTU) {
4344 // TODO: This can be generalized in two important ways:
4345 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4346 // values from the PBI edge.
4347 // 2) We can sink side effecting instructions into BI's fallthrough
4348 // successor provided they doesn't contribute to computation of
4349 // BI's condition.
4350 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4351 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4352 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4354 return false;
4355 if (!IfFalseBB->phis().empty())
4356 return false; // TODO
4357 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4358 // may undo the transform done here.
4359 // TODO: There might be a more fine-grained solution to this.
4360 if (!llvm::succ_empty(IfFalseBB))
4361 return false;
4362 // Use lambda to lazily compute expensive condition after cheap ones.
4363 auto NoSideEffects = [](BasicBlock &BB) {
4364 return llvm::none_of(BB, [](const Instruction &I) {
4365 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4366 });
4367 };
4368 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4369 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4370 NoSideEffects(*BI->getParent())) {
4371 auto *OldSuccessor = BI->getSuccessor(1);
4372 OldSuccessor->removePredecessor(BI->getParent());
4373 BI->setSuccessor(1, IfFalseBB);
4374 if (DTU)
4375 DTU->applyUpdates(
4376 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4377 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4378 return true;
4379 }
4380 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4381 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4382 NoSideEffects(*BI->getParent())) {
4383 auto *OldSuccessor = BI->getSuccessor(0);
4384 OldSuccessor->removePredecessor(BI->getParent());
4385 BI->setSuccessor(0, IfFalseBB);
4386 if (DTU)
4387 DTU->applyUpdates(
4388 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4389 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4390 return true;
4391 }
4392 return false;
4393}
4394
4395/// If we have a conditional branch as a predecessor of another block,
4396/// this function tries to simplify it. We know
4397/// that PBI and BI are both conditional branches, and BI is in one of the
4398/// successor blocks of PBI - PBI branches to BI.
4400 DomTreeUpdater *DTU,
4401 const DataLayout &DL,
4402 const TargetTransformInfo &TTI) {
4403 assert(PBI->isConditional() && BI->isConditional());
4404 BasicBlock *BB = BI->getParent();
4405
4406 // If this block ends with a branch instruction, and if there is a
4407 // predecessor that ends on a branch of the same condition, make
4408 // this conditional branch redundant.
4409 if (PBI->getCondition() == BI->getCondition() &&
4410 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4411 // Okay, the outcome of this conditional branch is statically
4412 // knowable. If this block had a single pred, handle specially, otherwise
4413 // FoldCondBranchOnValueKnownInPredecessor() will handle it.
4414 if (BB->getSinglePredecessor()) {
4415 // Turn this into a branch on constant.
4416 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4417 BI->setCondition(
4418 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4419 return true; // Nuke the branch on constant.
4420 }
4421 }
4422
4423 // If the previous block ended with a widenable branch, determine if reusing
4424 // the target block is profitable and legal. This will have the effect of
4425 // "widening" PBI, but doesn't require us to reason about hosting safety.
4426 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4427 return true;
4428
4429 // If both branches are conditional and both contain stores to the same
4430 // address, remove the stores from the conditionals and create a conditional
4431 // merged store at the end.
4432 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4433 return true;
4434
4435 // If this is a conditional branch in an empty block, and if any
4436 // predecessors are a conditional branch to one of our destinations,
4437 // fold the conditions into logical ops and one cond br.
4438
4439 // Ignore dbg intrinsics.
4440 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4441 return false;
4442
4443 int PBIOp, BIOp;
4444 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4445 PBIOp = 0;
4446 BIOp = 0;
4447 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4448 PBIOp = 0;
4449 BIOp = 1;
4450 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4451 PBIOp = 1;
4452 BIOp = 0;
4453 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4454 PBIOp = 1;
4455 BIOp = 1;
4456 } else {
4457 return false;
4458 }
4459
4460 // Check to make sure that the other destination of this branch
4461 // isn't BB itself. If so, this is an infinite loop that will
4462 // keep getting unwound.
4463 if (PBI->getSuccessor(PBIOp) == BB)
4464 return false;
4465
4466 // If predecessor's branch probability to BB is too low don't merge branches.
4467 SmallVector<uint32_t, 2> PredWeights;
4468 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4469 extractBranchWeights(*PBI, PredWeights) &&
4470 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4471
4473 PredWeights[PBIOp],
4474 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4475
4477 if (CommonDestProb >= Likely)
4478 return false;
4479 }
4480
4481 // Do not perform this transformation if it would require
4482 // insertion of a large number of select instructions. For targets
4483 // without predication/cmovs, this is a big pessimization.
4484
4485 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4486 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4487 unsigned NumPhis = 0;
4488 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4489 ++II, ++NumPhis) {
4490 if (NumPhis > 2) // Disable this xform.
4491 return false;
4492 }
4493
4494 // Finally, if everything is ok, fold the branches to logical ops.
4495 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4496
4497 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4498 << "AND: " << *BI->getParent());
4499
4501
4502 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4503 // branch in it, where one edge (OtherDest) goes back to itself but the other
4504 // exits. We don't *know* that the program avoids the infinite loop
4505 // (even though that seems likely). If we do this xform naively, we'll end up
4506 // recursively unpeeling the loop. Since we know that (after the xform is
4507 // done) that the block *is* infinite if reached, we just make it an obviously
4508 // infinite loop with no cond branch.
4509 if (OtherDest == BB) {
4510 // Insert it at the end of the function, because it's either code,
4511 // or it won't matter if it's hot. :)
4512 BasicBlock *InfLoopBlock =
4513 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4514 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4515 if (DTU)
4516 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4517 OtherDest = InfLoopBlock;
4518 }
4519
4520 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4521
4522 // BI may have other predecessors. Because of this, we leave
4523 // it alone, but modify PBI.
4524
4525 // Make sure we get to CommonDest on True&True directions.
4526 Value *PBICond = PBI->getCondition();
4527 IRBuilder<NoFolder> Builder(PBI);
4528 if (PBIOp)
4529 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4530
4531 Value *BICond = BI->getCondition();
4532 if (BIOp)
4533 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4534
4535 // Merge the conditions.
4536 Value *Cond =
4537 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4538
4539 // Modify PBI to branch on the new condition to the new dests.
4540 PBI->setCondition(Cond);
4541 PBI->setSuccessor(0, CommonDest);
4542 PBI->setSuccessor(1, OtherDest);
4543
4544 if (DTU) {
4545 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4546 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4547
4548 DTU->applyUpdates(Updates);
4549 }
4550
4551 // Update branch weight for PBI.
4552 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4553 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4554 bool HasWeights =
4555 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4556 SuccTrueWeight, SuccFalseWeight);
4557 if (HasWeights) {
4558 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4559 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4560 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4561 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4562 // The weight to CommonDest should be PredCommon * SuccTotal +
4563 // PredOther * SuccCommon.
4564 // The weight to OtherDest should be PredOther * SuccOther.
4565 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4566 PredOther * SuccCommon,
4567 PredOther * SuccOther};
4568 // Halve the weights if any of them cannot fit in an uint32_t
4569 FitWeights(NewWeights);
4570
4571 setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
4572 }
4573
4574 // OtherDest may have phi nodes. If so, add an entry from PBI's
4575 // block that are identical to the entries for BI's block.
4576 AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4577
4578 // We know that the CommonDest already had an edge from PBI to
4579 // it. If it has PHIs though, the PHIs may have different
4580 // entries for BB and PBI's BB. If so, insert a select to make
4581 // them agree.
4582 for (PHINode &PN : CommonDest->phis()) {
4583 Value *BIV = PN.getIncomingValueForBlock(BB);
4584 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4585 Value *PBIV = PN.getIncomingValue(PBBIdx);
4586 if (BIV != PBIV) {
4587 // Insert a select in PBI to pick the right value.
4588 SelectInst *NV = cast<SelectInst>(
4589 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4590 PN.setIncomingValue(PBBIdx, NV);
4591 // Although the select has the same condition as PBI, the original branch
4592 // weights for PBI do not apply to the new select because the select's
4593 // 'logical' edges are incoming edges of the phi that is eliminated, not
4594 // the outgoing edges of PBI.
4595 if (HasWeights) {
4596 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4597 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4598 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4599 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4600 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4601 // The weight to PredOtherDest should be PredOther * SuccCommon.
4602 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4603 PredOther * SuccCommon};
4604
4605 FitWeights(NewWeights);
4606
4607 setBranchWeights(NV, NewWeights[0], NewWeights[1]);
4608 }
4609 }
4610 }
4611
4612 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4613 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4614
4615 // This basic block is probably dead. We know it has at least
4616 // one fewer predecessor.
4617 return true;
4618}
4619
4620// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4621// true or to FalseBB if Cond is false.
4622// Takes care of updating the successors and removing the old terminator.
4623// Also makes sure not to introduce new successors by assuming that edges to
4624// non-successor TrueBBs and FalseBBs aren't reachable.
4625bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
4626 Value *Cond, BasicBlock *TrueBB,
4627 BasicBlock *FalseBB,
4628 uint32_t TrueWeight,
4629 uint32_t FalseWeight) {
4630 auto *BB = OldTerm->getParent();
4631 // Remove any superfluous successor edges from the CFG.
4632 // First, figure out which successors to preserve.
4633 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4634 // successor.
4635 BasicBlock *KeepEdge1 = TrueBB;
4636 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4637
4638 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4639
4640 // Then remove the rest.
4641 for (BasicBlock *Succ : successors(OldTerm)) {
4642 // Make sure only to keep exactly one copy of each edge.
4643 if (Succ == KeepEdge1)
4644 KeepEdge1 = nullptr;
4645 else if (Succ == KeepEdge2)
4646 KeepEdge2 = nullptr;
4647 else {
4648 Succ->removePredecessor(BB,
4649 /*KeepOneInputPHIs=*/true);
4650
4651 if (Succ != TrueBB && Succ != FalseBB)
4652 RemovedSuccessors.insert(Succ);
4653 }
4654 }
4655
4656 IRBuilder<> Builder(OldTerm);
4657 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4658
4659 // Insert an appropriate new terminator.
4660 if (!KeepEdge1 && !KeepEdge2) {
4661 if (TrueBB == FalseBB) {
4662 // We were only looking for one successor, and it was present.
4663 // Create an unconditional branch to it.
4664 Builder.CreateBr(TrueBB);
4665 } else {
4666 // We found both of the successors we were looking for.
4667 // Create a conditional branch sharing the condition of the select.
4668 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4669 if (TrueWeight != FalseWeight)
4670 setBranchWeights(NewBI, TrueWeight, FalseWeight);
4671 }
4672 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4673 // Neither of the selected blocks were successors, so this
4674 // terminator must be unreachable.
4675 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4676 } else {
4677 // One of the selected values was a successor, but the other wasn't.
4678 // Insert an unconditional branch to the one that was found;
4679 // the edge to the one that wasn't must be unreachable.
4680 if (!KeepEdge1) {
4681 // Only TrueBB was found.
4682 Builder.CreateBr(TrueBB);
4683 } else {
4684 // Only FalseBB was found.
4685 Builder.CreateBr(FalseBB);
4686 }
4687 }
4688
4690
4691 if (DTU) {
4693 Updates.reserve(RemovedSuccessors.size());
4694 for (auto *RemovedSuccessor : RemovedSuccessors)
4695 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4696 DTU->applyUpdates(Updates);
4697 }
4698
4699 return true;
4700}
4701
4702// Replaces
4703// (switch (select cond, X, Y)) on constant X, Y
4704// with a branch - conditional if X and Y lead to distinct BBs,
4705// unconditional otherwise.
4706bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
4707 SelectInst *Select) {
4708 // Check for constant integer values in the select.
4709 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4710 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4711 if (!TrueVal || !FalseVal)
4712 return false;
4713
4714 // Find the relevant condition and destinations.
4715 Value *Condition = Select->getCondition();
4716 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4717 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4718
4719 // Get weight for TrueBB and FalseBB.
4720 uint32_t TrueWeight = 0, FalseWeight = 0;
4722 bool HasWeights = hasBranchWeightMD(*SI);
4723 if (HasWeights) {
4724 GetBranchWeights(SI, Weights);
4725 if (Weights.size() == 1 + SI->getNumCases()) {
4726 TrueWeight =
4727 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4728 FalseWeight =
4729 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4730 }
4731 }
4732
4733 // Perform the actual simplification.
4734 return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4735 FalseWeight);
4736}
4737
4738// Replaces
4739// (indirectbr (select cond, blockaddress(@fn, BlockA),
4740// blockaddress(@fn, BlockB)))
4741// with
4742// (br cond, BlockA, BlockB).
4743bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4744 SelectInst *SI) {
4745 // Check that both operands of the select are block addresses.
4746 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4747 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4748 if (!TBA || !FBA)
4749 return false;
4750
4751 // Extract the actual blocks.
4752 BasicBlock *TrueBB = TBA->getBasicBlock();
4753 BasicBlock *FalseBB = FBA->getBasicBlock();
4754
4755 // Perform the actual simplification.
4756 return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4757 0);
4758}
4759
4760/// This is called when we find an icmp instruction
4761/// (a seteq/setne with a constant) as the only instruction in a
4762/// block that ends with an uncond branch. We are looking for a very specific
4763/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4764/// this case, we merge the first two "or's of icmp" into a switch, but then the
4765/// default value goes to an uncond block with a seteq in it, we get something
4766/// like:
4767///
4768/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4769/// DEFAULT:
4770/// %tmp = icmp eq i8 %A, 92
4771/// br label %end
4772/// end:
4773/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4774///
4775/// We prefer to split the edge to 'end' so that there is a true/false entry to
4776/// the PHI, merging the third icmp into the switch.
4777bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4778 ICmpInst *ICI, IRBuilder<> &Builder) {
4779 BasicBlock *BB = ICI->getParent();
4780
4781 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4782 // complex.
4783 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4784 return false;
4785
4786 Value *V = ICI->getOperand(0);
4787 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4788
4789 // The pattern we're looking for is where our only predecessor is a switch on
4790 // 'V' and this block is the default case for the switch. In this case we can
4791 // fold the compared value into the switch to simplify things.
4792 BasicBlock *Pred = BB->getSinglePredecessor();
4793 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4794 return false;
4795
4796 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4797 if (SI->getCondition() != V)
4798 return false;
4799
4800 // If BB is reachable on a non-default case, then we simply know the value of
4801 // V in this block. Substitute it and constant fold the icmp instruction
4802 // away.
4803 if (SI->getDefaultDest() != BB) {
4804 ConstantInt *VVal = SI->findCaseDest(BB);
4805 assert(VVal && "Should have a unique destination value");
4806 ICI->setOperand(0, VVal);
4807
4808 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
4809 ICI->replaceAllUsesWith(V);
4810 ICI->eraseFromParent();
4811 }
4812 // BB is now empty, so it is likely to simplify away.
4813 return requestResimplify();
4814 }
4815
4816 // Ok, the block is reachable from the default dest. If the constant we're
4817 // comparing exists in one of the other edges, then we can constant fold ICI
4818 // and zap it.
4819 if (SI->findCaseValue(Cst) != SI->case_default()) {
4820 Value *V;
4821 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4823 else
4825
4826 ICI->replaceAllUsesWith(V);
4827 ICI->eraseFromParent();
4828 // BB is now empty, so it is likely to simplify away.
4829 return requestResimplify();
4830 }
4831
4832 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4833 // the block.
4834 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4835 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4836 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4837 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4838 return false;
4839
4840 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4841 // true in the PHI.
4842 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4843 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4844
4845 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4846 std::swap(DefaultCst, NewCst);
4847
4848 // Replace ICI (which is used by the PHI for the default value) with true or
4849 // false depending on if it is EQ or NE.
4850 ICI->replaceAllUsesWith(DefaultCst);
4851 ICI->eraseFromParent();
4852
4854
4855 // Okay, the switch goes to this block on a default value. Add an edge from
4856 // the switch to the merge point on the compared value.
4857 BasicBlock *NewBB =
4858 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4859 {
4861 auto W0 = SIW.getSuccessorWeight(0);
4863 if (W0) {
4864 NewW = ((uint64_t(*W0) + 1) >> 1);
4865 SIW.setSuccessorWeight(0, *NewW);
4866 }
4867 SIW.addCase(Cst, NewBB, NewW);
4868 if (DTU)
4869 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4870 }
4871
4872 // NewBB branches to the phi block, add the uncond branch and the phi entry.
4873 Builder.SetInsertPoint(NewBB);
4874 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4875 Builder.CreateBr(SuccBlock);
4876 PHIUse->addIncoming(NewCst, NewBB);
4877 if (DTU) {
4878 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4879 DTU->applyUpdates(Updates);
4880 }
4881 return true;
4882}
4883
4884/// The specified branch is a conditional branch.
4885/// Check to see if it is branching on an or/and chain of icmp instructions, and
4886/// fold it into a switch instruction if so.
4887bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4888 IRBuilder<> &Builder,
4889 const DataLayout &DL) {
4890 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4891 if (!Cond)
4892 return false;
4893
4894 // Change br (X == 0 | X == 1), T, F into a switch instruction.
4895 // If this is a bunch of seteq's or'd together, or if it's a bunch of
4896 // 'setne's and'ed together, collect them.
4897
4898 // Try to gather values from a chain of and/or to be turned into a switch
4899 ConstantComparesGatherer ConstantCompare(Cond, DL);
4900 // Unpack the result
4901 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4902 Value *CompVal = ConstantCompare.CompValue;
4903 unsigned UsedICmps = ConstantCompare.UsedICmps;
4904 Value *ExtraCase = ConstantCompare.Extra;
4905
4906 // If we didn't have a multiply compared value, fail.
4907 if (!CompVal)
4908 return false;
4909
4910 // Avoid turning single icmps into a switch.
4911 if (UsedICmps <= 1)
4912 return false;
4913
4914 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4915
4916 // There might be duplicate constants in the list, which the switch
4917 // instruction can't handle, remove them now.
4918 array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
4919 Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
4920
4921 // If Extra was used, we require at least two switch values to do the
4922 // transformation. A switch with one value is just a conditional branch.
4923 if (ExtraCase && Values.size() < 2)
4924 return false;
4925
4926 // TODO: Preserve branch weight metadata, similarly to how
4927 // FoldValueComparisonIntoPredecessors preserves it.
4928
4929 // Figure out which block is which destination.
4930 BasicBlock *DefaultBB = BI->getSuccessor(1);
4931 BasicBlock *EdgeBB = BI->getSuccessor(0);
4932 if (!TrueWhenEqual)
4933 std::swap(DefaultBB, EdgeBB);
4934
4935 BasicBlock *BB = BI->getParent();
4936
4937 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4938 << " cases into SWITCH. BB is:\n"
4939 << *BB);
4940
4942
4943 // If there are any extra values that couldn't be folded into the switch
4944 // then we evaluate them with an explicit branch first. Split the block
4945 // right before the condbr to handle it.
4946 if (ExtraCase) {
4947 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4948 /*MSSAU=*/nullptr, "switch.early.test");
4949
4950 // Remove the uncond branch added to the old block.
4951 Instruction *OldTI = BB->getTerminator();
4952 Builder.SetInsertPoint(OldTI);
4953
4954 // There can be an unintended UB if extra values are Poison. Before the
4955 // transformation, extra values may not be evaluated according to the
4956 // condition, and it will not raise UB. But after transformation, we are
4957 // evaluating extra values before checking the condition, and it will raise
4958 // UB. It can be solved by adding freeze instruction to extra values.
4959 AssumptionCache *AC = Options.AC;
4960
4961 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4962 ExtraCase = Builder.CreateFreeze(ExtraCase);
4963
4964 if (TrueWhenEqual)
4965 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4966 else
4967 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4968
4969 OldTI->eraseFromParent();
4970
4971 if (DTU)
4972 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4973
4974 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4975 // for the edge we just added.
4976 AddPredecessorToBlock(EdgeBB, BB, NewBB);
4977
4978 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
4979 << "\nEXTRABB = " << *BB);
4980 BB = NewBB;
4981 }
4982
4983 Builder.SetInsertPoint(BI);
4984 // Convert pointer to int before we switch.
4985 if (CompVal->getType()->isPointerTy()) {
4986 CompVal = Builder.CreatePtrToInt(
4987 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
4988 }
4989
4990 // Create the new switch instruction now.
4991 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
4992
4993 // Add all of the 'cases' to the switch instruction.
4994 for (unsigned i = 0, e = Values.size(); i != e; ++i)
4995 New->addCase(Values[i], EdgeBB);
4996
4997 // We added edges from PI to the EdgeBB. As such, if there were any
4998 // PHI nodes in EdgeBB, they need entries to be added corresponding to
4999 // the number of edges added.
5000 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5001 PHINode *PN = cast<PHINode>(BBI);
5002 Value *InVal = PN->getIncomingValueForBlock(BB);
5003 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5004 PN->addIncoming(InVal, BB);
5005 }
5006
5007 // Erase the old branch instruction.
5009 if (DTU)
5010 DTU->applyUpdates(Updates);
5011
5012 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5013 return true;
5014}
5015
5016bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5017 if (isa<PHINode>(RI->getValue()))
5018 return simplifyCommonResume(RI);
5019 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5020 RI->getValue() == RI->getParent()->getFirstNonPHI())
5021 // The resume must unwind the exception that caused control to branch here.
5022 return simplifySingleResume(RI);
5023
5024 return false;
5025}
5026
5027// Check if cleanup block is empty
5029 for (Instruction &I : R) {
5030 auto *II = dyn_cast<IntrinsicInst>(&I);
5031 if (!II)
5032 return false;
5033
5034 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5035 switch (IntrinsicID) {
5036 case Intrinsic::dbg_declare:
5037 case Intrinsic::dbg_value:
5038 case Intrinsic::dbg_label:
5039 case Intrinsic::lifetime_end:
5040 break;
5041 default:
5042 return false;
5043 }
5044 }
5045 return true;
5046}
5047
5048// Simplify resume that is shared by several landing pads (phi of landing pad).
5049bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5050 BasicBlock *BB = RI->getParent();
5051
5052 // Check that there are no other instructions except for debug and lifetime
5053 // intrinsics between the phi's and resume instruction.
5056 return false;
5057
5058 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5059 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5060
5061 // Check incoming blocks to see if any of them are trivial.
5062 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5063 Idx++) {
5064 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5065 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5066
5067 // If the block has other successors, we can not delete it because
5068 // it has other dependents.
5069 if (IncomingBB->getUniqueSuccessor() != BB)
5070 continue;
5071
5072 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5073 // Not the landing pad that caused the control to branch here.
5074 if (IncomingValue != LandingPad)
5075 continue;
5076
5078 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5079 TrivialUnwindBlocks.insert(IncomingBB);
5080 }
5081
5082 // If no trivial unwind blocks, don't do any simplifications.
5083 if (TrivialUnwindBlocks.empty())
5084 return false;
5085
5086 // Turn all invokes that unwind here into calls.
5087 for (auto *TrivialBB : TrivialUnwindBlocks) {
5088 // Blocks that will be simplified should be removed from the phi node.
5089 // Note there could be multiple edges to the resume block, and we need
5090 // to remove them all.
5091 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5092 BB->removePredecessor(TrivialBB, true);
5093
5094 for (BasicBlock *Pred :
5096 removeUnwindEdge(Pred, DTU);
5097 ++NumInvokes;
5098 }
5099
5100 // In each SimplifyCFG run, only the current processed block can be erased.
5101 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5102 // of erasing TrivialBB, we only remove the branch to the common resume
5103 // block so that we can later erase the resume block since it has no
5104 // predecessors.
5105 TrivialBB->getTerminator()->eraseFromParent();
5106 new UnreachableInst(RI->getContext(), TrivialBB);
5107 if (DTU)
5108 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5109 }
5110
5111 // Delete the resume block if all its predecessors have been removed.
5112 if (pred_empty(BB))
5113 DeleteDeadBlock(BB, DTU);
5114
5115 return !TrivialUnwindBlocks.empty();
5116}
5117
5118// Simplify resume that is only used by a single (non-phi) landing pad.
5119bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5120 BasicBlock *BB = RI->getParent();
5121 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5122 assert(RI->getValue() == LPInst &&
5123 "Resume must unwind the exception that caused control to here");
5124
5125 // Check that there are no other instructions except for debug intrinsics.
5127 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5128 return false;
5129
5130 // Turn all invokes that unwind here into calls and delete the basic block.
5132 removeUnwindEdge(Pred, DTU);
5133 ++NumInvokes;
5134 }
5135
5136 // The landingpad is now unreachable. Zap it.
5137 DeleteDeadBlock(BB, DTU);
5138 return true;
5139}
5140
5142 // If this is a trivial cleanup pad that executes no instructions, it can be
5143 // eliminated. If the cleanup pad continues to the caller, any predecessor
5144 // that is an EH pad will be updated to continue to the caller and any
5145 // predecessor that terminates with an invoke instruction will have its invoke
5146 // instruction converted to a call instruction. If the cleanup pad being
5147 // simplified does not continue to the caller, each predecessor will be
5148 // updated to continue to the unwind destination of the cleanup pad being
5149 // simplified.
5150 BasicBlock *BB = RI->getParent();
5151 CleanupPadInst *CPInst = RI->getCleanupPad();
5152 if (CPInst->getParent() != BB)
5153 // This isn't an empty cleanup.
5154 return false;
5155
5156 // We cannot kill the pad if it has multiple uses. This typically arises
5157 // from unreachable basic blocks.
5158 if (!CPInst->hasOneUse())
5159 return false;
5160
5161 // Check that there are no other instructions except for benign intrinsics.
5163 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5164 return false;
5165
5166 // If the cleanup return we are simplifying unwinds to the caller, this will
5167 // set UnwindDest to nullptr.
5168 BasicBlock *UnwindDest = RI->getUnwindDest();
5169 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5170
5171 // We're about to remove BB from the control flow. Before we do, sink any
5172 // PHINodes into the unwind destination. Doing this before changing the
5173 // control flow avoids some potentially slow checks, since we can currently
5174 // be certain that UnwindDest and BB have no common predecessors (since they
5175 // are both EH pads).
5176 if (UnwindDest) {
5177 // First, go through the PHI nodes in UnwindDest and update any nodes that
5178 // reference the block we are removing
5179 for (PHINode &DestPN : UnwindDest->phis()) {
5180 int Idx = DestPN.getBasicBlockIndex(BB);
5181 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5182 assert(Idx != -1);
5183 // This PHI node has an incoming value that corresponds to a control
5184 // path through the cleanup pad we are removing. If the incoming
5185 // value is in the cleanup pad, it must be a PHINode (because we
5186 // verified above that the block is otherwise empty). Otherwise, the
5187 // value is either a constant or a value that dominates the cleanup
5188 // pad being removed.
5189 //
5190 // Because BB and UnwindDest are both EH pads, all of their
5191 // predecessors must unwind to these blocks, and since no instruction
5192 // can have multiple unwind destinations, there will be no overlap in
5193 // incoming blocks between SrcPN and DestPN.
5194 Value *SrcVal = DestPN.getIncomingValue(Idx);
5195 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5196
5197 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5198 for (auto *Pred : predecessors(BB)) {
5199 Value *Incoming =
5200 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5201 DestPN.addIncoming(Incoming, Pred);
5202 }
5203 }
5204
5205 // Sink any remaining PHI nodes directly into UnwindDest.
5206 Instruction *InsertPt = DestEHPad;
5207 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5208 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5209 // If the PHI node has no uses or all of its uses are in this basic
5210 // block (meaning they are debug or lifetime intrinsics), just leave
5211 // it. It will be erased when we erase BB below.
5212 continue;
5213
5214 // Otherwise, sink this PHI node into UnwindDest.
5215 // Any predecessors to UnwindDest which are not already represented
5216 // must be back edges which inherit the value from the path through
5217 // BB. In this case, the PHI value must reference itself.
5218 for (auto *pred : predecessors(UnwindDest))
5219 if (pred != BB)
5220 PN.addIncoming(&PN, pred);
5221 PN.moveBefore(InsertPt);
5222 // Also, add a dummy incoming value for the original BB itself,
5223 // so that the PHI is well-formed until we drop said predecessor.
5224 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5225 }
5226 }
5227
5228 std::vector<DominatorTree::UpdateType> Updates;
5229
5230 // We use make_early_inc_range here because we will remove all predecessors.
5232 if (UnwindDest == nullptr) {
5233 if (DTU) {
5234 DTU->applyUpdates(Updates);
5235 Updates.clear();
5236 }
5237 removeUnwindEdge(PredBB, DTU);
5238 ++NumInvokes;
5239 } else {
5240 BB->removePredecessor(PredBB);
5241 Instruction *TI = PredBB->getTerminator();
5242 TI->replaceUsesOfWith(BB, UnwindDest);
5243 if (DTU) {
5244 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5245 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5246 }
5247 }
5248 }
5249
5250 if (DTU)
5251 DTU->applyUpdates(Updates);
5252
5253 DeleteDeadBlock(BB, DTU);
5254
5255 return true;
5256}
5257
5258// Try to merge two cleanuppads together.
5260 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5261 // with.
5262 BasicBlock *UnwindDest = RI->getUnwindDest();
5263 if (!UnwindDest)
5264 return false;
5265
5266 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5267 // be safe to merge without code duplication.
5268 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5269 return false;
5270
5271 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5272 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5273 if (!SuccessorCleanupPad)
5274 return false;
5275
5276 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5277 // Replace any uses of the successor cleanupad with the predecessor pad
5278 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5279 // funclet bundle operands.
5280 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5281 // Remove the old cleanuppad.
5282 SuccessorCleanupPad->eraseFromParent();
5283 // Now, we simply replace the cleanupret with a branch to the unwind
5284 // destination.
5285 BranchInst::Create(UnwindDest, RI->getParent());
5286 RI->eraseFromParent();
5287
5288 return true;
5289}
5290
5291bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5292 // It is possible to transiantly have an undef cleanuppad operand because we
5293 // have deleted some, but not all, dead blocks.
5294 // Eventually, this block will be deleted.
5295 if (isa<UndefValue>(RI->getOperand(0)))
5296 return false;
5297
5298 if (mergeCleanupPad(RI))
5299 return true;
5300
5301 if (removeEmptyCleanup(RI, DTU))
5302 return true;
5303
5304 return false;
5305}
5306
5307// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5308bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5309 BasicBlock *BB = UI->getParent();
5310
5311 bool Changed = false;
5312
5313 // Ensure that any debug-info records that used to occur after the Unreachable
5314 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5315 // the block.
5317
5318 // Debug-info records on the unreachable inst itself should be deleted, as
5319 // below we delete everything past the final executable instruction.
5320 UI->dropDbgRecords();
5321
5322 // If there are any instructions immediately before the unreachable that can
5323 // be removed, do so.
5324 while (UI->getIterator() != BB->begin()) {
5326 --BBI;
5327
5329 break; // Can not drop any more instructions. We're done here.
5330 // Otherwise, this instruction can be freely erased,
5331 // even if it is not side-effect free.
5332
5333 // Note that deleting EH's here is in fact okay, although it involves a bit
5334 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5335 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5336 // and we can therefore guarantee this block will be erased.
5337
5338 // If we're deleting this, we're deleting any subsequent debug info, so
5339 // delete DbgRecords.
5340 BBI->dropDbgRecords();
5341
5342 // Delete this instruction (any uses are guaranteed to be dead)
5343 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5344 BBI->eraseFromParent();
5345 Changed = true;
5346 }
5347
5348 // If the unreachable instruction is the first in the block, take a gander
5349 // at all of the predecessors of this instruction, and simplify them.
5350 if (&BB->front() != UI)
5351 return Changed;
5352
5353 std::vector<DominatorTree::UpdateType> Updates;
5354
5356 for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
5357 auto *Predecessor = Preds[i];
5358 Instruction *TI = Predecessor->getTerminator();
5359 IRBuilder<> Builder(TI);
5360 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5361 // We could either have a proper unconditional branch,
5362 // or a degenerate conditional branch with matching destinations.
5363 if (all_of(BI->successors(),
5364 [BB](auto *Successor) { return Successor == BB; })) {
5365 new UnreachableInst(TI->getContext(), TI->getIterator());
5366 TI->eraseFromParent();
5367 Changed = true;
5368 } else {
5369 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5370 Value* Cond = BI->getCondition();
5371 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5372 "The destinations are guaranteed to be different here.");
5373 CallInst *Assumption;
5374 if (BI->getSuccessor(0) == BB) {
5375 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5376 Builder.CreateBr(BI->getSuccessor(1));
5377 } else {
5378 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5379 Assumption = Builder.CreateAssumption(Cond);
5380 Builder.CreateBr(BI->getSuccessor(0));
5381 }
5382 if (Options.AC)
5383 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5384
5386 Changed = true;
5387 }
5388 if (DTU)
5389 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5390 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5392 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5393 if (i->getCaseSuccessor() != BB) {
5394 ++i;
5395 continue;
5396 }
5397 BB->removePredecessor(SU->getParent());
5398 i = SU.removeCase(i);
5399 e = SU->case_end();
5400 Changed = true;
5401 }
5402 // Note that the default destination can't be removed!
5403 if (DTU && SI->getDefaultDest() != BB)
5404 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5405 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5406 if (II->getUnwindDest() == BB) {
5407 if (DTU) {
5408 DTU->applyUpdates(Updates);
5409 Updates.clear();
5410 }
5411 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5412 if (!CI->doesNotThrow())
5413 CI->setDoesNotThrow();
5414 Changed = true;
5415 }
5416 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5417 if (CSI->getUnwindDest() == BB) {
5418 if (DTU) {
5419 DTU->applyUpdates(Updates);
5420 Updates.clear();
5421 }
5422 removeUnwindEdge(TI->getParent(), DTU);
5423 Changed = true;
5424 continue;
5425 }
5426
5427 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5428 E = CSI->handler_end();
5429 I != E; ++I) {
5430 if (*I == BB) {
5431 CSI->removeHandler(I);
5432 --I;
5433 --E;
5434 Changed = true;
5435 }
5436 }
5437 if (DTU)
5438 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5439 if (CSI->getNumHandlers() == 0) {
5440 if (CSI->hasUnwindDest()) {
5441 // Redirect all predecessors of the block containing CatchSwitchInst
5442 // to instead branch to the CatchSwitchInst's unwind destination.
5443 if (DTU) {
5444 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5445 Updates.push_back({DominatorTree::Insert,
5446 PredecessorOfPredecessor,
5447 CSI->getUnwindDest()});
5448 Updates.push_back({DominatorTree::Delete,
5449 PredecessorOfPredecessor, Predecessor});
5450 }
5451 }
5452 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5453 } else {
5454 // Rewrite all preds to unwind to caller (or from invoke to call).
5455 if (DTU) {
5456 DTU->applyUpdates(Updates);
5457 Updates.clear();
5458 }
5459 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5460 for (BasicBlock *EHPred : EHPreds)
5461 removeUnwindEdge(EHPred, DTU);
5462 }
5463 // The catchswitch is no longer reachable.
5464 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5465 CSI->eraseFromParent();
5466 Changed = true;
5467 }
5468 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5469 (void)CRI;
5470 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5471 "Expected to always have an unwind to BB.");
5472 if (DTU)
5473 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5474 new UnreachableInst(TI->getContext(), TI->getIterator());
5475 TI->eraseFromParent();
5476 Changed = true;
5477 }
5478 }
5479
5480 if (DTU)
5481 DTU->applyUpdates(Updates);
5482
5483 // If this block is now dead, remove it.
5484 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5485 DeleteDeadBlock(BB, DTU);
5486 return true;
5487 }
5488
5489 return Changed;
5490}
5491
5493 assert(Cases.size() >= 1);
5494
5496 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5497 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5498 return false;
5499 }
5500 return true;
5501}
5502
5504 DomTreeUpdater *DTU) {
5505 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5506 auto *BB = Switch->getParent();
5507 auto *OrigDefaultBlock = Switch->getDefaultDest();
5508 OrigDefaultBlock->removePredecessor(BB);
5509 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5510 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5511 OrigDefaultBlock);
5512 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5513 Switch->setDefaultDest(&*NewDefaultBlock);
5514 if (DTU) {
5516 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5517 if (!is_contained(successors(BB), OrigDefaultBlock))
5518 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5519 DTU->applyUpdates(Updates);
5520 }
5521}
5522
5523/// Turn a switch into an integer range comparison and branch.
5524/// Switches with more than 2 destinations are ignored.
5525/// Switches with 1 destination are also ignored.
5526bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
5527 IRBuilder<> &Builder) {
5528 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5529
5530 bool HasDefault =
5531 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5532
5533 auto *BB = SI->getParent();
5534
5535 // Partition the cases into two sets with different destinations.
5536 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5537 BasicBlock *DestB = nullptr;
5540
5541 for (auto Case : SI->cases()) {
5542 BasicBlock *Dest = Case.getCaseSuccessor();
5543 if (!DestA)
5544 DestA = Dest;
5545 if (Dest == DestA) {
5546 CasesA.push_back(Case.getCaseValue());
5547 continue;
5548 }
5549 if (!DestB)
5550 DestB = Dest;
5551 if (Dest == DestB) {
5552 CasesB.push_back(Case.getCaseValue());
5553 continue;
5554 }
5555 return false; // More than two destinations.
5556 }
5557 if (!DestB)
5558 return false; // All destinations are the same and the default is unreachable
5559
5560 assert(DestA && DestB &&
5561 "Single-destination switch should have been folded.");
5562 assert(DestA != DestB);
5563 assert(DestB != SI->getDefaultDest());
5564 assert(!CasesB.empty() && "There must be non-default cases.");
5565 assert(!CasesA.empty() || HasDefault);
5566
5567 // Figure out if one of the sets of cases form a contiguous range.
5568 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5569 BasicBlock *ContiguousDest = nullptr;
5570 BasicBlock *OtherDest = nullptr;
5571 if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
5572 ContiguousCases = &CasesA;
5573 ContiguousDest = DestA;
5574 OtherDest = DestB;
5575 } else if (CasesAreContiguous(CasesB)) {
5576 ContiguousCases = &CasesB;
5577 ContiguousDest = DestB;
5578 OtherDest = DestA;
5579 } else
5580 return false;
5581
5582 // Start building the compare and branch.
5583
5584 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5585 Constant *NumCases =
5586 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5587
5588 Value *Sub = SI->getCondition();
5589 if (!Offset->isNullValue())
5590 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5591
5592 Value *Cmp;
5593 // If NumCases overflowed, then all possible values jump to the successor.
5594 if (NumCases->isNullValue() && !ContiguousCases->empty())
5595 Cmp = ConstantInt::getTrue(SI->getContext());
5596 else
5597 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5598 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5599
5600 // Update weight for the newly-created conditional branch.
5601 if (hasBranchWeightMD(*SI)) {
5603 GetBranchWeights(SI, Weights);
5604 if (Weights.size() == 1 + SI->getNumCases()) {
5605 uint64_t TrueWeight = 0;
5606 uint64_t FalseWeight = 0;
5607 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5608 if (SI->getSuccessor(I) == ContiguousDest)
5609 TrueWeight += Weights[I];
5610 else
5611 FalseWeight += Weights[I];
5612 }
5613 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5614 TrueWeight /= 2;
5615 FalseWeight /= 2;
5616 }
5617 setBranchWeights(NewBI, TrueWeight, FalseWeight);
5618 }
5619 }
5620
5621 // Prune obsolete incoming values off the successors' PHI nodes.
5622 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5623 unsigned PreviousEdges = ContiguousCases->size();
5624 if (ContiguousDest == SI->getDefaultDest())
5625 ++PreviousEdges;
5626 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5627 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5628 }
5629 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5630 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5631 if (OtherDest == SI->getDefaultDest())
5632 ++PreviousEdges;
5633 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5634 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5635 }
5636
5637 // Clean up the default block - it may have phis or other instructions before
5638 // the unreachable terminator.
5639 if (!HasDefault)
5641
5642 auto *UnreachableDefault = SI->getDefaultDest();
5643
5644 // Drop the switch.
5645 SI->eraseFromParent();
5646
5647 if (!HasDefault && DTU)
5648 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5649
5650 return true;
5651}
5652
5653/// Compute masked bits for the condition of a switch
5654/// and use it to remove dead cases.
5656 AssumptionCache *AC,
5657 const DataLayout &DL) {
5658 Value *Cond = SI->getCondition();
5659 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5660
5661 // We can also eliminate cases by determining that their values are outside of
5662 // the limited range of the condition based on how many significant (non-sign)
5663 // bits are in the condition value.
5664 unsigned MaxSignificantBitsInCond =
5665 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5666
5667 // Gather dead cases.
5669 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5670 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5671 for (const auto &Case : SI->cases()) {
5672 auto *Successor = Case.getCaseSuccessor();
5673 if (DTU) {
5674 if (!NumPerSuccessorCases.count(Successor))
5675 UniqueSuccessors.push_back(Successor);
5676 ++NumPerSuccessorCases[Successor];
5677 }
5678 const APInt &CaseVal = Case.getCaseValue()->getValue();
5679 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5680 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5681 DeadCases.push_back(Case.getCaseValue());
5682 if (DTU)
5683 --NumPerSuccessorCases[Successor];
5684 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5685 << " is dead.\n");
5686 }
5687 }
5688
5689 // If we can prove that the cases must cover all possible values, the
5690 // default destination becomes dead and we can remove it. If we know some
5691 // of the bits in the value, we can use that to more precisely compute the
5692 // number of possible unique case values.
5693 bool HasDefault =
5694 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5695 const unsigned NumUnknownBits =
5696 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5697 assert(NumUnknownBits <= Known.getBitWidth());
5698 if (HasDefault && DeadCases.empty() &&
5699 NumUnknownBits < 64 /* avoid overflow */ &&
5700 SI->getNumCases() == (1ULL << NumUnknownBits)) {
5702 return true;
5703 }
5704
5705 if (DeadCases.empty())
5706 return false;
5707
5709 for (ConstantInt *DeadCase : DeadCases) {
5710 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5711 assert(CaseI != SI->case_default() &&
5712 "Case was not found. Probably mistake in DeadCases forming.");
5713 // Prune unused values from PHI nodes.
5714 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5715 SIW.removeCase(CaseI);
5716 }
5717
5718 if (DTU) {
5719 std::vector<DominatorTree::UpdateType> Updates;
5720 for (auto *Successor : UniqueSuccessors)
5721 if (NumPerSuccessorCases[Successor] == 0)
5722 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5723 DTU->applyUpdates(Updates);
5724 }
5725
5726 return true;
5727}
5728
5729/// If BB would be eligible for simplification by
5730/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5731/// by an unconditional branch), look at the phi node for BB in the successor
5732/// block and see if the incoming value is equal to CaseValue. If so, return
5733/// the phi node, and set PhiIndex to BB's index in the phi node.
5735 BasicBlock *BB, int *PhiIndex) {
5736 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
5737 return nullptr; // BB must be empty to be a candidate for simplification.
5738 if (!BB->getSinglePredecessor())
5739 return nullptr; // BB must be dominated by the switch.
5740
5741 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5742 if (!Branch || !Branch->isUnconditional())
5743 return nullptr; // Terminator must be unconditional branch.
5744
5745 BasicBlock *Succ = Branch->getSuccessor(0);
5746
5747 for (PHINode &PHI : Succ->phis()) {
5748 int Idx = PHI.getBasicBlockIndex(BB);
5749 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5750
5751 Value *InValue = PHI.getIncomingValue(Idx);
5752 if (InValue != CaseValue)
5753 continue;
5754
5755 *PhiIndex = Idx;
5756 return &PHI;
5757 }
5758
5759 return nullptr;
5760}
5761
5762/// Try to forward the condition of a switch instruction to a phi node
5763/// dominated by the switch, if that would mean that some of the destination
5764/// blocks of the switch can be folded away. Return true if a change is made.
5766 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5767
5768 ForwardingNodesMap ForwardingNodes;
5769 BasicBlock *SwitchBlock = SI->getParent();
5770 bool Changed = false;
5771 for (const auto &Case : SI->cases()) {
5772 ConstantInt *CaseValue = Case.getCaseValue();
5773 BasicBlock *CaseDest = Case.getCaseSuccessor();
5774
5775 // Replace phi operands in successor blocks that are using the constant case
5776 // value rather than the switch condition variable:
5777 // switchbb:
5778 // switch i32 %x, label %default [
5779 // i32 17, label %succ
5780 // ...
5781 // succ:
5782 // %r = phi i32 ... [ 17, %switchbb ] ...
5783 // -->
5784 // %r = phi i32 ... [ %x, %switchbb ] ...
5785
5786 for (PHINode &Phi : CaseDest->phis()) {
5787 // This only works if there is exactly 1 incoming edge from the switch to
5788 // a phi. If there is >1, that means multiple cases of the switch map to 1
5789 // value in the phi, and that phi value is not the switch condition. Thus,
5790 // this transform would not make sense (the phi would be invalid because
5791 // a phi can't have different incoming values from the same block).
5792 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5793 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5794 count(Phi.blocks(), SwitchBlock) == 1) {
5795 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5796 Changed = true;
5797 }
5798 }
5799
5800 // Collect phi nodes that are indirectly using this switch's case constants.
5801 int PhiIdx;
5802 if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5803 ForwardingNodes[Phi].push_back(PhiIdx);
5804 }
5805
5806 for (auto &ForwardingNode : ForwardingNodes) {
5807 PHINode *Phi = ForwardingNode.first;
5808 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5809 if (Indexes.size() < 2)
5810 continue;
5811
5812 for (int Index : Indexes)
5813 Phi->setIncomingValue(Index, SI->getCondition());
5814 Changed = true;
5815 }
5816
5817 return Changed;
5818}
5819
5820/// Return true if the backend will be able to handle
5821/// initializing an array of constants like C.
5823 if (C->isThreadDependent())
5824 return false;
5825 if (C->isDLLImportDependent())
5826 return false;
5827
5828 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5829 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5830 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5831 return false;
5832
5833 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5834 // Pointer casts and in-bounds GEPs will not prohibit the backend from
5835 // materializing the array of constants.
5836 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
5837 if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
5838 return false;
5839 }
5840
5842 return false;
5843
5844 return true;
5845}
5846
5847/// If V is a Constant, return it. Otherwise, try to look up
5848/// its constant value in ConstantPool, returning 0 if it's not there.
5849static Constant *
5852 if (Constant *C = dyn_cast<Constant>(V))
5853 return C;
5854 return ConstantPool.lookup(V);
5855}
5856
5857/// Try to fold instruction I into a constant. This works for
5858/// simple instructions such as binary operations where both operands are
5859/// constant or can be replaced by constants from the ConstantPool. Returns the
5860/// resulting constant on success, 0 otherwise.
5861static Constant *
5864 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5865 Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
5866 if (!A)
5867 return nullptr;
5868 if (A->isAllOnesValue())
5869 return LookupConstant(Select->getTrueValue(), ConstantPool);
5870 if (A->isNullValue())
5871 return LookupConstant(Select->getFalseValue(), ConstantPool);
5872 return nullptr;
5873 }
5874
5876 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5877 if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
5878 COps.push_back(A);
5879 else
5880 return nullptr;
5881 }
5882
5883 return ConstantFoldInstOperands(I, COps, DL);
5884}
5885
5886/// Try to determine the resulting constant values in phi nodes
5887/// at the common destination basic block, *CommonDest, for one of the case
5888/// destionations CaseDest corresponding to value CaseVal (0 for the default
5889/// case), of a switch instruction SI.
5890static bool
5892 BasicBlock **CommonDest,
5893 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5894 const DataLayout &DL, const TargetTransformInfo &TTI) {
5895 // The block from which we enter the common destination.
5896 BasicBlock *Pred = SI->getParent();
5897
5898 // If CaseDest is empty except for some side-effect free instructions through
5899 // which we can constant-propagate the CaseVal, continue to its successor.
5901 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5902 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
5903 if (I.isTerminator()) {
5904 // If the terminator is a simple branch, continue to the next block.
5905 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
5906 return false;
5907 Pred = CaseDest;
5908 CaseDest = I.getSuccessor(0);
5909 } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
5910 // Instruction is side-effect free and constant.
5911
5912 // If the instruction has uses outside this block or a phi node slot for
5913 // the block, it is not safe to bypass the instruction since it would then
5914 // no longer dominate all its uses.
5915 for (auto &Use : I.uses()) {
5916 User *User = Use.getUser();
5917 if (Instruction *I = dyn_cast<Instruction>(User))
5918 if (I->getParent() == CaseDest)
5919 continue;
5920 if (PHINode *Phi = dyn_cast<PHINode>(User))
5921 if (Phi->getIncomingBlock(Use) == CaseDest)
5922 continue;
5923 return false;
5924 }
5925
5926 ConstantPool.insert(std::make_pair(&I, C));
5927 } else {
5928 break;
5929 }
5930 }
5931
5932 // If we did not have a CommonDest before, use the current one.
5933 if (!*CommonDest)
5934 *CommonDest = CaseDest;
5935 // If the destination isn't the common one, abort.
5936 if (CaseDest != *CommonDest)
5937 return false;
5938
5939 // Get the values for this case from phi nodes in the destination block.
5940 for (PHINode &PHI : (*CommonDest)->phis()) {
5941 int Idx = PHI.getBasicBlockIndex(Pred);
5942 if (Idx == -1)
5943 continue;
5944
5945 Constant *ConstVal =
5946 LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5947 if (!ConstVal)
5948 return false;
5949
5950 // Be conservative about which kinds of constants we support.
5951 if (!ValidLookupTableConstant(ConstVal, TTI))
5952 return false;
5953
5954 Res.push_back(std::make_pair(&PHI, ConstVal));
5955 }
5956
5957 return Res.size() > 0;
5958}
5959
5960// Helper function used to add CaseVal to the list of cases that generate
5961// Result. Returns the updated number of cases that generate this result.
5962static size_t mapCaseToResult(ConstantInt *CaseVal,
5963 SwitchCaseResultVectorTy &UniqueResults,
5964 Constant *Result) {
5965 for (auto &I : UniqueResults) {
5966 if (I.first == Result) {
5967 I.second.push_back(CaseVal);
5968 return I.second.size();
5969 }
5970 }
5971 UniqueResults.push_back(
5972 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
5973 return 1;
5974}
5975
5976// Helper function that initializes a map containing
5977// results for the PHI node of the common destination block for a switch
5978// instruction. Returns false if multiple PHI nodes have been found or if
5979// there is not a common destination block for the switch.
5981 BasicBlock *&CommonDest,
5982 SwitchCaseResultVectorTy &UniqueResults,
5983 Constant *&DefaultResult,
5984 const DataLayout &DL,
5985 const TargetTransformInfo &TTI,
5986 uintptr_t MaxUniqueResults) {
5987 for (const auto &I : SI->cases()) {
5988 ConstantInt *CaseVal = I.getCaseValue();
5989
5990 // Resulting value at phi nodes for this case value.
5991 SwitchCaseResultsTy Results;
5992 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
5993 DL, TTI))
5994 return false;
5995
5996 // Only one value per case is permitted.
5997 if (Results.size() > 1)
5998 return false;
5999
6000 // Add the case->result mapping to UniqueResults.
6001 const size_t NumCasesForResult =
6002 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6003
6004 // Early out if there are too many cases for this result.
6005 if (NumCasesForResult > MaxSwitchCasesPerResult)
6006 return false;
6007
6008 // Early out if there are too many unique results.
6009 if (UniqueResults.size() > MaxUniqueResults)
6010 return false;
6011
6012 // Check the PHI consistency.
6013 if (!PHI)
6014 PHI = Results[0].first;
6015 else if (PHI != Results[0].first)
6016 return false;
6017 }
6018 // Find the default result value.
6020 BasicBlock *DefaultDest = SI->getDefaultDest();
6021 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6022 DL, TTI);
6023 // If the default value is not found abort unless the default destination
6024 // is unreachable.
6025 DefaultResult =
6026 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6027 if ((!DefaultResult &&
6028 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6029 return false;
6030
6031 return true;
6032}
6033
6034// Helper function that checks if it is possible to transform a switch with only
6035// two cases (or two cases + default) that produces a result into a select.
6036// TODO: Handle switches with more than 2 cases that map to the same result.
6037static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6038 Constant *DefaultResult, Value *Condition,
6039 IRBuilder<> &Builder) {
6040 // If we are selecting between only two cases transform into a simple
6041 // select or a two-way select if default is possible.
6042 // Example:
6043 // switch (a) { %0 = icmp eq i32 %a, 10
6044 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6045 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6046 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6047 // }
6048 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6049 ResultVector[1].second.size() == 1) {
6050 ConstantInt *FirstCase = ResultVector[0].second[0];
6051 ConstantInt *SecondCase = ResultVector[1].second[0];
6052 Value *SelectValue = ResultVector[1].first;
6053 if (DefaultResult) {
6054 Value *ValueCompare =
6055 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6056 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6057 DefaultResult, "switch.select");
6058 }
6059 Value *ValueCompare =
6060 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6061 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6062 SelectValue, "switch.select");
6063 }
6064
6065 // Handle the degenerate case where two cases have the same result value.
6066 if (ResultVector.size() == 1 && DefaultResult) {
6067 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6068 unsigned CaseCount = CaseValues.size();
6069 // n bits group cases map to the same result:
6070 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6071 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6072 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6073 if (isPowerOf2_32(CaseCount)) {
6074 ConstantInt *MinCaseVal = CaseValues[0];
6075 // Find mininal value.
6076 for (auto *Case : CaseValues)
6077 if (Case->getValue().slt(MinCaseVal->getValue()))
6078 MinCaseVal = Case;
6079
6080 // Mark the bits case number touched.
6081 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6082 for (auto *Case : CaseValues)
6083 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6084
6085 // Check if cases with the same result can cover all number
6086 // in touched bits.
6087 if (BitMask.popcount() == Log2_32(CaseCount)) {
6088 if (!MinCaseVal->isNullValue())
6089 Condition = Builder.CreateSub(Condition, MinCaseVal);
6090 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6091 Value *Cmp = Builder.CreateICmpEQ(
6092 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6093 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6094 }
6095 }
6096
6097 // Handle the degenerate case where two cases have the same value.
6098 if (CaseValues.size() == 2) {
6099 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6100 "switch.selectcmp.case1");
6101 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6102 "switch.selectcmp.case2");
6103 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6104 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6105 }
6106 }
6107
6108 return nullptr;
6109}
6110
6111// Helper function to cleanup a switch instruction that has been converted into
6112// a select, fixing up PHI nodes and basic blocks.
6114 Value *SelectValue,
6115 IRBuilder<> &Builder,
6116 DomTreeUpdater *DTU) {
6117 std::vector<DominatorTree::UpdateType> Updates;
6118
6119 BasicBlock *SelectBB = SI->getParent();
6120 BasicBlock *DestBB = PHI->getParent();
6121
6122 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6123 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6124 Builder.CreateBr(DestBB);
6125
6126 // Remove the switch.
6127
6128 PHI->removeIncomingValueIf(
6129 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6130 PHI->addIncoming(SelectValue, SelectBB);
6131
6132 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6133 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6134 BasicBlock *Succ = SI->getSuccessor(i);
6135
6136 if (Succ == DestBB)
6137 continue;
6138 Succ->removePredecessor(SelectBB);
6139 if (DTU && RemovedSuccessors.insert(Succ).second)
6140 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6141 }
6142 SI->eraseFromParent();
6143 if (DTU)
6144 DTU->applyUpdates(Updates);
6145}
6146
6147/// If a switch is only used to initialize one or more phi nodes in a common
6148/// successor block with only two different constant values, try to replace the
6149/// switch with a select. Returns true if the fold was made.
6150static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6151 DomTreeUpdater *DTU, const DataLayout &DL,
6152 const TargetTransformInfo &TTI) {
6153 Value *const Cond = SI->getCondition();
6154 PHINode *PHI = nullptr;
6155 BasicBlock *CommonDest = nullptr;
6156 Constant *DefaultResult;
6157 SwitchCaseResultVectorTy UniqueResults;
6158 // Collect all the cases that will deliver the same value from the switch.
6159 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6160 DL, TTI, /*MaxUniqueResults*/ 2))
6161 return false;
6162
6163 assert(PHI != nullptr && "PHI for value select not found");
6164 Builder.SetInsertPoint(SI);
6165 Value *SelectValue =
6166 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6167 if (!SelectValue)
6168 return false;
6169
6170 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6171 return true;
6172}
6173
6174namespace {
6175
6176/// This class represents a lookup table that can be used to replace a switch.
6177class SwitchLookupTable {
6178public:
6179 /// Create a lookup table to use as a switch replacement with the contents
6180 /// of Values, using DefaultValue to fill any holes in the table.
6181 SwitchLookupTable(
6182 Module &M, uint64_t TableSize, ConstantInt *Offset,
6183 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6184 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6185
6186 /// Build instructions with Builder to retrieve the value at
6187 /// the position given by Index in the lookup table.
6188 Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
6189
6190 /// Return true if a table with TableSize elements of
6191 /// type ElementType would fit in a target-legal register.
6192 static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6193 Type *ElementType);
6194
6195private:
6196 // Depending on the contents of the table, it can be represented in
6197 // different ways.
6198 enum {
6199 // For tables where each element contains the same value, we just have to
6200 // store that single value and return it for each lookup.
6201 SingleValueKind,
6202
6203 // For tables where there is a linear relationship between table index
6204 // and values. We calculate the result with a simple multiplication
6205 // and addition instead of a table lookup.
6206 LinearMapKind,
6207
6208 // For small tables with integer elements, we can pack them into a bitmap
6209 // that fits into a target-legal register. Values are retrieved by
6210 // shift and mask operations.
6211 BitMapKind,
6212
6213 // The table is stored as an array of values. Values are retrieved by load
6214 // instructions from the table.
6215 ArrayKind
6216 } Kind;
6217
6218 // For SingleValueKind, this is the single value.
6219 Constant *SingleValue = nullptr;
6220
6221 // For BitMapKind, this is the bitmap.
6222 ConstantInt *BitMap = nullptr;
6223 IntegerType *BitMapElementTy = nullptr;
6224
6225 // For LinearMapKind, these are the constants used to derive the value.
6226 ConstantInt *LinearOffset = nullptr;
6227 ConstantInt *LinearMultiplier = nullptr;
6228 bool LinearMapValWrapped = false;
6229
6230 // For ArrayKind, this is the array.
6231 GlobalVariable *Array = nullptr;
6232};
6233
6234} // end anonymous namespace
6235
6236SwitchLookupTable::SwitchLookupTable(
6237 Module &M, uint64_t TableSize, ConstantInt *Offset,
6238 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6239 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6240 assert(Values.size() && "Can't build lookup table without values!");
6241 assert(TableSize >= Values.size() && "Can't fit values in table!");
6242
6243 // If all values in the table are equal, this is that value.
6244 SingleValue = Values.begin()->second;
6245
6246 Type *ValueType = Values.begin()->second->getType();
6247
6248 // Build up the table contents.
6249 SmallVector<Constant *, 64> TableContents(TableSize);
6250 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6251 ConstantInt *CaseVal = Values[I].first;
6252 Constant *CaseRes = Values[I].second;
6253 assert(CaseRes->getType() == ValueType);
6254
6255 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6256 TableContents[Idx] = CaseRes;
6257
6258 if (CaseRes != SingleValue)
6259 SingleValue = nullptr;
6260 }
6261
6262 // Fill in any holes in the table with the default result.
6263 if (Values.size() < TableSize) {
6264 assert(DefaultValue &&
6265 "Need a default value to fill the lookup table holes.");
6266 assert(DefaultValue->getType() == ValueType);
6267 for (uint64_t I = 0; I < TableSize; ++I) {
6268 if (!TableContents[I])
6269 TableContents[I] = DefaultValue;
6270 }
6271
6272 if (DefaultValue != SingleValue)
6273 SingleValue = nullptr;
6274 }
6275
6276 // If each element in the table contains the same value, we only need to store
6277 // that single value.
6278 if (SingleValue) {
6279 Kind = SingleValueKind;
6280 return;
6281 }
6282
6283 // Check if we can derive the value with a linear transformation from the
6284 // table index.
6285 if (isa<IntegerType>(ValueType)) {
6286 bool LinearMappingPossible = true;
6287 APInt PrevVal;
6288 APInt DistToPrev;
6289 // When linear map is monotonic and signed overflow doesn't happen on
6290 // maximum index, we can attach nsw on Add and Mul.
6291 bool NonMonotonic = false;
6292 assert(TableSize >= 2 && "Should be a SingleValue table.");
6293 // Check if there is the same distance between two consecutive values.
6294 for (uint64_t I = 0; I < TableSize; ++I) {
6295 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6296 if (!ConstVal) {
6297 // This is an undef. We could deal with it, but undefs in lookup tables
6298 // are very seldom. It's probably not worth the additional complexity.
6299 LinearMappingPossible = false;
6300 break;
6301 }
6302 const APInt &Val = ConstVal->getValue();
6303 if (I != 0) {
6304 APInt Dist = Val - PrevVal;
6305 if (I == 1) {
6306 DistToPrev = Dist;
6307 } else if (Dist != DistToPrev) {
6308 LinearMappingPossible = false;
6309 break;
6310 }
6311 NonMonotonic |=
6312 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6313 }
6314 PrevVal = Val;
6315 }
6316 if (LinearMappingPossible) {
6317 LinearOffset = cast<ConstantInt>(TableContents[0]);
6318 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6319 bool MayWrap = false;
6320 APInt M = LinearMultiplier->getValue();
6321 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6322 LinearMapValWrapped = NonMonotonic || MayWrap;
6323 Kind = LinearMapKind;
6324 ++NumLinearMaps;
6325 return;
6326 }
6327 }
6328
6329 // If the type is integer and the table fits in a register, build a bitmap.
6330 if (WouldFitInRegister(DL, TableSize, ValueType)) {
6331 IntegerType *IT = cast<IntegerType>(ValueType);
6332 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6333 for (uint64_t I = TableSize; I > 0; --I) {
6334 TableInt <<= IT->getBitWidth();
6335 // Insert values into the bitmap. Undef values are set to zero.
6336 if (!isa<UndefValue>(TableContents[I - 1])) {
6337 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6338 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6339 }
6340 }
6341 BitMap = ConstantInt::get(M.getContext(), TableInt);
6342 BitMapElementTy = IT;
6343 Kind = BitMapKind;
6344 ++NumBitMaps;
6345 return;
6346 }
6347
6348 // Store the table in an array.
6349 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6350 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6351
6352 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6353 GlobalVariable::PrivateLinkage, Initializer,
6354 "switch.table." + FuncName);
6355 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6356 // Set the alignment to that of an array items. We will be only loading one
6357 // value out of it.
6358 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6359 Kind = ArrayKind;
6360}
6361
6362Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
6363 switch (Kind) {
6364 case SingleValueKind:
6365 return SingleValue;
6366 case LinearMapKind: {
6367 // Derive the result value from the input value.
6368 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6369 false, "switch.idx.cast");
6370 if (!LinearMultiplier->isOne())
6371 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6372 /*HasNUW = */ false,
6373 /*HasNSW = */ !LinearMapValWrapped);
6374
6375 if (!LinearOffset->isZero())
6376 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6377 /*HasNUW = */ false,
6378 /*HasNSW = */ !LinearMapValWrapped);
6379 return Result;
6380 }
6381 case BitMapKind: {
6382 // Type of the bitmap (e.g. i59).
6383 IntegerType *MapTy = BitMap->getIntegerType();
6384
6385 // Cast Index to the same type as the bitmap.
6386 // Note: The Index is <= the number of elements in the table, so
6387 // truncating it to the width of the bitmask is safe.
6388 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6389
6390 // Multiply the shift amount by the element width. NUW/NSW can always be
6391 // set, because WouldFitInRegister guarantees Index * ShiftAmt is in
6392 // BitMap's bit width.
6393 ShiftAmt = Builder.CreateMul(
6394 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6395 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6396
6397 // Shift down.
6398 Value *DownShifted =
6399 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6400 // Mask off.
6401 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6402 }
6403 case ArrayKind: {
6404 // Make sure the table index will not overflow when treated as signed.
6405 IntegerType *IT = cast<IntegerType>(Index->getType());
6406 uint64_t TableSize =
6407 Array->getInitializer()->getType()->getArrayNumElements();
6408 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6409 Index = Builder.CreateZExt(
6410 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6411 "switch.tableidx.zext");
6412
6413 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6414 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6415 GEPIndices, "switch.gep");
6416 return Builder.CreateLoad(
6417 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6418 "switch.load");
6419 }
6420 }
6421 llvm_unreachable("Unknown lookup table kind!");
6422}
6423
6424bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
6425 uint64_t TableSize,
6426 Type *ElementType) {
6427 auto *IT = dyn_cast<IntegerType>(ElementType);
6428 if (!IT)
6429 return false;
6430 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6431 // are <= 15, we could try to narrow the type.
6432
6433 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6434 if (TableSize >= UINT_MAX / IT->getBitWidth())
6435 return false;
6436 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6437}
6438
6440 const DataLayout &DL) {
6441 // Allow any legal type.
6442 if (TTI.isTypeLegal(Ty))
6443 return true;
6444
6445 auto *IT = dyn_cast<IntegerType>(Ty);
6446 if (!IT)
6447 return false;
6448
6449 // Also allow power of 2 integer types that have at least 8 bits and fit in
6450 // a register. These types are common in frontend languages and targets
6451 // usually support loads of these types.
6452 // TODO: We could relax this to any integer that fits in a register and rely
6453 // on ABI alignment and padding in the table to allow the load to be widened.
6454 // Or we could widen the constants and truncate the load.
6455 unsigned BitWidth = IT->getBitWidth();
6456 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6457 DL.fitsInLegalInteger(IT->getBitWidth());
6458}
6459
6460static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6461 // 40% is the default density for building a jump table in optsize/minsize
6462 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6463 // function was based on.
6464 const uint64_t MinDensity = 40;
6465
6466 if (CaseRange >= UINT64_MAX / 100)
6467 return false; // Avoid multiplication overflows below.
6468
6469 return NumCases * 100 >= CaseRange * MinDensity;
6470}
6471
6473 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6474 uint64_t Range = Diff + 1;
6475 if (Range < Diff)
6476 return false; // Overflow.
6477
6478 return isSwitchDense(Values.size(), Range);
6479}
6480
6481/// Determine whether a lookup table should be built for this switch, based on
6482/// the number of cases, size of the table, and the types of the results.
6483// TODO: We could support larger than legal types by limiting based on the
6484// number of loads required and/or table size. If the constants are small we
6485// could use smaller table entries and extend after the load.
6486static bool
6488 const TargetTransformInfo &TTI, const DataLayout &DL,
6489 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6490 if (SI->getNumCases() > TableSize)
6491 return false; // TableSize overflowed.
6492
6493 bool AllTablesFitInRegister = true;
6494 bool HasIllegalType = false;
6495 for (const auto &I : ResultTypes) {
6496 Type *Ty = I.second;
6497
6498 // Saturate this flag to true.
6499 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6500
6501 // Saturate this flag to false.
6502 AllTablesFitInRegister =
6503 AllTablesFitInRegister &&
6504 SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
6505
6506 // If both flags saturate, we're done. NOTE: This *only* works with
6507 // saturating flags, and all flags have to saturate first due to the
6508 // non-deterministic behavior of iterating over a dense map.
6509 if (HasIllegalType && !AllTablesFitInRegister)
6510 break;
6511 }
6512
6513 // If each table would fit in a register, we should build it anyway.
6514 if (AllTablesFitInRegister)
6515 return true;
6516
6517 // Don't build a table that doesn't fit in-register if it has illegal types.
6518 if (HasIllegalType)
6519 return false;
6520
6521 return isSwitchDense(SI->getNumCases(), TableSize);
6522}
6523
6525 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6526 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6527 const DataLayout &DL, const TargetTransformInfo &TTI) {
6528 if (MinCaseVal.isNullValue())
6529 return true;
6530 if (MinCaseVal.isNegative() ||
6531 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6532 !HasDefaultResults)
6533 return false;
6534 return all_of(ResultTypes, [&](const auto &KV) {
6535 return SwitchLookupTable::WouldFitInRegister(
6536 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6537 KV.second /* ResultType */);
6538 });
6539}
6540
6541/// Try to reuse the switch table index compare. Following pattern:
6542/// \code
6543/// if (idx < tablesize)
6544/// r = table[idx]; // table does not contain default_value
6545/// else
6546/// r = default_value;
6547/// if (r != default_value)
6548/// ...
6549/// \endcode
6550/// Is optimized to:
6551/// \code
6552/// cond = idx < tablesize;
6553/// if (cond)
6554/// r = table[idx];
6555/// else
6556/// r = default_value;
6557/// if (cond)
6558/// ...
6559/// \endcode
6560/// Jump threading will then eliminate the second if(cond).
6562 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6563 Constant *DefaultValue,
6564 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6565 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6566 if (!CmpInst)
6567 return;
6568
6569 // We require that the compare is in the same block as the phi so that jump
6570 // threading can do its work afterwards.
6571 if (CmpInst->getParent() != PhiBlock)
6572 return;
6573
6574 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6575 if (!CmpOp1)
6576 return;
6577
6578 Value *RangeCmp = RangeCheckBranch->getCondition();
6579 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6580 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6581
6582 // Check if the compare with the default value is constant true or false.
6583 const DataLayout &DL = PhiBlock->getModule()->getDataLayout();
6585 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6586 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6587 return;
6588
6589 // Check if the compare with the case values is distinct from the default
6590 // compare result.
6591 for (auto ValuePair : Values) {
6593 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6594 if (!CaseConst || CaseConst == DefaultConst ||
6595 (CaseConst != TrueConst && CaseConst != FalseConst))
6596 return;
6597 }
6598
6599 // Check if the branch instruction dominates the phi node. It's a simple
6600 // dominance check, but sufficient for our needs.
6601 // Although this check is invariant in the calling loops, it's better to do it
6602 // at this late stage. Practically we do it at most once for a switch.
6603 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6604 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6605 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6606 return;
6607 }
6608
6609 if (DefaultConst == FalseConst) {
6610 // The compare yields the same result. We can replace it.
6611 CmpInst->replaceAllUsesWith(RangeCmp);
6612 ++NumTableCmpReuses;
6613 } else {
6614 // The compare yields the same result, just inverted. We can replace it.
6615 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6616 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6617 RangeCheckBranch->getIterator());
6618 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6619 ++NumTableCmpReuses;
6620 }
6621}
6622
6623/// If the switch is only used to initialize one or more phi nodes in a common
6624/// successor block with different constant values, replace the switch with
6625/// lookup tables.
6627 DomTreeUpdater *DTU, const DataLayout &DL,
6628 const TargetTransformInfo &TTI) {
6629 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6630
6631 BasicBlock *BB = SI->getParent();
6632 Function *Fn = BB->getParent();
6633 // Only build lookup table when we have a target that supports it or the
6634 // attribute is not set.
6636 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6637 return false;
6638
6639 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6640 // split off a dense part and build a lookup table for that.
6641
6642 // FIXME: This creates arrays of GEPs to constant strings, which means each
6643 // GEP needs a runtime relocation in PIC code. We should just build one big
6644 // string and lookup indices into that.
6645
6646 // Ignore switches with less than three cases. Lookup tables will not make
6647 // them faster, so we don't analyze them.
6648 if (SI->getNumCases() < 3)
6649 return false;
6650
6651 // Figure out the corresponding result for each case value and phi node in the
6652 // common destination, as well as the min and max case values.
6653 assert(!SI->cases().empty());
6654 SwitchInst::CaseIt CI = SI->case_begin();
6655 ConstantInt *MinCaseVal = CI->getCaseValue();
6656 ConstantInt *MaxCaseVal = CI->getCaseValue();
6657
6658 BasicBlock *CommonDest = nullptr;
6659
6660 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6662
6666
6667 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6668 ConstantInt *CaseVal = CI->getCaseValue();
6669 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6670 MinCaseVal = CaseVal;
6671 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6672 MaxCaseVal = CaseVal;
6673
6674 // Resulting value at phi nodes for this case value.
6676 ResultsTy Results;
6677 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6678 Results, DL, TTI))
6679 return false;
6680
6681 // Append the result from this case to the list for each phi.
6682 for (const auto &I : Results) {
6683 PHINode *PHI = I.first;
6684 Constant *Value = I.second;
6685 if (!ResultLists.count(PHI))
6686 PHIs.push_back(PHI);
6687 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6688 }
6689 }
6690
6691 // Keep track of the result types.
6692 for (PHINode *PHI : PHIs) {
6693 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6694 }
6695
6696 uint64_t NumResults = ResultLists[PHIs[0]].size();
6697
6698 // If the table has holes, we need a constant result for the default case
6699 // or a bitmask that fits in a register.
6700 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6701 bool HasDefaultResults =
6702 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6703 DefaultResultsList, DL, TTI);
6704
6705 for (const auto &I : DefaultResultsList) {
6706 PHINode *PHI = I.first;
6707 Constant *Result = I.second;
6708 DefaultResults[PHI] = Result;
6709 }
6710
6711 bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
6712 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6713 uint64_t TableSize;
6714 if (UseSwitchConditionAsTableIndex)
6715 TableSize = MaxCaseVal->getLimitedValue() + 1;
6716 else
6717 TableSize =
6718 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6719
6720 bool TableHasHoles = (NumResults < TableSize);
6721 bool NeedMask = (TableHasHoles && !HasDefaultResults);
6722 if (NeedMask) {
6723 // As an extra penalty for the validity test we require more cases.
6724 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
6725 return false;
6726 if (!DL.fitsInLegalInteger(TableSize))
6727 return false;
6728 }
6729
6730 if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6731 return false;
6732
6733 std::vector<DominatorTree::UpdateType> Updates;
6734
6735 // Compute the maximum table size representable by the integer type we are
6736 // switching upon.
6737 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6738 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
6739 assert(MaxTableSize >= TableSize &&
6740 "It is impossible for a switch to have more entries than the max "
6741 "representable value of its input integer type's size.");
6742
6743 // If the default destination is unreachable, or if the lookup table covers
6744 // all values of the conditional variable, branch directly to the lookup table
6745 // BB. Otherwise, check that the condition is within the case range.
6746 bool DefaultIsReachable =
6747 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
6748
6749 // Create the BB that does the lookups.
6750 Module &Mod = *CommonDest->getParent()->getParent();
6751 BasicBlock *LookupBB = BasicBlock::Create(
6752 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
6753
6754 // Compute the table index value.
6755 Builder.SetInsertPoint(SI);
6756 Value *TableIndex;
6757 ConstantInt *TableIndexOffset;
6758 if (UseSwitchConditionAsTableIndex) {
6759 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
6760 TableIndex = SI->getCondition();
6761 } else {
6762 TableIndexOffset = MinCaseVal;
6763 // If the default is unreachable, all case values are s>= MinCaseVal. Then
6764 // we can try to attach nsw.
6765 bool MayWrap = true;
6766 if (!DefaultIsReachable) {
6767 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
6768 (void)Res;
6769 }
6770
6771 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
6772 "switch.tableidx", /*HasNUW =*/false,
6773 /*HasNSW =*/!MayWrap);
6774 }
6775
6776 BranchInst *RangeCheckBranch = nullptr;
6777
6778 // Grow the table to cover all possible index values to avoid the range check.
6779 // It will use the default result to fill in the table hole later, so make
6780 // sure it exist.
6781 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6782 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
6783 // Grow the table shouldn't have any size impact by checking
6784 // WouldFitInRegister.
6785 // TODO: Consider growing the table also when it doesn't fit in a register
6786 // if no optsize is specified.
6787 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6788 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
6789 return SwitchLookupTable::WouldFitInRegister(
6790 DL, UpperBound, KV.second /* ResultType */);
6791 })) {
6792 // There may be some case index larger than the UpperBound (unreachable
6793 // case), so make sure the table size does not get smaller.
6794 TableSize = std::max(UpperBound, TableSize);
6795 // The default branch is unreachable after we enlarge the lookup table.
6796 // Adjust DefaultIsReachable to reuse code path.
6797 DefaultIsReachable = false;
6798 }
6799 }
6800
6801 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
6802 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6803 Builder.CreateBr(LookupBB);
6804 if (DTU)
6805 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6806 // Note: We call removeProdecessor later since we need to be able to get the
6807 // PHI value for the default case in case we're using a bit mask.
6808 } else {
6809 Value *Cmp = Builder.CreateICmpULT(
6810 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
6811 RangeCheckBranch =
6812 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
6813 if (DTU)
6814 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6815 }
6816
6817 // Populate the BB that does the lookups.
6818 Builder.SetInsertPoint(LookupBB);
6819
6820 if (NeedMask) {
6821 // Before doing the lookup, we do the hole check. The LookupBB is therefore
6822 // re-purposed to do the hole check, and we create a new LookupBB.
6823 BasicBlock *MaskBB = LookupBB;
6824 MaskBB->setName("switch.hole_check");
6825 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
6826 CommonDest->getParent(), CommonDest);
6827
6828 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
6829 // unnecessary illegal types.
6830 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
6831 APInt MaskInt(TableSizePowOf2, 0);
6832 APInt One(TableSizePowOf2, 1);
6833 // Build bitmask; fill in a 1 bit for every case.
6834 const ResultListTy &ResultList = ResultLists[PHIs[0]];
6835 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
6836 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
6837 .getLimitedValue();
6838 MaskInt |= One << Idx;
6839 }
6840 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
6841
6842 // Get the TableIndex'th bit of the bitmask.
6843 // If this bit is 0 (meaning hole) jump to the default destination,
6844 // else continue with table lookup.
6845 IntegerType *MapTy = TableMask->getIntegerType();
6846 Value *MaskIndex =
6847 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
6848 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
6849 Value *LoBit = Builder.CreateTrunc(
6850 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
6851 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
6852 if (DTU) {
6853 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
6854 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
6855 }
6856 Builder.SetInsertPoint(LookupBB);
6857 AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
6858 }
6859
6860 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6861 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
6862 // do not delete PHINodes here.
6863 SI->getDefaultDest()->removePredecessor(BB,
6864 /*KeepOneInputPHIs=*/true);
6865 if (DTU)
6866 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
6867 }
6868
6869 for (PHINode *PHI : PHIs) {
6870 const ResultListTy &ResultList = ResultLists[PHI];
6871
6872 // If using a bitmask, use any value to fill the lookup table holes.
6873 Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
6874 StringRef FuncName = Fn->getName();
6875 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
6876 DL, FuncName);
6877
6878 Value *Result = Table.BuildLookup(TableIndex, Builder);
6879
6880 // Do a small peephole optimization: re-use the switch table compare if
6881 // possible.
6882 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6883 BasicBlock *PhiBlock = PHI->getParent();
6884 // Search for compare instructions which use the phi.
6885 for (auto *User : PHI->users()) {
6886 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
6887 }
6888 }
6889
6890 PHI->addIncoming(Result, LookupBB);
6891 }
6892
6893 Builder.CreateBr(CommonDest);
6894 if (DTU)
6895 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
6896
6897 // Remove the switch.
6898 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
6899 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6900 BasicBlock *Succ = SI->getSuccessor(i);
6901
6902 if (Succ == SI->getDefaultDest())
6903 continue;
6904 Succ->removePredecessor(BB);
6905 if (DTU && RemovedSuccessors.insert(Succ).second)
6906 Updates.push_back({DominatorTree::Delete, BB, Succ});
6907 }
6908 SI->eraseFromParent();
6909
6910 if (DTU)
6911 DTU->applyUpdates(Updates);
6912
6913 ++NumLookupTables;
6914 if (NeedMask)
6915 ++NumLookupTablesHoles;
6916 return true;
6917}
6918
6919/// Try to transform a switch that has "holes" in it to a contiguous sequence
6920/// of cases.
6921///
6922/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6923/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6924///
6925/// This converts a sparse switch into a dense switch which allows better
6926/// lowering and could also allow transforming into a lookup table.
6927static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6928 const DataLayout &DL,
6929 const TargetTransformInfo &TTI) {
6930 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
6931 if (CondTy->getIntegerBitWidth() > 64 ||
6932 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6933 return false;
6934 // Only bother with this optimization if there are more than 3 switch cases;
6935 // SDAG will only bother creating jump tables for 4 or more cases.
6936 if (SI->getNumCases() < 4)
6937 return false;
6938
6939 // This transform is agnostic to the signedness of the input or case values. We
6940 // can treat the case values as signed or unsigned. We can optimize more common
6941 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6942 // as signed.
6944 for (const auto &C : SI->cases())
6945 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
6946 llvm::sort(Values);
6947
6948 // If the switch is already dense, there's nothing useful to do here.
6949 if (isSwitchDense(Values))
6950 return false;
6951
6952 // First, transform the values such that they start at zero and ascend.
6953 int64_t Base = Values[0];
6954 for (auto &V : Values)
6955 V -= (uint64_t)(Base);
6956
6957 // Now we have signed numbers that have been shifted so that, given enough
6958 // precision, there are no negative values. Since the rest of the transform
6959 // is bitwise only, we switch now to an unsigned representation.
6960
6961 // This transform can be done speculatively because it is so cheap - it
6962 // results in a single rotate operation being inserted.
6963
6964 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6965 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
6966 // less than 64.
6967 unsigned Shift = 64;
6968 for (auto &V : Values)
6969 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
6970 assert(Shift < 64);
6971 if (Shift > 0)
6972 for (auto &V : Values)
6973 V = (int64_t)((uint64_t)V >> Shift);
6974
6975 if (!isSwitchDense(Values))
6976 // Transform didn't create a dense switch.
6977 return false;
6978
6979 // The obvious transform is to shift the switch condition right and emit a
6980 // check that the condition actually cleanly divided by GCD, i.e.
6981 // C & (1 << Shift - 1) == 0
6982 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
6983 //
6984 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
6985 // shift and puts the shifted-off bits in the uppermost bits. If any of these
6986 // are nonzero then the switch condition will be very large and will hit the
6987 // default case.
6988
6989 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
6990 Builder.SetInsertPoint(SI);
6991 Value *Sub =
6992 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
6993 Value *Rot = Builder.CreateIntrinsic(
6994 Ty, Intrinsic::fshl,
6995 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
6996 SI->replaceUsesOfWith(SI->getCondition(), Rot);
6997
6998 for (auto Case : SI->cases()) {
6999 auto *Orig = Case.getCaseValue();
7000 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
7001 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7002 }
7003 return true;
7004}
7005
7006/// Tries to transform switch of powers of two to reduce switch range.
7007/// For example, switch like:
7008/// switch (C) { case 1: case 2: case 64: case 128: }
7009/// will be transformed to:
7010/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7011///
7012/// This transformation allows better lowering and could allow transforming into
7013/// a lookup table.
7015 const DataLayout &DL,
7016 const TargetTransformInfo &TTI) {
7017 Value *Condition = SI->getCondition();
7018 LLVMContext &Context = SI->getContext();
7019 auto *CondTy = cast<IntegerType>(Condition->getType());
7020
7021 if (CondTy->getIntegerBitWidth() > 64 ||
7022 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7023 return false;
7024
7025 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7026 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7027 {Condition, ConstantInt::getTrue(Context)}),
7029
7030 if (CttzIntrinsicCost > TTI::TCC_Basic)
7031 // Inserting intrinsic is too expensive.
7032 return false;
7033
7034 // Only bother with this optimization if there are more than 3 switch cases.
7035 // SDAG will only bother creating jump tables for 4 or more cases.
7036 if (SI->getNumCases() < 4)
7037 return false;
7038
7039 // We perform this optimization only for switches with
7040 // unreachable default case.
7041 // This assumtion will save us from checking if `Condition` is a power of two.
7042 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7043 return false;
7044
7045 // Check that switch cases are powers of two.
7047 for (const auto &Case : SI->cases()) {
7048 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7049 if (llvm::has_single_bit(CaseValue))
7050 Values.push_back(CaseValue);
7051 else
7052 return false;
7053 }
7054
7055 // isSwichDense requires case values to be sorted.
7056 llvm::sort(Values);
7057 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7058 llvm::countr_zero(Values.front()) + 1))
7059 // Transform is unable to generate dense switch.
7060 return false;
7061
7062 Builder.SetInsertPoint(SI);
7063
7064 // Replace each case with its trailing zeros number.
7065 for (auto &Case : SI->cases()) {
7066 auto *OrigValue = Case.getCaseValue();
7067 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7068 OrigValue->getValue().countr_zero()));
7069 }
7070
7071 // Replace condition with its trailing zeros number.
7072 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7073 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7074
7075 SI->setCondition(ConditionTrailingZeros);
7076
7077 return true;
7078}
7079
7080bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7081 BasicBlock *BB = SI->getParent();
7082
7083 if (isValueEqualityComparison(SI)) {
7084 // If we only have one predecessor, and if it is a branch on this value,
7085 // see if that predecessor totally determines the outcome of this switch.
7086 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7087 if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7088 return requestResimplify();
7089
7090 Value *Cond = SI->getCondition();
7091 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7092 if (SimplifySwitchOnSelect(SI, Select))
7093 return requestResimplify();
7094
7095 // If the block only contains the switch, see if we can fold the block
7096 // away into any preds.
7097 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7098 if (FoldValueComparisonIntoPredecessors(SI, Builder))
7099 return requestResimplify();
7100 }
7101
7102 // Try to transform the switch into an icmp and a branch.
7103 // The conversion from switch to comparison may lose information on
7104 // impossible switch values, so disable it early in the pipeline.
7105 if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
7106 return requestResimplify();
7107
7108 // Remove unreachable cases.
7109 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7110 return requestResimplify();
7111
7112 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7113 return requestResimplify();
7114
7115 if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
7116 return requestResimplify();
7117
7118 // The conversion from switch to lookup tables results in difficult-to-analyze
7119 // code and makes pruning branches much harder. This is a problem if the
7120 // switch expression itself can still be restricted as a result of inlining or
7121 // CVP. Therefore, only apply this transformation during late stages of the
7122 // optimisation pipeline.
7123 if (Options.ConvertSwitchToLookupTable &&
7124 SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
7125 return requestResimplify();
7126
7127 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7128 return requestResimplify();
7129
7130 if (ReduceSwitchRange(SI, Builder, DL, TTI))
7131 return requestResimplify();
7132
7133 if (HoistCommon &&
7134 hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
7135 return requestResimplify();
7136
7137 return false;
7138}
7139
7140bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7141 BasicBlock *BB = IBI->getParent();
7142 bool Changed = false;
7143
7144 // Eliminate redundant destinations.
7147 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7148 BasicBlock *Dest = IBI->getDestination(i);
7149 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7150 if (!Dest->hasAddressTaken())
7151 RemovedSuccs.insert(Dest);
7152 Dest->removePredecessor(BB);
7153 IBI->removeDestination(i);
7154 --i;
7155 --e;
7156 Changed = true;
7157 }
7158 }
7159
7160 if (DTU) {
7161 std::vector<DominatorTree::UpdateType> Updates;
7162 Updates.reserve(RemovedSuccs.size());
7163 for (auto *RemovedSucc : RemovedSuccs)
7164 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7165 DTU->applyUpdates(Updates);
7166 }
7167
7168 if (IBI->getNumDestinations() == 0) {
7169 // If the indirectbr has no successors, change it to unreachable.
7170 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7172 return true;
7173 }
7174
7175 if (IBI->getNumDestinations() == 1) {
7176 // If the indirectbr has one successor, change it to a direct branch.
7179 return true;
7180 }
7181
7182 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7183 if (SimplifyIndirectBrOnSelect(IBI, SI))
7184 return requestResimplify();
7185 }
7186 return Changed;
7187}
7188
7189/// Given an block with only a single landing pad and a unconditional branch
7190/// try to find another basic block which this one can be merged with. This
7191/// handles cases where we have multiple invokes with unique landing pads, but
7192/// a shared handler.
7193///
7194/// We specifically choose to not worry about merging non-empty blocks
7195/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7196/// practice, the optimizer produces empty landing pad blocks quite frequently
7197/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7198/// sinking in this file)
7199///
7200/// This is primarily a code size optimization. We need to avoid performing
7201/// any transform which might inhibit optimization (such as our ability to
7202/// specialize a particular handler via tail commoning). We do this by not
7203/// merging any blocks which require us to introduce a phi. Since the same
7204/// values are flowing through both blocks, we don't lose any ability to
7205/// specialize. If anything, we make such specialization more likely.
7206///
7207/// TODO - This transformation could remove entries from a phi in the target
7208/// block when the inputs in the phi are the same for the two blocks being
7209/// merged. In some cases, this could result in removal of the PHI entirely.
7211 BasicBlock *BB, DomTreeUpdater *DTU) {
7212 auto Succ = BB->getUniqueSuccessor();
7213 assert(Succ);
7214 // If there's a phi in the successor block, we'd likely have to introduce
7215 // a phi into the merged landing pad block.
7216 if (isa<PHINode>(*Succ->begin()))
7217 return false;
7218
7219 for (BasicBlock *OtherPred : predecessors(Succ)) {
7220 if (BB == OtherPred)
7221 continue;
7222 BasicBlock::iterator I = OtherPred->begin();
7223 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7224 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7225 continue;
7226 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7227 ;
7228 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7229 if (!BI2 || !BI2->isIdenticalTo(BI))
7230 continue;
7231
7232 std::vector<DominatorTree::UpdateType> Updates;
7233
7234 // We've found an identical block. Update our predecessors to take that
7235 // path instead and make ourselves dead.
7237 for (BasicBlock *Pred : UniquePreds) {
7238 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7239 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7240 "unexpected successor");
7241 II->setUnwindDest(OtherPred);
7242 if (DTU) {
7243 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7244 Updates.push_back({DominatorTree::Delete, Pred, BB});
7245 }
7246 }
7247
7248 // The debug info in OtherPred doesn't cover the merged control flow that
7249 // used to go through BB. We need to delete it or update it.
7250 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7251 if (isa<DbgInfoIntrinsic>(Inst))
7252 Inst.eraseFromParent();
7253
7255 for (BasicBlock *Succ : UniqueSuccs) {
7256 Succ->removePredecessor(BB);
7257 if (DTU)
7258 Updates.push_back({DominatorTree::Delete, BB, Succ});
7259 }
7260
7261 IRBuilder<> Builder(BI);
7262 Builder.CreateUnreachable();
7263 BI->eraseFromParent();
7264 if (DTU)
7265 DTU->applyUpdates(Updates);
7266 return true;
7267 }
7268 return false;
7269}
7270
7271bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7272 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7273 : simplifyCondBranch(Branch, Builder);
7274}
7275
7276bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7277 IRBuilder<> &Builder) {
7278 BasicBlock *BB = BI->getParent();
7279 BasicBlock *Succ = BI->getSuccessor(0);
7280
7281 // If the Terminator is the only non-phi instruction, simplify the block.
7282 // If LoopHeader is provided, check if the block or its successor is a loop
7283 // header. (This is for early invocations before loop simplify and
7284 // vectorization to keep canonical loop forms for nested loops. These blocks
7285 // can be eliminated when the pass is invoked later in the back-end.)
7286 // Note that if BB has only one predecessor then we do not introduce new
7287 // backedge, so we can eliminate BB.
7288 bool NeedCanonicalLoop =
7289 Options.NeedCanonicalLoop &&
7290 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7291 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7293 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7294 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7295 return true;
7296
7297 // If the only instruction in the block is a seteq/setne comparison against a
7298 // constant, try to simplify the block.
7299 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7300 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7301 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7302 ;
7303 if (I->isTerminator() &&
7304 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7305 return true;
7306 }
7307
7308 // See if we can merge an empty landing pad block with another which is
7309 // equivalent.
7310 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7311 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7312 ;
7313 if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
7314 return true;
7315 }
7316
7317 // If this basic block is ONLY a compare and a branch, and if a predecessor
7318 // branches to us and our successor, fold the comparison into the
7319 // predecessor and use logical operations to update the incoming value
7320 // for PHI nodes in common successor.
7321 if (Options.SpeculateBlocks &&
7322 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7323 Options.BonusInstThreshold))
7324 return requestResimplify();
7325 return false;
7326}
7327
7329 BasicBlock *PredPred = nullptr;
7330 for (auto *P : predecessors(BB)) {
7331 BasicBlock *PPred = P->getSinglePredecessor();
7332 if (!PPred || (PredPred && PredPred != PPred))
7333 return nullptr;
7334 PredPred = PPred;
7335 }
7336 return PredPred;
7337}
7338
7339bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7340 assert(
7341 !isa<ConstantInt>(BI->getCondition()) &&
7342 BI->getSuccessor(0) != BI->getSuccessor(1) &&
7343 "Tautological conditional branch should have been eliminated already.");
7344
7345 BasicBlock *BB = BI->getParent();
7346 if (!Options.SimplifyCondBranch ||
7347 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
7348 return false;
7349
7350 // Conditional branch
7351 if (isValueEqualityComparison(BI)) {
7352 // If we only have one predecessor, and if it is a branch on this value,
7353 // see if that predecessor totally determines the outcome of this
7354 // switch.
7355 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7356 if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
7357 return requestResimplify();
7358
7359 // This block must be empty, except for the setcond inst, if it exists.
7360 // Ignore dbg and pseudo intrinsics.
7361 auto I = BB->instructionsWithoutDebug(true).begin();
7362 if (&*I == BI) {
7363 if (FoldValueComparisonIntoPredecessors(BI, Builder))
7364 return requestResimplify();
7365 } else if (&*I == cast<Instruction>(BI->getCondition())) {
7366 ++I;
7367 if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
7368 return requestResimplify();
7369 }
7370 }
7371
7372 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
7373 if (SimplifyBranchOnICmpChain(BI, Builder, DL))
7374 return true;
7375
7376 // If this basic block has dominating predecessor blocks and the dominating
7377 // blocks' conditions imply BI's condition, we know the direction of BI.
7378 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
7379 if (Imp) {
7380 // Turn this into a branch on constant.
7381 auto *OldCond = BI->getCondition();
7382 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
7383 : ConstantInt::getFalse(BB->getContext());
7384 BI->setCondition(TorF);
7386 return requestResimplify();
7387 }
7388
7389 // If this basic block is ONLY a compare and a branch, and if a predecessor
7390 // branches to us and one of our successors, fold the comparison into the
7391 // predecessor and use logical operations to pick the right destination.
7392 if (Options.SpeculateBlocks &&
7393 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7394 Options.BonusInstThreshold))
7395 return requestResimplify();
7396
7397 // We have a conditional branch to two blocks that are only reachable
7398 // from BI. We know that the condbr dominates the two blocks, so see if
7399 // there is any identical code in the "then" and "else" blocks. If so, we
7400 // can hoist it up to the branching block.
7401 if (BI->getSuccessor(0)->getSinglePredecessor()) {
7402 if (BI->getSuccessor(1)->getSinglePredecessor()) {
7403 if (HoistCommon && hoistCommonCodeFromSuccessors(
7404 BI->getParent(), !Options.HoistCommonInsts))
7405 return requestResimplify();
7406 } else {
7407 // If Successor #1 has multiple preds, we may be able to conditionally
7408 // execute Successor #0 if it branches to Successor #1.
7409 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
7410 if (Succ0TI->getNumSuccessors() == 1 &&
7411 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
7412 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
7413 return requestResimplify();
7414 }
7415 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
7416 // If Successor #0 has multiple preds, we may be able to conditionally
7417 // execute Successor #1 if it branches to Successor #0.
7418 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
7419 if (Succ1TI->getNumSuccessors() == 1 &&
7420 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
7421 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
7422 return requestResimplify();
7423 }
7424
7425 // If this is a branch on something for which we know the constant value in
7426 // predecessors (e.g. a phi node in the current block), thread control
7427 // through this block.
7429 return requestResimplify();
7430
7431 // Scan predecessor blocks for conditional branches.
7432 for (BasicBlock *Pred : predecessors(BB))
7433 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
7434 if (PBI != BI && PBI->isConditional())
7435 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
7436 return requestResimplify();
7437
7438 // Look for diamond patterns.
7439 if (MergeCondStores)
7441 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
7442 if (PBI != BI && PBI->isConditional())
7443 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
7444 return requestResimplify();
7445
7446 return false;
7447}
7448
7449/// Check if passing a value to an instruction will cause undefined behavior.
7450static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
7451 Constant *C = dyn_cast<Constant>(V);
7452 if (!C)
7453 return false;
7454
7455 if (I->use_empty())
7456 return false;
7457
7458 if (C->isNullValue() || isa<UndefValue>(C)) {
7459 // Only look at the first use, avoid hurting compile time with long uselists
7460 auto *Use = cast<Instruction>(*I->user_begin());
7461 // Bail out if Use is not in the same BB as I or Use == I or Use comes
7462 // before I in the block. The latter two can be the case if Use is a PHI
7463 // node.
7464 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
7465 return false;
7466
7467 // Now make sure that there are no instructions in between that can alter
7468 // control flow (eg. calls)
7469 auto InstrRange =
7470 make_range(std::next(I->getIterator()), Use->getIterator());
7471 if (any_of(InstrRange, [](Instruction &I) {
7473 }))
7474 return false;
7475
7476 // Look through GEPs. A load from a GEP derived from NULL is still undefined
7477 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
7478 if (GEP->getPointerOperand() == I) {
7479 // The current base address is null, there are four cases to consider:
7480 // getelementptr (TY, null, 0) -> null
7481 // getelementptr (TY, null, not zero) -> may be modified
7482 // getelementptr inbounds (TY, null, 0) -> null
7483 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
7484 // undefined?
7485 if (!GEP->hasAllZeroIndices() &&
7486 (!GEP->isInBounds() ||
7487 NullPointerIsDefined(GEP->getFunction(),
7488 GEP->getPointerAddressSpace())))
7489 PtrValueMayBeModified = true;
7490 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
7491 }
7492
7493 // Look through return.
7494 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
7495 bool HasNoUndefAttr =
7496 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
7497 // Return undefined to a noundef return value is undefined.
7498 if (isa<UndefValue>(C) && HasNoUndefAttr)
7499 return true;
7500 // Return null to a nonnull+noundef return value is undefined.
7501 if (C->isNullValue() && HasNoUndefAttr &&
7502 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
7503 return !PtrValueMayBeModified;
7504 }
7505 }
7506
7507 // Look through bitcasts.
7508 if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
7509 return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
7510
7511 // Load from null is undefined.
7512 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
7513 if (!LI->isVolatile())
7514 return !NullPointerIsDefined(LI->getFunction(),
7515 LI->getPointerAddressSpace());
7516
7517 // Store to null is undefined.
7518 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
7519 if (!SI->isVolatile())
7520 return (!NullPointerIsDefined(SI->getFunction(),
7521 SI->getPointerAddressSpace())) &&
7522 SI->getPointerOperand() == I;
7523
7524 // llvm.assume(false/undef) always triggers immediate UB.
7525 if (auto *Assume = dyn_cast<AssumeInst>(Use)) {
7526 // Ignore assume operand bundles.
7527 if (I == Assume->getArgOperand(0))
7528 return true;
7529 }
7530
7531 if (auto *CB = dyn_cast<CallBase>(Use)) {
7532 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
7533 return false;
7534 // A call to null is undefined.
7535 if (CB->getCalledOperand() == I)
7536 return true;
7537
7538 if (C->isNullValue()) {
7539 for (const llvm::Use &Arg : CB->args())
7540 if (Arg == I) {
7541 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7542 if (CB->isPassingUndefUB(ArgIdx) &&
7543 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
7544 // Passing null to a nonnnull+noundef argument is undefined.
7545 return !PtrValueMayBeModified;
7546 }
7547 }
7548 } else if (isa<UndefValue>(C)) {
7549 // Passing undef to a noundef argument is undefined.
7550 for (const llvm::Use &Arg : CB->args())
7551 if (Arg == I) {
7552 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7553 if (CB->isPassingUndefUB(ArgIdx)) {
7554 // Passing undef to a noundef argument is undefined.
7555 return true;
7556 }
7557 }
7558 }
7559 }
7560 }
7561 return false;
7562}
7563
7564/// If BB has an incoming value that will always trigger undefined behavior
7565/// (eg. null pointer dereference), remove the branch leading here.
7567 DomTreeUpdater *DTU,
7568 AssumptionCache *AC) {
7569 for (PHINode &PHI : BB->phis())
7570 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
7571 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
7572 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
7573 Instruction *T = Predecessor->getTerminator();
7574 IRBuilder<> Builder(T);
7575 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
7576 BB->removePredecessor(Predecessor);
7577 // Turn unconditional branches into unreachables and remove the dead
7578 // destination from conditional branches.
7579 if (BI->isUnconditional())
7580 Builder.CreateUnreachable();
7581 else {
7582 // Preserve guarding condition in assume, because it might not be
7583 // inferrable from any dominating condition.
7584 Value *Cond = BI->getCondition();
7585 CallInst *Assumption;
7586 if (BI->getSuccessor(0) == BB)
7587 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
7588 else
7589 Assumption = Builder.CreateAssumption(Cond);
7590 if (AC)
7591 AC->registerAssumption(cast<AssumeInst>(Assumption));
7592 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
7593 : BI->getSuccessor(0));
7594 }
7595 BI->eraseFromParent();
7596 if (DTU)
7597 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
7598 return true;
7599 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
7600 // Redirect all branches leading to UB into
7601 // a newly created unreachable block.
7602 BasicBlock *Unreachable = BasicBlock::Create(
7603 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
7604 Builder.SetInsertPoint(Unreachable);
7605 // The new block contains only one instruction: Unreachable
7606 Builder.CreateUnreachable();
7607 for (const auto &Case : SI->cases())
7608 if (Case.getCaseSuccessor() == BB) {
7609 BB->removePredecessor(Predecessor);
7610 Case.setSuccessor(Unreachable);
7611 }
7612 if (SI->getDefaultDest() == BB) {
7613 BB->removePredecessor(Predecessor);
7614 SI->setDefaultDest(Unreachable);
7615 }
7616
7617 if (DTU)
7618 DTU->applyUpdates(
7619 { { DominatorTree::Insert, Predecessor, Unreachable },
7620 { DominatorTree::Delete, Predecessor, BB } });
7621 return true;
7622 }
7623 }
7624
7625 return false;
7626}
7627
7628bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
7629 bool Changed = false;
7630
7631 assert(BB && BB->getParent() && "Block not embedded in function!");
7632 assert(BB->getTerminator() && "Degenerate basic block encountered!");
7633
7634 // Remove basic blocks that have no predecessors (except the entry block)...
7635 // or that just have themself as a predecessor. These are unreachable.
7636 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
7637 BB->getSinglePredecessor() == BB) {
7638 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
7639 DeleteDeadBlock(BB, DTU);
7640 return true;
7641 }
7642
7643 // Check to see if we can constant propagate this terminator instruction
7644 // away...
7645 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
7646 /*TLI=*/nullptr, DTU);
7647
7648 // Check for and eliminate duplicate PHI nodes in this block.
7649 Changed |= EliminateDuplicatePHINodes(BB);
7650
7651 // Check for and remove branches that will always cause undefined behavior.
7653 return requestResimplify();
7654
7655 // Merge basic blocks into their predecessor if there is only one distinct
7656 // pred, and if there is only one distinct successor of the predecessor, and
7657 // if there are no PHI nodes.
7658 if (MergeBlockIntoPredecessor(BB, DTU))
7659 return true;
7660
7661 if (SinkCommon && Options.SinkCommonInsts)
7662 if (SinkCommonCodeFromPredecessors(BB, DTU) ||
7663 MergeCompatibleInvokes(BB, DTU)) {
7664 // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
7665 // so we may now how duplicate PHI's.
7666 // Let's rerun EliminateDuplicatePHINodes() first,
7667 // before FoldTwoEntryPHINode() potentially converts them into select's,
7668 // after which we'd need a whole EarlyCSE pass run to cleanup them.
7669 return true;
7670 }
7671
7672 IRBuilder<> Builder(BB);
7673
7674 if (Options.SpeculateBlocks &&
7675 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
7676 // If there is a trivial two-entry PHI node in this basic block, and we can
7677 // eliminate it, do so now.
7678 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
7679 if (PN->getNumIncomingValues() == 2)
7680 if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
7681 return true;
7682 }
7683
7685 Builder.SetInsertPoint(Terminator);
7686 switch (Terminator->getOpcode()) {
7687 case Instruction::Br:
7688 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
7689 break;
7690 case Instruction::Resume:
7691 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
7692 break;
7693 case Instruction::CleanupRet:
7694 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
7695 break;
7696 case Instruction::Switch:
7697 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
7698 break;
7699 case Instruction::Unreachable:
7700 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
7701 break;
7702 case Instruction::IndirectBr:
7703 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
7704 break;
7705 }
7706
7707 return Changed;
7708}
7709
7710bool SimplifyCFGOpt::run(BasicBlock *BB) {
7711 bool Changed = false;
7712
7713 // Repeated simplify BB as long as resimplification is requested.
7714 do {
7715 Resimplify = false;
7716
7717 // Perform one round of simplifcation. Resimplify flag will be set if
7718 // another iteration is requested.
7719 Changed |= simplifyOnce(BB);
7720 } while (Resimplify);
7721
7722 return Changed;
7723}
7724
7727 ArrayRef<WeakVH> LoopHeaders) {
7728 return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
7729 Options)
7730 .run(BB);
7731}
#define Fail
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Module * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static Constant * ConstantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static Constant * LookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static void GetBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static ConstantInt * GetConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static void EliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static std::optional< bool > FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static PHINode * FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool IncomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool ForwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static int ConstantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static void FitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static void EraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static unsigned skippedInstrFlags(Instruction *I)
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static bool ValuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< Instruction *, SmallVector< Value *, 4 > > &PHIOperands)
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static void MergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool ShouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool CasesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1144
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:174
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:335
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:247
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:640
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:360
const Instruction & front() const
Definition: BasicBlock.h:453
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:474
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:490
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:324
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:482
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:712
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:379
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:65
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:672
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:478
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:613
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:509
This class represents a no-op cast from one type to another.
The address of a basic block.
Definition: Constants.h:889
BasicBlock * getBasicBlock() const
Definition: Constants.h:918
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1809
bool cannotMerge() const
Determine if the call cannot be tail merged.
Definition: InstrTypes.h:2291
bool isIndirectCall() const
Return true if the callsite is an indirect call.
Value * getCalledOperand() const
Definition: InstrTypes.h:1735
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:983
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isNegative() const
Definition: Constants.h:200
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:255
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:184
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:148
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
bool hasPostDomTree() const
Returns true if it holds a PostDominatorTree.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
const BasicBlock & getEntryBlock() const
Definition: Function.h:787
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
iterator begin()
Definition: Function.h:803
size_t size() const
Definition: Function.h:808
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2257
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1263
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1876
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1143
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles=std::nullopt)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:551
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1803
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1114
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1676
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const LLVM_READONLY
This function determines if the specified instruction executes the same operation as the current one.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:84
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:359
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:255
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1706
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:932
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
Invoke instruction.
BasicBlock * getUnwindDest() const
void setNormalDest(BasicBlock *B)
void setUnwindDest(BasicBlock *B)
BasicBlock * getNormalDest() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:184
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:301
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Align getAlign() const
Definition: Instructions.h:369
bool isSimple() const
Definition: Instructions.h:406
Value * getValueOperand()
Definition: Instructions.h:414
bool isUnordered() const
Definition: Instructions.h:408
Value * getPointerOperand()
Definition: Instructions.h:417
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:882
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
bool user_empty() const
Definition: Value.h:385
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:507
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1895
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:238
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:31
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1715
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:130
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Definition: ValueMapper.h:272
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2165
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1768
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2059
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:693
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1120
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2060
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1422
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3160
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3341
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3607
@ And
Bitwise or logical AND of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4109
auto max_element(R &&Range)
Definition: STLExtras.h:1986
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
unsigned succ_size(const MachineBasicBlock *BB)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1607
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1487
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:281
void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254