LLVM 19.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/Statistic.h"
46#include "llvm/Analysis/CFG.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DIBuilder.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/Dominators.h"
71#include "llvm/IR/Function.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/Metadata.h"
80#include "llvm/IR/Operator.h"
81#include "llvm/IR/PassManager.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/IR/ValueHandle.h"
92#include "llvm/Support/Debug.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <memory>
104#include <optional>
105#include <string>
106#include <utility>
107
108#define DEBUG_TYPE "instcombine"
110#include <optional>
111
112using namespace llvm;
113using namespace llvm::PatternMatch;
114
115STATISTIC(NumWorklistIterations,
116 "Number of instruction combining iterations performed");
117STATISTIC(NumOneIteration, "Number of functions with one iteration");
118STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120STATISTIC(NumFourOrMoreIterations,
121 "Number of functions with four or more iterations");
122
123STATISTIC(NumCombined , "Number of insts combined");
124STATISTIC(NumConstProp, "Number of constant folds");
125STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126STATISTIC(NumSunkInst , "Number of instructions sunk");
127STATISTIC(NumExpand, "Number of expansions");
128STATISTIC(NumFactor , "Number of factorizations");
129STATISTIC(NumReassoc , "Number of reassociations");
130DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131 "Controls which instructions are visited");
132
133static cl::opt<bool>
134EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
135 cl::init(true));
136
138 "instcombine-max-sink-users", cl::init(32),
139 cl::desc("Maximum number of undroppable users for instruction sinking"));
140
142MaxArraySize("instcombine-maxarray-size", cl::init(1024),
143 cl::desc("Maximum array size considered when doing a combine"));
144
145// FIXME: Remove this flag when it is no longer necessary to convert
146// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147// increases variable availability at the cost of accuracy. Variables that
148// cannot be promoted by mem2reg or SROA will be described as living in memory
149// for their entire lifetime. However, passes like DSE and instcombine can
150// delete stores to the alloca, leading to misleading and inaccurate debug
151// information. This flag can be removed when those passes are fixed.
152static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153 cl::Hidden, cl::init(true));
154
155std::optional<Instruction *>
157 // Handle target specific intrinsics
159 return TTI.instCombineIntrinsic(*this, II);
160 }
161 return std::nullopt;
162}
163
165 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
166 bool &KnownBitsComputed) {
167 // Handle target specific intrinsics
169 return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
170 KnownBitsComputed);
171 }
172 return std::nullopt;
173}
174
176 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
177 APInt &PoisonElts2, APInt &PoisonElts3,
178 std::function<void(Instruction *, unsigned, APInt, APInt &)>
179 SimplifyAndSetOp) {
180 // Handle target specific intrinsics
183 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
184 SimplifyAndSetOp);
185 }
186 return std::nullopt;
187}
188
189bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
190 return TTI.isValidAddrSpaceCast(FromAS, ToAS);
191}
192
193Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
194 if (!RewriteGEP)
196
198 auto *Inst = dyn_cast<Instruction>(GEP);
199 if (Inst)
201
202 Value *Offset = EmitGEPOffset(GEP);
203 // If a non-trivial GEP has other uses, rewrite it to avoid duplicating
204 // the offset arithmetic.
205 if (Inst && !GEP->hasOneUse() && !GEP->hasAllConstantIndices() &&
206 !GEP->getSourceElementType()->isIntegerTy(8)) {
208 *Inst, Builder.CreateGEP(Builder.getInt8Ty(), GEP->getPointerOperand(),
209 Offset, "", GEP->isInBounds()));
211 }
212 return Offset;
213}
214
215/// Legal integers and common types are considered desirable. This is used to
216/// avoid creating instructions with types that may not be supported well by the
217/// the backend.
218/// NOTE: This treats i8, i16 and i32 specially because they are common
219/// types in frontend languages.
220bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
221 switch (BitWidth) {
222 case 8:
223 case 16:
224 case 32:
225 return true;
226 default:
227 return DL.isLegalInteger(BitWidth);
228 }
229}
230
231/// Return true if it is desirable to convert an integer computation from a
232/// given bit width to a new bit width.
233/// We don't want to convert from a legal or desirable type (like i8) to an
234/// illegal type or from a smaller to a larger illegal type. A width of '1'
235/// is always treated as a desirable type because i1 is a fundamental type in
236/// IR, and there are many specialized optimizations for i1 types.
237/// Common/desirable widths are equally treated as legal to convert to, in
238/// order to open up more combining opportunities.
239bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
240 unsigned ToWidth) const {
241 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
242 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
243
244 // Convert to desirable widths even if they are not legal types.
245 // Only shrink types, to prevent infinite loops.
246 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
247 return true;
248
249 // If this is a legal or desiable integer from type, and the result would be
250 // an illegal type, don't do the transformation.
251 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
252 return false;
253
254 // Otherwise, if both are illegal, do not increase the size of the result. We
255 // do allow things like i160 -> i64, but not i64 -> i160.
256 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
257 return false;
258
259 return true;
260}
261
262/// Return true if it is desirable to convert a computation from 'From' to 'To'.
263/// We don't want to convert from a legal to an illegal type or from a smaller
264/// to a larger illegal type. i1 is always treated as a legal type because it is
265/// a fundamental type in IR, and there are many specialized optimizations for
266/// i1 types.
267bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
268 // TODO: This could be extended to allow vectors. Datalayout changes might be
269 // needed to properly support that.
270 if (!From->isIntegerTy() || !To->isIntegerTy())
271 return false;
272
273 unsigned FromWidth = From->getPrimitiveSizeInBits();
274 unsigned ToWidth = To->getPrimitiveSizeInBits();
275 return shouldChangeType(FromWidth, ToWidth);
276}
277
278// Return true, if No Signed Wrap should be maintained for I.
279// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
280// where both B and C should be ConstantInts, results in a constant that does
281// not overflow. This function only handles the Add and Sub opcodes. For
282// all other opcodes, the function conservatively returns false.
284 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
285 if (!OBO || !OBO->hasNoSignedWrap())
286 return false;
287
288 // We reason about Add and Sub Only.
289 Instruction::BinaryOps Opcode = I.getOpcode();
290 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
291 return false;
292
293 const APInt *BVal, *CVal;
294 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
295 return false;
296
297 bool Overflow = false;
298 if (Opcode == Instruction::Add)
299 (void)BVal->sadd_ov(*CVal, Overflow);
300 else
301 (void)BVal->ssub_ov(*CVal, Overflow);
302
303 return !Overflow;
304}
305
307 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
308 return OBO && OBO->hasNoUnsignedWrap();
309}
310
312 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
313 return OBO && OBO->hasNoSignedWrap();
314}
315
316/// Conservatively clears subclassOptionalData after a reassociation or
317/// commutation. We preserve fast-math flags when applicable as they can be
318/// preserved.
320 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
321 if (!FPMO) {
322 I.clearSubclassOptionalData();
323 return;
324 }
325
326 FastMathFlags FMF = I.getFastMathFlags();
327 I.clearSubclassOptionalData();
328 I.setFastMathFlags(FMF);
329}
330
331/// Combine constant operands of associative operations either before or after a
332/// cast to eliminate one of the associative operations:
333/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
334/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
336 InstCombinerImpl &IC) {
337 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
338 if (!Cast || !Cast->hasOneUse())
339 return false;
340
341 // TODO: Enhance logic for other casts and remove this check.
342 auto CastOpcode = Cast->getOpcode();
343 if (CastOpcode != Instruction::ZExt)
344 return false;
345
346 // TODO: Enhance logic for other BinOps and remove this check.
347 if (!BinOp1->isBitwiseLogicOp())
348 return false;
349
350 auto AssocOpcode = BinOp1->getOpcode();
351 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
352 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
353 return false;
354
355 Constant *C1, *C2;
356 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
357 !match(BinOp2->getOperand(1), m_Constant(C2)))
358 return false;
359
360 // TODO: This assumes a zext cast.
361 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
362 // to the destination type might lose bits.
363
364 // Fold the constants together in the destination type:
365 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
366 const DataLayout &DL = IC.getDataLayout();
367 Type *DestTy = C1->getType();
368 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
369 if (!CastC2)
370 return false;
371 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
372 if (!FoldedC)
373 return false;
374
375 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
376 IC.replaceOperand(*BinOp1, 1, FoldedC);
378 Cast->dropPoisonGeneratingFlags();
379 return true;
380}
381
382// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
383// inttoptr ( ptrtoint (x) ) --> x
384Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
385 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
386 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
387 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
388 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
389 Type *CastTy = IntToPtr->getDestTy();
390 if (PtrToInt &&
391 CastTy->getPointerAddressSpace() ==
392 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
393 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
394 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
395 return PtrToInt->getOperand(0);
396 }
397 return nullptr;
398}
399
400/// This performs a few simplifications for operators that are associative or
401/// commutative:
402///
403/// Commutative operators:
404///
405/// 1. Order operands such that they are listed from right (least complex) to
406/// left (most complex). This puts constants before unary operators before
407/// binary operators.
408///
409/// Associative operators:
410///
411/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
412/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
413///
414/// Associative and commutative operators:
415///
416/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
417/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
418/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
419/// if C1 and C2 are constants.
421 Instruction::BinaryOps Opcode = I.getOpcode();
422 bool Changed = false;
423
424 do {
425 // Order operands such that they are listed from right (least complex) to
426 // left (most complex). This puts constants before unary operators before
427 // binary operators.
428 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
429 getComplexity(I.getOperand(1)))
430 Changed = !I.swapOperands();
431
432 if (I.isCommutative()) {
433 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
434 replaceOperand(I, 0, Pair->first);
435 replaceOperand(I, 1, Pair->second);
436 Changed = true;
437 }
438 }
439
440 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
441 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
442
443 if (I.isAssociative()) {
444 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
445 if (Op0 && Op0->getOpcode() == Opcode) {
446 Value *A = Op0->getOperand(0);
447 Value *B = Op0->getOperand(1);
448 Value *C = I.getOperand(1);
449
450 // Does "B op C" simplify?
451 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
452 // It simplifies to V. Form "A op V".
453 replaceOperand(I, 0, A);
454 replaceOperand(I, 1, V);
455 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
456 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
457
458 // Conservatively clear all optional flags since they may not be
459 // preserved by the reassociation. Reset nsw/nuw based on the above
460 // analysis.
462
463 // Note: this is only valid because SimplifyBinOp doesn't look at
464 // the operands to Op0.
465 if (IsNUW)
466 I.setHasNoUnsignedWrap(true);
467
468 if (IsNSW)
469 I.setHasNoSignedWrap(true);
470
471 Changed = true;
472 ++NumReassoc;
473 continue;
474 }
475 }
476
477 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
478 if (Op1 && Op1->getOpcode() == Opcode) {
479 Value *A = I.getOperand(0);
480 Value *B = Op1->getOperand(0);
481 Value *C = Op1->getOperand(1);
482
483 // Does "A op B" simplify?
484 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
485 // It simplifies to V. Form "V op C".
486 replaceOperand(I, 0, V);
487 replaceOperand(I, 1, C);
488 // Conservatively clear the optional flags, since they may not be
489 // preserved by the reassociation.
491 Changed = true;
492 ++NumReassoc;
493 continue;
494 }
495 }
496 }
497
498 if (I.isAssociative() && I.isCommutative()) {
499 if (simplifyAssocCastAssoc(&I, *this)) {
500 Changed = true;
501 ++NumReassoc;
502 continue;
503 }
504
505 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
506 if (Op0 && Op0->getOpcode() == Opcode) {
507 Value *A = Op0->getOperand(0);
508 Value *B = Op0->getOperand(1);
509 Value *C = I.getOperand(1);
510
511 // Does "C op A" simplify?
512 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
513 // It simplifies to V. Form "V op B".
514 replaceOperand(I, 0, V);
515 replaceOperand(I, 1, B);
516 // Conservatively clear the optional flags, since they may not be
517 // preserved by the reassociation.
519 Changed = true;
520 ++NumReassoc;
521 continue;
522 }
523 }
524
525 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
526 if (Op1 && Op1->getOpcode() == Opcode) {
527 Value *A = I.getOperand(0);
528 Value *B = Op1->getOperand(0);
529 Value *C = Op1->getOperand(1);
530
531 // Does "C op A" simplify?
532 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
533 // It simplifies to V. Form "B op V".
534 replaceOperand(I, 0, B);
535 replaceOperand(I, 1, V);
536 // Conservatively clear the optional flags, since they may not be
537 // preserved by the reassociation.
539 Changed = true;
540 ++NumReassoc;
541 continue;
542 }
543 }
544
545 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
546 // if C1 and C2 are constants.
547 Value *A, *B;
548 Constant *C1, *C2, *CRes;
549 if (Op0 && Op1 &&
550 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
551 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
552 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
553 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
554 bool IsNUW = hasNoUnsignedWrap(I) &&
555 hasNoUnsignedWrap(*Op0) &&
556 hasNoUnsignedWrap(*Op1);
557 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
558 BinaryOperator::CreateNUW(Opcode, A, B) :
559 BinaryOperator::Create(Opcode, A, B);
560
561 if (isa<FPMathOperator>(NewBO)) {
562 FastMathFlags Flags = I.getFastMathFlags() &
563 Op0->getFastMathFlags() &
564 Op1->getFastMathFlags();
565 NewBO->setFastMathFlags(Flags);
566 }
567 InsertNewInstWith(NewBO, I.getIterator());
568 NewBO->takeName(Op1);
569 replaceOperand(I, 0, NewBO);
570 replaceOperand(I, 1, CRes);
571 // Conservatively clear the optional flags, since they may not be
572 // preserved by the reassociation.
574 if (IsNUW)
575 I.setHasNoUnsignedWrap(true);
576
577 Changed = true;
578 continue;
579 }
580 }
581
582 // No further simplifications.
583 return Changed;
584 } while (true);
585}
586
587/// Return whether "X LOp (Y ROp Z)" is always equal to
588/// "(X LOp Y) ROp (X LOp Z)".
591 // X & (Y | Z) <--> (X & Y) | (X & Z)
592 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
593 if (LOp == Instruction::And)
594 return ROp == Instruction::Or || ROp == Instruction::Xor;
595
596 // X | (Y & Z) <--> (X | Y) & (X | Z)
597 if (LOp == Instruction::Or)
598 return ROp == Instruction::And;
599
600 // X * (Y + Z) <--> (X * Y) + (X * Z)
601 // X * (Y - Z) <--> (X * Y) - (X * Z)
602 if (LOp == Instruction::Mul)
603 return ROp == Instruction::Add || ROp == Instruction::Sub;
604
605 return false;
606}
607
608/// Return whether "(X LOp Y) ROp Z" is always equal to
609/// "(X ROp Z) LOp (Y ROp Z)".
613 return leftDistributesOverRight(ROp, LOp);
614
615 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
617
618 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
619 // but this requires knowing that the addition does not overflow and other
620 // such subtleties.
621}
622
623/// This function returns identity value for given opcode, which can be used to
624/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
626 if (isa<Constant>(V))
627 return nullptr;
628
629 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
630}
631
632/// This function predicates factorization using distributive laws. By default,
633/// it just returns the 'Op' inputs. But for special-cases like
634/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
635/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
636/// allow more factorization opportunities.
639 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
640 assert(Op && "Expected a binary operator");
641 LHS = Op->getOperand(0);
642 RHS = Op->getOperand(1);
643 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
644 Constant *C;
645 if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
646 // X << C --> X * (1 << C)
647 RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
648 return Instruction::Mul;
649 }
650 // TODO: We can add other conversions e.g. shr => div etc.
651 }
652 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
653 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
655 // lshr nneg C, X --> ashr nneg C, X
656 return Instruction::AShr;
657 }
658 }
659 return Op->getOpcode();
660}
661
662/// This tries to simplify binary operations by factorizing out common terms
663/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
666 Instruction::BinaryOps InnerOpcode, Value *A,
667 Value *B, Value *C, Value *D) {
668 assert(A && B && C && D && "All values must be provided");
669
670 Value *V = nullptr;
671 Value *RetVal = nullptr;
672 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
673 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
674
675 // Does "X op' Y" always equal "Y op' X"?
676 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
677
678 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
679 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
680 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
681 // commutative case, "(A op' B) op (C op' A)"?
682 if (A == C || (InnerCommutative && A == D)) {
683 if (A != C)
684 std::swap(C, D);
685 // Consider forming "A op' (B op D)".
686 // If "B op D" simplifies then it can be formed with no cost.
687 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
688
689 // If "B op D" doesn't simplify then only go on if one of the existing
690 // operations "A op' B" and "C op' D" will be zapped as no longer used.
691 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
692 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
693 if (V)
694 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
695 }
696 }
697
698 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
699 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
700 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
701 // commutative case, "(A op' B) op (B op' D)"?
702 if (B == D || (InnerCommutative && B == C)) {
703 if (B != D)
704 std::swap(C, D);
705 // Consider forming "(A op C) op' B".
706 // If "A op C" simplifies then it can be formed with no cost.
707 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
708
709 // If "A op C" doesn't simplify then only go on if one of the existing
710 // operations "A op' B" and "C op' D" will be zapped as no longer used.
711 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
712 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
713 if (V)
714 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
715 }
716 }
717
718 if (!RetVal)
719 return nullptr;
720
721 ++NumFactor;
722 RetVal->takeName(&I);
723
724 // Try to add no-overflow flags to the final value.
725 if (isa<OverflowingBinaryOperator>(RetVal)) {
726 bool HasNSW = false;
727 bool HasNUW = false;
728 if (isa<OverflowingBinaryOperator>(&I)) {
729 HasNSW = I.hasNoSignedWrap();
730 HasNUW = I.hasNoUnsignedWrap();
731 }
732 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
733 HasNSW &= LOBO->hasNoSignedWrap();
734 HasNUW &= LOBO->hasNoUnsignedWrap();
735 }
736
737 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
738 HasNSW &= ROBO->hasNoSignedWrap();
739 HasNUW &= ROBO->hasNoUnsignedWrap();
740 }
741
742 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
743 // We can propagate 'nsw' if we know that
744 // %Y = mul nsw i16 %X, C
745 // %Z = add nsw i16 %Y, %X
746 // =>
747 // %Z = mul nsw i16 %X, C+1
748 //
749 // iff C+1 isn't INT_MIN
750 const APInt *CInt;
751 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
752 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
753
754 // nuw can be propagated with any constant or nuw value.
755 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
756 }
757 }
758 return RetVal;
759}
760
761// If `I` has one Const operand and the other matches `(ctpop (not x))`,
762// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
763// This is only useful is the new subtract can fold so we only handle the
764// following cases:
765// 1) (add/sub/disjoint_or C, (ctpop (not x))
766// -> (add/sub/disjoint_or C', (ctpop x))
767// 1) (cmp pred C, (ctpop (not x))
768// -> (cmp pred C', (ctpop x))
770 unsigned Opc = I->getOpcode();
771 unsigned ConstIdx = 1;
772 switch (Opc) {
773 default:
774 return nullptr;
775 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
776 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
777 // is constant.
778 case Instruction::Sub:
779 ConstIdx = 0;
780 break;
781 case Instruction::ICmp:
782 // Signed predicates aren't correct in some edge cases like for i2 types, as
783 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
784 // comparisons against it are simplfied to unsigned.
785 if (cast<ICmpInst>(I)->isSigned())
786 return nullptr;
787 break;
788 case Instruction::Or:
789 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
790 return nullptr;
791 [[fallthrough]];
792 case Instruction::Add:
793 break;
794 }
795
796 Value *Op;
797 // Find ctpop.
798 if (!match(I->getOperand(1 - ConstIdx),
799 m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(Op)))))
800 return nullptr;
801
802 Constant *C;
803 // Check other operand is ImmConstant.
804 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
805 return nullptr;
806
807 Type *Ty = Op->getType();
808 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
809 // Need extra check for icmp. Note if this check is true, it generally means
810 // the icmp will simplify to true/false.
811 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality()) {
812 Constant *Cmp =
814 if (!Cmp || !Cmp->isZeroValue())
815 return nullptr;
816 }
817
818 // Check we can invert `(not x)` for free.
819 bool Consumes = false;
820 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
821 return nullptr;
822 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
823 assert(NotOp != nullptr &&
824 "Desync between isFreeToInvert and getFreelyInverted");
825
826 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
827
828 Value *R = nullptr;
829
830 // Do the transformation here to avoid potentially introducing an infinite
831 // loop.
832 switch (Opc) {
833 case Instruction::Sub:
834 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
835 break;
836 case Instruction::Or:
837 case Instruction::Add:
838 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
839 break;
840 case Instruction::ICmp:
841 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
842 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
843 break;
844 default:
845 llvm_unreachable("Unhandled Opcode");
846 }
847 assert(R != nullptr);
848 return replaceInstUsesWith(*I, R);
849}
850
851// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
852// IFF
853// 1) the logic_shifts match
854// 2) either both binops are binops and one is `and` or
855// BinOp1 is `and`
856// (logic_shift (inv_logic_shift C1, C), C) == C1 or
857//
858// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
859//
860// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
861// IFF
862// 1) the logic_shifts match
863// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
864//
865// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
866//
867// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
868// IFF
869// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
870// 2) Binop2 is `not`
871//
872// -> (arithmetic_shift Binop1((not X), Y), Amt)
873
875 const DataLayout &DL = I.getModule()->getDataLayout();
876 auto IsValidBinOpc = [](unsigned Opc) {
877 switch (Opc) {
878 default:
879 return false;
880 case Instruction::And:
881 case Instruction::Or:
882 case Instruction::Xor:
883 case Instruction::Add:
884 // Skip Sub as we only match constant masks which will canonicalize to use
885 // add.
886 return true;
887 }
888 };
889
890 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
891 // constraints.
892 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
893 unsigned ShOpc) {
894 assert(ShOpc != Instruction::AShr);
895 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
896 ShOpc == Instruction::Shl;
897 };
898
899 auto GetInvShift = [](unsigned ShOpc) {
900 assert(ShOpc != Instruction::AShr);
901 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
902 };
903
904 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
905 unsigned ShOpc, Constant *CMask,
906 Constant *CShift) {
907 // If the BinOp1 is `and` we don't need to check the mask.
908 if (BinOpc1 == Instruction::And)
909 return true;
910
911 // For all other possible transfers we need complete distributable
912 // binop/shift (anything but `add` + `lshr`).
913 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
914 return false;
915
916 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
917 // vecs, otherwise the mask will be simplified and the following check will
918 // handle it).
919 if (BinOpc2 == Instruction::And)
920 return true;
921
922 // Otherwise, need mask that meets the below requirement.
923 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
924 Constant *MaskInvShift =
925 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
926 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
927 CMask;
928 };
929
930 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
931 Constant *CMask, *CShift;
932 Value *X, *Y, *ShiftedX, *Mask, *Shift;
933 if (!match(I.getOperand(ShOpnum),
934 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
935 return nullptr;
936 if (!match(I.getOperand(1 - ShOpnum),
937 m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
938 return nullptr;
939
940 if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift)))))
941 return nullptr;
942
943 // Make sure we are matching instruction shifts and not ConstantExpr
944 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
945 auto *IX = dyn_cast<Instruction>(ShiftedX);
946 if (!IY || !IX)
947 return nullptr;
948
949 // LHS and RHS need same shift opcode
950 unsigned ShOpc = IY->getOpcode();
951 if (ShOpc != IX->getOpcode())
952 return nullptr;
953
954 // Make sure binop is real instruction and not ConstantExpr
955 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
956 if (!BO2)
957 return nullptr;
958
959 unsigned BinOpc = BO2->getOpcode();
960 // Make sure we have valid binops.
961 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
962 return nullptr;
963
964 if (ShOpc == Instruction::AShr) {
965 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
966 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
967 Value *NotX = Builder.CreateNot(X);
968 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
970 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
971 }
972
973 return nullptr;
974 }
975
976 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
977 // distribute to drop the shift irrelevant of constants.
978 if (BinOpc == I.getOpcode() &&
979 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
980 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
981 Value *NewBinOp1 = Builder.CreateBinOp(
982 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
983 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
984 }
985
986 // Otherwise we can only distribute by constant shifting the mask, so
987 // ensure we have constants.
988 if (!match(Shift, m_ImmConstant(CShift)))
989 return nullptr;
990 if (!match(Mask, m_ImmConstant(CMask)))
991 return nullptr;
992
993 // Check if we can distribute the binops.
994 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
995 return nullptr;
996
997 Constant *NewCMask =
998 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
999 Value *NewBinOp2 = Builder.CreateBinOp(
1000 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
1001 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
1002 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
1003 NewBinOp1, CShift);
1004 };
1005
1006 if (Instruction *R = MatchBinOp(0))
1007 return R;
1008 return MatchBinOp(1);
1009}
1010
1011// (Binop (zext C), (select C, T, F))
1012// -> (select C, (binop 1, T), (binop 0, F))
1013//
1014// (Binop (sext C), (select C, T, F))
1015// -> (select C, (binop -1, T), (binop 0, F))
1016//
1017// Attempt to simplify binary operations into a select with folded args, when
1018// one operand of the binop is a select instruction and the other operand is a
1019// zext/sext extension, whose value is the select condition.
1022 // TODO: this simplification may be extended to any speculatable instruction,
1023 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1024 Instruction::BinaryOps Opc = I.getOpcode();
1025 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1026 Value *A, *CondVal, *TrueVal, *FalseVal;
1027 Value *CastOp;
1028
1029 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1030 return match(CastOp, m_ZExtOrSExt(m_Value(A))) &&
1031 A->getType()->getScalarSizeInBits() == 1 &&
1032 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1033 m_Value(FalseVal)));
1034 };
1035
1036 // Make sure one side of the binop is a select instruction, and the other is a
1037 // zero/sign extension operating on a i1.
1038 if (MatchSelectAndCast(LHS, RHS))
1039 CastOp = LHS;
1040 else if (MatchSelectAndCast(RHS, LHS))
1041 CastOp = RHS;
1042 else
1043 return nullptr;
1044
1045 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1046 bool IsCastOpRHS = (CastOp == RHS);
1047 bool IsZExt = isa<ZExtInst>(CastOp);
1048 Constant *C;
1049
1050 if (IsTrueArm) {
1051 C = Constant::getNullValue(V->getType());
1052 } else if (IsZExt) {
1053 unsigned BitWidth = V->getType()->getScalarSizeInBits();
1054 C = Constant::getIntegerValue(V->getType(), APInt(BitWidth, 1));
1055 } else {
1056 C = Constant::getAllOnesValue(V->getType());
1057 }
1058
1059 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, C)
1060 : Builder.CreateBinOp(Opc, C, V);
1061 };
1062
1063 // If the value used in the zext/sext is the select condition, or the negated
1064 // of the select condition, the binop can be simplified.
1065 if (CondVal == A) {
1066 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1067 return SelectInst::Create(CondVal, NewTrueVal,
1068 NewFoldedConst(true, FalseVal));
1069 }
1070
1071 if (match(A, m_Not(m_Specific(CondVal)))) {
1072 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1073 return SelectInst::Create(CondVal, NewTrueVal,
1074 NewFoldedConst(false, FalseVal));
1075 }
1076
1077 return nullptr;
1078}
1079
1081 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1082 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1083 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1084 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1085 Value *A, *B, *C, *D;
1086 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1087
1088 if (Op0)
1089 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1090 if (Op1)
1091 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1092
1093 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1094 // a common term.
1095 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1096 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1097 return V;
1098
1099 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1100 // term.
1101 if (Op0)
1102 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1103 if (Value *V =
1104 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1105 return V;
1106
1107 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1108 // term.
1109 if (Op1)
1110 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1111 if (Value *V =
1112 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1113 return V;
1114
1115 return nullptr;
1116}
1117
1118/// This tries to simplify binary operations which some other binary operation
1119/// distributes over either by factorizing out common terms
1120/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1121/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1122/// Returns the simplified value, or null if it didn't simplify.
1124 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1125 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1126 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1127 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1128
1129 // Factorization.
1130 if (Value *R = tryFactorizationFolds(I))
1131 return R;
1132
1133 // Expansion.
1134 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1135 // The instruction has the form "(A op' B) op C". See if expanding it out
1136 // to "(A op C) op' (B op C)" results in simplifications.
1137 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1138 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1139
1140 // Disable the use of undef because it's not safe to distribute undef.
1141 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1142 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1143 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1144
1145 // Do "A op C" and "B op C" both simplify?
1146 if (L && R) {
1147 // They do! Return "L op' R".
1148 ++NumExpand;
1149 C = Builder.CreateBinOp(InnerOpcode, L, R);
1150 C->takeName(&I);
1151 return C;
1152 }
1153
1154 // Does "A op C" simplify to the identity value for the inner opcode?
1155 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1156 // They do! Return "B op C".
1157 ++NumExpand;
1158 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1159 C->takeName(&I);
1160 return C;
1161 }
1162
1163 // Does "B op C" simplify to the identity value for the inner opcode?
1164 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1165 // They do! Return "A op C".
1166 ++NumExpand;
1167 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1168 C->takeName(&I);
1169 return C;
1170 }
1171 }
1172
1173 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1174 // The instruction has the form "A op (B op' C)". See if expanding it out
1175 // to "(A op B) op' (A op C)" results in simplifications.
1176 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1177 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1178
1179 // Disable the use of undef because it's not safe to distribute undef.
1180 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1181 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1182 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1183
1184 // Do "A op B" and "A op C" both simplify?
1185 if (L && R) {
1186 // They do! Return "L op' R".
1187 ++NumExpand;
1188 A = Builder.CreateBinOp(InnerOpcode, L, R);
1189 A->takeName(&I);
1190 return A;
1191 }
1192
1193 // Does "A op B" simplify to the identity value for the inner opcode?
1194 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1195 // They do! Return "A op C".
1196 ++NumExpand;
1197 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1198 A->takeName(&I);
1199 return A;
1200 }
1201
1202 // Does "A op C" simplify to the identity value for the inner opcode?
1203 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1204 // They do! Return "A op B".
1205 ++NumExpand;
1206 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1207 A->takeName(&I);
1208 return A;
1209 }
1210 }
1211
1213}
1214
1215static std::optional<std::pair<Value *, Value *>>
1217 if (LHS->getParent() != RHS->getParent())
1218 return std::nullopt;
1219
1220 if (LHS->getNumIncomingValues() < 2)
1221 return std::nullopt;
1222
1223 if (!equal(LHS->blocks(), RHS->blocks()))
1224 return std::nullopt;
1225
1226 Value *L0 = LHS->getIncomingValue(0);
1227 Value *R0 = RHS->getIncomingValue(0);
1228
1229 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1230 Value *L1 = LHS->getIncomingValue(I);
1231 Value *R1 = RHS->getIncomingValue(I);
1232
1233 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1234 continue;
1235
1236 return std::nullopt;
1237 }
1238
1239 return std::optional(std::pair(L0, R0));
1240}
1241
1242std::optional<std::pair<Value *, Value *>>
1243InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1244 Instruction *LHSInst = dyn_cast<Instruction>(LHS);
1245 Instruction *RHSInst = dyn_cast<Instruction>(RHS);
1246 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1247 return std::nullopt;
1248 switch (LHSInst->getOpcode()) {
1249 case Instruction::PHI:
1250 return matchSymmetricPhiNodesPair(cast<PHINode>(LHS), cast<PHINode>(RHS));
1251 case Instruction::Select: {
1252 Value *Cond = LHSInst->getOperand(0);
1253 Value *TrueVal = LHSInst->getOperand(1);
1254 Value *FalseVal = LHSInst->getOperand(2);
1255 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1256 FalseVal == RHSInst->getOperand(1))
1257 return std::pair(TrueVal, FalseVal);
1258 return std::nullopt;
1259 }
1260 case Instruction::Call: {
1261 // Match min(a, b) and max(a, b)
1262 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1263 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1264 if (LHSMinMax && RHSMinMax &&
1265 LHSMinMax->getPredicate() ==
1267 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1268 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1269 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1270 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1271 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1272 return std::nullopt;
1273 }
1274 default:
1275 return std::nullopt;
1276 }
1277}
1278
1280 Value *LHS,
1281 Value *RHS) {
1282 Value *A, *B, *C, *D, *E, *F;
1283 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1284 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1285 if (!LHSIsSelect && !RHSIsSelect)
1286 return nullptr;
1287
1288 FastMathFlags FMF;
1290 if (isa<FPMathOperator>(&I)) {
1291 FMF = I.getFastMathFlags();
1293 }
1294
1295 Instruction::BinaryOps Opcode = I.getOpcode();
1297
1298 Value *Cond, *True = nullptr, *False = nullptr;
1299
1300 // Special-case for add/negate combination. Replace the zero in the negation
1301 // with the trailing add operand:
1302 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1303 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1304 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1305 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1306 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1307 return nullptr;
1308
1309 Value *N;
1310 if (True && match(FVal, m_Neg(m_Value(N)))) {
1311 Value *Sub = Builder.CreateSub(Z, N);
1312 return Builder.CreateSelect(Cond, True, Sub, I.getName());
1313 }
1314 if (False && match(TVal, m_Neg(m_Value(N)))) {
1315 Value *Sub = Builder.CreateSub(Z, N);
1316 return Builder.CreateSelect(Cond, Sub, False, I.getName());
1317 }
1318 return nullptr;
1319 };
1320
1321 if (LHSIsSelect && RHSIsSelect && A == D) {
1322 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1323 Cond = A;
1324 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1325 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1326
1327 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1328 if (False && !True)
1329 True = Builder.CreateBinOp(Opcode, B, E);
1330 else if (True && !False)
1331 False = Builder.CreateBinOp(Opcode, C, F);
1332 }
1333 } else if (LHSIsSelect && LHS->hasOneUse()) {
1334 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1335 Cond = A;
1336 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1337 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1338 if (Value *NewSel = foldAddNegate(B, C, RHS))
1339 return NewSel;
1340 } else if (RHSIsSelect && RHS->hasOneUse()) {
1341 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1342 Cond = D;
1343 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1344 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1345 if (Value *NewSel = foldAddNegate(E, F, LHS))
1346 return NewSel;
1347 }
1348
1349 if (!True || !False)
1350 return nullptr;
1351
1352 Value *SI = Builder.CreateSelect(Cond, True, False);
1353 SI->takeName(&I);
1354 return SI;
1355}
1356
1357/// Freely adapt every user of V as-if V was changed to !V.
1358/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1360 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1361 for (User *U : make_early_inc_range(I->users())) {
1362 if (U == IgnoredUser)
1363 continue; // Don't consider this user.
1364 switch (cast<Instruction>(U)->getOpcode()) {
1365 case Instruction::Select: {
1366 auto *SI = cast<SelectInst>(U);
1367 SI->swapValues();
1368 SI->swapProfMetadata();
1369 break;
1370 }
1371 case Instruction::Br: {
1372 BranchInst *BI = cast<BranchInst>(U);
1373 BI->swapSuccessors(); // swaps prof metadata too
1374 if (BPI)
1376 break;
1377 }
1378 case Instruction::Xor:
1379 replaceInstUsesWith(cast<Instruction>(*U), I);
1380 // Add to worklist for DCE.
1381 addToWorklist(cast<Instruction>(U));
1382 break;
1383 default:
1384 llvm_unreachable("Got unexpected user - out of sync with "
1385 "canFreelyInvertAllUsersOf() ?");
1386 }
1387 }
1388}
1389
1390/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1391/// constant zero (which is the 'negate' form).
1392Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1393 Value *NegV;
1394 if (match(V, m_Neg(m_Value(NegV))))
1395 return NegV;
1396
1397 // Constants can be considered to be negated values if they can be folded.
1398 if (ConstantInt *C = dyn_cast<ConstantInt>(V))
1399 return ConstantExpr::getNeg(C);
1400
1401 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
1402 if (C->getType()->getElementType()->isIntegerTy())
1403 return ConstantExpr::getNeg(C);
1404
1405 if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
1406 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1407 Constant *Elt = CV->getAggregateElement(i);
1408 if (!Elt)
1409 return nullptr;
1410
1411 if (isa<UndefValue>(Elt))
1412 continue;
1413
1414 if (!isa<ConstantInt>(Elt))
1415 return nullptr;
1416 }
1417 return ConstantExpr::getNeg(CV);
1418 }
1419
1420 // Negate integer vector splats.
1421 if (auto *CV = dyn_cast<Constant>(V))
1422 if (CV->getType()->isVectorTy() &&
1423 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1424 return ConstantExpr::getNeg(CV);
1425
1426 return nullptr;
1427}
1428
1429// Try to fold:
1430// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1431// -> ({s|u}itofp (int_binop x, y))
1432// 2) (fp_binop ({s|u}itofp x), FpC)
1433// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1434//
1435// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1436Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1437 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1439
1440 Type *FPTy = BO.getType();
1441 Type *IntTy = IntOps[0]->getType();
1442
1443 unsigned IntSz = IntTy->getScalarSizeInBits();
1444 // This is the maximum number of inuse bits by the integer where the int -> fp
1445 // casts are exact.
1446 unsigned MaxRepresentableBits =
1448
1449 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1450 // checks later on.
1451 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1452
1453 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1454 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1455 auto IsNonZero = [&](unsigned OpNo) -> bool {
1456 if (OpsKnown[OpNo].hasKnownBits() &&
1457 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1458 return true;
1459 return isKnownNonZero(IntOps[OpNo], SQ);
1460 };
1461
1462 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1463 // NB: This matches the impl in ValueTracking, we just try to use cached
1464 // knownbits here. If we ever start supporting WithCache for
1465 // `isKnownNonNegative`, change this to an explicit call.
1466 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1467 };
1468
1469 // Check if we know for certain that ({s|u}itofp op) is exact.
1470 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1471 // Can we treat this operand as the desired sign?
1472 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1473 !IsNonNeg(OpNo))
1474 return false;
1475
1476 // If fp precision >= bitwidth(op) then its exact.
1477 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1478 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1479 // handled specially. We can't, however, increase the bound arbitrarily for
1480 // `sitofp` as for larger sizes, it won't sign extend.
1481 if (MaxRepresentableBits < IntSz) {
1482 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1483 // numSignBits(op).
1484 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1485 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1486 if (OpsFromSigned)
1487 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1488 // Finally for unsigned check that fp precision >= bitwidth(op) -
1489 // numLeadingZeros(op).
1490 else {
1491 NumUsedLeadingBits[OpNo] =
1492 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1493 }
1494 }
1495 // NB: We could also check if op is known to be a power of 2 or zero (which
1496 // will always be representable). Its unlikely, however, that is we are
1497 // unable to bound op in any way we will be able to pass the overflow checks
1498 // later on.
1499
1500 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1501 return false;
1502 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1503 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1504 IsNonZero(OpNo);
1505 };
1506
1507 // If we have a constant rhs, see if we can losslessly convert it to an int.
1508 if (Op1FpC != nullptr) {
1509 // Signed + Mul req non-zero
1510 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1511 !match(Op1FpC, m_NonZeroFP()))
1512 return nullptr;
1513
1515 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1516 IntTy, DL);
1517 if (Op1IntC == nullptr)
1518 return nullptr;
1519 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1520 : Instruction::UIToFP,
1521 Op1IntC, FPTy, DL) != Op1FpC)
1522 return nullptr;
1523
1524 // First try to keep sign of cast the same.
1525 IntOps[1] = Op1IntC;
1526 }
1527
1528 // Ensure lhs/rhs integer types match.
1529 if (IntTy != IntOps[1]->getType())
1530 return nullptr;
1531
1532 if (Op1FpC == nullptr) {
1533 if (!IsValidPromotion(1))
1534 return nullptr;
1535 }
1536 if (!IsValidPromotion(0))
1537 return nullptr;
1538
1539 // Final we check if the integer version of the binop will not overflow.
1541 // Because of the precision check, we can often rule out overflows.
1542 bool NeedsOverflowCheck = true;
1543 // Try to conservatively rule out overflow based on the already done precision
1544 // checks.
1545 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1546 unsigned OverflowMaxCurBits =
1547 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1548 bool OutputSigned = OpsFromSigned;
1549 switch (BO.getOpcode()) {
1550 case Instruction::FAdd:
1551 IntOpc = Instruction::Add;
1552 OverflowMaxOutputBits += OverflowMaxCurBits;
1553 break;
1554 case Instruction::FSub:
1555 IntOpc = Instruction::Sub;
1556 OverflowMaxOutputBits += OverflowMaxCurBits;
1557 break;
1558 case Instruction::FMul:
1559 IntOpc = Instruction::Mul;
1560 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1561 break;
1562 default:
1563 llvm_unreachable("Unsupported binop");
1564 }
1565 // The precision check may have already ruled out overflow.
1566 if (OverflowMaxOutputBits < IntSz) {
1567 NeedsOverflowCheck = false;
1568 // We can bound unsigned overflow from sub to in range signed value (this is
1569 // what allows us to avoid the overflow check for sub).
1570 if (IntOpc == Instruction::Sub)
1571 OutputSigned = true;
1572 }
1573
1574 // Precision check did not rule out overflow, so need to check.
1575 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1576 // `IntOps[...]` arguments to `KnownOps[...]`.
1577 if (NeedsOverflowCheck &&
1578 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1579 return nullptr;
1580
1581 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1582 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1583 IntBO->setHasNoSignedWrap(OutputSigned);
1584 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1585 }
1586 if (OutputSigned)
1587 return new SIToFPInst(IntBinOp, FPTy);
1588 return new UIToFPInst(IntBinOp, FPTy);
1589}
1590
1591// Try to fold:
1592// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1593// -> ({s|u}itofp (int_binop x, y))
1594// 2) (fp_binop ({s|u}itofp x), FpC)
1595// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1596Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1597 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1598 Constant *Op1FpC = nullptr;
1599 // Check for:
1600 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1601 // 2) (binop ({s|u}itofp x), FpC)
1602 if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
1603 !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
1604 return nullptr;
1605
1606 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1607 !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
1608 !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
1609 return nullptr;
1610
1611 // Cache KnownBits a bit to potentially save some analysis.
1612 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1613
1614 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1615 // different constraints depending on the sign of the cast.
1616 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1617 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1618 IntOps, Op1FpC, OpsKnown))
1619 return R;
1620 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1621 Op1FpC, OpsKnown);
1622}
1623
1624/// A binop with a constant operand and a sign-extended boolean operand may be
1625/// converted into a select of constants by applying the binary operation to
1626/// the constant with the two possible values of the extended boolean (0 or -1).
1627Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1628 // TODO: Handle non-commutative binop (constant is operand 0).
1629 // TODO: Handle zext.
1630 // TODO: Peek through 'not' of cast.
1631 Value *BO0 = BO.getOperand(0);
1632 Value *BO1 = BO.getOperand(1);
1633 Value *X;
1634 Constant *C;
1635 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1636 !X->getType()->isIntOrIntVectorTy(1))
1637 return nullptr;
1638
1639 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1642 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1643 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1644 return SelectInst::Create(X, TVal, FVal);
1645}
1646
1648 SelectInst *SI,
1649 bool IsTrueArm) {
1650 SmallVector<Constant *> ConstOps;
1651 for (Value *Op : I.operands()) {
1652 CmpInst::Predicate Pred;
1653 Constant *C = nullptr;
1654 if (Op == SI) {
1655 C = dyn_cast<Constant>(IsTrueArm ? SI->getTrueValue()
1656 : SI->getFalseValue());
1657 } else if (match(SI->getCondition(),
1658 m_ICmp(Pred, m_Specific(Op), m_Constant(C))) &&
1659 Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1661 // Pass
1662 } else {
1663 C = dyn_cast<Constant>(Op);
1664 }
1665 if (C == nullptr)
1666 return nullptr;
1667
1668 ConstOps.push_back(C);
1669 }
1670
1671 return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1672}
1673
1675 Value *NewOp, InstCombiner &IC) {
1676 Instruction *Clone = I.clone();
1677 Clone->replaceUsesOfWith(SI, NewOp);
1679 IC.InsertNewInstBefore(Clone, SI->getIterator());
1680 return Clone;
1681}
1682
1684 bool FoldWithMultiUse) {
1685 // Don't modify shared select instructions unless set FoldWithMultiUse
1686 if (!SI->hasOneUse() && !FoldWithMultiUse)
1687 return nullptr;
1688
1689 Value *TV = SI->getTrueValue();
1690 Value *FV = SI->getFalseValue();
1691 if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1692 return nullptr;
1693
1694 // Bool selects with constant operands can be folded to logical ops.
1695 if (SI->getType()->isIntOrIntVectorTy(1))
1696 return nullptr;
1697
1698 // Test if a FCmpInst instruction is used exclusively by a select as
1699 // part of a minimum or maximum operation. If so, refrain from doing
1700 // any other folding. This helps out other analyses which understand
1701 // non-obfuscated minimum and maximum idioms. And in this case, at
1702 // least one of the comparison operands has at least one user besides
1703 // the compare (the select), which would often largely negate the
1704 // benefit of folding anyway.
1705 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1706 if (CI->hasOneUse()) {
1707 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1708 if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1))
1709 return nullptr;
1710 }
1711 }
1712
1713 // Make sure that one of the select arms constant folds successfully.
1714 Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
1715 Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
1716 if (!NewTV && !NewFV)
1717 return nullptr;
1718
1719 // Create an instruction for the arm that did not fold.
1720 if (!NewTV)
1721 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1722 if (!NewFV)
1723 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1724 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1725}
1726
1728 Value *InValue, BasicBlock *InBB,
1729 const DataLayout &DL,
1730 const SimplifyQuery SQ) {
1731 // NB: It is a precondition of this transform that the operands be
1732 // phi translatable! This is usually trivially satisfied by limiting it
1733 // to constant ops, and for selects we do a more sophisticated check.
1735 for (Value *Op : I.operands()) {
1736 if (Op == PN)
1737 Ops.push_back(InValue);
1738 else
1739 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1740 }
1741
1742 // Don't consider the simplification successful if we get back a constant
1743 // expression. That's just an instruction in hiding.
1744 // Also reject the case where we simplify back to the phi node. We wouldn't
1745 // be able to remove it in that case.
1747 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1748 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1749 return NewVal;
1750
1751 // Check if incoming PHI value can be replaced with constant
1752 // based on implied condition.
1753 BranchInst *TerminatorBI = dyn_cast<BranchInst>(InBB->getTerminator());
1754 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1755 if (TerminatorBI && TerminatorBI->isConditional() &&
1756 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1757 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1758 std::optional<bool> ImpliedCond =
1759 isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(),
1760 Ops[0], Ops[1], DL, LHSIsTrue);
1761 if (ImpliedCond)
1762 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1763 }
1764
1765 return nullptr;
1766}
1767
1769 unsigned NumPHIValues = PN->getNumIncomingValues();
1770 if (NumPHIValues == 0)
1771 return nullptr;
1772
1773 // We normally only transform phis with a single use. However, if a PHI has
1774 // multiple uses and they are all the same operation, we can fold *all* of the
1775 // uses into the PHI.
1776 if (!PN->hasOneUse()) {
1777 // Walk the use list for the instruction, comparing them to I.
1778 for (User *U : PN->users()) {
1779 Instruction *UI = cast<Instruction>(U);
1780 if (UI != &I && !I.isIdenticalTo(UI))
1781 return nullptr;
1782 }
1783 // Otherwise, we can replace *all* users with the new PHI we form.
1784 }
1785
1786 // Check to see whether the instruction can be folded into each phi operand.
1787 // If there is one operand that does not fold, remember the BB it is in.
1788 // If there is more than one or if *it* is a PHI, bail out.
1789 SmallVector<Value *> NewPhiValues;
1790 BasicBlock *NonSimplifiedBB = nullptr;
1791 Value *NonSimplifiedInVal = nullptr;
1792 for (unsigned i = 0; i != NumPHIValues; ++i) {
1793 Value *InVal = PN->getIncomingValue(i);
1794 BasicBlock *InBB = PN->getIncomingBlock(i);
1795
1796 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1797 NewPhiValues.push_back(NewVal);
1798 continue;
1799 }
1800
1801 if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1802
1803 NonSimplifiedBB = InBB;
1804 NonSimplifiedInVal = InVal;
1805 NewPhiValues.push_back(nullptr);
1806
1807 // If the InVal is an invoke at the end of the pred block, then we can't
1808 // insert a computation after it without breaking the edge.
1809 if (isa<InvokeInst>(InVal))
1810 if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1811 return nullptr;
1812
1813 // If the incoming non-constant value is reachable from the phis block,
1814 // we'll push the operation across a loop backedge. This could result in
1815 // an infinite combine loop, and is generally non-profitable (especially
1816 // if the operation was originally outside the loop).
1817 if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1818 LI))
1819 return nullptr;
1820 }
1821
1822 // If there is exactly one non-simplified value, we can insert a copy of the
1823 // operation in that block. However, if this is a critical edge, we would be
1824 // inserting the computation on some other paths (e.g. inside a loop). Only
1825 // do this if the pred block is unconditionally branching into the phi block.
1826 // Also, make sure that the pred block is not dead code.
1827 if (NonSimplifiedBB != nullptr) {
1828 BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1829 if (!BI || !BI->isUnconditional() ||
1830 !DT.isReachableFromEntry(NonSimplifiedBB))
1831 return nullptr;
1832 }
1833
1834 // Okay, we can do the transformation: create the new PHI node.
1835 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1836 InsertNewInstBefore(NewPN, PN->getIterator());
1837 NewPN->takeName(PN);
1838 NewPN->setDebugLoc(PN->getDebugLoc());
1839
1840 // If we are going to have to insert a new computation, do so right before the
1841 // predecessor's terminator.
1842 Instruction *Clone = nullptr;
1843 if (NonSimplifiedBB) {
1844 Clone = I.clone();
1845 for (Use &U : Clone->operands()) {
1846 if (U == PN)
1847 U = NonSimplifiedInVal;
1848 else
1849 U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1850 }
1851 InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
1852 }
1853
1854 for (unsigned i = 0; i != NumPHIValues; ++i) {
1855 if (NewPhiValues[i])
1856 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1857 else
1858 NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1859 }
1860
1861 for (User *U : make_early_inc_range(PN->users())) {
1862 Instruction *User = cast<Instruction>(U);
1863 if (User == &I) continue;
1864 replaceInstUsesWith(*User, NewPN);
1866 }
1867
1868 replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1869 const_cast<PHINode &>(*NewPN),
1870 const_cast<PHINode &>(*PN), DT);
1871 return replaceInstUsesWith(I, NewPN);
1872}
1873
1875 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1876 // we are guarding against replicating the binop in >1 predecessor.
1877 // This could miss matching a phi with 2 constant incoming values.
1878 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1879 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1880 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1881 Phi0->getNumOperands() != Phi1->getNumOperands())
1882 return nullptr;
1883
1884 // TODO: Remove the restriction for binop being in the same block as the phis.
1885 if (BO.getParent() != Phi0->getParent() ||
1886 BO.getParent() != Phi1->getParent())
1887 return nullptr;
1888
1889 // Fold if there is at least one specific constant value in phi0 or phi1's
1890 // incoming values that comes from the same block and this specific constant
1891 // value can be used to do optimization for specific binary operator.
1892 // For example:
1893 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1894 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1895 // %add = add i32 %phi0, %phi1
1896 // ==>
1897 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1899 /*AllowRHSConstant*/ false);
1900 if (C) {
1901 SmallVector<Value *, 4> NewIncomingValues;
1902 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1903 auto &Phi0Use = std::get<0>(T);
1904 auto &Phi1Use = std::get<1>(T);
1905 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
1906 return false;
1907 Value *Phi0UseV = Phi0Use.get();
1908 Value *Phi1UseV = Phi1Use.get();
1909 if (Phi0UseV == C)
1910 NewIncomingValues.push_back(Phi1UseV);
1911 else if (Phi1UseV == C)
1912 NewIncomingValues.push_back(Phi0UseV);
1913 else
1914 return false;
1915 return true;
1916 };
1917
1918 if (all_of(zip(Phi0->operands(), Phi1->operands()),
1919 CanFoldIncomingValuePair)) {
1920 PHINode *NewPhi =
1921 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
1922 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1923 "The number of collected incoming values should equal the number "
1924 "of the original PHINode operands!");
1925 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
1926 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
1927 return NewPhi;
1928 }
1929 }
1930
1931 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1932 return nullptr;
1933
1934 // Match a pair of incoming constants for one of the predecessor blocks.
1935 BasicBlock *ConstBB, *OtherBB;
1936 Constant *C0, *C1;
1937 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1938 ConstBB = Phi0->getIncomingBlock(0);
1939 OtherBB = Phi0->getIncomingBlock(1);
1940 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1941 ConstBB = Phi0->getIncomingBlock(1);
1942 OtherBB = Phi0->getIncomingBlock(0);
1943 } else {
1944 return nullptr;
1945 }
1946 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1947 return nullptr;
1948
1949 // The block that we are hoisting to must reach here unconditionally.
1950 // Otherwise, we could be speculatively executing an expensive or
1951 // non-speculative op.
1952 auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1953 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1954 !DT.isReachableFromEntry(OtherBB))
1955 return nullptr;
1956
1957 // TODO: This check could be tightened to only apply to binops (div/rem) that
1958 // are not safe to speculatively execute. But that could allow hoisting
1959 // potentially expensive instructions (fdiv for example).
1960 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1962 return nullptr;
1963
1964 // Fold constants for the predecessor block with constant incoming values.
1965 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1966 if (!NewC)
1967 return nullptr;
1968
1969 // Make a new binop in the predecessor block with the non-constant incoming
1970 // values.
1971 Builder.SetInsertPoint(PredBlockBranch);
1972 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1973 Phi0->getIncomingValueForBlock(OtherBB),
1974 Phi1->getIncomingValueForBlock(OtherBB));
1975 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1976 NotFoldedNewBO->copyIRFlags(&BO);
1977
1978 // Replace the binop with a phi of the new values. The old phis are dead.
1979 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1980 NewPhi->addIncoming(NewBO, OtherBB);
1981 NewPhi->addIncoming(NewC, ConstBB);
1982 return NewPhi;
1983}
1984
1986 if (!isa<Constant>(I.getOperand(1)))
1987 return nullptr;
1988
1989 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1990 if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1991 return NewSel;
1992 } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1993 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1994 return NewPhi;
1995 }
1996 return nullptr;
1997}
1998
2000 // If this GEP has only 0 indices, it is the same pointer as
2001 // Src. If Src is not a trivial GEP too, don't combine
2002 // the indices.
2003 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2004 !Src.hasOneUse())
2005 return false;
2006 return true;
2007}
2008
2010 if (!isa<VectorType>(Inst.getType()))
2011 return nullptr;
2012
2013 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2014 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2015 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2016 cast<VectorType>(Inst.getType())->getElementCount());
2017 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2018 cast<VectorType>(Inst.getType())->getElementCount());
2019
2020 // If both operands of the binop are vector concatenations, then perform the
2021 // narrow binop on each pair of the source operands followed by concatenation
2022 // of the results.
2023 Value *L0, *L1, *R0, *R1;
2024 ArrayRef<int> Mask;
2025 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2026 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2027 LHS->hasOneUse() && RHS->hasOneUse() &&
2028 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2029 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2030 // This transform does not have the speculative execution constraint as
2031 // below because the shuffle is a concatenation. The new binops are
2032 // operating on exactly the same elements as the existing binop.
2033 // TODO: We could ease the mask requirement to allow different undef lanes,
2034 // but that requires an analysis of the binop-with-undef output value.
2035 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2036 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2037 BO->copyIRFlags(&Inst);
2038 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2039 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2040 BO->copyIRFlags(&Inst);
2041 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2042 }
2043
2044 auto createBinOpReverse = [&](Value *X, Value *Y) {
2045 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2046 if (auto *BO = dyn_cast<BinaryOperator>(V))
2047 BO->copyIRFlags(&Inst);
2048 Module *M = Inst.getModule();
2049 Function *F =
2050 Intrinsic::getDeclaration(M, Intrinsic::vector_reverse, V->getType());
2051 return CallInst::Create(F, V);
2052 };
2053
2054 // NOTE: Reverse shuffles don't require the speculative execution protection
2055 // below because they don't affect which lanes take part in the computation.
2056
2057 Value *V1, *V2;
2058 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2059 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2060 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2061 (LHS->hasOneUse() || RHS->hasOneUse() ||
2062 (LHS == RHS && LHS->hasNUses(2))))
2063 return createBinOpReverse(V1, V2);
2064
2065 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2066 if (LHS->hasOneUse() && isSplatValue(RHS))
2067 return createBinOpReverse(V1, RHS);
2068 }
2069 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2070 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2071 return createBinOpReverse(LHS, V2);
2072
2073 // It may not be safe to reorder shuffles and things like div, urem, etc.
2074 // because we may trap when executing those ops on unknown vector elements.
2075 // See PR20059.
2076 if (!isSafeToSpeculativelyExecute(&Inst))
2077 return nullptr;
2078
2079 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2080 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2081 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2082 BO->copyIRFlags(&Inst);
2083 return new ShuffleVectorInst(XY, M);
2084 };
2085
2086 // If both arguments of the binary operation are shuffles that use the same
2087 // mask and shuffle within a single vector, move the shuffle after the binop.
2088 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2089 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2090 V1->getType() == V2->getType() &&
2091 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2092 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2093 return createBinOpShuffle(V1, V2, Mask);
2094 }
2095
2096 // If both arguments of a commutative binop are select-shuffles that use the
2097 // same mask with commuted operands, the shuffles are unnecessary.
2098 if (Inst.isCommutative() &&
2099 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2100 match(RHS,
2101 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
2102 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2103 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2104 // TODO: Allow shuffles that contain undefs in the mask?
2105 // That is legal, but it reduces undef knowledge.
2106 // TODO: Allow arbitrary shuffles by shuffling after binop?
2107 // That might be legal, but we have to deal with poison.
2108 if (LShuf->isSelect() &&
2109 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2110 RShuf->isSelect() &&
2111 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2112 // Example:
2113 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2114 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2115 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2116 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2117 NewBO->copyIRFlags(&Inst);
2118 return NewBO;
2119 }
2120 }
2121
2122 // If one argument is a shuffle within one vector and the other is a constant,
2123 // try moving the shuffle after the binary operation. This canonicalization
2124 // intends to move shuffles closer to other shuffles and binops closer to
2125 // other binops, so they can be folded. It may also enable demanded elements
2126 // transforms.
2127 Constant *C;
2128 auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
2129 if (InstVTy &&
2131 m_Mask(Mask))),
2132 m_ImmConstant(C))) &&
2133 cast<FixedVectorType>(V1->getType())->getNumElements() <=
2134 InstVTy->getNumElements()) {
2135 assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
2136 "Shuffle should not change scalar type");
2137
2138 // Find constant NewC that has property:
2139 // shuffle(NewC, ShMask) = C
2140 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
2141 // reorder is not possible. A 1-to-1 mapping is not required. Example:
2142 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
2143 bool ConstOp1 = isa<Constant>(RHS);
2144 ArrayRef<int> ShMask = Mask;
2145 unsigned SrcVecNumElts =
2146 cast<FixedVectorType>(V1->getType())->getNumElements();
2147 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2148 SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar);
2149 bool MayChange = true;
2150 unsigned NumElts = InstVTy->getNumElements();
2151 for (unsigned I = 0; I < NumElts; ++I) {
2152 Constant *CElt = C->getAggregateElement(I);
2153 if (ShMask[I] >= 0) {
2154 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2155 Constant *NewCElt = NewVecC[ShMask[I]];
2156 // Bail out if:
2157 // 1. The constant vector contains a constant expression.
2158 // 2. The shuffle needs an element of the constant vector that can't
2159 // be mapped to a new constant vector.
2160 // 3. This is a widening shuffle that copies elements of V1 into the
2161 // extended elements (extending with poison is allowed).
2162 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2163 I >= SrcVecNumElts) {
2164 MayChange = false;
2165 break;
2166 }
2167 NewVecC[ShMask[I]] = CElt;
2168 }
2169 // If this is a widening shuffle, we must be able to extend with poison
2170 // elements. If the original binop does not produce a poison in the high
2171 // lanes, then this transform is not safe.
2172 // Similarly for poison lanes due to the shuffle mask, we can only
2173 // transform binops that preserve poison.
2174 // TODO: We could shuffle those non-poison constant values into the
2175 // result by using a constant vector (rather than an poison vector)
2176 // as operand 1 of the new binop, but that might be too aggressive
2177 // for target-independent shuffle creation.
2178 if (I >= SrcVecNumElts || ShMask[I] < 0) {
2179 Constant *MaybePoison =
2180 ConstOp1
2181 ? ConstantFoldBinaryOpOperands(Opcode, PoisonScalar, CElt, DL)
2182 : ConstantFoldBinaryOpOperands(Opcode, CElt, PoisonScalar, DL);
2183 if (!MaybePoison || !isa<PoisonValue>(MaybePoison)) {
2184 MayChange = false;
2185 break;
2186 }
2187 }
2188 }
2189 if (MayChange) {
2190 Constant *NewC = ConstantVector::get(NewVecC);
2191 // It may not be safe to execute a binop on a vector with poison elements
2192 // because the entire instruction can be folded to undef or create poison
2193 // that did not exist in the original code.
2194 // TODO: The shift case should not be necessary.
2195 if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
2196 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2197
2198 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2199 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2200 Value *NewLHS = ConstOp1 ? V1 : NewC;
2201 Value *NewRHS = ConstOp1 ? NewC : V1;
2202 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2203 }
2204 }
2205
2206 // Try to reassociate to sink a splat shuffle after a binary operation.
2207 if (Inst.isAssociative() && Inst.isCommutative()) {
2208 // Canonicalize shuffle operand as LHS.
2209 if (isa<ShuffleVectorInst>(RHS))
2210 std::swap(LHS, RHS);
2211
2212 Value *X;
2213 ArrayRef<int> MaskC;
2214 int SplatIndex;
2215 Value *Y, *OtherOp;
2216 if (!match(LHS,
2217 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2218 !match(MaskC, m_SplatOrPoisonMask(SplatIndex)) ||
2219 X->getType() != Inst.getType() ||
2220 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2221 return nullptr;
2222
2223 // FIXME: This may not be safe if the analysis allows undef elements. By
2224 // moving 'Y' before the splat shuffle, we are implicitly assuming
2225 // that it is not undef/poison at the splat index.
2226 if (isSplatValue(OtherOp, SplatIndex)) {
2227 std::swap(Y, OtherOp);
2228 } else if (!isSplatValue(Y, SplatIndex)) {
2229 return nullptr;
2230 }
2231
2232 // X and Y are splatted values, so perform the binary operation on those
2233 // values followed by a splat followed by the 2nd binary operation:
2234 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2235 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2236 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2237 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2238 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2239
2240 // Intersect FMF on both new binops. Other (poison-generating) flags are
2241 // dropped to be safe.
2242 if (isa<FPMathOperator>(R)) {
2243 R->copyFastMathFlags(&Inst);
2244 R->andIRFlags(RHS);
2245 }
2246 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2247 NewInstBO->copyIRFlags(R);
2248 return R;
2249 }
2250
2251 return nullptr;
2252}
2253
2254/// Try to narrow the width of a binop if at least 1 operand is an extend of
2255/// of a value. This requires a potentially expensive known bits check to make
2256/// sure the narrow op does not overflow.
2257Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2258 // We need at least one extended operand.
2259 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2260
2261 // If this is a sub, we swap the operands since we always want an extension
2262 // on the RHS. The LHS can be an extension or a constant.
2263 if (BO.getOpcode() == Instruction::Sub)
2264 std::swap(Op0, Op1);
2265
2266 Value *X;
2267 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2268 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2269 return nullptr;
2270
2271 // If both operands are the same extension from the same source type and we
2272 // can eliminate at least one (hasOneUse), this might work.
2273 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2274 Value *Y;
2275 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2276 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2277 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2278 // If that did not match, see if we have a suitable constant operand.
2279 // Truncating and extending must produce the same constant.
2280 Constant *WideC;
2281 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2282 return nullptr;
2283 Constant *NarrowC = getLosslessTrunc(WideC, X->getType(), CastOpc);
2284 if (!NarrowC)
2285 return nullptr;
2286 Y = NarrowC;
2287 }
2288
2289 // Swap back now that we found our operands.
2290 if (BO.getOpcode() == Instruction::Sub)
2291 std::swap(X, Y);
2292
2293 // Both operands have narrow versions. Last step: the math must not overflow
2294 // in the narrow width.
2295 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2296 return nullptr;
2297
2298 // bo (ext X), (ext Y) --> ext (bo X, Y)
2299 // bo (ext X), C --> ext (bo X, C')
2300 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2301 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2302 if (IsSext)
2303 NewBinOp->setHasNoSignedWrap();
2304 else
2305 NewBinOp->setHasNoUnsignedWrap();
2306 }
2307 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2308}
2309
2311 // At least one GEP must be inbounds.
2312 if (!GEP1.isInBounds() && !GEP2.isInBounds())
2313 return false;
2314
2315 return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
2316 (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
2317}
2318
2319/// Thread a GEP operation with constant indices through the constant true/false
2320/// arms of a select.
2322 InstCombiner::BuilderTy &Builder) {
2323 if (!GEP.hasAllConstantIndices())
2324 return nullptr;
2325
2326 Instruction *Sel;
2327 Value *Cond;
2328 Constant *TrueC, *FalseC;
2329 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2330 !match(Sel,
2331 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2332 return nullptr;
2333
2334 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2335 // Propagate 'inbounds' and metadata from existing instructions.
2336 // Note: using IRBuilder to create the constants for efficiency.
2337 SmallVector<Value *, 4> IndexC(GEP.indices());
2338 bool IsInBounds = GEP.isInBounds();
2339 Type *Ty = GEP.getSourceElementType();
2340 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
2341 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
2342 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2343}
2344
2345// Canonicalization:
2346// gep T, (gep i8, base, C1), (Index + C2) into
2347// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2349 GEPOperator *Src,
2350 InstCombinerImpl &IC) {
2351 if (GEP.getNumIndices() != 1)
2352 return nullptr;
2353 auto &DL = IC.getDataLayout();
2354 Value *Base;
2355 const APInt *C1;
2356 if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
2357 return nullptr;
2358 Value *VarIndex;
2359 const APInt *C2;
2360 Type *PtrTy = Src->getType()->getScalarType();
2361 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2362 if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
2363 return nullptr;
2364 if (C1->getBitWidth() != IndexSizeInBits ||
2365 C2->getBitWidth() != IndexSizeInBits)
2366 return nullptr;
2367 Type *BaseType = GEP.getSourceElementType();
2368 if (isa<ScalableVectorType>(BaseType))
2369 return nullptr;
2370 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2371 APInt NewOffset = TypeSize * *C2 + *C1;
2372 if (NewOffset.isZero() ||
2373 (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2374 Value *GEPConst =
2375 IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset));
2376 return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex);
2377 }
2378
2379 return nullptr;
2380}
2381
2383 GEPOperator *Src) {
2384 // Combine Indices - If the source pointer to this getelementptr instruction
2385 // is a getelementptr instruction with matching element type, combine the
2386 // indices of the two getelementptr instructions into a single instruction.
2387 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2388 return nullptr;
2389
2390 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2391 return I;
2392
2393 // For constant GEPs, use a more general offset-based folding approach.
2394 Type *PtrTy = Src->getType()->getScalarType();
2395 if (GEP.hasAllConstantIndices() &&
2396 (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2397 // Split Src into a variable part and a constant suffix.
2399 Type *BaseType = GTI.getIndexedType();
2400 bool IsFirstType = true;
2401 unsigned NumVarIndices = 0;
2402 for (auto Pair : enumerate(Src->indices())) {
2403 if (!isa<ConstantInt>(Pair.value())) {
2404 BaseType = GTI.getIndexedType();
2405 IsFirstType = false;
2406 NumVarIndices = Pair.index() + 1;
2407 }
2408 ++GTI;
2409 }
2410
2411 // Determine the offset for the constant suffix of Src.
2413 if (NumVarIndices != Src->getNumIndices()) {
2414 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2415 if (BaseType->isScalableTy())
2416 return nullptr;
2417
2418 SmallVector<Value *> ConstantIndices;
2419 if (!IsFirstType)
2420 ConstantIndices.push_back(
2422 append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2423 Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2424 }
2425
2426 // Add the offset for GEP (which is fully constant).
2427 if (!GEP.accumulateConstantOffset(DL, Offset))
2428 return nullptr;
2429
2430 APInt OffsetOld = Offset;
2431 // Convert the total offset back into indices.
2432 SmallVector<APInt> ConstIndices =
2434 if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2435 // If both GEP are constant-indexed, and cannot be merged in either way,
2436 // convert them to a GEP of i8.
2437 if (Src->hasAllConstantIndices())
2438 return replaceInstUsesWith(
2440 Builder.getInt8Ty(), Src->getOperand(0),
2441 Builder.getInt(OffsetOld), "",
2442 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2443 return nullptr;
2444 }
2445
2446 bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
2447 SmallVector<Value *> Indices;
2448 append_range(Indices, drop_end(Src->indices(),
2449 Src->getNumIndices() - NumVarIndices));
2450 for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2451 Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2452 // Even if the total offset is inbounds, we may end up representing it
2453 // by first performing a larger negative offset, and then a smaller
2454 // positive one. The large negative offset might go out of bounds. Only
2455 // preserve inbounds if all signs are the same.
2456 IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2457 }
2458
2459 return replaceInstUsesWith(
2460 GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
2461 Indices, "", IsInBounds));
2462 }
2463
2464 if (Src->getResultElementType() != GEP.getSourceElementType())
2465 return nullptr;
2466
2467 SmallVector<Value*, 8> Indices;
2468
2469 // Find out whether the last index in the source GEP is a sequential idx.
2470 bool EndsWithSequential = false;
2471 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2472 I != E; ++I)
2473 EndsWithSequential = I.isSequential();
2474
2475 // Can we combine the two pointer arithmetics offsets?
2476 if (EndsWithSequential) {
2477 // Replace: gep (gep %P, long B), long A, ...
2478 // With: T = long A+B; gep %P, T, ...
2479 Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2480 Value *GO1 = GEP.getOperand(1);
2481
2482 // If they aren't the same type, then the input hasn't been processed
2483 // by the loop above yet (which canonicalizes sequential index types to
2484 // intptr_t). Just avoid transforming this until the input has been
2485 // normalized.
2486 if (SO1->getType() != GO1->getType())
2487 return nullptr;
2488
2489 Value *Sum =
2490 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2491 // Only do the combine when we are sure the cost after the
2492 // merge is never more than that before the merge.
2493 if (Sum == nullptr)
2494 return nullptr;
2495
2496 // Update the GEP in place if possible.
2497 if (Src->getNumOperands() == 2) {
2498 GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2499 replaceOperand(GEP, 0, Src->getOperand(0));
2500 replaceOperand(GEP, 1, Sum);
2501 return &GEP;
2502 }
2503 Indices.append(Src->op_begin()+1, Src->op_end()-1);
2504 Indices.push_back(Sum);
2505 Indices.append(GEP.op_begin()+2, GEP.op_end());
2506 } else if (isa<Constant>(*GEP.idx_begin()) &&
2507 cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2508 Src->getNumOperands() != 1) {
2509 // Otherwise we can do the fold if the first index of the GEP is a zero
2510 Indices.append(Src->op_begin()+1, Src->op_end());
2511 Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2512 }
2513
2514 if (!Indices.empty())
2515 return replaceInstUsesWith(
2517 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2518 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2519
2520 return nullptr;
2521}
2522
2524 BuilderTy *Builder,
2525 bool &DoesConsume, unsigned Depth) {
2526 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2527 // ~(~(X)) -> X.
2528 Value *A, *B;
2529 if (match(V, m_Not(m_Value(A)))) {
2530 DoesConsume = true;
2531 return A;
2532 }
2533
2534 Constant *C;
2535 // Constants can be considered to be not'ed values.
2536 if (match(V, m_ImmConstant(C)))
2537 return ConstantExpr::getNot(C);
2538
2540 return nullptr;
2541
2542 // The rest of the cases require that we invert all uses so don't bother
2543 // doing the analysis if we know we can't use the result.
2544 if (!WillInvertAllUses)
2545 return nullptr;
2546
2547 // Compares can be inverted if all of their uses are being modified to use
2548 // the ~V.
2549 if (auto *I = dyn_cast<CmpInst>(V)) {
2550 if (Builder != nullptr)
2551 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
2552 I->getOperand(1));
2553 return NonNull;
2554 }
2555
2556 // If `V` is of the form `A + B` then `-1 - V` can be folded into
2557 // `(-1 - B) - A` if we are willing to invert all of the uses.
2558 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
2559 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2560 DoesConsume, Depth))
2561 return Builder ? Builder->CreateSub(BV, A) : NonNull;
2562 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2563 DoesConsume, Depth))
2564 return Builder ? Builder->CreateSub(AV, B) : NonNull;
2565 return nullptr;
2566 }
2567
2568 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2569 // into `A ^ B` if we are willing to invert all of the uses.
2570 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
2571 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2572 DoesConsume, Depth))
2573 return Builder ? Builder->CreateXor(A, BV) : NonNull;
2574 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2575 DoesConsume, Depth))
2576 return Builder ? Builder->CreateXor(AV, B) : NonNull;
2577 return nullptr;
2578 }
2579
2580 // If `V` is of the form `B - A` then `-1 - V` can be folded into
2581 // `A + (-1 - B)` if we are willing to invert all of the uses.
2582 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
2583 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2584 DoesConsume, Depth))
2585 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
2586 return nullptr;
2587 }
2588
2589 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2590 // into `A s>> B` if we are willing to invert all of the uses.
2591 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
2592 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2593 DoesConsume, Depth))
2594 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
2595 return nullptr;
2596 }
2597
2598 Value *Cond;
2599 // LogicOps are special in that we canonicalize them at the cost of an
2600 // instruction.
2601 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
2602 !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));
2603 // Selects/min/max with invertible operands are freely invertible
2604 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
2605 bool LocalDoesConsume = DoesConsume;
2606 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
2607 LocalDoesConsume, Depth))
2608 return nullptr;
2609 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2610 LocalDoesConsume, Depth)) {
2611 DoesConsume = LocalDoesConsume;
2612 if (Builder != nullptr) {
2613 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2614 DoesConsume, Depth);
2615 assert(NotB != nullptr &&
2616 "Unable to build inverted value for known freely invertable op");
2617 if (auto *II = dyn_cast<IntrinsicInst>(V))
2619 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
2620 return Builder->CreateSelect(Cond, NotA, NotB);
2621 }
2622 return NonNull;
2623 }
2624 }
2625
2626 if (PHINode *PN = dyn_cast<PHINode>(V)) {
2627 bool LocalDoesConsume = DoesConsume;
2629 for (Use &U : PN->operands()) {
2630 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2631 Value *NewIncomingVal = getFreelyInvertedImpl(
2632 U.get(), /*WillInvertAllUses=*/false,
2633 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
2634 if (NewIncomingVal == nullptr)
2635 return nullptr;
2636 // Make sure that we can safely erase the original PHI node.
2637 if (NewIncomingVal == V)
2638 return nullptr;
2639 if (Builder != nullptr)
2640 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
2641 }
2642
2643 DoesConsume = LocalDoesConsume;
2644 if (Builder != nullptr) {
2647 PHINode *NewPN =
2648 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
2649 for (auto [Val, Pred] : IncomingValues)
2650 NewPN->addIncoming(Val, Pred);
2651 return NewPN;
2652 }
2653 return NonNull;
2654 }
2655
2656 if (match(V, m_SExtLike(m_Value(A)))) {
2657 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2658 DoesConsume, Depth))
2659 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
2660 return nullptr;
2661 }
2662
2663 if (match(V, m_Trunc(m_Value(A)))) {
2664 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2665 DoesConsume, Depth))
2666 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
2667 return nullptr;
2668 }
2669
2670 // De Morgan's Laws:
2671 // (~(A | B)) -> (~A & ~B)
2672 // (~(A & B)) -> (~A | ~B)
2673 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2674 bool IsLogical, Value *A,
2675 Value *B) -> Value * {
2676 bool LocalDoesConsume = DoesConsume;
2677 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
2678 LocalDoesConsume, Depth))
2679 return nullptr;
2680 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2681 LocalDoesConsume, Depth)) {
2682 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2683 LocalDoesConsume, Depth);
2684 DoesConsume = LocalDoesConsume;
2685 if (IsLogical)
2686 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
2687 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
2688 }
2689
2690 return nullptr;
2691 };
2692
2693 if (match(V, m_Or(m_Value(A), m_Value(B))))
2694 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
2695 B);
2696
2697 if (match(V, m_And(m_Value(A), m_Value(B))))
2698 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
2699 B);
2700
2701 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
2702 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
2703 B);
2704
2705 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
2706 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
2707 B);
2708
2709 return nullptr;
2710}
2711
2713 Value *PtrOp = GEP.getOperand(0);
2714 SmallVector<Value *, 8> Indices(GEP.indices());
2715 Type *GEPType = GEP.getType();
2716 Type *GEPEltType = GEP.getSourceElementType();
2717 if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2719 return replaceInstUsesWith(GEP, V);
2720
2721 // For vector geps, use the generic demanded vector support.
2722 // Skip if GEP return type is scalable. The number of elements is unknown at
2723 // compile-time.
2724 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2725 auto VWidth = GEPFVTy->getNumElements();
2726 APInt PoisonElts(VWidth, 0);
2727 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2728 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2729 PoisonElts)) {
2730 if (V != &GEP)
2731 return replaceInstUsesWith(GEP, V);
2732 return &GEP;
2733 }
2734
2735 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2736 // possible (decide on canonical form for pointer broadcast), 3) exploit
2737 // undef elements to decrease demanded bits
2738 }
2739
2740 // Eliminate unneeded casts for indices, and replace indices which displace
2741 // by multiples of a zero size type with zero.
2742 bool MadeChange = false;
2743
2744 // Index width may not be the same width as pointer width.
2745 // Data layout chooses the right type based on supported integer types.
2746 Type *NewScalarIndexTy =
2747 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2748
2750 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2751 ++I, ++GTI) {
2752 // Skip indices into struct types.
2753 if (GTI.isStruct())
2754 continue;
2755
2756 Type *IndexTy = (*I)->getType();
2757 Type *NewIndexType =
2758 IndexTy->isVectorTy()
2759 ? VectorType::get(NewScalarIndexTy,
2760 cast<VectorType>(IndexTy)->getElementCount())
2761 : NewScalarIndexTy;
2762
2763 // If the element type has zero size then any index over it is equivalent
2764 // to an index of zero, so replace it with zero if it is not zero already.
2765 Type *EltTy = GTI.getIndexedType();
2766 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2767 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2768 *I = Constant::getNullValue(NewIndexType);
2769 MadeChange = true;
2770 }
2771
2772 if (IndexTy != NewIndexType) {
2773 // If we are using a wider index than needed for this platform, shrink
2774 // it to what we need. If narrower, sign-extend it to what we need.
2775 // This explicit cast can make subsequent optimizations more obvious.
2776 *I = Builder.CreateIntCast(*I, NewIndexType, true);
2777 MadeChange = true;
2778 }
2779 }
2780 if (MadeChange)
2781 return &GEP;
2782
2783 // Canonicalize constant GEPs to i8 type.
2784 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
2786 if (GEP.accumulateConstantOffset(DL, Offset))
2787 return replaceInstUsesWith(
2789 GEP.isInBounds()));
2790 }
2791
2792 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
2793 // intrinsic. This has better support in BasicAA.
2794 if (GEPEltType->isScalableTy()) {
2795 Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
2796 return replaceInstUsesWith(
2797 GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.isInBounds()));
2798 }
2799
2800 // Check to see if the inputs to the PHI node are getelementptr instructions.
2801 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2802 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2803 if (!Op1)
2804 return nullptr;
2805
2806 // Don't fold a GEP into itself through a PHI node. This can only happen
2807 // through the back-edge of a loop. Folding a GEP into itself means that
2808 // the value of the previous iteration needs to be stored in the meantime,
2809 // thus requiring an additional register variable to be live, but not
2810 // actually achieving anything (the GEP still needs to be executed once per
2811 // loop iteration).
2812 if (Op1 == &GEP)
2813 return nullptr;
2814
2815 int DI = -1;
2816
2817 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2818 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2819 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2820 Op1->getSourceElementType() != Op2->getSourceElementType())
2821 return nullptr;
2822
2823 // As for Op1 above, don't try to fold a GEP into itself.
2824 if (Op2 == &GEP)
2825 return nullptr;
2826
2827 // Keep track of the type as we walk the GEP.
2828 Type *CurTy = nullptr;
2829
2830 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2831 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2832 return nullptr;
2833
2834 if (Op1->getOperand(J) != Op2->getOperand(J)) {
2835 if (DI == -1) {
2836 // We have not seen any differences yet in the GEPs feeding the
2837 // PHI yet, so we record this one if it is allowed to be a
2838 // variable.
2839
2840 // The first two arguments can vary for any GEP, the rest have to be
2841 // static for struct slots
2842 if (J > 1) {
2843 assert(CurTy && "No current type?");
2844 if (CurTy->isStructTy())
2845 return nullptr;
2846 }
2847
2848 DI = J;
2849 } else {
2850 // The GEP is different by more than one input. While this could be
2851 // extended to support GEPs that vary by more than one variable it
2852 // doesn't make sense since it greatly increases the complexity and
2853 // would result in an R+R+R addressing mode which no backend
2854 // directly supports and would need to be broken into several
2855 // simpler instructions anyway.
2856 return nullptr;
2857 }
2858 }
2859
2860 // Sink down a layer of the type for the next iteration.
2861 if (J > 0) {
2862 if (J == 1) {
2863 CurTy = Op1->getSourceElementType();
2864 } else {
2865 CurTy =
2866 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2867 }
2868 }
2869 }
2870 }
2871
2872 // If not all GEPs are identical we'll have to create a new PHI node.
2873 // Check that the old PHI node has only one use so that it will get
2874 // removed.
2875 if (DI != -1 && !PN->hasOneUse())
2876 return nullptr;
2877
2878 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2879 if (DI == -1) {
2880 // All the GEPs feeding the PHI are identical. Clone one down into our
2881 // BB so that it can be merged with the current GEP.
2882 } else {
2883 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2884 // into the current block so it can be merged, and create a new PHI to
2885 // set that index.
2886 PHINode *NewPN;
2887 {
2890 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2891 PN->getNumOperands());
2892 }
2893
2894 for (auto &I : PN->operands())
2895 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2896 PN->getIncomingBlock(I));
2897
2898 NewGEP->setOperand(DI, NewPN);
2899 }
2900
2901 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
2902 return replaceOperand(GEP, 0, NewGEP);
2903 }
2904
2905 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2906 if (Instruction *I = visitGEPOfGEP(GEP, Src))
2907 return I;
2908
2909 if (GEP.getNumIndices() == 1) {
2910 unsigned AS = GEP.getPointerAddressSpace();
2911 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2912 DL.getIndexSizeInBits(AS)) {
2913 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2914
2915 if (TyAllocSize == 1) {
2916 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
2917 // but only if the result pointer is only used as if it were an integer,
2918 // or both point to the same underlying object (otherwise provenance is
2919 // not necessarily retained).
2920 Value *X = GEP.getPointerOperand();
2921 Value *Y;
2922 if (match(GEP.getOperand(1),
2924 GEPType == Y->getType()) {
2925 bool HasSameUnderlyingObject =
2927 bool Changed = false;
2928 GEP.replaceUsesWithIf(Y, [&](Use &U) {
2929 bool ShouldReplace = HasSameUnderlyingObject ||
2930 isa<ICmpInst>(U.getUser()) ||
2931 isa<PtrToIntInst>(U.getUser());
2932 Changed |= ShouldReplace;
2933 return ShouldReplace;
2934 });
2935 return Changed ? &GEP : nullptr;
2936 }
2937 } else {
2938 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
2939 Value *V;
2940 if ((has_single_bit(TyAllocSize) &&
2941 match(GEP.getOperand(1),
2943 m_SpecificInt(countr_zero(TyAllocSize)))))) ||
2944 match(GEP.getOperand(1),
2945 m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
2947 Builder.getInt8Ty(), GEP.getPointerOperand(), V);
2948 NewGEP->setIsInBounds(GEP.isInBounds());
2949 return NewGEP;
2950 }
2951 }
2952 }
2953 }
2954 // We do not handle pointer-vector geps here.
2955 if (GEPType->isVectorTy())
2956 return nullptr;
2957
2958 if (GEP.getNumIndices() == 1) {
2959 // We can only preserve inbounds if the original gep is inbounds, the add
2960 // is nsw, and the add operands are non-negative.
2961 auto CanPreserveInBounds = [&](bool AddIsNSW, Value *Idx1, Value *Idx2) {
2963 return GEP.isInBounds() && AddIsNSW && isKnownNonNegative(Idx1, Q) &&
2964 isKnownNonNegative(Idx2, Q);
2965 };
2966
2967 // Try to replace ADD + GEP with GEP + GEP.
2968 Value *Idx1, *Idx2;
2969 if (match(GEP.getOperand(1),
2970 m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
2971 // %idx = add i64 %idx1, %idx2
2972 // %gep = getelementptr i32, ptr %ptr, i64 %idx
2973 // as:
2974 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
2975 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
2976 bool IsInBounds = CanPreserveInBounds(
2977 cast<OverflowingBinaryOperator>(GEP.getOperand(1))->hasNoSignedWrap(),
2978 Idx1, Idx2);
2979 auto *NewPtr =
2980 Builder.CreateGEP(GEP.getResultElementType(), GEP.getPointerOperand(),
2981 Idx1, "", IsInBounds);
2982 return replaceInstUsesWith(
2983 GEP, Builder.CreateGEP(GEP.getResultElementType(), NewPtr, Idx2, "",
2984 IsInBounds));
2985 }
2986 ConstantInt *C;
2987 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAdd(
2988 m_Value(Idx1), m_ConstantInt(C))))))) {
2989 // %add = add nsw i32 %idx1, idx2
2990 // %sidx = sext i32 %add to i64
2991 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
2992 // as:
2993 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
2994 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
2995 bool IsInBounds = CanPreserveInBounds(
2996 /*IsNSW=*/true, Idx1, C);
2997 auto *NewPtr = Builder.CreateGEP(
2998 GEP.getResultElementType(), GEP.getPointerOperand(),
2999 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()), "",
3000 IsInBounds);
3001 return replaceInstUsesWith(
3002 GEP,
3003 Builder.CreateGEP(GEP.getResultElementType(), NewPtr,
3004 Builder.CreateSExt(C, GEP.getOperand(1)->getType()),
3005 "", IsInBounds));
3006 }
3007 }
3008
3009 if (!GEP.isInBounds()) {
3010 unsigned IdxWidth =
3012 APInt BasePtrOffset(IdxWidth, 0);
3013 Value *UnderlyingPtrOp =
3015 BasePtrOffset);
3016 bool CanBeNull, CanBeFreed;
3017 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3018 DL, CanBeNull, CanBeFreed);
3019 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3020 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
3021 BasePtrOffset.isNonNegative()) {
3022 APInt AllocSize(IdxWidth, DerefBytes);
3023 if (BasePtrOffset.ule(AllocSize)) {
3025 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
3026 }
3027 }
3028 }
3029 }
3030
3032 return R;
3033
3034 return nullptr;
3035}
3036
3038 Instruction *AI) {
3039 if (isa<ConstantPointerNull>(V))
3040 return true;
3041 if (auto *LI = dyn_cast<LoadInst>(V))
3042 return isa<GlobalVariable>(LI->getPointerOperand());
3043 // Two distinct allocations will never be equal.
3044 return isAllocLikeFn(V, &TLI) && V != AI;
3045}
3046
3047/// Given a call CB which uses an address UsedV, return true if we can prove the
3048/// call's only possible effect is storing to V.
3049static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3050 const TargetLibraryInfo &TLI) {
3051 if (!CB.use_empty())
3052 // TODO: add recursion if returned attribute is present
3053 return false;
3054
3055 if (CB.isTerminator())
3056 // TODO: remove implementation restriction
3057 return false;
3058
3059 if (!CB.willReturn() || !CB.doesNotThrow())
3060 return false;
3061
3062 // If the only possible side effect of the call is writing to the alloca,
3063 // and the result isn't used, we can safely remove any reads implied by the
3064 // call including those which might read the alloca itself.
3065 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
3066 return Dest && Dest->Ptr == UsedV;
3067}
3068
3071 const TargetLibraryInfo &TLI) {
3073 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
3074 Worklist.push_back(AI);
3075
3076 do {
3077 Instruction *PI = Worklist.pop_back_val();
3078 for (User *U : PI->users()) {
3079 Instruction *I = cast<Instruction>(U);
3080 switch (I->getOpcode()) {
3081 default:
3082 // Give up the moment we see something we can't handle.
3083 return false;
3084
3085 case Instruction::AddrSpaceCast:
3086 case Instruction::BitCast:
3087 case Instruction::GetElementPtr:
3088 Users.emplace_back(I);
3089 Worklist.push_back(I);
3090 continue;
3091
3092 case Instruction::ICmp: {
3093 ICmpInst *ICI = cast<ICmpInst>(I);
3094 // We can fold eq/ne comparisons with null to false/true, respectively.
3095 // We also fold comparisons in some conditions provided the alloc has
3096 // not escaped (see isNeverEqualToUnescapedAlloc).
3097 if (!ICI->isEquality())
3098 return false;
3099 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3100 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3101 return false;
3102
3103 // Do not fold compares to aligned_alloc calls, as they may have to
3104 // return null in case the required alignment cannot be satisfied,
3105 // unless we can prove that both alignment and size are valid.
3106 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3107 // Check if alignment and size of a call to aligned_alloc is valid,
3108 // that is alignment is a power-of-2 and the size is a multiple of the
3109 // alignment.
3110 const APInt *Alignment;
3111 const APInt *Size;
3112 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3113 match(CB->getArgOperand(1), m_APInt(Size)) &&
3114 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3115 };
3116 auto *CB = dyn_cast<CallBase>(AI);
3117 LibFunc TheLibFunc;
3118 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3119 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3120 !AlignmentAndSizeKnownValid(CB))
3121 return false;
3122 Users.emplace_back(I);
3123 continue;
3124 }
3125
3126 case Instruction::Call:
3127 // Ignore no-op and store intrinsics.
3128 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3129 switch (II->getIntrinsicID()) {
3130 default:
3131 return false;
3132
3133 case Intrinsic::memmove:
3134 case Intrinsic::memcpy:
3135 case Intrinsic::memset: {
3136 MemIntrinsic *MI = cast<MemIntrinsic>(II);
3137 if (MI->isVolatile() || MI->getRawDest() != PI)
3138 return false;
3139 [[fallthrough]];
3140 }
3141 case Intrinsic::assume:
3142 case Intrinsic::invariant_start:
3143 case Intrinsic::invariant_end:
3144 case Intrinsic::lifetime_start:
3145 case Intrinsic::lifetime_end:
3146 case Intrinsic::objectsize:
3147 Users.emplace_back(I);
3148 continue;
3149 case Intrinsic::launder_invariant_group:
3150 case Intrinsic::strip_invariant_group:
3151 Users.emplace_back(I);
3152 Worklist.push_back(I);
3153 continue;
3154 }
3155 }
3156
3157 if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3158 Users.emplace_back(I);
3159 continue;
3160 }
3161
3162 if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3163 getAllocationFamily(I, &TLI) == Family) {
3164 assert(Family);
3165 Users.emplace_back(I);
3166 continue;
3167 }
3168
3169 if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
3170 getAllocationFamily(I, &TLI) == Family) {
3171 assert(Family);
3172 Users.emplace_back(I);
3173 Worklist.push_back(I);
3174 continue;
3175 }
3176
3177 return false;
3178
3179 case Instruction::Store: {
3180 StoreInst *SI = cast<StoreInst>(I);
3181 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3182 return false;
3183 Users.emplace_back(I);
3184 continue;
3185 }
3186 }
3187 llvm_unreachable("missing a return?");
3188 }
3189 } while (!Worklist.empty());
3190 return true;
3191}
3192
3194 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3195
3196 // If we have a malloc call which is only used in any amount of comparisons to
3197 // null and free calls, delete the calls and replace the comparisons with true
3198 // or false as appropriate.
3199
3200 // This is based on the principle that we can substitute our own allocation
3201 // function (which will never return null) rather than knowledge of the
3202 // specific function being called. In some sense this can change the permitted
3203 // outputs of a program (when we convert a malloc to an alloca, the fact that
3204 // the allocation is now on the stack is potentially visible, for example),
3205 // but we believe in a permissible manner.
3207
3208 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3209 // before each store.
3212 std::unique_ptr<DIBuilder> DIB;
3213 if (isa<AllocaInst>(MI)) {
3214 findDbgUsers(DVIs, &MI, &DVRs);
3215 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3216 }
3217
3218 if (isAllocSiteRemovable(&MI, Users, TLI)) {
3219 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3220 // Lowering all @llvm.objectsize calls first because they may
3221 // use a bitcast/GEP of the alloca we are removing.
3222 if (!Users[i])
3223 continue;
3224
3225 Instruction *I = cast<Instruction>(&*Users[i]);
3226
3227 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3228 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3229 SmallVector<Instruction *> InsertedInstructions;
3230 Value *Result = lowerObjectSizeCall(
3231 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3232 for (Instruction *Inserted : InsertedInstructions)
3233 Worklist.add(Inserted);
3234 replaceInstUsesWith(*I, Result);
3236 Users[i] = nullptr; // Skip examining in the next loop.
3237 }
3238 }
3239 }
3240 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3241 if (!Users[i])
3242 continue;
3243
3244 Instruction *I = cast<Instruction>(&*Users[i]);
3245
3246 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3248 ConstantInt::get(Type::getInt1Ty(C->getContext()),
3249 C->isFalseWhenEqual()));
3250 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3251 for (auto *DVI : DVIs)
3252 if (DVI->isAddressOfVariable())
3253 ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
3254 for (auto *DVR : DVRs)
3255 if (DVR->isAddressOfVariable())
3256 ConvertDebugDeclareToDebugValue(DVR, SI, *DIB);
3257 } else {
3258 // Casts, GEP, or anything else: we're about to delete this instruction,
3259 // so it can not have any valid uses.
3260 replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
3261 }
3263 }
3264
3265 if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
3266 // Replace invoke with a NOP intrinsic to maintain the original CFG
3267 Module *M = II->getModule();
3268 Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
3269 InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
3270 std::nullopt, "", II->getParent());
3271 }
3272
3273 // Remove debug intrinsics which describe the value contained within the
3274 // alloca. In addition to removing dbg.{declare,addr} which simply point to
3275 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3276 //
3277 // ```
3278 // define void @foo(i32 %0) {
3279 // %a = alloca i32 ; Deleted.
3280 // store i32 %0, i32* %a
3281 // dbg.value(i32 %0, "arg0") ; Not deleted.
3282 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
3283 // call void @trivially_inlinable_no_op(i32* %a)
3284 // ret void
3285 // }
3286 // ```
3287 //
3288 // This may not be required if we stop describing the contents of allocas
3289 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3290 // the LowerDbgDeclare utility.
3291 //
3292 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3293 // "arg0" dbg.value may be stale after the call. However, failing to remove
3294 // the DW_OP_deref dbg.value causes large gaps in location coverage.
3295 //
3296 // FIXME: the Assignment Tracking project has now likely made this
3297 // redundant (and it's sometimes harmful).
3298 for (auto *DVI : DVIs)
3299 if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
3300 DVI->eraseFromParent();
3301 for (auto *DVR : DVRs)
3302 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
3303 DVR->eraseFromParent();
3304
3305 return eraseInstFromFunction(MI);
3306 }
3307 return nullptr;
3308}
3309
3310/// Move the call to free before a NULL test.
3311///
3312/// Check if this free is accessed after its argument has been test
3313/// against NULL (property 0).
3314/// If yes, it is legal to move this call in its predecessor block.
3315///
3316/// The move is performed only if the block containing the call to free
3317/// will be removed, i.e.:
3318/// 1. it has only one predecessor P, and P has two successors
3319/// 2. it contains the call, noops, and an unconditional branch
3320/// 3. its successor is the same as its predecessor's successor
3321///
3322/// The profitability is out-of concern here and this function should
3323/// be called only if the caller knows this transformation would be
3324/// profitable (e.g., for code size).
3326 const DataLayout &DL) {
3327 Value *Op = FI.getArgOperand(0);
3328 BasicBlock *FreeInstrBB = FI.getParent();
3329 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3330
3331 // Validate part of constraint #1: Only one predecessor
3332 // FIXME: We can extend the number of predecessor, but in that case, we
3333 // would duplicate the call to free in each predecessor and it may
3334 // not be profitable even for code size.
3335 if (!PredBB)
3336 return nullptr;
3337
3338 // Validate constraint #2: Does this block contains only the call to
3339 // free, noops, and an unconditional branch?
3340 BasicBlock *SuccBB;
3341 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3342 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
3343 return nullptr;
3344
3345 // If there are only 2 instructions in the block, at this point,
3346 // this is the call to free and unconditional.
3347 // If there are more than 2 instructions, check that they are noops
3348 // i.e., they won't hurt the performance of the generated code.
3349 if (FreeInstrBB->size() != 2) {
3350 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3351 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
3352 continue;
3353 auto *Cast = dyn_cast<CastInst>(&Inst);
3354 if (!Cast || !Cast->isNoopCast(DL))
3355 return nullptr;
3356 }
3357 }
3358 // Validate the rest of constraint #1 by matching on the pred branch.
3359 Instruction *TI = PredBB->getTerminator();
3360 BasicBlock *TrueBB, *FalseBB;
3362 if (!match(TI, m_Br(m_ICmp(Pred,
3364 m_Specific(Op->stripPointerCasts())),
3365 m_Zero()),
3366 TrueBB, FalseBB)))
3367 return nullptr;
3368 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3369 return nullptr;
3370
3371 // Validate constraint #3: Ensure the null case just falls through.
3372 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3373 return nullptr;
3374 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3375 "Broken CFG: missing edge from predecessor to successor");
3376
3377 // At this point, we know that everything in FreeInstrBB can be moved
3378 // before TI.
3379 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
3380 if (&Instr == FreeInstrBBTerminator)
3381 break;
3382 Instr.moveBeforePreserving(TI);
3383 }
3384 assert(FreeInstrBB->size() == 1 &&
3385 "Only the branch instruction should remain");
3386
3387 // Now that we've moved the call to free before the NULL check, we have to
3388 // remove any attributes on its parameter that imply it's non-null, because
3389 // those attributes might have only been valid because of the NULL check, and
3390 // we can get miscompiles if we keep them. This is conservative if non-null is
3391 // also implied by something other than the NULL check, but it's guaranteed to
3392 // be correct, and the conservativeness won't matter in practice, since the
3393 // attributes are irrelevant for the call to free itself and the pointer
3394 // shouldn't be used after the call.
3395 AttributeList Attrs = FI.getAttributes();
3396 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3397 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3398 if (Dereferenceable.isValid()) {
3399 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3400 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3401 Attribute::Dereferenceable);
3402 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
3403 }
3404 FI.setAttributes(Attrs);
3405
3406 return &FI;
3407}
3408
3410 // free undef -> unreachable.
3411 if (isa<UndefValue>(Op)) {
3412 // Leave a marker since we can't modify the CFG here.
3414 return eraseInstFromFunction(FI);
3415 }
3416
3417 // If we have 'free null' delete the instruction. This can happen in stl code
3418 // when lots of inlining happens.
3419 if (isa<ConstantPointerNull>(Op))
3420 return eraseInstFromFunction(FI);
3421
3422 // If we had free(realloc(...)) with no intervening uses, then eliminate the
3423 // realloc() entirely.
3424 CallInst *CI = dyn_cast<CallInst>(Op);
3425 if (CI && CI->hasOneUse())
3426 if (Value *ReallocatedOp = getReallocatedOperand(CI))
3427 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
3428
3429 // If we optimize for code size, try to move the call to free before the null
3430 // test so that simplify cfg can remove the empty block and dead code
3431 // elimination the branch. I.e., helps to turn something like:
3432 // if (foo) free(foo);
3433 // into
3434 // free(foo);
3435 //
3436 // Note that we can only do this for 'free' and not for any flavor of
3437 // 'operator delete'; there is no 'operator delete' symbol for which we are
3438 // permitted to invent a call, even if we're passing in a null pointer.
3439 if (MinimizeSize) {
3440 LibFunc Func;
3441 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3443 return I;
3444 }
3445
3446 return nullptr;
3447}
3448
3450 Value *RetVal = RI.getReturnValue();
3451 if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(RetVal->getType()))
3452 return nullptr;
3453
3454 Function *F = RI.getFunction();
3455 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3456 if (ReturnClass == fcNone)
3457 return nullptr;
3458
3459 KnownFPClass KnownClass;
3460 Value *Simplified =
3461 SimplifyDemandedUseFPClass(RetVal, ~ReturnClass, KnownClass, 0, &RI);
3462 if (!Simplified)
3463 return nullptr;
3464
3465 return ReturnInst::Create(RI.getContext(), Simplified);
3466}
3467
3468// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3470 // Try to remove the previous instruction if it must lead to unreachable.
3471 // This includes instructions like stores and "llvm.assume" that may not get
3472 // removed by simple dead code elimination.
3473 bool Changed = false;
3474 while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3475 // While we theoretically can erase EH, that would result in a block that
3476 // used to start with an EH no longer starting with EH, which is invalid.
3477 // To make it valid, we'd need to fixup predecessors to no longer refer to
3478 // this block, but that changes CFG, which is not allowed in InstCombine.
3479 if (Prev->isEHPad())
3480 break; // Can not drop any more instructions. We're done here.
3481
3483 break; // Can not drop any more instructions. We're done here.
3484 // Otherwise, this instruction can be freely erased,
3485 // even if it is not side-effect free.
3486
3487 // A value may still have uses before we process it here (for example, in
3488 // another unreachable block), so convert those to poison.
3489 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
3490 eraseInstFromFunction(*Prev);
3491 Changed = true;
3492 }
3493 return Changed;
3494}
3495
3498 return nullptr;
3499}
3500
3502 assert(BI.isUnconditional() && "Only for unconditional branches.");
3503
3504 // If this store is the second-to-last instruction in the basic block
3505 // (excluding debug info and bitcasts of pointers) and if the block ends with
3506 // an unconditional branch, try to move the store to the successor block.
3507
3508 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3509 auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3510 return BBI->isDebugOrPseudoInst() ||
3511 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3512 };
3513
3514 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3515 do {
3516 if (BBI != FirstInstr)
3517 --BBI;
3518 } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3519
3520 return dyn_cast<StoreInst>(BBI);
3521 };
3522
3523 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3524 if (mergeStoreIntoSuccessor(*SI))
3525 return &BI;
3526
3527 return nullptr;
3528}
3529
3532 if (!DeadEdges.insert({From, To}).second)
3533 return;
3534
3535 // Replace phi node operands in successor with poison.
3536 for (PHINode &PN : To->phis())
3537 for (Use &U : PN.incoming_values())
3538 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
3539 replaceUse(U, PoisonValue::get(PN.getType()));
3540 addToWorklist(&PN);
3541 MadeIRChange = true;
3542 }
3543
3544 Worklist.push_back(To);
3545}
3546
3547// Under the assumption that I is unreachable, remove it and following
3548// instructions. Changes are reported directly to MadeIRChange.
3551 BasicBlock *BB = I->getParent();
3552 for (Instruction &Inst : make_early_inc_range(
3553 make_range(std::next(BB->getTerminator()->getReverseIterator()),
3554 std::next(I->getReverseIterator())))) {
3555 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
3556 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
3557 MadeIRChange = true;
3558 }
3559 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
3560 continue;
3561 // RemoveDIs: erase debug-info on this instruction manually.
3562 Inst.dropDbgRecords();
3564 MadeIRChange = true;
3565 }
3566
3567 SmallVector<Value *> Changed;
3568 if (handleUnreachableTerminator(BB->getTerminator(), Changed)) {
3569 MadeIRChange = true;
3570 for (Value *V : Changed)
3571 addToWorklist(cast<Instruction>(V));
3572 }
3573
3574 // Handle potentially dead successors.
3575 for (BasicBlock *Succ : successors(BB))
3576 addDeadEdge(BB, Succ, Worklist);
3577}
3578
3581 while (!Worklist.empty()) {
3582 BasicBlock *BB = Worklist.pop_back_val();
3583 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
3584 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
3585 }))
3586 continue;
3587
3589 }
3590}
3591
3593 BasicBlock *LiveSucc) {
3595 for (BasicBlock *Succ : successors(BB)) {
3596 // The live successor isn't dead.
3597 if (Succ == LiveSucc)
3598 continue;
3599
3600 addDeadEdge(BB, Succ, Worklist);
3601 }
3602
3604}
3605
3607 if (BI.isUnconditional())
3609
3610 // Change br (not X), label True, label False to: br X, label False, True
3611 Value *Cond = BI.getCondition();
3612 Value *X;
3613 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
3614 // Swap Destinations and condition...
3615 BI.swapSuccessors();
3616 if (BPI)
3618 return replaceOperand(BI, 0, X);
3619 }
3620
3621 // Canonicalize logical-and-with-invert as logical-or-with-invert.
3622 // This is done by inverting the condition and swapping successors:
3623 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
3624 Value *Y;
3625 if (isa<SelectInst>(Cond) &&
3626 match(Cond,
3628 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
3629 Value *Or = Builder.CreateLogicalOr(NotX, Y);
3630 BI.swapSuccessors();
3631 if (BPI)
3633 return replaceOperand(BI, 0, Or);
3634 }
3635
3636 // If the condition is irrelevant, remove the use so that other
3637 // transforms on the condition become more effective.
3638 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
3639 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
3640
3641 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3642 CmpInst::Predicate Pred;
3643 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
3644 !isCanonicalPredicate(Pred)) {
3645 // Swap destinations and condition.
3646 auto *Cmp = cast<CmpInst>(Cond);
3647 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
3648 BI.swapSuccessors();
3649 if (BPI)
3651 Worklist.push(Cmp);
3652 return &BI;
3653 }
3654
3655 if (isa<UndefValue>(Cond)) {
3656 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
3657 return nullptr;
3658 }
3659 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3661 BI.getSuccessor(!CI->getZExtValue()));
3662 return nullptr;
3663 }
3664
3665 DC.registerBranch(&BI);
3666 return nullptr;
3667}
3668
3669// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
3670// we can prove that both (switch C) and (switch X) go to the default when cond
3671// is false/true.
3674 bool IsTrueArm) {
3675 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
3676 auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
3677 if (!C)
3678 return nullptr;
3679
3680 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
3681 if (CstBB != SI.getDefaultDest())
3682 return nullptr;
3683 Value *X = Select->getOperand(3 - CstOpIdx);
3685 const APInt *RHSC;
3686 if (!match(Select->getCondition(),
3687 m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
3688 return nullptr;
3689 if (IsTrueArm)
3690 Pred = ICmpInst::getInversePredicate(Pred);
3691
3692 // See whether we can replace the select with X
3694 for (auto Case : SI.cases())
3695 if (!CR.contains(Case.getCaseValue()->getValue()))
3696 return nullptr;
3697
3698 return X;
3699}
3700
3702 Value *Cond = SI.getCondition();
3703 Value *Op0;
3704 ConstantInt *AddRHS;
3705 if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3706 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3707 for (auto Case : SI.cases()) {
3708 Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3709 assert(isa<ConstantInt>(NewCase) &&
3710 "Result of expression should be constant");
3711 Case.setValue(cast<ConstantInt>(NewCase));
3712 }
3713 return replaceOperand(SI, 0, Op0);
3714 }
3715
3716 ConstantInt *SubLHS;
3717 if (match(Cond, m_Sub(m_ConstantInt(SubLHS), m_Value(Op0)))) {
3718 // Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
3719 for (auto Case : SI.cases()) {
3720 Constant *NewCase = ConstantExpr::getSub(SubLHS, Case.getCaseValue());
3721 assert(isa<ConstantInt>(NewCase) &&
3722 "Result of expression should be constant");
3723 Case.setValue(cast<ConstantInt>(NewCase));
3724 }
3725 return replaceOperand(SI, 0, Op0);
3726 }
3727
3728 uint64_t ShiftAmt;
3729 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
3730 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
3731 all_of(SI.cases(), [&](const auto &Case) {
3732 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
3733 })) {
3734 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
3735 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Cond);
3736 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
3737 Shl->hasOneUse()) {
3738 Value *NewCond = Op0;
3739 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
3740 // If the shift may wrap, we need to mask off the shifted bits.
3741 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
3742 NewCond = Builder.CreateAnd(
3743 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
3744 }
3745 for (auto Case : SI.cases()) {
3746 const APInt &CaseVal = Case.getCaseValue()->getValue();
3747 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
3748 : CaseVal.lshr(ShiftAmt);
3749 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
3750 }
3751 return replaceOperand(SI, 0, NewCond);
3752 }
3753 }
3754
3755 // Fold switch(zext/sext(X)) into switch(X) if possible.
3756 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
3757 bool IsZExt = isa<ZExtInst>(Cond);
3758 Type *SrcTy = Op0->getType();
3759 unsigned NewWidth = SrcTy->getScalarSizeInBits();
3760
3761 if (all_of(SI.cases(), [&](const auto &Case) {
3762 const APInt &CaseVal = Case.getCaseValue()->getValue();
3763 return IsZExt ? CaseVal.isIntN(NewWidth)
3764 : CaseVal.isSignedIntN(NewWidth);
3765 })) {
3766 for (auto &Case : SI.cases()) {
3767 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3768 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3769 }
3770 return replaceOperand(SI, 0, Op0);
3771 }
3772 }
3773
3774 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
3775 if (auto *Select = dyn_cast<SelectInst>(Cond)) {
3776 if (Value *V =
3777 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
3778 return replaceOperand(SI, 0, V);
3779 if (Value *V =
3780 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
3781 return replaceOperand(SI, 0, V);
3782 }
3783
3784 KnownBits Known = computeKnownBits(Cond, 0, &SI);
3785 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3786 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3787
3788 // Compute the number of leading bits we can ignore.
3789 // TODO: A better way to determine this would use ComputeNumSignBits().
3790 for (const auto &C : SI.cases()) {
3791 LeadingKnownZeros =
3792 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
3793 LeadingKnownOnes =
3794 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
3795 }
3796
3797 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3798
3799 // Shrink the condition operand if the new type is smaller than the old type.
3800 // But do not shrink to a non-standard type, because backend can't generate
3801 // good code for that yet.
3802 // TODO: We can make it aggressive again after fixing PR39569.
3803 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3804 shouldChangeType(Known.getBitWidth(), NewWidth)) {
3805 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3807 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3808
3809 for (auto Case : SI.cases()) {
3810 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3811 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3812 }
3813 return replaceOperand(SI, 0, NewCond);
3814 }
3815
3816 if (isa<UndefValue>(Cond)) {
3817 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
3818 return nullptr;
3819 }
3820 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3821 handlePotentiallyDeadSuccessors(SI.getParent(),
3822 SI.findCaseValue(CI)->getCaseSuccessor());
3823 return nullptr;
3824 }
3825
3826 return nullptr;
3827}
3828
3830InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3831 auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
3832 if (!WO)
3833 return nullptr;
3834
3835 Intrinsic::ID OvID = WO->getIntrinsicID();
3836 const APInt *C = nullptr;
3837 if (match(WO->getRHS(), m_APIntAllowPoison(C))) {
3838 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
3839 OvID == Intrinsic::umul_with_overflow)) {
3840 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3841 if (C->isAllOnes())
3842 return BinaryOperator::CreateNeg(WO->getLHS());
3843 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3844 if (C->isPowerOf2()) {
3845 return BinaryOperator::CreateShl(
3846 WO->getLHS(),
3847 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
3848 }
3849 }
3850 }
3851
3852 // We're extracting from an overflow intrinsic. See if we're the only user.
3853 // That allows us to simplify multiple result intrinsics to simpler things
3854 // that just get one value.
3855 if (!WO->hasOneUse())
3856 return nullptr;
3857
3858 // Check if we're grabbing only the result of a 'with overflow' intrinsic
3859 // and replace it with a traditional binary instruction.
3860 if (*EV.idx_begin() == 0) {
3861 Instruction::BinaryOps BinOp = WO->getBinaryOp();
3862 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3863 // Replace the old instruction's uses with poison.
3864 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3866 return BinaryOperator::Create(BinOp, LHS, RHS);
3867 }
3868
3869 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3870
3871 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3872 if (OvID == Intrinsic::usub_with_overflow)
3873 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3874
3875 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
3876 // +1 is not possible because we assume signed values.
3877 if (OvID == Intrinsic::smul_with_overflow &&
3878 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
3879 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
3880
3881 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
3882 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
3883 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
3884 // Only handle even bitwidths for performance reasons.
3885 if (BitWidth % 2 == 0)
3886 return new ICmpInst(
3887 ICmpInst::ICMP_UGT, WO->getLHS(),
3888 ConstantInt::get(WO->getLHS()->getType(),
3890 }
3891
3892 // If only the overflow result is used, and the right hand side is a
3893 // constant (or constant splat), we can remove the intrinsic by directly
3894 // checking for overflow.
3895 if (C) {
3896 // Compute the no-wrap range for LHS given RHS=C, then construct an
3897 // equivalent icmp, potentially using an offset.
3899 WO->getBinaryOp(), *C, WO->getNoWrapKind());
3900
3901 CmpInst::Predicate Pred;
3902 APInt NewRHSC, Offset;
3903 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3904 auto *OpTy = WO->getRHS()->getType();
3905 auto *NewLHS = WO->getLHS();
3906 if (Offset != 0)
3907 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3908 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3909 ConstantInt::get(OpTy, NewRHSC));
3910 }
3911
3912 return nullptr;
3913}
3914
3916 Value *Agg = EV.getAggregateOperand();
3917
3918 if (!EV.hasIndices())
3919 return replaceInstUsesWith(EV, Agg);
3920
3921 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
3922 SQ.getWithInstruction(&EV)))
3923 return replaceInstUsesWith(EV, V);
3924
3925 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3926 // We're extracting from an insertvalue instruction, compare the indices
3927 const unsigned *exti, *exte, *insi, *inse;
3928 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3929 exte = EV.idx_end(), inse = IV->idx_end();
3930 exti != exte && insi != inse;
3931 ++exti, ++insi) {
3932 if (*insi != *exti)
3933 // The insert and extract both reference distinctly different elements.
3934 // This means the extract is not influenced by the insert, and we can
3935 // replace the aggregate operand of the extract with the aggregate
3936 // operand of the insert. i.e., replace
3937 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3938 // %E = extractvalue { i32, { i32 } } %I, 0
3939 // with
3940 // %E = extractvalue { i32, { i32 } } %A, 0
3941 return ExtractValueInst::Create(IV->getAggregateOperand(),
3942 EV.getIndices());
3943 }
3944 if (exti == exte && insi == inse)
3945 // Both iterators are at the end: Index lists are identical. Replace
3946 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3947 // %C = extractvalue { i32, { i32 } } %B, 1, 0
3948 // with "i32 42"
3949 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3950 if (exti == exte) {
3951 // The extract list is a prefix of the insert list. i.e. replace
3952 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3953 // %E = extractvalue { i32, { i32 } } %I, 1
3954 // with
3955 // %X = extractvalue { i32, { i32 } } %A, 1
3956 // %E = insertvalue { i32 } %X, i32 42, 0
3957 // by switching the order of the insert and extract (though the
3958 // insertvalue should be left in, since it may have other uses).
3959 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3960 EV.getIndices());
3961 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3962 ArrayRef(insi, inse));
3963 }
3964 if (insi == inse)
3965 // The insert list is a prefix of the extract list
3966 // We can simply remove the common indices from the extract and make it
3967 // operate on the inserted value instead of the insertvalue result.
3968 // i.e., replace
3969 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3970 // %E = extractvalue { i32, { i32 } } %I, 1, 0
3971 // with
3972 // %E extractvalue { i32 } { i32 42 }, 0
3973 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3974 ArrayRef(exti, exte));
3975 }
3976
3977 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3978 return R;
3979
3980 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
3981 // Bail out if the aggregate contains scalable vector type
3982 if (auto *STy = dyn_cast<StructType>(Agg->getType());
3983 STy && STy->containsScalableVectorType())
3984 return nullptr;
3985
3986 // If the (non-volatile) load only has one use, we can rewrite this to a
3987 // load from a GEP. This reduces the size of the load. If a load is used
3988 // only by extractvalue instructions then this either must have been
3989 // optimized before, or it is a struct with padding, in which case we
3990 // don't want to do the transformation as it loses padding knowledge.
3991 if (L->isSimple() && L->hasOneUse()) {
3992 // extractvalue has integer indices, getelementptr has Value*s. Convert.
3993 SmallVector<Value*, 4> Indices;
3994 // Prefix an i32 0 since we need the first element.
3995 Indices.push_back(Builder.getInt32(0));
3996 for (unsigned Idx : EV.indices())
3997 Indices.push_back(Builder.getInt32(Idx));
3998
3999 // We need to insert these at the location of the old load, not at that of
4000 // the extractvalue.
4002 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
4003 L->getPointerOperand(), Indices);
4005 // Whatever aliasing information we had for the orignal load must also
4006 // hold for the smaller load, so propagate the annotations.
4007 NL->setAAMetadata(L->getAAMetadata());
4008 // Returning the load directly will cause the main loop to insert it in
4009 // the wrong spot, so use replaceInstUsesWith().
4010 return replaceInstUsesWith(EV, NL);
4011 }
4012 }
4013
4014 if (auto *PN = dyn_cast<PHINode>(Agg))
4015 if (Instruction *Res = foldOpIntoPhi(EV, PN))
4016 return Res;
4017
4018 // Canonicalize extract (select Cond, TV, FV)
4019 // -> select cond, (extract TV), (extract FV)
4020 if (auto *SI = dyn_cast<SelectInst>(Agg))
4021 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
4022 return R;
4023
4024 // We could simplify extracts from other values. Note that nested extracts may
4025 // already be simplified implicitly by the above: extract (extract (insert) )
4026 // will be translated into extract ( insert ( extract ) ) first and then just
4027 // the value inserted, if appropriate. Similarly for extracts from single-use
4028 // loads: extract (extract (load)) will be translated to extract (load (gep))
4029 // and if again single-use then via load (gep (gep)) to load (gep).
4030 // However, double extracts from e.g. function arguments or return values
4031 // aren't handled yet.
4032 return nullptr;
4033}
4034
4035/// Return 'true' if the given typeinfo will match anything.
4036static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4037 switch (Personality) {
4041 // The GCC C EH and Rust personality only exists to support cleanups, so
4042 // it's not clear what the semantics of catch clauses are.
4043 return false;
4045 return false;
4047 // While __gnat_all_others_value will match any Ada exception, it doesn't
4048 // match foreign exceptions (or didn't, before gcc-4.7).
4049 return false;
4060 return TypeInfo->isNullValue();
4061 }
4062 llvm_unreachable("invalid enum");
4063}
4064
4065static bool shorter_filter(const Value *LHS, const Value *RHS) {
4066 return
4067 cast<ArrayType>(LHS->getType())->getNumElements()
4068 <
4069 cast<ArrayType>(RHS->getType())->getNumElements();
4070}
4071
4073 // The logic here should be correct for any real-world personality function.
4074 // However if that turns out not to be true, the offending logic can always
4075 // be conditioned on the personality function, like the catch-all logic is.
4076 EHPersonality Personality =
4077 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
4078
4079 // Simplify the list of clauses, eg by removing repeated catch clauses
4080 // (these are often created by inlining).
4081 bool MakeNewInstruction = false; // If true, recreate using the following:
4082 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4083 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4084
4085 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4086 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4087 bool isLastClause = i + 1 == e;
4088 if (LI.isCatch(i)) {
4089 // A catch clause.
4090 Constant *CatchClause = LI.getClause(i);
4091 Constant *TypeInfo = CatchClause->stripPointerCasts();
4092
4093 // If we already saw this clause, there is no point in having a second
4094 // copy of it.
4095 if (AlreadyCaught.insert(TypeInfo).second) {
4096 // This catch clause was not already seen.
4097 NewClauses.push_back(CatchClause);
4098 } else {
4099 // Repeated catch clause - drop the redundant copy.
4100 MakeNewInstruction = true;
4101 }
4102
4103 // If this is a catch-all then there is no point in keeping any following
4104 // clauses or marking the landingpad as having a cleanup.
4105 if (isCatchAll(Personality, TypeInfo)) {
4106 if (!isLastClause)
4107 MakeNewInstruction = true;
4108 CleanupFlag = false;
4109 break;
4110 }
4111 } else {
4112 // A filter clause. If any of the filter elements were already caught
4113 // then they can be dropped from the filter. It is tempting to try to
4114 // exploit the filter further by saying that any typeinfo that does not
4115 // occur in the filter can't be caught later (and thus can be dropped).
4116 // However this would be wrong, since typeinfos can match without being
4117 // equal (for example if one represents a C++ class, and the other some
4118 // class derived from it).
4119 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4120 Constant *FilterClause = LI.getClause(i);
4121 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
4122 unsigned NumTypeInfos = FilterType->getNumElements();
4123
4124 // An empty filter catches everything, so there is no point in keeping any
4125 // following clauses or marking the landingpad as having a cleanup. By
4126 // dealing with this case here the following code is made a bit simpler.
4127 if (!NumTypeInfos) {
4128 NewClauses.push_back(FilterClause);
4129 if (!isLastClause)
4130 MakeNewInstruction = true;
4131 CleanupFlag = false;
4132 break;
4133 }
4134
4135 bool MakeNewFilter = false; // If true, make a new filter.
4136 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4137 if (isa<ConstantAggregateZero>(FilterClause)) {
4138 // Not an empty filter - it contains at least one null typeinfo.
4139 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4140 Constant *TypeInfo =
4142 // If this typeinfo is a catch-all then the filter can never match.
4143 if (isCatchAll(Personality, TypeInfo)) {
4144 // Throw the filter away.
4145 MakeNewInstruction = true;
4146 continue;
4147 }
4148
4149 // There is no point in having multiple copies of this typeinfo, so
4150 // discard all but the first copy if there is more than one.
4151 NewFilterElts.push_back(TypeInfo);
4152 if (NumTypeInfos > 1)
4153 MakeNewFilter = true;
4154 } else {
4155 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
4156 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4157 NewFilterElts.reserve(NumTypeInfos);
4158
4159 // Remove any filter elements that were already caught or that already
4160 // occurred in the filter. While there, see if any of the elements are
4161 // catch-alls. If so, the filter can be discarded.
4162 bool SawCatchAll = false;
4163 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4164 Constant *Elt = Filter->getOperand(j);
4165 Constant *TypeInfo = Elt->stripPointerCasts();
4166 if (isCatchAll(Personality, TypeInfo)) {
4167 // This element is a catch-all. Bail out, noting this fact.
4168 SawCatchAll = true;
4169 break;
4170 }
4171
4172 // Even if we've seen a type in a catch clause, we don't want to
4173 // remove it from the filter. An unexpected type handler may be
4174 // set up for a call site which throws an exception of the same
4175 // type caught. In order for the exception thrown by the unexpected
4176 // handler to propagate correctly, the filter must be correctly
4177 // described for the call site.
4178 //
4179 // Example:
4180 //
4181 // void unexpected() { throw 1;}
4182 // void foo() throw (int) {
4183 // std::set_unexpected(unexpected);
4184 // try {
4185 // throw 2.0;
4186 // } catch (int i) {}
4187 // }
4188
4189 // There is no point in having multiple copies of the same typeinfo in
4190 // a filter, so only add it if we didn't already.
4191 if (SeenInFilter.insert(TypeInfo).second)
4192 NewFilterElts.push_back(cast<Constant>(Elt));
4193 }
4194 // A filter containing a catch-all cannot match anything by definition.
4195 if (SawCatchAll) {
4196 // Throw the filter away.
4197 MakeNewInstruction = true;
4198 continue;
4199 }
4200
4201 // If we dropped something from the filter, make a new one.
4202 if (NewFilterElts.size() < NumTypeInfos)
4203 MakeNewFilter = true;
4204 }
4205 if (MakeNewFilter) {
4206 FilterType = ArrayType::get(FilterType->getElementType(),
4207 NewFilterElts.size());
4208 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
4209 MakeNewInstruction = true;
4210 }
4211
4212 NewClauses.push_back(FilterClause);
4213
4214 // If the new filter is empty then it will catch everything so there is
4215 // no point in keeping any following clauses or marking the landingpad
4216 // as having a cleanup. The case of the original filter being empty was
4217 // already handled above.
4218 if (MakeNewFilter && !NewFilterElts.size()) {
4219 assert(MakeNewInstruction && "New filter but not a new instruction!");
4220 CleanupFlag = false;
4221 break;
4222 }
4223 }
4224 }
4225
4226 // If several filters occur in a row then reorder them so that the shortest
4227 // filters come first (those with the smallest number of elements). This is
4228 // advantageous because shorter filters are more likely to match, speeding up
4229 // unwinding, but mostly because it increases the effectiveness of the other
4230 // filter optimizations below.
4231 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
4232 unsigned j;
4233 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4234 for (j = i; j != e; ++j)
4235 if (!isa<ArrayType>(NewClauses[j]->getType()))
4236 break;
4237
4238 // Check whether the filters are already sorted by length. We need to know
4239 // if sorting them is actually going to do anything so that we only make a
4240 // new landingpad instruction if it does.
4241 for (unsigned k = i; k + 1 < j; ++k)
4242 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
4243 // Not sorted, so sort the filters now. Doing an unstable sort would be
4244 // correct too but reordering filters pointlessly might confuse users.
4245 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
4247 MakeNewInstruction = true;
4248 break;
4249 }
4250
4251 // Look for the next batch of filters.
4252 i = j + 1;
4253 }
4254
4255 // If typeinfos matched if and only if equal, then the elements of a filter L
4256 // that occurs later than a filter F could be replaced by the intersection of
4257 // the elements of F and L. In reality two typeinfos can match without being
4258 // equal (for example if one represents a C++ class, and the other some class
4259 // derived from it) so it would be wrong to perform this transform in general.
4260 // However the transform is correct and useful if F is a subset of L. In that
4261 // case L can be replaced by F, and thus removed altogether since repeating a
4262 // filter is pointless. So here we look at all pairs of filters F and L where
4263 // L follows F in the list of clauses, and remove L if every element of F is
4264 // an element of L. This can occur when inlining C++ functions with exception
4265 // specifications.
4266 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
4267 // Examine each filter in turn.
4268 Value *Filter = NewClauses[i];
4269 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
4270 if (!FTy)
4271 // Not a filter - skip it.
4272 continue;
4273 unsigned FElts = FTy->getNumElements();
4274 // Examine each filter following this one. Doing this backwards means that
4275 // we don't have to worry about filters disappearing under us when removed.
4276 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
4277 Value *LFilter = NewClauses[j];
4278 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
4279 if (!LTy)
4280 // Not a filter - skip it.
4281 continue;
4282 // If Filter is a subset of LFilter, i.e. every element of Filter is also
4283 // an element of LFilter, then discard LFilter.
4284 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4285 // If Filter is empty then it is a subset of LFilter.
4286 if (!FElts) {
4287 // Discard LFilter.
4288 NewClauses.erase(J);
4289 MakeNewInstruction = true;
4290 // Move on to the next filter.
4291 continue;
4292 }
4293 unsigned LElts = LTy->getNumElements();
4294 // If Filter is longer than LFilter then it cannot be a subset of it.
4295 if (FElts > LElts)
4296 // Move on to the next filter.
4297 continue;
4298 // At this point we know that LFilter has at least one element.
4299 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
4300 // Filter is a subset of LFilter iff Filter contains only zeros (as we
4301 // already know that Filter is not longer than LFilter).
4302 if (isa<ConstantAggregateZero>(Filter)) {
4303 assert(FElts <= LElts && "Should have handled this case earlier!");
4304 // Discard LFilter.
4305 NewClauses.erase(J);
4306 MakeNewInstruction = true;
4307 }
4308 // Move on to the next filter.
4309 continue;
4310 }
4311 ConstantArray *LArray = cast<ConstantArray>(LFilter);
4312 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
4313 // Since Filter is non-empty and contains only zeros, it is a subset of
4314 // LFilter iff LFilter contains a zero.
4315 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
4316 for (unsigned l = 0; l != LElts; ++l)
4317 if (LArray->getOperand(l)->isNullValue()) {
4318 // LFilter contains a zero - discard it.
4319 NewClauses.erase(J);
4320 MakeNewInstruction = true;
4321 break;
4322 }
4323 // Move on to the next filter.
4324 continue;
4325 }
4326 // At this point we know that both filters are ConstantArrays. Loop over
4327 // operands to see whether every element of Filter is also an element of
4328 // LFilter. Since filters tend to be short this is probably faster than
4329 // using a method that scales nicely.
4330 ConstantArray *FArray = cast<ConstantArray>(Filter);
4331 bool AllFound = true;
4332 for (unsigned f = 0; f != FElts; ++f) {
4333 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
4334 AllFound = false;
4335 for (unsigned l = 0; l != LElts; ++l) {
4336 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
4337 if (LTypeInfo == FTypeInfo) {
4338 AllFound = true;
4339 break;
4340 }
4341 }
4342 if (!AllFound)
4343 break;
4344 }
4345 if (AllFound) {
4346 // Discard LFilter.
4347 NewClauses.erase(J);
4348 MakeNewInstruction = true;
4349 }
4350 // Move on to the next filter.
4351 }
4352 }
4353
4354 // If we changed any of the clauses, replace the old landingpad instruction
4355 // with a new one.
4356 if (MakeNewInstruction) {
4357 LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
4358 NewClauses.size());
4359 for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
4360 NLI->addClause(NewClauses[i]);
4361 // A landing pad with no clauses must have the cleanup flag set. It is
4362 // theoretically possible, though highly unlikely, that we eliminated all
4363 // clauses. If so, force the cleanup flag to true.
4364 if (NewClauses.empty())
4365 CleanupFlag = true;
4366 NLI->setCleanup(CleanupFlag);
4367 return NLI;
4368 }
4369
4370 // Even if none of the clauses changed, we may nonetheless have understood
4371 // that the cleanup flag is pointless. Clear it if so.
4372 if (LI.isCleanup() != CleanupFlag) {
4373 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4374 LI.setCleanup(CleanupFlag);
4375 return &LI;
4376 }
4377
4378 return nullptr;
4379}
4380
4381Value *
4383 // Try to push freeze through instructions that propagate but don't produce
4384 // poison as far as possible. If an operand of freeze follows three
4385 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
4386 // guaranteed-non-poison operands then push the freeze through to the one
4387 // operand that is not guaranteed non-poison. The actual transform is as
4388 // follows.
4389 // Op1 = ... ; Op1 can be posion
4390 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
4391 // ; single guaranteed-non-poison operands
4392 // ... = Freeze(Op0)
4393 // =>
4394 // Op1 = ...
4395 // Op1.fr = Freeze(Op1)
4396 // ... = Inst(Op1.fr, NonPoisonOps...)
4397 auto *OrigOp = OrigFI.getOperand(0);
4398 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
4399
4400 // While we could change the other users of OrigOp to use freeze(OrigOp), that
4401 // potentially reduces their optimization potential, so let's only do this iff
4402 // the OrigOp is only used by the freeze.
4403 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
4404 return nullptr;
4405
4406 // We can't push the freeze through an instruction which can itself create
4407 // poison. If the only source of new poison is flags, we can simply
4408 // strip them (since we know the only use is the freeze and nothing can
4409 // benefit from them.)
4410 if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
4411 /*ConsiderFlagsAndMetadata*/ false))
4412 return nullptr;
4413
4414 // If operand is guaranteed not to be poison, there is no need to add freeze
4415 // to the operand. So we first find the operand that is not guaranteed to be
4416 // poison.
4417 Use *MaybePoisonOperand = nullptr;
4418 for (Use &U : OrigOpInst->operands()) {
4419 if (isa<MetadataAsValue>(U.get()) ||
4421 continue;
4422 if (!MaybePoisonOperand)
4423 MaybePoisonOperand = &U;
4424 else
4425 return nullptr;
4426 }
4427
4428 OrigOpInst->dropPoisonGeneratingAnnotations();
4429
4430 // If all operands are guaranteed to be non-poison, we can drop freeze.
4431 if (!MaybePoisonOperand)
4432 return OrigOp;
4433
4434 Builder.SetInsertPoint(OrigOpInst);
4435 auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
4436 MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
4437
4438 replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
4439 return OrigOp;
4440}
4441
4443 PHINode *PN) {
4444 // Detect whether this is a recurrence with a start value and some number of
4445 // backedge values. We'll check whether we can push the freeze through the
4446 // backedge values (possibly dropping poison flags along the way) until we
4447 // reach the phi again. In that case, we can move the freeze to the start
4448 // value.
4449 Use *StartU = nullptr;
4451 for (Use &U : PN->incoming_values()) {
4452 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
4453 // Add backedge value to worklist.
4454 Worklist.push_back(U.get());
4455 continue;
4456 }
4457
4458 // Don't bother handling multiple start values.
4459 if (StartU)
4460 return nullptr;
4461 StartU = &U;
4462 }
4463
4464 if (!StartU || Worklist.empty())
4465 return nullptr; // Not a recurrence.
4466
4467 Value *StartV = StartU->get();
4468 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
4469 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
4470 // We can't insert freeze if the start value is the result of the
4471 // terminator (e.g. an invoke).
4472 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
4473 return nullptr;
4474
4477 while (!Worklist.empty()) {
4478 Value *V = Worklist.pop_back_val();
4479 if (!Visited.insert(V).second)
4480 continue;
4481
4482 if (Visited.size() > 32)
4483 return nullptr; // Limit the total number of values we inspect.
4484
4485 // Assume that PN is non-poison, because it will be after the transform.
4486 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
4487 continue;
4488
4489 Instruction *I = dyn_cast<Instruction>(V);
4490 if (!I || canCreateUndefOrPoison(cast<Operator>(I),
4491 /*ConsiderFlagsAndMetadata*/ false))
4492 return nullptr;
4493
4494 DropFlags.push_back(I);
4495 append_range(Worklist, I->operands());
4496 }
4497
4498 for (Instruction *I : DropFlags)
4499 I->dropPoisonGeneratingAnnotations();
4500
4501 if (StartNeedsFreeze) {
4503 Value *FrozenStartV = Builder.CreateFreeze(StartV,
4504 StartV->getName() + ".fr");
4505 replaceUse(*StartU, FrozenStartV);
4506 }
4507 return replaceInstUsesWith(FI, PN);
4508}
4509
4511 Value *Op = FI.getOperand(0);
4512
4513 if (isa<Constant>(Op) || Op->hasOneUse())
4514 return false;
4515
4516 // Move the freeze directly after the definition of its operand, so that
4517 // it dominates the maximum number of uses. Note that it may not dominate
4518 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
4519 // the normal/default destination. This is why the domination check in the
4520 // replacement below is still necessary.
4521 BasicBlock::iterator MoveBefore;
4522 if (isa<Argument>(Op)) {
4523 MoveBefore =
4525 } else {
4526 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
4527 if (!MoveBeforeOpt)
4528 return false;
4529 MoveBefore = *MoveBeforeOpt;
4530 }
4531
4532 // Don't move to the position of a debug intrinsic.
4533 if (isa<DbgInfoIntrinsic>(MoveBefore))
4534 MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator();
4535 // Re-point iterator to come after any debug-info records, if we're
4536 // running in "RemoveDIs" mode
4537 MoveBefore.setHeadBit(false);
4538
4539 bool Changed = false;
4540 if (&FI != &*MoveBefore) {
4541 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
4542 Changed = true;
4543 }
4544
4545 Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
4546 bool Dominates = DT.dominates(&FI, U);
4547 Changed |= Dominates;
4548 return Dominates;
4549 });
4550
4551 return Changed;
4552}
4553
4554// Check if any direct or bitcast user of this value is a shuffle instruction.
4556 for (auto *U : V->users()) {
4557 if (isa<ShuffleVectorInst>(U))
4558 return true;
4559 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
4560 return true;
4561 }
4562 return false;
4563}
4564
4566 Value *Op0 = I.getOperand(0);
4567
4569 return replaceInstUsesWith(I, V);
4570
4571 // freeze (phi const, x) --> phi const, (freeze x)
4572 if (auto *PN = dyn_cast<PHINode>(Op0)) {
4573 if (Instruction *NV = foldOpIntoPhi(I, PN))
4574 return NV;
4575 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
4576 return NV;
4577 }
4578
4580 return replaceInstUsesWith(I, NI);
4581
4582 // If I is freeze(undef), check its uses and fold it to a fixed constant.
4583 // - or: pick -1
4584 // - select's condition: if the true value is constant, choose it by making
4585 // the condition true.
4586 // - default: pick 0
4587 //
4588 // Note that this transform is intentionally done here rather than
4589 // via an analysis in InstSimplify or at individual user sites. That is
4590 // because we must produce the same value for all uses of the freeze -
4591 // it's the reason "freeze" exists!
4592 //
4593 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
4594 // duplicating logic for binops at least.
4595 auto getUndefReplacement = [&I](Type *Ty) {
4596 Constant *BestValue = nullptr;
4597 Constant *NullValue = Constant::getNullValue(Ty);
4598 for (const auto *U : I.users()) {
4599 Constant *C = NullValue;
4600 if (match(U, m_Or(m_Value(), m_Value())))
4602 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
4603 C = ConstantInt::getTrue(Ty);
4604
4605 if (!BestValue)
4606 BestValue = C;
4607 else if (BestValue != C)
4608 BestValue = NullValue;
4609 }
4610 assert(BestValue && "Must have at least one use");
4611 return BestValue;
4612 };
4613
4614 if (match(Op0, m_Undef())) {
4615 // Don't fold freeze(undef/poison) if it's used as a vector operand in
4616 // a shuffle. This may improve codegen for shuffles that allow
4617 // unspecified inputs.
4619 return nullptr;
4620 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
4621 }
4622
4623 Constant *C;
4624 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4625 Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
4627 }
4628
4629 // Replace uses of Op with freeze(Op).
4630 if (freezeOtherUses(I))
4631 return &I;
4632
4633 return nullptr;
4634}
4635
4636/// Check for case where the call writes to an otherwise dead alloca. This
4637/// shows up for unused out-params in idiomatic C/C++ code. Note that this
4638/// helper *only* analyzes the write; doesn't check any other legality aspect.
4640 auto *CB = dyn_cast<CallBase>(I);
4641 if (!CB)
4642 // TODO: handle e.g. store to alloca here - only worth doing if we extend
4643 // to allow reload along used path as described below. Otherwise, this
4644 // is simply a store to a dead allocation which will be removed.
4645 return false;
4646 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
4647 if (!Dest)
4648 return false;
4649 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
4650 if (!AI)
4651 // TODO: allow malloc?
4652 return false;
4653 // TODO: allow memory access dominated by move point? Note that since AI
4654 // could have a reference to itself captured by the call, we would need to
4655 // account for cycles in doing so.
4656 SmallVector<const User *> AllocaUsers;
4658 auto pushUsers = [&](const Instruction &I) {
4659 for (const User *U : I.users()) {
4660 if (Visited.insert(U).second)
4661 AllocaUsers.push_back(U);
4662 }
4663 };
4664 pushUsers(*AI);
4665 while (!AllocaUsers.empty()) {
4666 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
4667 if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
4668 isa<AddrSpaceCastInst>(UserI)) {
4669 pushUsers(*UserI);
4670 continue;
4671 }
4672 if (UserI == CB)
4673 continue;
4674 // TODO: support lifetime.start/end here
4675 return false;
4676 }
4677 return true;
4678}
4679
4680/// Try to move the specified instruction from its current block into the
4681/// beginning of DestBlock, which can only happen if it's safe to move the
4682/// instruction past all of the instructions between it and the end of its
4683/// block.
4685 BasicBlock *DestBlock) {
4686 BasicBlock *SrcBlock = I->getParent();
4687
4688 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
4689 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
4690 I->isTerminator())
4691 return false;
4692
4693 // Do not sink static or dynamic alloca instructions. Static allocas must
4694 // remain in the entry block, and dynamic allocas must not be sunk in between
4695 // a stacksave / stackrestore pair, which would incorrectly shorten its
4696 // lifetime.
4697 if (isa<AllocaInst>(I))
4698 return false;
4699
4700 // Do not sink into catchswitch blocks.
4701 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
4702 return false;
4703
4704 // Do not sink convergent call instructions.
4705 if (auto *CI = dyn_cast<CallInst>(I)) {
4706 if (CI->isConvergent())
4707 return false;
4708 }
4709
4710 // Unless we can prove that the memory write isn't visibile except on the
4711 // path we're sinking to, we must bail.
4712 if (I->mayWriteToMemory()) {
4713 if (!SoleWriteToDeadLocal(I, TLI))
4714 return false;
4715 }
4716
4717 // We can only sink load instructions if there is nothing between the load and
4718 // the end of block that could change the value.
4719 if (I->mayReadFromMemory()) {
4720 // We don't want to do any sophisticated alias analysis, so we only check
4721 // the instructions after I in I's parent block if we try to sink to its
4722 // successor block.
4723 if (DestBlock->getUniquePredecessor() != I->getParent())
4724 return false;
4725 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
4726 E = I->getParent()->end();
4727 Scan != E; ++Scan)
4728 if (Scan->mayWriteToMemory())
4729 return false;
4730 }
4731
4732 I->dropDroppableUses([&](const Use *U) {
4733 auto *I = dyn_cast<Instruction>(U->getUser());
4734 if (I && I->getParent() != DestBlock) {
4735 Worklist.add(I);
4736 return true;
4737 }
4738 return false;
4739 });
4740 /// FIXME: We could remove droppable uses that are not dominated by
4741 /// the new position.
4742
4743 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4744 I->moveBefore(*DestBlock, InsertPos);
4745 ++NumSunkInst;
4746
4747 // Also sink all related debug uses from the source basic block. Otherwise we
4748 // get debug use before the def. Attempt to salvage debug uses first, to
4749 // maximise the range variables have location for. If we cannot salvage, then
4750 // mark the location undef: we know it was supposed to receive a new location
4751 // here, but that computation has been sunk.
4753 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
4754 findDbgUsers(DbgUsers, I, &DbgVariableRecords);
4755 if (!DbgUsers.empty())
4756 tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
4757 if (!DbgVariableRecords.empty())
4758 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
4759 DbgVariableRecords);
4760
4761 // PS: there are numerous flaws with this behaviour, not least that right now
4762 // assignments can be re-ordered past other assignments to the same variable
4763 // if they use different Values. Creating more undef assignements can never be
4764 // undone. And salvaging all users outside of this block can un-necessarily
4765 // alter the lifetime of the live-value that the variable refers to.
4766 // Some of these things can be resolved by tolerating debug use-before-defs in
4767 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
4768 // being used for more architectures.
4769
4770 return true;
4771}
4772
4774 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4776 // For all debug values in the destination block, the sunk instruction
4777 // will still be available, so they do not need to be dropped.
4779 for (auto &DbgUser : DbgUsers)
4780 if (DbgUser->getParent() != DestBlock)
4781 DbgUsersToSalvage.push_back(DbgUser);
4782
4783 // Process the sinking DbgUsersToSalvage in reverse order, as we only want
4784 // to clone the last appearing debug intrinsic for each given variable.
4786 for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
4787 if (DVI->getParent() == SrcBlock)
4788 DbgUsersToSink.push_back(DVI);
4789 llvm::sort(DbgUsersToSink,
4790 [](auto *A, auto *B) { return B->comesBefore(A); });
4791
4793 SmallSet<DebugVariable, 4> SunkVariables;
4794 for (auto *User : DbgUsersToSink) {
4795 // A dbg.declare instruction should not be cloned, since there can only be
4796 // one per variable fragment. It should be left in the original place
4797 // because the sunk instruction is not an alloca (otherwise we could not be
4798 // here).
4799 if (isa<DbgDeclareInst>(User))
4800 continue;
4801
4802 DebugVariable DbgUserVariable =
4803 DebugVariable(User->getVariable(), User->getExpression(),
4804 User->getDebugLoc()->getInlinedAt());
4805
4806 if (!SunkVariables.insert(DbgUserVariable).second)
4807 continue;
4808
4809 // Leave dbg.assign intrinsics in their original positions and there should
4810 // be no need to insert a clone.
4811 if (isa<DbgAssignIntrinsic>(User))
4812 continue;
4813
4814 DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
4815 if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
4816 DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
4817 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4818 }
4819
4820 // Perform salvaging without the clones, then sink the clones.
4821 if (!DIIClones.empty()) {
4822 salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, {});
4823 // The clones are in reverse order of original appearance, reverse again to
4824 // maintain the original order.
4825 for (auto &DIIClone : llvm::reverse(DIIClones)) {
4826 DIIClone->insertBefore(&*InsertPos);
4827 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4828 }
4829 }
4830}
4831
4833 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4834 BasicBlock *DestBlock,
4835 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
4836 // Implementation of tryToSinkInstructionDbgValues, but for the
4837 // DbgVariableRecord of variable assignments rather than dbg.values.
4838
4839 // Fetch all DbgVariableRecords not already in the destination.
4840 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
4841 for (auto &DVR : DbgVariableRecords)
4842 if (DVR->getParent() != DestBlock)
4843 DbgVariableRecordsToSalvage.push_back(DVR);
4844
4845 // Fetch a second collection, of DbgVariableRecords in the source block that
4846 // we're going to sink.
4847 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
4848 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
4849 if (DVR->getParent() == SrcBlock)
4850 DbgVariableRecordsToSink.push_back(DVR);
4851
4852 // Sort DbgVariableRecords according to their position in the block. This is a
4853 // partial order: DbgVariableRecords attached to different instructions will
4854 // be ordered by the instruction order, but DbgVariableRecords attached to the
4855 // same instruction won't have an order.
4856 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
4857 return B->getInstruction()->comesBefore(A->getInstruction());
4858 };
4859 llvm::stable_sort(DbgVariableRecordsToSink, Order);
4860
4861 // If there are two assignments to the same variable attached to the same
4862 // instruction, the ordering between the two assignments is important. Scan
4863 // for this (rare) case and establish which is the last assignment.
4864 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
4866 if (DbgVariableRecordsToSink.size() > 1) {
4868 // Count how many assignments to each variable there is per instruction.
4869 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4870 DebugVariable DbgUserVariable =
4871 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4872 DVR->getDebugLoc()->getInlinedAt());
4873 CountMap[std::make_pair(DVR->getInstruction(), DbgUserVariable)] += 1;
4874 }
4875
4876 // If there are any instructions with two assignments, add them to the
4877 // FilterOutMap to record that they need extra filtering.
4879 for (auto It : CountMap) {
4880 if (It.second > 1) {
4881 FilterOutMap[It.first] = nullptr;
4882 DupSet.insert(It.first.first);
4883 }
4884 }
4885
4886 // For all instruction/variable pairs needing extra filtering, find the
4887 // latest assignment.
4888 for (const Instruction *Inst : DupSet) {
4889 for (DbgVariableRecord &DVR :
4890 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
4891 DebugVariable DbgUserVariable =
4892 DebugVariable(DVR.getVariable(), DVR.getExpression(),
4893 DVR.getDebugLoc()->getInlinedAt());
4894 auto FilterIt =
4895 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
4896 if (FilterIt == FilterOutMap.end())
4897 continue;
4898 if (FilterIt->second != nullptr)
4899 continue;
4900 FilterIt->second = &DVR;
4901 }
4902 }
4903 }
4904
4905 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
4906 // out any duplicate assignments identified above.
4908 SmallSet<DebugVariable, 4> SunkVariables;
4909 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4911 continue;
4912
4913 DebugVariable DbgUserVariable =
4914 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4915 DVR->getDebugLoc()->getInlinedAt());
4916
4917 // For any variable where there were multiple assignments in the same place,
4918 // ignore all but the last assignment.
4919 if (!FilterOutMap.empty()) {
4920 InstVarPair IVP = std::make_pair(DVR->getInstruction(), DbgUserVariable);
4921 auto It = FilterOutMap.find(IVP);
4922
4923 // Filter out.
4924 if (It != FilterOutMap.end() && It->second != DVR)
4925 continue;
4926 }
4927
4928 if (!SunkVariables.insert(DbgUserVariable).second)
4929 continue;
4930
4931 if (DVR->isDbgAssign())
4932 continue;
4933
4934 DVRClones.emplace_back(DVR->clone());
4935 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
4936 }
4937
4938 // Perform salvaging without the clones, then sink the clones.
4939 if (DVRClones.empty())
4940 return;
4941
4942 salvageDebugInfoForDbgValues(*I, {}, DbgVariableRecordsToSalvage);
4943
4944 // The clones are in reverse order of original appearance. Assert that the
4945 // head bit is set on the iterator as we _should_ have received it via
4946 // getFirstInsertionPt. Inserting like this will reverse the clone order as
4947 // we'll repeatedly insert at the head, such as:
4948 // DVR-3 (third insertion goes here)
4949 // DVR-2 (second insertion goes here)
4950 // DVR-1 (first insertion goes here)
4951 // Any-Prior-DVRs
4952 // InsertPtInst
4953 assert(InsertPos.getHeadBit());
4954 for (DbgVariableRecord *DVRClone : DVRClones) {
4955 InsertPos->getParent()->insertDbgRecordBefore(DVRClone, InsertPos);
4956 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
4957 }
4958}
4959
4961 while (!Worklist.isEmpty()) {
4962 // Walk deferred instructions in reverse order, and push them to the
4963 // worklist, which means they'll end up popped from the worklist in-order.
4964 while (Instruction *I = Worklist.popDeferred()) {
4965 // Check to see if we can DCE the instruction. We do this already here to
4966 // reduce the number of uses and thus allow other folds to trigger.
4967 // Note that eraseInstFromFunction() may push additional instructions on
4968 // the deferred worklist, so this will DCE whole instruction chains.
4971 ++NumDeadInst;
4972 continue;
4973 }
4974
4975 Worklist.push(I);
4976 }
4977
4979 if (I == nullptr) continue; // skip null values.
4980
4981 // Check to see if we can DCE the instruction.
4984 ++NumDeadInst;
4985 continue;
4986 }
4987
4988 if (!DebugCounter::shouldExecute(VisitCounter))
4989 continue;
4990
4991 // See if we can trivially sink this instruction to its user if we can
4992 // prove that the successor is not executed more frequently than our block.
4993 // Return the UserBlock if successful.
4994 auto getOptionalSinkBlockForInst =
4995 [this](Instruction *I) -> std::optional<BasicBlock *> {
4996 if (!EnableCodeSinking)
4997 return std::nullopt;
4998
4999 BasicBlock *BB = I->getParent();
5000 BasicBlock *UserParent = nullptr;
5001 unsigned NumUsers = 0;
5002
5003 for (auto *U : I->users()) {
5004 if (U->isDroppable())
5005 continue;
5006 if (NumUsers > MaxSinkNumUsers)
5007 return std::nullopt;
5008
5009 Instruction *UserInst = cast<Instruction>(U);
5010 // Special handling for Phi nodes - get the block the use occurs in.
5011 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
5012 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
5013 if (PN->getIncomingValue(i) == I) {
5014 // Bail out if we have uses in different blocks. We don't do any
5015 // sophisticated analysis (i.e finding NearestCommonDominator of
5016 // these use blocks).
5017 if (UserParent && UserParent != PN->getIncomingBlock(i))
5018 return std::nullopt;
5019 UserParent = PN->getIncomingBlock(i);
5020 }
5021 }
5022 assert(UserParent && "expected to find user block!");
5023 } else {
5024 if (UserParent && UserParent != UserInst->getParent())
5025 return std::nullopt;
5026 UserParent = UserInst->getParent();
5027 }
5028
5029 // Make sure these checks are done only once, naturally we do the checks
5030 // the first time we get the userparent, this will save compile time.
5031 if (NumUsers == 0) {
5032 // Try sinking to another block. If that block is unreachable, then do
5033 // not bother. SimplifyCFG should handle it.
5034 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
5035 return std::nullopt;
5036
5037 auto *Term = UserParent->getTerminator();
5038 // See if the user is one of our successors that has only one
5039 // predecessor, so that we don't have to split the critical edge.
5040 // Another option where we can sink is a block that ends with a
5041 // terminator that does not pass control to other block (such as
5042 // return or unreachable or resume). In this case:
5043 // - I dominates the User (by SSA form);
5044 // - the User will be executed at most once.
5045 // So sinking I down to User is always profitable or neutral.
5046 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
5047 return std::nullopt;
5048
5049 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5050 }
5051
5052 NumUsers++;
5053 }
5054
5055 // No user or only has droppable users.
5056 if (!UserParent)
5057 return std::nullopt;
5058
5059 return UserParent;
5060 };
5061
5062 auto OptBB = getOptionalSinkBlockForInst(I);
5063 if (OptBB) {
5064 auto *UserParent = *OptBB;
5065 // Okay, the CFG is simple enough, try to sink this instruction.
5066 if (tryToSinkInstruction(I, UserParent)) {
5067 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5068 MadeIRChange = true;
5069 // We'll add uses of the sunk instruction below, but since
5070 // sinking can expose opportunities for it's *operands* add
5071 // them to the worklist
5072 for (Use &U : I->operands())
5073 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
5074 Worklist.push(OpI);
5075 }
5076 }
5077
5078 // Now that we have an instruction, try combining it to simplify it.
5081 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5082
5083#ifndef NDEBUG
5084 std::string OrigI;
5085#endif
5086 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
5087 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5088
5089 if (Instruction *Result = visit(*I)) {
5090 ++NumCombined;
5091 // Should we replace the old instruction with a new one?
5092 if (Result != I) {
5093 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5094 << " New = " << *Result << '\n');
5095
5096 Result->copyMetadata(*I,
5097 {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5098 // Everything uses the new instruction now.
5099 I->replaceAllUsesWith(Result);
5100
5101 // Move the name to the new instruction first.
5102 Result->takeName(I);
5103
5104 // Insert the new instruction into the basic block...
5105 BasicBlock *InstParent = I->getParent();
5106 BasicBlock::iterator InsertPos = I->getIterator();
5107
5108 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5109 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
5110 // We need to fix up the insertion point.
5111 if (isa<PHINode>(I)) // PHI -> Non-PHI
5112 InsertPos = InstParent->getFirstInsertionPt();
5113 else // Non-PHI -> PHI
5114 InsertPos = InstParent->getFirstNonPHIIt();
5115 }
5116
5117 Result->insertInto(InstParent, InsertPos);
5118
5119 // Push the new instruction and any users onto the worklist.
5121 Worklist.push(Result);
5122
5124 } else {
5125 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5126 << " New = " << *I << '\n');
5127
5128 // If the instruction was modified, it's possible that it is now dead.
5129 // if so, remove it.
5132 } else {
5134 Worklist.push(I);
5135 }
5136 }
5137 MadeIRChange = true;
5138 }
5139 }
5140
5141 Worklist.zap();
5142 return MadeIRChange;
5143}
5144
5145// Track the scopes used by !alias.scope and !noalias. In a function, a
5146// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5147// by both sets. If not, the declaration of the scope can be safely omitted.
5148// The MDNode of the scope can be omitted as well for the instructions that are
5149// part of this function. We do not do that at this point, as this might become
5150// too time consuming to do.
5152 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5153 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5154
5155public:
5157 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5158 if (!I->hasMetadataOtherThanDebugLoc())
5159 return;
5160
5161 auto Track = [](Metadata *ScopeList, auto &Container) {
5162 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
5163 if (!MDScopeList || !Container.insert(MDScopeList).second)
5164 return;
5165 for (const auto &MDOperand : MDScopeList->operands())
5166 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
5167 Container.insert(MDScope);
5168 };
5169
5170 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5171 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5172 }
5173
5175 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
5176 if (!Decl)
5177 return false;
5178
5179 assert(Decl->use_empty() &&
5180 "llvm.experimental.noalias.scope.decl in use ?");
5181 const MDNode *MDSL = Decl->getScopeList();
5182 assert(MDSL->getNumOperands() == 1 &&
5183 "llvm.experimental.noalias.scope should refer to a single scope");
5184 auto &MDOperand = MDSL->getOperand(0);
5185 if (auto *MD = dyn_cast<MDNode>(MDOperand))
5186 return !UsedAliasScopesAndLists.contains(MD) ||
5187 !UsedNoAliasScopesAndLists.contains(MD);
5188
5189 // Not an MDNode ? throw away.
5190 return true;
5191 }
5192};
5193
5194/// Populate the IC worklist from a function, by walking it in reverse
5195/// post-order and adding all reachable code to the worklist.
5196///
5197/// This has a couple of tricks to make the code faster and more powerful. In
5198/// particular, we constant fold and DCE instructions as we go, to avoid adding
5199/// them to the worklist (this significantly speeds up instcombine on code where
5200/// many instructions are dead or constant). Additionally, if we find a branch
5201/// whose condition is a known constant, we only visit the reachable successors.
5204 bool MadeIRChange = false;
5206 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
5207 DenseMap<Constant *, Constant *> FoldedConstants;
5208 AliasScopeTracker SeenAliasScopes;
5209
5210 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
5211 for (BasicBlock *Succ : successors(BB))
5212 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
5213 for (PHINode &PN : Succ->phis())
5214 for (Use &U : PN.incoming_values())
5215 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
5216 U.set(PoisonValue::get(PN.getType()));
5217 MadeIRChange = true;
5218 }
5219 };
5220
5221 for (BasicBlock *BB : RPOT) {
5222 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
5223 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
5224 })) {
5225 HandleOnlyLiveSuccessor(BB, nullptr);
5226 continue;
5227 }
5228 LiveBlocks.insert(BB);
5229
5230 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
5231 // ConstantProp instruction if trivially constant.
5232 if (!Inst.use_empty() &&
5233 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
5234 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
5235 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5236 << '\n');
5237 Inst.replaceAllUsesWith(C);
5238 ++NumConstProp;
5239 if (isInstructionTriviallyDead(&Inst, &TLI))
5240 Inst.eraseFromParent();
5241 MadeIRChange = true;
5242 continue;
5243 }
5244
5245 // See if we can constant fold its operands.
5246 for (Use &U : Inst.operands()) {
5247 if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
5248 continue;
5249
5250 auto *C = cast<Constant>(U);
5251 Constant *&FoldRes = FoldedConstants[C];
5252 if (!FoldRes)
5253 FoldRes = ConstantFoldConstant(C, DL, &TLI);
5254
5255 if (FoldRes != C) {
5256 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5257 << "\n Old = " << *C
5258 << "\n New = " << *FoldRes << '\n');
5259 U = FoldRes;
5260 MadeIRChange = true;
5261 }
5262 }
5263
5264 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
5265 // these call instructions consumes non-trivial amount of time and
5266 // provides no value for the optimization.
5267 if (!Inst.isDebugOrPseudoInst()) {
5268 InstrsForInstructionWorklist.push_back(&Inst);
5269 SeenAliasScopes.analyse(&Inst);
5270 }
5271 }
5272
5273 // If this is a branch or switch on a constant, mark only the single
5274 // live successor. Otherwise assume all successors are live.
5275 Instruction *TI = BB->getTerminator();
5276 if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
5277 if (isa<UndefValue>(BI->getCondition())) {
5278 // Branch on undef is UB.
5279 HandleOnlyLiveSuccessor(BB, nullptr);
5280 continue;
5281 }
5282 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
5283 bool CondVal = Cond->getZExtValue();
5284 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
5285 continue;
5286 }
5287 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
5288 if (isa<UndefValue>(SI->getCondition())) {
5289 // Switch on undef is UB.
5290 HandleOnlyLiveSuccessor(BB, nullptr);
5291 continue;
5292 }
5293 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
5294 HandleOnlyLiveSuccessor(BB,
5295 SI->findCaseValue(Cond)->getCaseSuccessor());
5296 continue;
5297 }
5298 }
5299 }
5300
5301 // Remove instructions inside unreachable blocks. This prevents the
5302 // instcombine code from having to deal with some bad special cases, and
5303 // reduces use counts of instructions.
5304 for (BasicBlock &BB : F) {
5305 if (LiveBlocks.count(&BB))
5306 continue;
5307
5308 unsigned NumDeadInstInBB;
5309 unsigned NumDeadDbgInstInBB;
5310 std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
5312
5313 MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
5314 NumDeadInst += NumDeadInstInBB;
5315 }
5316
5317 // Once we've found all of the instructions to add to instcombine's worklist,
5318 // add them in reverse order. This way instcombine will visit from the top
5319 // of the function down. This jives well with the way that it adds all uses
5320 // of instructions to the worklist after doing a transformation, thus avoiding
5321 // some N^2 behavior in pathological cases.
5322 Worklist.reserve(InstrsForInstructionWorklist.size());
5323 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
5324 // DCE instruction if trivially dead. As we iterate in reverse program
5325 // order here, we will clean up whole chains of dead instructions.
5326 if (isInstructionTriviallyDead(Inst, &TLI) ||
5327 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5328 ++NumDeadInst;
5329 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
5330 salvageDebugInfo(*Inst);
5331 Inst->eraseFromParent();
5332 MadeIRChange = true;
5333 continue;
5334 }
5335
5336 Worklist.push(Inst);
5337 }
5338
5339 return MadeIRChange;
5340}
5341
5347 const InstCombineOptions &Opts) {
5348 auto &DL = F.getParent()->getDataLayout();
5349
5350 /// Builder - This is an IRBuilder that automatically inserts new
5351 /// instructions into the worklist when they are created.
5353 F.getContext(), TargetFolder(DL),
5354 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
5355 Worklist.add(I);
5356 if (auto *Assume = dyn_cast<AssumeInst>(I))
5357 AC.registerAssumption(Assume);
5358 }));
5359
5361
5362 // Lower dbg.declare intrinsics otherwise their value may be clobbered
5363 // by instcombiner.
5364 bool MadeIRChange = false;
5366 MadeIRChange = LowerDbgDeclare(F);
5367
5368 // Iterate while there is work to do.
5369 unsigned Iteration = 0;
5370 while (true) {
5371 ++Iteration;
5372
5373 if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
5374 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5375 << " on " << F.getName()
5376 << " reached; stopping without verifying fixpoint\n");
5377 break;
5378 }
5379
5380 ++NumWorklistIterations;
5381 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5382 << F.getName() << "\n");
5383
5384 InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
5385 ORE, BFI, BPI, PSI, DL, LI);
5387 bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
5388 MadeChangeInThisIteration |= IC.run();
5389 if (!MadeChangeInThisIteration)
5390 break;
5391
5392 MadeIRChange = true;
5393 if (Iteration > Opts.MaxIterations) {
5395 "Instruction Combining did not reach a fixpoint after " +
5396 Twine(Opts.MaxIterations) + " iterations",
5397 /*GenCrashDiag=*/false);
5398 }
5399 }
5400
5401 if (Iteration == 1)
5402 ++NumOneIteration;
5403 else if (Iteration == 2)
5404 ++NumTwoIterations;
5405 else if (Iteration == 3)
5406 ++NumThreeIterations;
5407 else
5408 ++NumFourOrMoreIterations;
5409
5410 return MadeIRChange;
5411}
5412
5414
5416 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5417 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
5418 OS, MapClassName2PassName);
5419 OS << '<';
5420 OS << "max-iterations=" << Options.MaxIterations << ";";
5421 OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
5422 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
5423 OS << '>';
5424}
5425
5428 auto &AC = AM.getResult<AssumptionAnalysis>(F);
5429 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
5430 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
5432 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
5433
5434 // TODO: Only use LoopInfo when the option is set. This requires that the
5435 // callers in the pass pipeline explicitly set the option.
5436 auto *LI = AM.getCachedResult<LoopAnalysis>(F);
5437 if (!LI && Options.UseLoopInfo)
5438 LI = &AM.getResult<LoopAnalysis>(F);
5439
5440 auto *AA = &AM.getResult<AAManager>(F);
5441 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
5442 ProfileSummaryInfo *PSI =
5443 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
5444 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
5445 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
5447
5448 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5449 BFI, BPI, PSI, LI, Options))
5450 // No changes, all analyses are preserved.
5451 return PreservedAnalyses::all();
5452
5453 // Mark all the analyses that instcombine updates as preserved.
5456 return PA;
5457}
5458
5460 AU.setPreservesCFG();
5473}
5474
5476 if (skipFunction(F))
5477 return false;
5478
5479 // Required analyses.
5480 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
5481 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5482 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
5483 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
5484 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5485 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5486
5487 // Optional analyses.
5488 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
5489 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
5490 ProfileSummaryInfo *PSI =
5491 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
5492 BlockFrequencyInfo *BFI =
5493 (PSI && PSI->hasProfileSummary()) ?
5494 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
5495 nullptr;
5496 BranchProbabilityInfo *BPI = nullptr;
5497 if (auto *WrapperPass =
5498 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
5499 BPI = &WrapperPass->getBPI();
5500
5501 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5502 BFI, BPI, PSI, LI,
5504}
5505
5507
5510}
5511
5513 "Combine redundant instructions", false, false)
5525
5526// Initialization Routines
5529}
5530
5532 return new InstructionCombiningPass();
5533}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
Expand Atomic instructions
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define NL
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Hexagon Vector Combine
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static Value * simplifySwitchOnSelectUsingRanges(SwitchInst &SI, SelectInst *Select, bool IsTrueArm)
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI)
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, GEPOperator *Src, InstCombinerImpl &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Constant * constantFoldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2)
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static bool IsSelect(MachineInstr &MI)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:401
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1898
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:805
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:492
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Class to represent array types.
Definition: DerivedTypes.h:371
uint64_t getNumElements() const
Definition: DerivedTypes.h:383
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
Type * getElementType() const
Definition: DerivedTypes.h:384
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
Definition: Attributes.cpp:390
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:193
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:247
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:367
const Instruction & front() const
Definition: BasicBlock.h:453
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:564
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:423
size_t size() const
Definition: BasicBlock.h:451
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:513
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name, BasicBlock::iterator InsertBefore)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition: InstrTypes.h:392
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1823
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:2283
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1819
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
ConstantArray - Constant Array Declarations.
Definition: Constants.h:423
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:766
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2542
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2529
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2560
static Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2535
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2596
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:863
This class represents a range of values.
Definition: ConstantRange.h:47
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition: Constants.h:507
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
static Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
Definition: Constants.cpp:767
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
const Constant * stripPointerCasts() const
Definition: Constant.h:213
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:432
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
SmallVector< APInt > getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const
Get GEP indices to access Offset inside ElemTy.
Definition: DataLayout.cpp:998
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:774
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:905
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:420
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef< Value * > Indices) const
Returns the offset from the beginning of the type for the specified indices.
Definition: DataLayout.cpp:920
This is the common base class for debug info intrinsics for variables.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
iterator end()
Definition: DenseMap.h:84
void registerBranch(BranchInst *BI)
Add a branch condition to the cache.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr, BasicBlock::iterator InsertBefore)
idx_iterator idx_begin() const
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:201
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
const BasicBlock & getEntryBlock() const
Definition: Function.h:787
static bool isTargetIntrinsic(Intrinsic::ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
Definition: Function.cpp:885
bool isInBounds() const
Test whether this is an inbounds GEP, as defined by LangRef.html.
Definition: Operator.h:420
bool hasAllZeroIndices() const
Return true if all of the indices of this GEP are zeros.
Definition: Operator.h:475
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Create an "inbounds" getelementptr.
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Definition: IRBuilder.cpp:921
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1978
Value * CreateLogicalOp(Instruction::BinaryOps Opc, Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1688
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1876
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1456
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:502
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:76
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr, BasicBlock::iterator InsertBefore)
InstCombinePass(InstCombineOptions Opts={})
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * visitUnconditionalBranchInst(BranchInst &BI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
bool prepareWorklist(Function &F, ReversePostOrderTraversal< BasicBlock * > &RPOT)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
Constant * getLosslessTrunc(Constant *C, Type *TruncTy, unsigned ExtOp)
Value * SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask, KnownFPClass &Known, unsigned Depth, Instruction *CxtI)
Attempts to replace V with a simpler value based on the demanded floating-point classes.
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
void tryToSinkInstructionDbgValues(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableIntrinsic * > &DbgUsers)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void tryToSinkInstructionDbgVariableRecords(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableRecord * > &DPUsers)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Instruction * visitBranchInst(BranchInst &BI)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
Definition: InstCombiner.h:47
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341
static bool isCanonicalPredicate(CmpInst::Predicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
Definition: InstCombiner.h:157
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
Definition: InstCombiner.h:232
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
Definition: InstCombiner.h:139
TargetLibraryInfo & TLI
Definition: InstCombiner.h:73
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:366
AAResults * AA
Definition: InstCombiner.h:69
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:55
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
Definition: InstCombiner.h:191
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
Definition: InstCombiner.h:418
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:375
BranchProbabilityInfo * BPI
Definition: InstCombiner.h:79
const DataLayout & DL
Definition: InstCombiner.h:75
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:452
DomConditionCache DC
Definition: InstCombiner.h:81
const bool MinimizeSize
Definition: InstCombiner.h:67
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:336
Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:410
DominatorTree & DT
Definition: InstCombiner.h:74
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:284
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
Definition: InstCombiner.h:90
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:431
BuilderTy & Builder
Definition: InstCombiner.h:60
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
Definition: InstCombiner.h:213
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition: InstCombine.h:71
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void add(Instruction *I)
Add instruction to the worklist.
void push(Instruction *I)
Push the instruction onto the worklist stack.
void zap()
Check that the worklist is empty and nuke the backing store for the map.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:301
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:152
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
bool isTerminator() const
Definition: Instruction.h:255
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
Definition: Instruction.h:306
bool isShift() const
Definition: Instruction.h:259
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isIntDivRem() const
Definition: Instruction.h:258
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, BasicBlock::iterator InsertBefore)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Definition: Instructions.h:184
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:889
This is the common base class for memset/memcpy/memmove.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition: Metadata.h:62
This class represents min/max intrinsics.
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:756
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:76
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:109
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:103
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1396
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal, BasicBlock::iterator InsertBefore)
This class represents a cast from signed integer to floating point.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
This class represents a cast unsigned integer to floating point.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:736
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition: Value.cpp:851
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
constexpr bool isZero() const
Definition: TypeSize.h:156
An efficient, type-erasing, non-owning reference to a callable.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:112
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isNoFPClassCompatibleType(Type *Ty)
Returns true if this is a type legal for the 'nofpclass' attribute.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:524
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:160
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:816
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
constantexpr_match m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
Definition: PatternMatch.h:186
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
Definition: PatternMatch.h:560
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, FCmpInst, FCmpInst::Predicate > m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
Definition: PatternMatch.h:305
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
CastInst_match< OpTy, UIToFPInst > m_UIToFP(const OpTy &Op)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CastInst_match< OpTy, SIToFPInst > m_SIToFP(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
Exact_match< T > m_Exact(const T &SubPattern)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
Definition: PatternMatch.h:791
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
CastOperator_match< OpTy, Instruction::PtrToInt > m_PtrToInt(const OpTy &Op)
Matches PtrToInt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
FunctionPass * createInstructionCombiningPass()
std::pair< unsigned, unsigned > removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition: Local.cpp:2801
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableIntrinsic * > Insns, ArrayRef< DbgVariableRecord * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition: Local.cpp:2242
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1652
auto successors(const MachineBasicBlock *BB)
bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
gep_type_iterator gep_type_end(const User *GEP)
Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition: Local.cpp:2783
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:48
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool LowerDbgDeclare(Function &F)
Lowers llvm.dbg.declare intrinsics into appropriate set of llvm.dbg.value intrinsics.
Definition: Local.cpp:1916
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1691
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2711
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Or
Bitwise or logical OR of integers.
DWARFExpression::Operation Op
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2025
Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, bool InBounds, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
void initializeInstructionCombiningPassPass(PassRegistry &)
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:231
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition: KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:74
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96
SimplifyQuery getWithoutUndef() const