16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
87 T *thisT() {
return static_cast<T *
>(
this); }
96 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
100 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
119 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
121 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
134 "Can only extract subvectors from vectors");
136 assert((!isa<FixedVectorType>(VTy) ||
137 (
Index + NumSubElts) <=
138 (
int)cast<FixedVectorType>(VTy)->getNumElements()) &&
139 "SK_ExtractSubvector index out of range");
145 for (
int i = 0; i != NumSubElts; ++i) {
147 thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
149 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,
162 "Can only insert subvectors into vectors");
164 assert((!isa<FixedVectorType>(VTy) ||
165 (
Index + NumSubElts) <=
166 (
int)cast<FixedVectorType>(VTy)->getNumElements()) &&
167 "SK_InsertSubvector index out of range");
173 for (
int i = 0; i != NumSubElts; ++i) {
174 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
177 thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
CostKind,
178 i +
Index,
nullptr,
nullptr);
185 return static_cast<const T *
>(
this)->getST();
190 return static_cast<const T *
>(
this)->getTLI();
212 bool IsGatherScatter,
216 if (isa<ScalableVectorType>(DataTy))
219 auto *VT = cast<FixedVectorType>(DataTy);
220 unsigned VF = VT->getNumElements();
236 VF * thisT()->getMemoryOpCost(Opcode, VT->getElementType(), Alignment,
242 Opcode == Instruction::Store,
CostKind);
256 VF * (thisT()->getCFInstrCost(Instruction::Br,
CostKind) +
257 thisT()->getCFInstrCost(Instruction::PHI,
CostKind));
260 return AddrExtractCost + MemoryOpCost + PackingCost + ConditionalCost;
275 unsigned *
Fast)
const {
318 std::pair<const Value *, unsigned>
341 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
343 int64_t ScalableOffset = 0) {
358 Type *ScalarValTy)
const {
359 auto &&IsSupportedByTarget = [
this, ScalarMemTy, ScalarValTy](
unsigned VF) {
362 if (getTLI()->isOperationLegal(
ISD::STORE, VT) ||
372 while (VF > 2 && IsSupportedByTarget(VF))
408 int64_t Scale,
unsigned AddrSpace) {
447 unsigned &JumpTableSize,
457 unsigned N = SI.getNumCases();
462 bool IsJTAllowed = TLI->
areJTsAllowed(SI.getParent()->getParent());
468 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
469 APInt MinCaseVal = MaxCaseVal;
470 for (
auto CI : SI.cases()) {
471 const APInt &CaseVal = CI.getCaseValue()->getValue();
472 if (CaseVal.
sgt(MaxCaseVal))
473 MaxCaseVal = CaseVal;
474 if (CaseVal.
slt(MinCaseVal))
475 MinCaseVal = CaseVal;
481 for (
auto I : SI.cases())
482 Dests.
insert(
I.getCaseSuccessor());
491 if (
N < 2 || N < TLI->getMinimumJumpTableEntries())
494 (MaxCaseVal - MinCaseVal)
495 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
498 JumpTableSize = Range;
514 if (!
TM.isPositionIndependent())
524 Triple TargetTriple =
TM.getTargetTriple();
562 case Instruction::SDiv:
563 case Instruction::SRem:
564 case Instruction::UDiv:
565 case Instruction::URem: {
614 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
615 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
622 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
632 <<
"advising against unrolling the loop because it "
685 std::optional<Value *>
688 bool &KnownBitsComputed) {
699 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
703 virtual std::optional<unsigned>
705 return std::optional<unsigned>(
709 virtual std::optional<unsigned>
711 std::optional<unsigned> TargetResult =
729 unsigned NumStridedMemAccesses,
730 unsigned NumPrefetches,
731 bool HasCall)
const {
733 NumPrefetches, HasCall);
765 const APInt &DemandedElts,
766 bool Insert,
bool Extract,
770 if (isa<ScalableVectorType>(InTy))
772 auto *Ty = cast<FixedVectorType>(InTy);
775 "Vector size mismatch");
779 for (
int i = 0, e = Ty->getNumElements(); i < e; ++i) {
780 if (!DemandedElts[i])
783 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,
786 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
797 if (isa<ScalableVectorType>(InTy))
799 auto *Ty = cast<FixedVectorType>(InTy);
802 return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
813 assert(Args.size() == Tys.
size() &&
"Expected matching Args and Tys");
817 for (
int I = 0,
E = Args.size();
I !=
E;
I++) {
825 if (!isa<Constant>(
A) && UniqueOperands.
insert(
A).second) {
826 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
883 if (MTy == LK.second)
897 ArrayRef<const Value *> Args = std::nullopt,
898 const Instruction *CxtI =
nullptr) {
900 const TargetLoweringBase *TLI = getTLI();
901 int ISD = TLI->InstructionOpcodeToISD(Opcode);
902 assert(ISD &&
"Invalid opcode");
915 InstructionCost OpCost = (IsFloat ? 2 : 1);
917 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
920 return LT.first * OpCost;
923 if (!TLI->isOperationExpand(ISD,
LT.second)) {
926 return LT.first * 2 * OpCost;
938 unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;
939 InstructionCost DivCost = thisT()->getArithmeticInstrCost(
940 DivOpc, Ty,
CostKind, Opd1Info, Opd2Info);
941 InstructionCost MulCost =
942 thisT()->getArithmeticInstrCost(Instruction::Mul, Ty,
CostKind);
943 InstructionCost SubCost =
944 thisT()->getArithmeticInstrCost(Instruction::Sub, Ty,
CostKind);
945 return DivCost + MulCost + SubCost;
950 if (isa<ScalableVectorType>(Ty))
956 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
957 InstructionCost
Cost = thisT()->getArithmeticInstrCost(
962 SmallVector<Type *> Tys(
Args.size(), Ty);
985 (
Index + Mask.size()) <= (
size_t)NumSrcElts) {
993 Mask, NumSrcElts, NumSubElts,
Index)) {
994 if (
Index + NumSubElts > NumSrcElts)
1027 if (
auto *FVT = dyn_cast<FixedVectorType>(Tp))
1028 return getBroadcastShuffleOverhead(FVT,
CostKind);
1036 if (
auto *FVT = dyn_cast<FixedVectorType>(Tp))
1037 return getPermuteShuffleOverhead(FVT,
CostKind);
1041 cast<FixedVectorType>(SubTp));
1044 cast<FixedVectorType>(SubTp));
1058 assert(ISD &&
"Invalid opcode");
1062 TypeSize SrcSize = SrcLT.second.getSizeInBits();
1063 TypeSize DstSize = DstLT.second.getSizeInBits();
1064 bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
1065 bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
1070 case Instruction::Trunc:
1075 case Instruction::BitCast:
1078 if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
1082 case Instruction::FPExt:
1083 if (
I && getTLI()->isExtFree(
I))
1086 case Instruction::ZExt:
1087 if (TLI->
isZExtFree(SrcLT.second, DstLT.second))
1090 case Instruction::SExt:
1091 if (
I && getTLI()->isExtFree(
I))
1101 if (DstLT.first == SrcLT.first &&
1106 case Instruction::AddrSpaceCast:
1108 Dst->getPointerAddressSpace()))
1113 auto *SrcVTy = dyn_cast<VectorType>(Src);
1114 auto *DstVTy = dyn_cast<VectorType>(Dst);
1117 if (SrcLT.first == DstLT.first &&
1122 if (!SrcVTy && !DstVTy) {
1133 if (DstVTy && SrcVTy) {
1135 if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
1138 if (Opcode == Instruction::ZExt)
1142 if (Opcode == Instruction::SExt)
1143 return SrcLT.first * 2;
1149 return SrcLT.first * 1;
1162 if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
1163 DstVTy->getElementCount().isVector()) {
1166 T *
TTI =
static_cast<T *
>(
this);
1169 (!SplitSrc || !SplitDst) ?
TTI->getVectorSplitCost() : 0;
1176 if (isa<ScalableVectorType>(DstVTy))
1181 unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
1183 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH,
CostKind,
I);
1196 if (Opcode == Instruction::BitCast) {
1212 return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1229 assert(ISD &&
"Invalid opcode");
1238 assert(CondTy &&
"CondTy must exist");
1244 if (!(ValTy->
isVectorTy() && !LT.second.isVector()) &&
1248 return LT.first * 1;
1254 if (
auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
1255 if (isa<ScalableVectorType>(ValTy))
1258 unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
1262 Opcode, ValVTy->getScalarType(), CondTy, VecPred,
CostKind,
I);
1284 Value *Op0 =
nullptr;
1285 Value *Op1 =
nullptr;
1286 if (
auto *IE = dyn_cast<InsertElementInst>(&
I)) {
1287 Op0 = IE->getOperand(0);
1288 Op1 = IE->getOperand(1);
1290 return thisT()->getVectorInstrCost(
I.getOpcode(), Val,
CostKind,
Index, Op0,
1296 const APInt &DemandedDstElts,
1299 "Unexpected size of DemandedDstElts.");
1317 Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
1320 Cost += thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
1332 assert(!Src->isVoidTy() &&
"Invalid type");
1334 if (getTLI()->getValueType(
DL, Src,
true) == MVT::Other)
1349 LT.second.getSizeInBits())) {
1355 if (Opcode == Instruction::Store)
1364 cast<VectorType>(Src), Opcode != Instruction::Store,
1365 Opcode == Instruction::Store,
CostKind);
1376 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment,
true,
false,
1381 const Value *
Ptr,
bool VariableMask,
1385 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1390 const Value *
Ptr,
bool VariableMask,
1397 return thisT()->getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
1404 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) {
1407 if (isa<ScalableVectorType>(VecTy))
1410 auto *VT = cast<FixedVectorType>(VecTy);
1412 unsigned NumElts = VT->getNumElements();
1413 assert(Factor > 1 && NumElts % Factor == 0 &&
"Invalid interleave factor");
1415 unsigned NumSubElts = NumElts / Factor;
1420 if (UseMaskForCond || UseMaskForGaps)
1421 Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1430 unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1450 unsigned NumLegalInsts =
divideCeil(VecTySize, VecTyLTSize);
1454 unsigned NumEltsPerLegalInst =
divideCeil(NumElts, NumLegalInsts);
1457 BitVector UsedInsts(NumLegalInsts,
false);
1458 for (
unsigned Index : Indices)
1459 for (
unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1460 UsedInsts.
set((
Index + Elt * Factor) / NumEltsPerLegalInst);
1469 "Interleaved memory op has too many members");
1475 for (
unsigned Index : Indices) {
1476 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
1477 for (
unsigned Elm = 0; Elm < NumSubElts; Elm++)
1478 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
1481 if (Opcode == Instruction::Load) {
1491 SubVT, DemandedAllSubElts,
1493 Cost += Indices.
size() * InsSubCost;
1494 Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1512 SubVT, DemandedAllSubElts,
1514 Cost += ExtSubCost * Indices.
size();
1515 Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1520 if (!UseMaskForCond)
1525 Cost += thisT()->getReplicationShuffleCost(
1526 I8Type, Factor, NumSubElts,
1527 UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
1535 if (UseMaskForGaps) {
1537 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1562 (
RetTy->isVectorTy() ? cast<VectorType>(
RetTy)->getElementCount()
1571 case Intrinsic::powi:
1572 if (
auto *RHSC = dyn_cast<ConstantInt>(Args[1])) {
1573 bool ShouldOptForSize =
I->getParent()->getParent()->hasOptSize();
1575 ShouldOptForSize)) {
1579 unsigned ActiveBits =
Exponent.getActiveBits();
1580 unsigned PopCount =
Exponent.popcount();
1582 thisT()->getArithmeticInstrCost(
1584 if (RHSC->isNegative())
1585 Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv,
RetTy,
1591 case Intrinsic::cttz:
1597 case Intrinsic::ctlz:
1603 case Intrinsic::memcpy:
1604 return thisT()->getMemcpyCost(ICA.
getInst());
1606 case Intrinsic::masked_scatter: {
1607 const Value *Mask = Args[3];
1608 bool VarMask = !isa<Constant>(Mask);
1609 Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1610 return thisT()->getGatherScatterOpCost(Instruction::Store,
1614 case Intrinsic::masked_gather: {
1615 const Value *Mask = Args[2];
1616 bool VarMask = !isa<Constant>(Mask);
1617 Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1618 return thisT()->getGatherScatterOpCost(Instruction::Load,
RetTy, Args[0],
1621 case Intrinsic::experimental_vp_strided_store: {
1624 const Value *Mask = Args[3];
1625 const Value *EVL = Args[4];
1626 bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1627 Align Alignment =
I->getParamAlign(1).valueOrOne();
1628 return thisT()->getStridedMemoryOpCost(Instruction::Store,
1629 Data->getType(),
Ptr, VarMask,
1632 case Intrinsic::experimental_vp_strided_load: {
1634 const Value *Mask = Args[2];
1635 const Value *EVL = Args[3];
1636 bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1637 Align Alignment =
I->getParamAlign(0).valueOrOne();
1638 return thisT()->getStridedMemoryOpCost(Instruction::Load,
RetTy,
Ptr,
1641 case Intrinsic::experimental_stepvector: {
1642 if (isa<ScalableVectorType>(
RetTy))
1647 case Intrinsic::vector_extract: {
1650 if (isa<ScalableVectorType>(
RetTy))
1652 unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1653 return thisT()->getShuffleCost(
1657 case Intrinsic::vector_insert: {
1660 if (isa<ScalableVectorType>(Args[1]->
getType()))
1662 unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1663 return thisT()->getShuffleCost(
1667 case Intrinsic::vector_reverse: {
1668 return thisT()->getShuffleCost(
1672 case Intrinsic::vector_splice: {
1673 unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1674 return thisT()->getShuffleCost(
1678 case Intrinsic::vector_reduce_add:
1679 case Intrinsic::vector_reduce_mul:
1680 case Intrinsic::vector_reduce_and:
1681 case Intrinsic::vector_reduce_or:
1682 case Intrinsic::vector_reduce_xor:
1683 case Intrinsic::vector_reduce_smax:
1684 case Intrinsic::vector_reduce_smin:
1685 case Intrinsic::vector_reduce_fmax:
1686 case Intrinsic::vector_reduce_fmin:
1687 case Intrinsic::vector_reduce_fmaximum:
1688 case Intrinsic::vector_reduce_fminimum:
1689 case Intrinsic::vector_reduce_umax:
1690 case Intrinsic::vector_reduce_umin: {
1694 case Intrinsic::vector_reduce_fadd:
1695 case Intrinsic::vector_reduce_fmul: {
1697 IID,
RetTy, {Args[0]->getType(), Args[1]->
getType()}, FMF,
I, 1);
1700 case Intrinsic::fshl:
1701 case Intrinsic::fshr: {
1702 const Value *
X = Args[0];
1703 const Value *
Y = Args[1];
1704 const Value *Z = Args[2];
1717 thisT()->getArithmeticInstrCost(BinaryOperator::Or,
RetTy,
CostKind);
1719 thisT()->getArithmeticInstrCost(BinaryOperator::Sub,
RetTy,
CostKind);
1720 Cost += thisT()->getArithmeticInstrCost(
1723 Cost += thisT()->getArithmeticInstrCost(
1728 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem,
RetTy,
1732 Type *CondTy =
RetTy->getWithNewBitWidth(1);
1734 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp,
RetTy, CondTy,
1737 thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy, CondTy,
1742 case Intrinsic::get_active_lane_mask: {
1748 if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1758 thisT()->getTypeBasedIntrinsicInstrCost(Attrs,
CostKind);
1759 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy,
RetTy,
1763 case Intrinsic::experimental_cttz_elts: {
1768 if (!getTLI()->shouldExpandCttzElements(ArgType))
1776 bool ZeroIsPoison = !cast<ConstantInt>(Args[1])->isZero();
1778 if (isa<ScalableVectorType>(ICA.
getArgTypes()[0]) &&
I &&
I->getCaller())
1787 NewEltTy, cast<VectorType>(Args[0]->
getType())->getElementCount());
1792 thisT()->getIntrinsicInstrCost(StepVecAttrs,
CostKind);
1795 thisT()->getArithmeticInstrCost(Instruction::Sub, NewVecTy,
CostKind);
1796 Cost += thisT()->getCastInstrCost(Instruction::SExt, NewVecTy,
1800 thisT()->getArithmeticInstrCost(Instruction::And, NewVecTy,
CostKind);
1803 NewEltTy, NewVecTy, FMF,
I, 1);
1804 Cost += thisT()->getTypeBasedIntrinsicInstrCost(ReducAttrs,
CostKind);
1806 thisT()->getArithmeticInstrCost(Instruction::Sub, NewEltTy,
CostKind);
1818 std::optional<unsigned> FOp =
1821 if (ICA.
getID() == Intrinsic::vp_load) {
1823 if (
auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.
getInst()))
1824 Alignment = VPI->getPointerAlignment().valueOrOne();
1828 dyn_cast<PointerType>(ICA.
getArgs()[0]->getType()))
1829 AS = PtrTy->getAddressSpace();
1830 return thisT()->getMemoryOpCost(*FOp, ICA.
getReturnType(), Alignment,
1833 if (ICA.
getID() == Intrinsic::vp_store) {
1835 if (
auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.
getInst()))
1836 Alignment = VPI->getPointerAlignment().valueOrOne();
1840 dyn_cast<PointerType>(ICA.
getArgs()[1]->getType()))
1841 AS = PtrTy->getAddressSpace();
1842 return thisT()->getMemoryOpCost(*FOp, Args[0]->
getType(), Alignment,
1846 return thisT()->getArithmeticInstrCost(*FOp, ICA.
getReturnType(),
1851 std::optional<Intrinsic::ID> FID =
1856 "Expected VPIntrinsic to have Mask and Vector Length args and "
1864 *FID != Intrinsic::vector_reduce_fadd &&
1865 *FID != Intrinsic::vector_reduce_fmul)
1870 return thisT()->getIntrinsicInstrCost(NewICA,
CostKind);
1879 ScalarizationCost = 0;
1880 if (!
RetTy->isVoidTy())
1882 cast<VectorType>(
RetTy),
1884 ScalarizationCost +=
1890 return thisT()->getTypeBasedIntrinsicInstrCost(Attrs,
CostKind);
1911 unsigned VecTyIndex = 0;
1912 if (IID == Intrinsic::vector_reduce_fadd ||
1913 IID == Intrinsic::vector_reduce_fmul)
1915 assert(Tys.
size() > VecTyIndex &&
"Unexpected IntrinsicCostAttributes");
1916 VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
1925 if (isa<ScalableVectorType>(
RetTy) ||
any_of(Tys, [](
const Type *Ty) {
1926 return isa<ScalableVectorType>(Ty);
1932 SkipScalarizationCost ? ScalarizationCostPassed : 0;
1933 unsigned ScalarCalls = 1;
1935 if (
auto *RetVTy = dyn_cast<VectorType>(
RetTy)) {
1936 if (!SkipScalarizationCost)
1939 ScalarCalls = std::max(ScalarCalls,
1940 cast<FixedVectorType>(RetVTy)->getNumElements());
1941 ScalarRetTy =
RetTy->getScalarType();
1944 for (
unsigned i = 0, ie = Tys.
size(); i != ie; ++i) {
1946 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
1947 if (!SkipScalarizationCost)
1950 ScalarCalls = std::max(ScalarCalls,
1951 cast<FixedVectorType>(VTy)->getNumElements());
1956 if (ScalarCalls == 1)
1961 thisT()->getIntrinsicInstrCost(ScalarAttrs,
CostKind);
1963 return ScalarCalls * ScalarCost + ScalarizationCost;
1967 case Intrinsic::sqrt:
1970 case Intrinsic::sin:
1973 case Intrinsic::cos:
1976 case Intrinsic::exp:
1979 case Intrinsic::exp2:
1982 case Intrinsic::exp10:
1985 case Intrinsic::log:
1988 case Intrinsic::log10:
1991 case Intrinsic::log2:
1994 case Intrinsic::fabs:
1997 case Intrinsic::canonicalize:
2000 case Intrinsic::minnum:
2003 case Intrinsic::maxnum:
2006 case Intrinsic::minimum:
2009 case Intrinsic::maximum:
2012 case Intrinsic::copysign:
2015 case Intrinsic::floor:
2018 case Intrinsic::ceil:
2021 case Intrinsic::trunc:
2024 case Intrinsic::nearbyint:
2027 case Intrinsic::rint:
2030 case Intrinsic::lrint:
2033 case Intrinsic::llrint:
2036 case Intrinsic::round:
2039 case Intrinsic::roundeven:
2042 case Intrinsic::pow:
2045 case Intrinsic::fma:
2048 case Intrinsic::fmuladd:
2051 case Intrinsic::experimental_constrained_fmuladd:
2055 case Intrinsic::lifetime_start:
2056 case Intrinsic::lifetime_end:
2057 case Intrinsic::sideeffect:
2058 case Intrinsic::pseudoprobe:
2059 case Intrinsic::arithmetic_fence:
2061 case Intrinsic::masked_store: {
2063 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2064 return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
2067 case Intrinsic::masked_load: {
2069 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2070 return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
2073 case Intrinsic::vector_reduce_add:
2074 case Intrinsic::vector_reduce_mul:
2075 case Intrinsic::vector_reduce_and:
2076 case Intrinsic::vector_reduce_or:
2077 case Intrinsic::vector_reduce_xor:
2078 return thisT()->getArithmeticReductionCost(
2081 case Intrinsic::vector_reduce_fadd:
2082 case Intrinsic::vector_reduce_fmul:
2083 return thisT()->getArithmeticReductionCost(
2085 case Intrinsic::vector_reduce_smax:
2086 case Intrinsic::vector_reduce_smin:
2087 case Intrinsic::vector_reduce_umax:
2088 case Intrinsic::vector_reduce_umin:
2089 case Intrinsic::vector_reduce_fmax:
2090 case Intrinsic::vector_reduce_fmin:
2091 case Intrinsic::vector_reduce_fmaximum:
2092 case Intrinsic::vector_reduce_fminimum:
2095 case Intrinsic::abs: {
2097 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2100 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp,
RetTy, CondTy,
2102 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy, CondTy,
2105 Cost += thisT()->getArithmeticInstrCost(
2109 case Intrinsic::smax:
2110 case Intrinsic::smin:
2111 case Intrinsic::umax:
2112 case Intrinsic::umin: {
2114 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2115 bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
2119 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp,
RetTy, CondTy,
2121 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy, CondTy,
2125 case Intrinsic::sadd_sat:
2126 case Intrinsic::ssub_sat: {
2127 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2131 ? Intrinsic::sadd_with_overflow
2132 : Intrinsic::ssub_with_overflow;
2139 nullptr, ScalarizationCostPassed);
2140 Cost += thisT()->getIntrinsicInstrCost(Attrs,
CostKind);
2141 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp,
RetTy, CondTy,
2143 Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy,
2147 case Intrinsic::uadd_sat:
2148 case Intrinsic::usub_sat: {
2149 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2153 ? Intrinsic::uadd_with_overflow
2154 : Intrinsic::usub_with_overflow;
2158 nullptr, ScalarizationCostPassed);
2159 Cost += thisT()->getIntrinsicInstrCost(Attrs,
CostKind);
2161 thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy, CondTy,
2165 case Intrinsic::smul_fix:
2166 case Intrinsic::umul_fix: {
2167 unsigned ExtSize =
RetTy->getScalarSizeInBits() * 2;
2168 Type *ExtTy =
RetTy->getWithNewBitWidth(ExtSize);
2171 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
2177 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
2178 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc,
RetTy, ExtTy,
2180 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr,
RetTy,
2190 case Intrinsic::sadd_with_overflow:
2191 case Intrinsic::ssub_with_overflow: {
2192 Type *SumTy =
RetTy->getContainedType(0);
2193 Type *OverflowTy =
RetTy->getContainedType(1);
2194 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
2195 ? BinaryOperator::Add
2196 : BinaryOperator::Sub;
2203 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy,
CostKind);
2204 Cost += 2 * thisT()->getCmpSelInstrCost(
2205 Instruction::ICmp, SumTy, OverflowTy,
2207 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
2211 case Intrinsic::uadd_with_overflow:
2212 case Intrinsic::usub_with_overflow: {
2213 Type *SumTy =
RetTy->getContainedType(0);
2214 Type *OverflowTy =
RetTy->getContainedType(1);
2215 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
2216 ? BinaryOperator::Add
2217 : BinaryOperator::Sub;
2223 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy,
CostKind);
2225 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
2229 case Intrinsic::smul_with_overflow:
2230 case Intrinsic::umul_with_overflow: {
2231 Type *MulTy =
RetTy->getContainedType(0);
2232 Type *OverflowTy =
RetTy->getContainedType(1);
2235 bool IsSigned = IID == Intrinsic::smul_with_overflow;
2237 unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
2241 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH,
CostKind);
2243 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
2244 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
2246 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
2252 Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
2257 Cost += thisT()->getCmpSelInstrCost(
2261 case Intrinsic::fptosi_sat:
2262 case Intrinsic::fptoui_sat: {
2265 Type *FromTy = Tys[0];
2266 bool IsSigned = IID == Intrinsic::fptosi_sat;
2271 Cost += thisT()->getIntrinsicInstrCost(Attrs1,
CostKind);
2274 Cost += thisT()->getIntrinsicInstrCost(Attrs2,
CostKind);
2275 Cost += thisT()->getCastInstrCost(
2276 IsSigned ? Instruction::FPToSI : Instruction::FPToUI,
RetTy, FromTy,
2279 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2280 Cost += thisT()->getCmpSelInstrCost(
2282 Cost += thisT()->getCmpSelInstrCost(
2287 case Intrinsic::ctpop:
2293 case Intrinsic::ctlz:
2296 case Intrinsic::cttz:
2299 case Intrinsic::bswap:
2302 case Intrinsic::bitreverse:
2311 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
2321 return (LT.first * 2);
2323 return (LT.first * 1);
2327 return (LT.first * 2);
2332 if (IID == Intrinsic::fmuladd)
2333 return thisT()->getArithmeticInstrCost(BinaryOperator::FMul,
RetTy,
2335 thisT()->getArithmeticInstrCost(BinaryOperator::FAdd,
RetTy,
2337 if (IID == Intrinsic::experimental_constrained_fmuladd) {
2339 Intrinsic::experimental_constrained_fmul,
RetTy, Tys);
2341 Intrinsic::experimental_constrained_fadd,
RetTy, Tys);
2342 return thisT()->getIntrinsicInstrCost(FMulAttrs,
CostKind) +
2343 thisT()->getIntrinsicInstrCost(FAddAttrs,
CostKind);
2349 if (
auto *RetVTy = dyn_cast<VectorType>(
RetTy)) {
2351 if (isa<ScalableVectorType>(
RetTy) ||
any_of(Tys, [](
const Type *Ty) {
2352 return isa<ScalableVectorType>(Ty);
2357 SkipScalarizationCost
2358 ? ScalarizationCostPassed
2362 unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2364 for (
unsigned i = 0, ie = Tys.
size(); i != ie; ++i) {
2372 thisT()->getIntrinsicInstrCost(Attrs,
CostKind);
2373 for (
unsigned i = 0, ie = Tys.
size(); i != ie; ++i) {
2374 if (
auto *VTy = dyn_cast<VectorType>(Tys[i])) {
2378 ScalarCalls = std::max(ScalarCalls,
2379 cast<FixedVectorType>(VTy)->getNumElements());
2382 return ScalarCalls * ScalarCost + ScalarizationCost;
2386 return SingleCallCost;
2408 return LT.first.isValid() ? *LT.first.getValue() : 0;
2441 if (isa<ScalableVectorType>(Ty))
2445 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2446 if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
2456 return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
2458 thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
2462 unsigned NumReduxLevels =
Log2_32(NumVecElts);
2465 std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2466 unsigned LongVectorCount = 0;
2468 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2469 while (NumVecElts > MVTLen) {
2475 ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy,
CostKind);
2480 NumReduxLevels -= LongVectorCount;
2492 NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty,
CostKind);
2493 return ShuffleCost + ArithCost +
2494 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2518 if (isa<ScalableVectorType>(Ty))
2521 auto *VTy = cast<FixedVectorType>(Ty);
2528 return ExtractCost + ArithCost;
2532 std::optional<FastMathFlags> FMF,
2534 assert(Ty &&
"Unknown reduction vector type");
2547 if (isa<ScalableVectorType>(Ty))
2551 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2552 unsigned NumReduxLevels =
Log2_32(NumVecElts);
2555 std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2556 unsigned LongVectorCount = 0;
2558 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2559 while (NumVecElts > MVTLen) {
2573 NumReduxLevels -= LongVectorCount;
2586 return ShuffleCost + MinMaxCost +
2587 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2599 thisT()->getArithmeticReductionCost(Opcode, ExtTy, FMF,
CostKind);
2601 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2604 return RedCost + ExtCost;
2615 Instruction::Add, ExtTy, std::nullopt,
CostKind);
2617 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2621 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
2623 return RedCost + MulCost + 2 * ExtCost;
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool sgt(const APInt &RHS) const
Signed greater than comparison.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool slt(const APInt &RHS) const
Signed less than comparison.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
size_t size() const
size - Get the array size.
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Base class which can be used to help build a TTI implementation.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
virtual unsigned getPrefetchDistance() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
unsigned getMaxInterleaveFactor(ElementCount VF)
unsigned getNumberOfParts(Type *Tp)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate the cost of performing strict (in-order) reductions, which involves doing a sequence...
bool isNumRegsMajorCostOfLSR()
bool isTruncateFree(Type *Ty1, Type *Ty2)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
bool isLegalICmpImmediate(int64_t imm)
bool isProfitableToHoist(Instruction *I)
virtual unsigned getMaxPrefetchIterationsAhead() const
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)
std::optional< unsigned > getMaxVScale() const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
bool shouldBuildRelLookupTables() const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
bool shouldFoldTerminatingConditionAfterLSR() const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
bool hasBranchDivergence(const Function *F=nullptr)
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
unsigned getAssumedAddrSpace(const Value *V) const
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
virtual std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
bool isAlwaysUniform(const Value *V)
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
virtual std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
virtual bool enableWritePrefetching() const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getFPOpCost(Type *Ty)
InstructionCost getVectorSplitCost()
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool haveFastSqrt(Type *Ty)
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
unsigned getInliningThresholdMultiplier() const
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
virtual ~BasicTTIImplBase()=default
bool isLegalAddScalableImmediate(int64_t Imm)
InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
bool isVScaleKnownToBeAPowerOfTwo() const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
bool isLegalAddImmediate(int64_t imm)
bool shouldBuildLookupTables()
unsigned getFlatAddressSpace()
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
virtual unsigned getCacheLineSize() const
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool isSourceOfDivergence(const Value *V)
int getInlinerVectorBonusPercent() const
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
bool isSingleThreaded() const
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
unsigned adjustInliningThreshold(const CallBase *CB)
bool isProfitableLSRChainElement(Instruction *I)
Concrete BasicTTIImpl that can be used if no further customization is needed.
size_type count() const
count - Returns the number of bits which are set.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This class represents a range of values.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool isTargetIntrinsic() const
isTargetIntrinsic - Returns true if this function is an intrinsic and the intrinsic is specific to a ...
AttributeList getAttributes() const
Return the attribute list for this Function.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
virtual bool enableWritePrefetching() const
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
virtual std::optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
virtual std::optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Analysis providing profile information.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isZeroEltSplatMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses all elements with the same value as the first element of exa...
static bool isSpliceMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is a splice mask, concatenating the two inputs together and then ext...
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isTransposeMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask is a transpose mask.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Provides information about what library functions are available for the current target.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
@ TypeScalarizeScalableVector
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isProfitableToHoist(Instruction *I) const
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool isLegalAddScalableImmediate(int64_t) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
If the specified predicate checks whether a generic pointer falls within a specified address space,...
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
ThreadModel::Model ThreadModel
ThreadModel - This flag specifies the type of threading model to assume for things like atomics.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
Triple - Helper class for working with autoconf configuration names.
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isArch64Bit() const
Test whether the architecture is 64-bit.
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
static std::optional< Intrinsic::ID > getFunctionalIntrinsicIDForVP(Intrinsic::ID ID)
static bool isVPIntrinsic(Intrinsic::ID)
static bool isVPReduction(Intrinsic::ID ID)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ BSWAP
Byte Swap and Counting operators.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
constexpr unsigned BitWidth
cl::opt< unsigned > PartialUnrollingThreshold
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
ElementCount getVectorElementCount() const
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Attributes of a target dependent hardware loop.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...