44#include "llvm/IR/IntrinsicsAArch64.h"
52#define DEBUG_TYPE "aarch64-isel"
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
87 ProduceNonFlagSettingCondBr =
135 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
213 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
215 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
217 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
219 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
235 unsigned emitConstantPoolEntry(
const Constant *CPVal,
254 std::optional<CmpInst::Predicate> = std::nullopt)
const;
257 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
258 std::initializer_list<llvm::SrcOp> SrcOps,
260 const ComplexRendererFns &RenderFns = std::nullopt)
const;
295 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
316 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
338 std::pair<MachineInstr *, AArch64CC::CondCode>
373 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
374 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
375 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
376 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
378 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
380 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
383 unsigned Size)
const;
385 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
386 return selectAddrModeUnscaled(Root, 1);
388 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
389 return selectAddrModeUnscaled(Root, 2);
391 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
392 return selectAddrModeUnscaled(Root, 4);
394 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
395 return selectAddrModeUnscaled(Root, 8);
397 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
398 return selectAddrModeUnscaled(Root, 16);
403 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
407 unsigned Size)
const;
409 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
410 return selectAddrModeIndexed(Root, Width / 8);
417 unsigned SizeInBytes)
const;
425 bool WantsExt)
const;
426 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
428 unsigned SizeInBytes)
const;
430 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
431 return selectAddrModeXRO(Root, Width / 8);
435 unsigned SizeInBytes)
const;
437 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
438 return selectAddrModeWRO(Root, Width / 8);
442 bool AllowROR =
false)
const;
444 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
445 return selectShiftedRegister(Root);
448 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
449 return selectShiftedRegister(Root,
true);
459 bool IsLoadStore =
false)
const;
470 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
475 int OpIdx = -1)
const;
477 int OpIdx = -1)
const;
479 int OpIdx = -1)
const;
483 int OpIdx = -1)
const;
485 int OpIdx = -1)
const;
487 int OpIdx = -1)
const;
490 int OpIdx = -1)
const;
496 bool tryOptSelect(
GSelect &Sel);
503 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
516 bool ProduceNonFlagSettingCondBr =
false;
525#define GET_GLOBALISEL_PREDICATES_DECL
526#include "AArch64GenGlobalISel.inc"
527#undef GET_GLOBALISEL_PREDICATES_DECL
531#define GET_GLOBALISEL_TEMPORARIES_DECL
532#include "AArch64GenGlobalISel.inc"
533#undef GET_GLOBALISEL_TEMPORARIES_DECL
538#define GET_GLOBALISEL_IMPL
539#include "AArch64GenGlobalISel.inc"
540#undef GET_GLOBALISEL_IMPL
542AArch64InstructionSelector::AArch64InstructionSelector(
545 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
548#include
"AArch64GenGlobalISel.inc"
551#include
"AArch64GenGlobalISel.inc"
563 bool GetAllRegSet =
false) {
564 if (RB.
getID() == AArch64::GPRRegBankID) {
566 return GetAllRegSet ? &AArch64::GPR32allRegClass
567 : &AArch64::GPR32RegClass;
569 return GetAllRegSet ? &AArch64::GPR64allRegClass
570 : &AArch64::GPR64RegClass;
572 return &AArch64::XSeqPairsClassRegClass;
576 if (RB.
getID() == AArch64::FPRRegBankID) {
579 return &AArch64::FPR8RegClass;
581 return &AArch64::FPR16RegClass;
583 return &AArch64::FPR32RegClass;
585 return &AArch64::FPR64RegClass;
587 return &AArch64::FPR128RegClass;
599 bool GetAllRegSet =
false) {
600 unsigned RegBankID = RB.
getID();
602 if (RegBankID == AArch64::GPRRegBankID) {
603 if (SizeInBits <= 32)
604 return GetAllRegSet ? &AArch64::GPR32allRegClass
605 : &AArch64::GPR32RegClass;
606 if (SizeInBits == 64)
607 return GetAllRegSet ? &AArch64::GPR64allRegClass
608 : &AArch64::GPR64RegClass;
609 if (SizeInBits == 128)
610 return &AArch64::XSeqPairsClassRegClass;
613 if (RegBankID == AArch64::FPRRegBankID) {
614 switch (SizeInBits) {
618 return &AArch64::FPR8RegClass;
620 return &AArch64::FPR16RegClass;
622 return &AArch64::FPR32RegClass;
624 return &AArch64::FPR64RegClass;
626 return &AArch64::FPR128RegClass;
636 switch (
TRI.getRegSizeInBits(*RC)) {
644 if (RC != &AArch64::FPR32RegClass)
654 dbgs() <<
"Couldn't find appropriate subregister for register class.");
663 switch (RB.
getID()) {
664 case AArch64::GPRRegBankID:
666 case AArch64::FPRRegBankID:
689 const unsigned RegClassIDs[],
691 unsigned NumRegs = Regs.
size();
694 assert(NumRegs >= 2 && NumRegs <= 4 &&
695 "Only support between two and 4 registers in a tuple!");
697 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
699 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
700 for (
unsigned I = 0, E = Regs.
size();
I < E; ++
I) {
701 RegSequence.addUse(Regs[
I]);
702 RegSequence.addImm(SubRegs[
I]);
704 return RegSequence.getReg(0);
709 static const unsigned RegClassIDs[] = {
710 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
711 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
712 AArch64::dsub2, AArch64::dsub3};
713 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
718 static const unsigned RegClassIDs[] = {
719 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
720 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
721 AArch64::qsub2, AArch64::qsub3};
722 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
727 auto &
MBB = *
MI.getParent();
729 auto &
MRI = MF.getRegInfo();
735 else if (Root.
isReg()) {
740 Immed = ValAndVReg->Value.getSExtValue();
756 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
763 for (
auto &MO :
I.operands()) {
766 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
774 if (!MO.getReg().isVirtual()) {
775 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
785 if (PrevOpBank && OpBank != PrevOpBank) {
786 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
801 case AArch64::GPRRegBankID:
803 switch (GenericOpc) {
804 case TargetOpcode::G_SHL:
805 return AArch64::LSLVWr;
806 case TargetOpcode::G_LSHR:
807 return AArch64::LSRVWr;
808 case TargetOpcode::G_ASHR:
809 return AArch64::ASRVWr;
813 }
else if (OpSize == 64) {
814 switch (GenericOpc) {
815 case TargetOpcode::G_PTR_ADD:
816 return AArch64::ADDXrr;
817 case TargetOpcode::G_SHL:
818 return AArch64::LSLVXr;
819 case TargetOpcode::G_LSHR:
820 return AArch64::LSRVXr;
821 case TargetOpcode::G_ASHR:
822 return AArch64::ASRVXr;
828 case AArch64::FPRRegBankID:
831 switch (GenericOpc) {
832 case TargetOpcode::G_FADD:
833 return AArch64::FADDSrr;
834 case TargetOpcode::G_FSUB:
835 return AArch64::FSUBSrr;
836 case TargetOpcode::G_FMUL:
837 return AArch64::FMULSrr;
838 case TargetOpcode::G_FDIV:
839 return AArch64::FDIVSrr;
844 switch (GenericOpc) {
845 case TargetOpcode::G_FADD:
846 return AArch64::FADDDrr;
847 case TargetOpcode::G_FSUB:
848 return AArch64::FSUBDrr;
849 case TargetOpcode::G_FMUL:
850 return AArch64::FMULDrr;
851 case TargetOpcode::G_FDIV:
852 return AArch64::FDIVDrr;
853 case TargetOpcode::G_OR:
854 return AArch64::ORRv8i8;
871 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
873 case AArch64::GPRRegBankID:
876 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
878 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
880 return isStore ? AArch64::STRWui : AArch64::LDRWui;
882 return isStore ? AArch64::STRXui : AArch64::LDRXui;
885 case AArch64::FPRRegBankID:
888 return isStore ? AArch64::STRBui : AArch64::LDRBui;
890 return isStore ? AArch64::STRHui : AArch64::LDRHui;
892 return isStore ? AArch64::STRSui : AArch64::LDRSui;
894 return isStore ? AArch64::STRDui : AArch64::LDRDui;
896 return isStore ? AArch64::STRQui : AArch64::LDRQui;
910 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
911 assert(To &&
"Destination register class cannot be null");
918 RegOp.
setReg(SubRegCopy.getReg(0));
922 if (!
I.getOperand(0).getReg().isPhysical())
932static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
936 Register DstReg =
I.getOperand(0).getReg();
937 Register SrcReg =
I.getOperand(1).getReg();
951 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
952 SrcSize = DstSize = 32;
969 if (Reg.isPhysical())
971 LLT Ty =
MRI.getType(Reg);
977 RC = getRegClassForTypeOnBank(Ty, RB);
980 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
993 Register DstReg =
I.getOperand(0).getReg();
994 Register SrcReg =
I.getOperand(1).getReg();
1013 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1017 unsigned SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1018 unsigned DstSize =
TRI.getRegSizeInBits(*DstRC);
1029 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1031 }
else if (SrcSize > DstSize) {
1038 }
else if (DstSize > SrcSize) {
1045 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1047 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1052 RegOp.
setReg(PromoteReg);
1071 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1072 I.setDesc(
TII.get(AArch64::COPY));
1073 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1077 I.setDesc(
TII.get(AArch64::COPY));
1092 switch (GenericOpc) {
1093 case TargetOpcode::G_SITOFP:
1094 return AArch64::SCVTFUWSri;
1095 case TargetOpcode::G_UITOFP:
1096 return AArch64::UCVTFUWSri;
1097 case TargetOpcode::G_FPTOSI:
1098 return AArch64::FCVTZSUWSr;
1099 case TargetOpcode::G_FPTOUI:
1100 return AArch64::FCVTZUUWSr;
1105 switch (GenericOpc) {
1106 case TargetOpcode::G_SITOFP:
1107 return AArch64::SCVTFUXSri;
1108 case TargetOpcode::G_UITOFP:
1109 return AArch64::UCVTFUXSri;
1110 case TargetOpcode::G_FPTOSI:
1111 return AArch64::FCVTZSUWDr;
1112 case TargetOpcode::G_FPTOUI:
1113 return AArch64::FCVTZUUWDr;
1123 switch (GenericOpc) {
1124 case TargetOpcode::G_SITOFP:
1125 return AArch64::SCVTFUWDri;
1126 case TargetOpcode::G_UITOFP:
1127 return AArch64::UCVTFUWDri;
1128 case TargetOpcode::G_FPTOSI:
1129 return AArch64::FCVTZSUXSr;
1130 case TargetOpcode::G_FPTOUI:
1131 return AArch64::FCVTZUUXSr;
1136 switch (GenericOpc) {
1137 case TargetOpcode::G_SITOFP:
1138 return AArch64::SCVTFUXDri;
1139 case TargetOpcode::G_UITOFP:
1140 return AArch64::UCVTFUXDri;
1141 case TargetOpcode::G_FPTOSI:
1142 return AArch64::FCVTZSUXDr;
1143 case TargetOpcode::G_FPTOUI:
1144 return AArch64::FCVTZUUXDr;
1163 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1164 "Expected both select operands to have the same regbank?");
1165 LLT Ty =
MRI.getType(True);
1170 "Expected 32 bit or 64 bit select only?");
1171 const bool Is32Bit =
Size == 32;
1172 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1173 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1174 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1180 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1182 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1197 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1214 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1233 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1249 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1255 if (!TrueCst && !FalseCst)
1258 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1259 if (TrueCst && FalseCst) {
1260 int64_t
T = TrueCst->Value.getSExtValue();
1261 int64_t
F = FalseCst->Value.getSExtValue();
1263 if (
T == 0 &&
F == 1) {
1265 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1271 if (
T == 0 &&
F == -1) {
1273 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1281 int64_t
T = TrueCst->Value.getSExtValue();
1284 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1293 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1302 int64_t
F = FalseCst->Value.getSExtValue();
1305 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1312 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1320 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1321 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1442 assert(Reg.isValid() &&
"Expected valid register!");
1443 bool HasZext =
false;
1445 unsigned Opc =
MI->getOpcode();
1447 if (!
MI->getOperand(0).isReg() ||
1448 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1455 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1456 Opc == TargetOpcode::G_TRUNC) {
1457 if (Opc == TargetOpcode::G_ZEXT)
1460 Register NextReg =
MI->getOperand(1).getReg();
1462 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1471 std::optional<uint64_t>
C;
1476 case TargetOpcode::G_AND:
1477 case TargetOpcode::G_XOR: {
1478 TestReg =
MI->getOperand(1).getReg();
1479 Register ConstantReg =
MI->getOperand(2).getReg();
1490 C = VRegAndVal->Value.getZExtValue();
1492 C = VRegAndVal->Value.getSExtValue();
1496 case TargetOpcode::G_ASHR:
1497 case TargetOpcode::G_LSHR:
1498 case TargetOpcode::G_SHL: {
1499 TestReg =
MI->getOperand(1).getReg();
1503 C = VRegAndVal->Value.getSExtValue();
1515 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1519 case TargetOpcode::G_AND:
1521 if ((*
C >> Bit) & 1)
1524 case TargetOpcode::G_SHL:
1527 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1532 case TargetOpcode::G_ASHR:
1537 if (Bit >= TestRegSize)
1538 Bit = TestRegSize - 1;
1540 case TargetOpcode::G_LSHR:
1542 if ((Bit + *
C) < TestRegSize) {
1547 case TargetOpcode::G_XOR:
1556 if ((*
C >> Bit) & 1)
1575 assert(ProduceNonFlagSettingCondBr &&
1576 "Cannot emit TB(N)Z with speculation tracking!");
1581 LLT Ty =
MRI.getType(TestReg);
1584 assert(Bit < 64 &&
"Bit is too large!");
1588 bool UseWReg =
Bit < 32;
1589 unsigned NecessarySize = UseWReg ? 32 : 64;
1590 if (
Size != NecessarySize)
1591 TestReg = moveScalarRegClass(
1592 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1595 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1596 {AArch64::TBZW, AArch64::TBNZW}};
1597 unsigned Opc = OpcTable[UseWReg][IsNegative];
1604bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1607 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1634 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1641 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1649 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1651 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1652 AArch64::GPRRegBankID &&
1653 "Expected GPRs only?");
1654 auto Ty =
MRI.getType(CompareReg);
1657 assert(Width <= 64 &&
"Expected width to be at most 64?");
1658 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1659 {AArch64::CBNZW, AArch64::CBNZX}};
1660 unsigned Opc = OpcTable[IsNegative][Width == 64];
1661 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1666bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1669 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1681 I.eraseFromParent();
1685bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1688 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1694 if (!ProduceNonFlagSettingCondBr)
1713 if (VRegAndVal && !AndInst) {
1714 int64_t
C = VRegAndVal->Value.getSExtValue();
1720 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1721 I.eraseFromParent();
1729 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1730 I.eraseFromParent();
1738 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1739 I.eraseFromParent();
1753 if (VRegAndVal && VRegAndVal->Value == 0) {
1761 tryOptAndIntoCompareBranch(
1763 I.eraseFromParent();
1768 auto LHSTy =
MRI.getType(LHS);
1769 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1771 I.eraseFromParent();
1780bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1783 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1784 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1794 I.eraseFromParent();
1798bool AArch64InstructionSelector::selectCompareBranch(
1800 Register CondReg =
I.getOperand(0).getReg();
1805 if (CCMIOpc == TargetOpcode::G_FCMP)
1806 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1807 if (CCMIOpc == TargetOpcode::G_ICMP)
1808 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1813 if (ProduceNonFlagSettingCondBr) {
1814 emitTestBit(CondReg, 0,
true,
1815 I.getOperand(1).getMBB(), MIB);
1816 I.eraseFromParent();
1826 .
addMBB(
I.getOperand(1).getMBB());
1827 I.eraseFromParent();
1835 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1846 return std::nullopt;
1848 int64_t Imm = *ShiftImm;
1850 return std::nullopt;
1854 return std::nullopt;
1857 return std::nullopt;
1861 return std::nullopt;
1865 return std::nullopt;
1869 return std::nullopt;
1875bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1877 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1878 Register DstReg =
I.getOperand(0).getReg();
1879 const LLT Ty =
MRI.getType(DstReg);
1880 Register Src1Reg =
I.getOperand(1).getReg();
1881 Register Src2Reg =
I.getOperand(2).getReg();
1892 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1894 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1896 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1898 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1900 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1902 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1904 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1910 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1916 I.eraseFromParent();
1920bool AArch64InstructionSelector::selectVectorAshrLshr(
1922 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1923 I.getOpcode() == TargetOpcode::G_LSHR);
1924 Register DstReg =
I.getOperand(0).getReg();
1925 const LLT Ty =
MRI.getType(DstReg);
1926 Register Src1Reg =
I.getOperand(1).getReg();
1927 Register Src2Reg =
I.getOperand(2).getReg();
1932 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1942 unsigned NegOpc = 0;
1944 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1946 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1947 NegOpc = AArch64::NEGv2i64;
1949 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1950 NegOpc = AArch64::NEGv4i32;
1952 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1953 NegOpc = AArch64::NEGv2i32;
1955 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1956 NegOpc = AArch64::NEGv4i16;
1958 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1959 NegOpc = AArch64::NEGv8i16;
1961 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1962 NegOpc = AArch64::NEGv16i8;
1964 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1965 NegOpc = AArch64::NEGv8i8;
1971 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1973 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1975 I.eraseFromParent();
1979bool AArch64InstructionSelector::selectVaStartAAPCS(
1984bool AArch64InstructionSelector::selectVaStartDarwin(
1987 Register ListReg =
I.getOperand(0).getReg();
1989 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2000 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2008 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2015 I.eraseFromParent();
2019void AArch64InstructionSelector::materializeLargeCMVal(
2025 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2036 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2038 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2040 GV, MovZ->getOperand(1).getOffset(), Flags));
2044 MovZ->getOperand(1).getOffset(), Flags));
2050 Register DstReg = BuildMovK(MovZ.getReg(0),
2056bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2061 switch (
I.getOpcode()) {
2062 case TargetOpcode::G_STORE: {
2063 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2071 SrcOp.setReg(NewSrc);
2072 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2077 case TargetOpcode::G_PTR_ADD:
2078 return convertPtrAddToAdd(
I,
MRI);
2079 case TargetOpcode::G_LOAD: {
2084 Register DstReg =
I.getOperand(0).getReg();
2085 const LLT DstTy =
MRI.getType(DstReg);
2091 case AArch64::G_DUP: {
2093 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2097 MRI.setType(
I.getOperand(0).getReg(),
2099 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2100 I.getOperand(1).setReg(NewSrc.getReg(0));
2103 case TargetOpcode::G_UITOFP:
2104 case TargetOpcode::G_SITOFP: {
2109 Register SrcReg =
I.getOperand(1).getReg();
2110 LLT SrcTy =
MRI.getType(SrcReg);
2111 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2115 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2116 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2117 I.setDesc(
TII.get(AArch64::G_SITOF));
2119 I.setDesc(
TII.get(AArch64::G_UITOF));
2137bool AArch64InstructionSelector::convertPtrAddToAdd(
2139 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2140 Register DstReg =
I.getOperand(0).getReg();
2141 Register AddOp1Reg =
I.getOperand(1).getReg();
2142 const LLT PtrTy =
MRI.getType(DstReg);
2146 const LLT CastPtrTy =
2151 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2153 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2157 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2158 MRI.setType(DstReg, CastPtrTy);
2159 I.getOperand(1).setReg(PtrToInt.getReg(0));
2160 if (!select(*PtrToInt)) {
2161 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2170 I.getOperand(2).setReg(NegatedReg);
2171 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2175bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2180 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2181 const auto &MO =
I.getOperand(2);
2186 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2190 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2191 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2193 if (!Imm1Fn || !Imm2Fn)
2197 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2200 for (
auto &RenderFn : *Imm1Fn)
2202 for (
auto &RenderFn : *Imm2Fn)
2205 I.eraseFromParent();
2209bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2211 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2229 LLT DefDstTy =
MRI.getType(DefDstReg);
2230 Register StoreSrcReg =
I.getOperand(0).getReg();
2231 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2242 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2243 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2247 I.getOperand(0).setReg(DefDstReg);
2251bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2252 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2253 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2259 switch (
I.getOpcode()) {
2260 case AArch64::G_DUP: {
2263 Register Src =
I.getOperand(1).getReg();
2268 Register Dst =
I.getOperand(0).getReg();
2270 MRI.getType(Dst).getNumElements(),
2272 ValAndVReg->Value));
2273 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2275 I.eraseFromParent();
2278 case TargetOpcode::G_SEXT:
2281 if (selectUSMovFromExtend(
I,
MRI))
2284 case TargetOpcode::G_BR:
2286 case TargetOpcode::G_SHL:
2287 return earlySelectSHL(
I,
MRI);
2288 case TargetOpcode::G_CONSTANT: {
2289 bool IsZero =
false;
2290 if (
I.getOperand(1).isCImm())
2291 IsZero =
I.getOperand(1).getCImm()->isZero();
2292 else if (
I.getOperand(1).isImm())
2293 IsZero =
I.getOperand(1).getImm() == 0;
2298 Register DefReg =
I.getOperand(0).getReg();
2299 LLT Ty =
MRI.getType(DefReg);
2301 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2302 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2304 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2305 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2309 I.setDesc(
TII.get(TargetOpcode::COPY));
2313 case TargetOpcode::G_ADD: {
2322 Register AddDst =
I.getOperand(0).getReg();
2323 Register AddLHS =
I.getOperand(1).getReg();
2324 Register AddRHS =
I.getOperand(2).getReg();
2326 LLT Ty =
MRI.getType(AddLHS);
2335 if (!
MRI.hasOneNonDBGUse(Reg))
2349 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2359 Cmp = MatchCmp(AddRHS);
2363 auto &PredOp =
Cmp->getOperand(1);
2368 emitIntegerCompare(
Cmp->getOperand(2),
2369 Cmp->getOperand(3), PredOp, MIB);
2370 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2371 I.eraseFromParent();
2374 case TargetOpcode::G_OR: {
2378 Register Dst =
I.getOperand(0).getReg();
2379 LLT Ty =
MRI.getType(Dst);
2398 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2401 int64_t Immr =
Size - ShiftImm;
2402 int64_t Imms =
Size - ShiftImm - 1;
2403 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2404 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2405 I.eraseFromParent();
2408 case TargetOpcode::G_FENCE: {
2409 if (
I.getOperand(1).getImm() == 0)
2413 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2414 I.eraseFromParent();
2423 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2424 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2431 if (Subtarget->requiresStrictAlign()) {
2433 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2439 unsigned Opcode =
I.getOpcode();
2441 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2444 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2447 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2448 const Register DefReg =
I.getOperand(0).getReg();
2449 const LLT DefTy =
MRI.getType(DefReg);
2452 MRI.getRegClassOrRegBank(DefReg);
2462 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2469 I.setDesc(
TII.get(TargetOpcode::PHI));
2471 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2477 if (
I.isDebugInstr())
2484 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2486 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2493 if (preISelLower(
I)) {
2494 Opcode =
I.getOpcode();
2505 if (selectImpl(
I, *CoverageInfo))
2509 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2512 case TargetOpcode::G_SBFX:
2513 case TargetOpcode::G_UBFX: {
2514 static const unsigned OpcTable[2][2] = {
2515 {AArch64::UBFMWri, AArch64::UBFMXri},
2516 {AArch64::SBFMWri, AArch64::SBFMXri}};
2517 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2519 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2522 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2525 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2526 auto LSB = Cst1->Value.getZExtValue();
2527 auto Width = Cst2->Value.getZExtValue();
2529 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2531 .
addImm(LSB + Width - 1);
2532 I.eraseFromParent();
2535 case TargetOpcode::G_BRCOND:
2536 return selectCompareBranch(
I, MF,
MRI);
2538 case TargetOpcode::G_BRINDIRECT: {
2539 I.setDesc(
TII.get(AArch64::BR));
2543 case TargetOpcode::G_BRJT:
2544 return selectBrJT(
I,
MRI);
2546 case AArch64::G_ADD_LOW: {
2552 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2553 I.setDesc(
TII.get(AArch64::ADDXri));
2558 "Expected small code model");
2560 auto Op2 =
I.getOperand(2);
2561 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2562 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2563 Op1.getTargetFlags())
2565 Op2.getTargetFlags());
2566 I.eraseFromParent();
2570 case TargetOpcode::G_FCONSTANT:
2571 case TargetOpcode::G_CONSTANT: {
2572 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2581 const Register DefReg =
I.getOperand(0).getReg();
2582 const LLT DefTy =
MRI.getType(DefReg);
2588 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2590 <<
" constant, expected: " << s16 <<
" or " << s32
2591 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2595 if (RB.
getID() != AArch64::FPRRegBankID) {
2597 <<
" constant on bank: " << RB
2598 <<
", expected: FPR\n");
2606 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2610 if (Ty != p0 && Ty != s8 && Ty != s16) {
2612 <<
" constant, expected: " << s32 <<
", " << s64
2613 <<
", or " << p0 <<
'\n');
2617 if (RB.
getID() != AArch64::GPRRegBankID) {
2619 <<
" constant on bank: " << RB
2620 <<
", expected: GPR\n");
2637 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2644 auto *FPImm =
I.getOperand(1).getFPImm();
2647 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2651 I.eraseFromParent();
2652 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2656 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2658 const Register DefGPRReg =
MRI.createVirtualRegister(
2659 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2665 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2666 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2674 }
else if (
I.getOperand(1).isCImm()) {
2675 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2676 I.getOperand(1).ChangeToImmediate(Val);
2677 }
else if (
I.getOperand(1).isImm()) {
2678 uint64_t Val =
I.getOperand(1).getImm();
2679 I.getOperand(1).ChangeToImmediate(Val);
2682 const unsigned MovOpc =
2683 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2684 I.setDesc(
TII.get(MovOpc));
2688 case TargetOpcode::G_EXTRACT: {
2689 Register DstReg =
I.getOperand(0).getReg();
2690 Register SrcReg =
I.getOperand(1).getReg();
2691 LLT SrcTy =
MRI.getType(SrcReg);
2692 LLT DstTy =
MRI.getType(DstReg);
2704 unsigned Offset =
I.getOperand(2).getImm();
2713 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2715 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2717 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2719 AArch64::GPR64RegClass, NewI->getOperand(0));
2720 I.eraseFromParent();
2726 unsigned LaneIdx =
Offset / 64;
2728 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2731 I.eraseFromParent();
2735 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2741 "unexpected G_EXTRACT types");
2748 .addReg(DstReg, 0, AArch64::sub_32);
2749 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2750 AArch64::GPR32RegClass,
MRI);
2751 I.getOperand(0).setReg(DstReg);
2756 case TargetOpcode::G_INSERT: {
2757 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2758 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2765 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2766 unsigned LSB =
I.getOperand(3).getImm();
2767 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2768 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2773 "unexpected G_INSERT types");
2779 TII.get(AArch64::SUBREG_TO_REG))
2782 .
addUse(
I.getOperand(2).getReg())
2783 .
addImm(AArch64::sub_32);
2784 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2785 AArch64::GPR32RegClass,
MRI);
2786 I.getOperand(2).setReg(SrcReg);
2790 case TargetOpcode::G_FRAME_INDEX: {
2797 I.setDesc(
TII.get(AArch64::ADDXri));
2806 case TargetOpcode::G_GLOBAL_VALUE: {
2809 if (
I.getOperand(1).isSymbol()) {
2810 OpFlags =
I.getOperand(1).getTargetFlags();
2814 GV =
I.getOperand(1).getGlobal();
2816 return selectTLSGlobalValue(
I,
MRI);
2817 OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2821 I.setDesc(
TII.get(AArch64::LOADgot));
2822 I.getOperand(1).setTargetFlags(OpFlags);
2824 !
TM.isPositionIndependent()) {
2826 materializeLargeCMVal(
I, GV, OpFlags);
2827 I.eraseFromParent();
2830 I.setDesc(
TII.get(AArch64::ADR));
2831 I.getOperand(1).setTargetFlags(OpFlags);
2833 I.setDesc(
TII.get(AArch64::MOVaddr));
2836 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2842 case TargetOpcode::G_ZEXTLOAD:
2843 case TargetOpcode::G_LOAD:
2844 case TargetOpcode::G_STORE: {
2846 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2860 if (Order != AtomicOrdering::NotAtomic &&
2861 Order != AtomicOrdering::Unordered &&
2862 Order != AtomicOrdering::Monotonic) {
2863 assert(!isa<GZExtLoad>(LdSt));
2864 assert(MemSizeInBytes <= 8 &&
2865 "128-bit atomics should already be custom-legalized");
2867 if (isa<GLoad>(LdSt)) {
2868 static constexpr unsigned LDAPROpcodes[] = {
2869 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2870 static constexpr unsigned LDAROpcodes[] = {
2871 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2873 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2876 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2878 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2879 AArch64::STLRW, AArch64::STLRX};
2881 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2883 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2884 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2885 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2886 I.getOperand(0).setReg(NewVal);
2888 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2899 "Load/Store pointer operand isn't a GPR");
2900 assert(
MRI.getType(PtrReg).isPointer() &&
2901 "Load/Store pointer operand isn't a pointer");
2905 const LLT ValTy =
MRI.getType(ValReg);
2910 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2913 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2919 .addReg(ValReg, 0,
SubReg)
2921 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2923 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2926 if (RB.
getID() == AArch64::FPRRegBankID) {
2929 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2936 MRI.setRegBank(NewDst, RB);
2939 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2943 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2944 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2951 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2952 bool IsStore = isa<GStore>(
I);
2953 const unsigned NewOpc =
2955 if (NewOpc ==
I.getOpcode())
2959 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2962 I.setDesc(
TII.get(NewOpc));
2968 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2969 Register CurValReg =
I.getOperand(0).getReg();
2970 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2971 NewInst.cloneMemRefs(
I);
2972 for (
auto &Fn : *AddrModeFns)
2974 I.eraseFromParent();
2983 if (Opcode == TargetOpcode::G_STORE) {
2986 if (CVal && CVal->Value == 0) {
2988 case AArch64::STRWui:
2989 case AArch64::STRHHui:
2990 case AArch64::STRBBui:
2991 LoadStore->getOperand(0).setReg(AArch64::WZR);
2993 case AArch64::STRXui:
2994 LoadStore->getOperand(0).setReg(AArch64::XZR);
3000 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3001 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3004 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3008 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3013 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3016 .
addImm(AArch64::sub_32);
3018 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3024 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3025 case TargetOpcode::G_INDEXED_SEXTLOAD:
3026 return selectIndexedExtLoad(
I,
MRI);
3027 case TargetOpcode::G_INDEXED_LOAD:
3028 return selectIndexedLoad(
I,
MRI);
3029 case TargetOpcode::G_INDEXED_STORE:
3030 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3032 case TargetOpcode::G_LSHR:
3033 case TargetOpcode::G_ASHR:
3034 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3035 return selectVectorAshrLshr(
I,
MRI);
3037 case TargetOpcode::G_SHL:
3038 if (Opcode == TargetOpcode::G_SHL &&
3039 MRI.getType(
I.getOperand(0).getReg()).isVector())
3040 return selectVectorSHL(
I,
MRI);
3047 Register SrcReg =
I.getOperand(1).getReg();
3048 Register ShiftReg =
I.getOperand(2).getReg();
3049 const LLT ShiftTy =
MRI.getType(ShiftReg);
3050 const LLT SrcTy =
MRI.getType(SrcReg);
3055 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3056 .addReg(ShiftReg, 0, AArch64::sub_32);
3057 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3058 I.getOperand(2).setReg(Trunc.getReg(0));
3062 case TargetOpcode::G_OR: {
3069 const Register DefReg =
I.getOperand(0).getReg();
3073 if (NewOpc ==
I.getOpcode())
3076 I.setDesc(
TII.get(NewOpc));
3084 case TargetOpcode::G_PTR_ADD: {
3085 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3086 I.eraseFromParent();
3090 case TargetOpcode::G_SADDE:
3091 case TargetOpcode::G_UADDE:
3092 case TargetOpcode::G_SSUBE:
3093 case TargetOpcode::G_USUBE:
3094 case TargetOpcode::G_SADDO:
3095 case TargetOpcode::G_UADDO:
3096 case TargetOpcode::G_SSUBO:
3097 case TargetOpcode::G_USUBO:
3098 return selectOverflowOp(
I,
MRI);
3100 case TargetOpcode::G_PTRMASK: {
3101 Register MaskReg =
I.getOperand(2).getReg();
3108 I.setDesc(
TII.get(AArch64::ANDXri));
3109 I.getOperand(2).ChangeToImmediate(
3114 case TargetOpcode::G_PTRTOINT:
3115 case TargetOpcode::G_TRUNC: {
3116 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3117 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3119 const Register DstReg =
I.getOperand(0).getReg();
3120 const Register SrcReg =
I.getOperand(1).getReg();
3127 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3131 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3140 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3141 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3142 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3146 if (DstRC == SrcRC) {
3148 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3152 }
else if (DstRC == &AArch64::GPR32RegClass &&
3153 SrcRC == &AArch64::GPR64RegClass) {
3154 I.getOperand(1).setSubReg(AArch64::sub_32);
3157 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3161 I.setDesc(
TII.get(TargetOpcode::COPY));
3163 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3166 I.setDesc(
TII.get(AArch64::XTNv4i16));
3176 I.eraseFromParent();
3181 if (Opcode == TargetOpcode::G_PTRTOINT) {
3182 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3183 I.setDesc(
TII.get(TargetOpcode::COPY));
3191 case TargetOpcode::G_ANYEXT: {
3192 if (selectUSMovFromExtend(
I,
MRI))
3195 const Register DstReg =
I.getOperand(0).getReg();
3196 const Register SrcReg =
I.getOperand(1).getReg();
3199 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3201 <<
", expected: GPR\n");
3206 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3208 <<
", expected: GPR\n");
3212 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3215 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3219 if (DstSize != 64 && DstSize > 32) {
3221 <<
", expected: 32 or 64\n");
3227 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3232 .
addImm(AArch64::sub_32);
3233 I.getOperand(1).setReg(ExtSrc);
3238 case TargetOpcode::G_ZEXT:
3239 case TargetOpcode::G_SEXT_INREG:
3240 case TargetOpcode::G_SEXT: {
3241 if (selectUSMovFromExtend(
I,
MRI))
3244 unsigned Opcode =
I.getOpcode();
3245 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3246 const Register DefReg =
I.getOperand(0).getReg();
3247 Register SrcReg =
I.getOperand(1).getReg();
3248 const LLT DstTy =
MRI.getType(DefReg);
3249 const LLT SrcTy =
MRI.getType(SrcReg);
3255 if (Opcode == TargetOpcode::G_SEXT_INREG)
3256 SrcSize =
I.getOperand(2).getImm();
3262 AArch64::GPRRegBankID &&
3263 "Unexpected ext regbank");
3276 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3277 if (LoadMI && IsGPR) {
3279 unsigned BytesLoaded =
MemOp->getSize().getValue();
3286 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3288 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3289 const Register ZReg = AArch64::WZR;
3290 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3293 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3296 .
addImm(AArch64::sub_32);
3298 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3300 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3304 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3310 I.eraseFromParent();
3315 if (DstSize == 64) {
3316 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3318 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3324 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3325 {&AArch64::GPR64RegClass}, {})
3332 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3336 }
else if (DstSize <= 32) {
3337 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3346 I.eraseFromParent();
3350 case TargetOpcode::G_SITOFP:
3351 case TargetOpcode::G_UITOFP:
3352 case TargetOpcode::G_FPTOSI:
3353 case TargetOpcode::G_FPTOUI: {
3354 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3355 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3357 if (NewOpc == Opcode)
3360 I.setDesc(
TII.get(NewOpc));
3367 case TargetOpcode::G_FREEZE:
3370 case TargetOpcode::G_INTTOPTR:
3375 case TargetOpcode::G_BITCAST:
3383 case TargetOpcode::G_SELECT: {
3384 auto &Sel = cast<GSelect>(
I);
3385 const Register CondReg = Sel.getCondReg();
3386 const Register TReg = Sel.getTrueReg();
3387 const Register FReg = Sel.getFalseReg();
3389 if (tryOptSelect(Sel))
3394 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3395 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3398 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3400 Sel.eraseFromParent();
3403 case TargetOpcode::G_ICMP: {
3416 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3417 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3418 AArch64::WZR, InvCC, MIB);
3419 I.eraseFromParent();
3423 case TargetOpcode::G_FCMP: {
3426 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3428 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3430 I.eraseFromParent();
3433 case TargetOpcode::G_VASTART:
3434 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3435 : selectVaStartAAPCS(
I, MF,
MRI);
3436 case TargetOpcode::G_INTRINSIC:
3437 return selectIntrinsic(
I,
MRI);
3438 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3439 return selectIntrinsicWithSideEffects(
I,
MRI);
3440 case TargetOpcode::G_IMPLICIT_DEF: {
3441 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3442 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3443 const Register DstReg =
I.getOperand(0).getReg();
3446 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3449 case TargetOpcode::G_BLOCK_ADDR: {
3451 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3452 I.eraseFromParent();
3455 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3456 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3457 I.getOperand(0).getReg())
3461 I.getOperand(1).getBlockAddress(), 0,
3463 I.eraseFromParent();
3467 case AArch64::G_DUP: {
3473 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3474 AArch64::GPRRegBankID)
3476 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3478 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3480 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3482 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3484 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3489 case TargetOpcode::G_BUILD_VECTOR:
3490 return selectBuildVector(
I,
MRI);
3491 case TargetOpcode::G_MERGE_VALUES:
3493 case TargetOpcode::G_UNMERGE_VALUES:
3495 case TargetOpcode::G_SHUFFLE_VECTOR:
3496 return selectShuffleVector(
I,
MRI);
3497 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3498 return selectExtractElt(
I,
MRI);
3499 case TargetOpcode::G_CONCAT_VECTORS:
3500 return selectConcatVectors(
I,
MRI);
3501 case TargetOpcode::G_JUMP_TABLE:
3502 return selectJumpTable(
I,
MRI);
3503 case TargetOpcode::G_MEMCPY:
3504 case TargetOpcode::G_MEMCPY_INLINE:
3505 case TargetOpcode::G_MEMMOVE:
3506 case TargetOpcode::G_MEMSET:
3507 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3508 return selectMOPS(
I,
MRI);
3514bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3521bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3525 case TargetOpcode::G_MEMCPY:
3526 case TargetOpcode::G_MEMCPY_INLINE:
3527 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3529 case TargetOpcode::G_MEMMOVE:
3530 Mopcode = AArch64::MOPSMemoryMovePseudo;
3532 case TargetOpcode::G_MEMSET:
3534 Mopcode = AArch64::MOPSMemorySetPseudo;
3543 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3544 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3547 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3548 const auto &SrcValRegClass =
3549 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3552 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3553 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3554 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3564 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3565 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3567 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3568 {DstPtrCopy, SizeCopy, SrcValCopy});
3570 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3571 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3572 {DstPtrCopy, SrcValCopy, SizeCopy});
3581 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3582 Register JTAddr =
I.getOperand(0).getReg();
3583 unsigned JTI =
I.getOperand(1).getIndex();
3586 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3587 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3590 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3591 {TargetReg, ScratchReg}, {JTAddr,
Index})
3592 .addJumpTableIndex(JTI);
3594 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3595 {
static_cast<int64_t
>(JTI)});
3597 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3598 I.eraseFromParent();
3602bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3604 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3605 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3607 Register DstReg =
I.getOperand(0).getReg();
3608 unsigned JTI =
I.getOperand(1).getIndex();
3611 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3614 I.eraseFromParent();
3618bool AArch64InstructionSelector::selectTLSGlobalValue(
3620 if (!STI.isTargetMachO())
3625 const auto &GlobalOp =
I.getOperand(1);
3626 assert(GlobalOp.getOffset() == 0 &&
3627 "Shouldn't have an offset on TLS globals!");
3631 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3634 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3635 {LoadGOT.getReg(0)})
3648 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3650 I.eraseFromParent();
3654MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3657 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3659 auto BuildFn = [&](
unsigned SubregIndex) {
3663 .addImm(SubregIndex);
3671 return BuildFn(AArch64::bsub);
3673 return BuildFn(AArch64::hsub);
3675 return BuildFn(AArch64::ssub);
3677 return BuildFn(AArch64::dsub);
3684AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3687 LLT DstTy =
MRI.getType(DstReg);
3689 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3690 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3697 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3703 .addReg(SrcReg, 0,
SubReg);
3704 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3708bool AArch64InstructionSelector::selectMergeValues(
3710 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3711 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3712 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3716 if (
I.getNumOperands() != 3)
3723 Register DstReg =
I.getOperand(0).getReg();
3724 Register Src1Reg =
I.getOperand(1).getReg();
3725 Register Src2Reg =
I.getOperand(2).getReg();
3726 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3727 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3732 Src2Reg, 1, RB, MIB);
3737 I.eraseFromParent();
3741 if (RB.
getID() != AArch64::GPRRegBankID)
3747 auto *DstRC = &AArch64::GPR64RegClass;
3748 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3750 TII.get(TargetOpcode::SUBREG_TO_REG))
3753 .
addUse(
I.getOperand(1).getReg())
3754 .
addImm(AArch64::sub_32);
3755 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3758 TII.get(TargetOpcode::SUBREG_TO_REG))
3761 .
addUse(
I.getOperand(2).getReg())
3762 .
addImm(AArch64::sub_32);
3764 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3765 .
addDef(
I.getOperand(0).getReg())
3773 I.eraseFromParent();
3778 const unsigned EltSize) {
3783 CopyOpc = AArch64::DUPi8;
3784 ExtractSubReg = AArch64::bsub;
3787 CopyOpc = AArch64::DUPi16;
3788 ExtractSubReg = AArch64::hsub;
3791 CopyOpc = AArch64::DUPi32;
3792 ExtractSubReg = AArch64::ssub;
3795 CopyOpc = AArch64::DUPi64;
3796 ExtractSubReg = AArch64::dsub;
3800 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3806MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3807 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
3810 unsigned CopyOpc = 0;
3811 unsigned ExtractSubReg = 0;
3814 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3819 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
3821 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
3826 const LLT &VecTy =
MRI.getType(VecReg);
3828 getRegClassForTypeOnBank(VecTy, VecRB,
true);
3830 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3837 DstReg =
MRI.createVirtualRegister(DstRC);
3840 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3841 .addReg(VecReg, 0, ExtractSubReg);
3842 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3851 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3852 if (!ScalarToVector)
3858 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3862 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3866bool AArch64InstructionSelector::selectExtractElt(
3868 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3869 "unexpected opcode!");
3870 Register DstReg =
I.getOperand(0).getReg();
3871 const LLT NarrowTy =
MRI.getType(DstReg);
3872 const Register SrcReg =
I.getOperand(1).getReg();
3873 const LLT WideTy =
MRI.getType(SrcReg);
3876 "source register size too small!");
3877 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
3881 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
3883 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
3892 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3896 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3901 I.eraseFromParent();
3905bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3907 unsigned NumElts =
I.getNumOperands() - 1;
3908 Register SrcReg =
I.getOperand(NumElts).getReg();
3909 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
3910 const LLT SrcTy =
MRI.getType(SrcReg);
3912 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
3914 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
3921 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
3922 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3923 Register Dst =
I.getOperand(OpIdx).getReg();
3925 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3929 I.eraseFromParent();
3933bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
3935 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3936 "unexpected opcode");
3939 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
3940 AArch64::FPRRegBankID ||
3941 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3942 AArch64::FPRRegBankID) {
3943 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
3944 "currently unsupported.\n");
3950 unsigned NumElts =
I.getNumOperands() - 1;
3951 Register SrcReg =
I.getOperand(NumElts).getReg();
3952 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
3953 const LLT WideTy =
MRI.getType(SrcReg);
3956 "can only unmerge from vector or s128 types!");
3958 "source register size too small!");
3961 return selectSplitVectorUnmerge(
I,
MRI);
3965 unsigned CopyOpc = 0;
3966 unsigned ExtractSubReg = 0;
3977 unsigned NumInsertRegs = NumElts - 1;
3989 *RBI.getRegBank(SrcReg,
MRI,
TRI));
3993 assert(Found &&
"expected to find last operand's subeg idx");
3994 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
3995 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3997 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4001 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4004 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4021 Register CopyTo =
I.getOperand(0).getReg();
4022 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4023 .addReg(InsertRegs[0], 0, ExtractSubReg);
4027 unsigned LaneIdx = 1;
4028 for (
Register InsReg : InsertRegs) {
4029 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4042 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4048 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4049 I.eraseFromParent();
4053bool AArch64InstructionSelector::selectConcatVectors(
4055 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4056 "Unexpected opcode");
4057 Register Dst =
I.getOperand(0).getReg();
4058 Register Op1 =
I.getOperand(1).getReg();
4059 Register Op2 =
I.getOperand(2).getReg();
4060 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4063 I.eraseFromParent();
4068AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4077MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4085 RC = &AArch64::FPR128RegClass;
4086 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4089 RC = &AArch64::FPR64RegClass;
4090 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4093 RC = &AArch64::FPR32RegClass;
4094 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4097 RC = &AArch64::FPR16RegClass;
4098 Opc = AArch64::LDRHui;
4101 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4107 auto &MF = MIRBuilder.
getMF();
4108 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4109 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4111 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4114 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4117 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4118 .addConstantPoolIndex(
4134static std::pair<unsigned, unsigned>
4136 unsigned Opc, SubregIdx;
4137 if (RB.
getID() == AArch64::GPRRegBankID) {
4139 Opc = AArch64::INSvi8gpr;
4140 SubregIdx = AArch64::bsub;
4141 }
else if (EltSize == 16) {
4142 Opc = AArch64::INSvi16gpr;
4143 SubregIdx = AArch64::ssub;
4144 }
else if (EltSize == 32) {
4145 Opc = AArch64::INSvi32gpr;
4146 SubregIdx = AArch64::ssub;
4147 }
else if (EltSize == 64) {
4148 Opc = AArch64::INSvi64gpr;
4149 SubregIdx = AArch64::dsub;
4155 Opc = AArch64::INSvi8lane;
4156 SubregIdx = AArch64::bsub;
4157 }
else if (EltSize == 16) {
4158 Opc = AArch64::INSvi16lane;
4159 SubregIdx = AArch64::hsub;
4160 }
else if (EltSize == 32) {
4161 Opc = AArch64::INSvi32lane;
4162 SubregIdx = AArch64::ssub;
4163 }
else if (EltSize == 64) {
4164 Opc = AArch64::INSvi64lane;
4165 SubregIdx = AArch64::dsub;
4170 return std::make_pair(Opc, SubregIdx);
4174 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4176 const ComplexRendererFns &RenderFns)
const {
4177 assert(Opcode &&
"Expected an opcode?");
4179 "Function should only be used to produce selected instructions!");
4180 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4182 for (
auto &Fn : *RenderFns)
4189 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4193 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4194 auto Ty =
MRI.getType(
LHS.getReg());
4197 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4198 bool Is32Bit =
Size == 32;
4201 if (
auto Fns = selectArithImmed(RHS))
4202 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4206 if (
auto Fns = selectNegArithImmed(RHS))
4207 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4211 if (
auto Fns = selectArithExtendedRegister(RHS))
4212 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4216 if (
auto Fns = selectShiftedRegister(RHS))
4217 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4219 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4227 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4228 {{AArch64::ADDXri, AArch64::ADDWri},
4229 {AArch64::ADDXrs, AArch64::ADDWrs},
4230 {AArch64::ADDXrr, AArch64::ADDWrr},
4231 {AArch64::SUBXri, AArch64::SUBWri},
4232 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4233 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4240 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4241 {{AArch64::ADDSXri, AArch64::ADDSWri},
4242 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4243 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4244 {AArch64::SUBSXri, AArch64::SUBSWri},
4245 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4246 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4253 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4254 {{AArch64::SUBSXri, AArch64::SUBSWri},
4255 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4256 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4257 {AArch64::ADDSXri, AArch64::ADDSWri},
4258 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4259 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4266 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4268 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4269 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4270 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4277 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4279 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4280 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4281 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4288 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4289 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4290 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4296 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4300 bool Is32Bit = (
RegSize == 32);
4301 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4302 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4303 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4307 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4310 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4317 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4318 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4319 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4322MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4325 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4332 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4334 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4336 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4337 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4340MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4344 LLT Ty =
MRI.getType(Dst);
4346 "Expected a 32-bit scalar register?");
4348 const Register ZReg = AArch64::WZR;
4353 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4359 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4360 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4361 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4366MachineInstr *AArch64InstructionSelector::emitFPCompare(
4368 std::optional<CmpInst::Predicate> Pred)
const {
4370 LLT Ty =
MRI.getType(LHS);
4374 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4385 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4389 ShouldUseImm =
true;
4393 unsigned CmpOpcTbl[2][3] = {
4394 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4395 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4397 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4409MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4418 const LLT Op1Ty =
MRI.getType(Op1);
4419 const LLT Op2Ty =
MRI.getType(Op2);
4421 if (Op1Ty != Op2Ty) {
4422 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4425 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4428 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4444 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4446 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4447 if (!WidenedOp1 || !WidenedOp2) {
4448 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4453 unsigned InsertOpc, InsSubRegIdx;
4454 std::tie(InsertOpc, InsSubRegIdx) =
4458 Dst =
MRI.createVirtualRegister(DstRC);
4479 Size =
TRI.getRegSizeInBits(*RC);
4481 Size =
MRI.getType(Dst).getSizeInBits();
4483 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4484 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4485 unsigned Opc = OpcTable[
Size == 64];
4486 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4494 unsigned Opcode =
I.getOpcode();
4498 bool NeedsNegatedCarry =
4499 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4509 if (SrcMI ==
I.getPrevNode()) {
4510 if (
auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4511 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4512 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4513 CarrySrcMI->isUnsigned() &&
4514 CarrySrcMI->getCarryOutReg() == CarryReg &&
4515 selectAndRestoreState(*SrcMI))
4520 Register DeadReg =
MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4522 if (NeedsNegatedCarry) {
4525 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4529 auto Fns = select12BitValueWithLeftShift(1);
4530 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4533bool AArch64InstructionSelector::selectOverflowOp(
MachineInstr &
I,
4535 auto &CarryMI = cast<GAddSubCarryOut>(
I);
4537 if (
auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&
I)) {
4539 emitCarryIn(
I, CarryInMI->getCarryInReg());
4543 auto OpAndCC = emitOverflowOp(
I.getOpcode(), CarryMI.getDstReg(),
4544 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4546 Register CarryOutReg = CarryMI.getCarryOutReg();
4549 if (!
MRI.use_nodbg_empty(CarryOutReg)) {
4555 emitCSINC(CarryOutReg, ZReg, ZReg,
4556 getInvertedCondCode(OpAndCC.second), MIB);
4559 I.eraseFromParent();
4563std::pair<MachineInstr *, AArch64CC::CondCode>
4564AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4571 case TargetOpcode::G_SADDO:
4572 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4573 case TargetOpcode::G_UADDO:
4574 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4575 case TargetOpcode::G_SSUBO:
4576 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4577 case TargetOpcode::G_USUBO:
4578 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4579 case TargetOpcode::G_SADDE:
4580 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4581 case TargetOpcode::G_UADDE:
4582 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4583 case TargetOpcode::G_SSUBE:
4584 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4585 case TargetOpcode::G_USUBE:
4586 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4606 unsigned Depth = 0) {
4607 if (!
MRI.hasOneNonDBGUse(Val))
4611 if (isa<GAnyCmp>(ValDef)) {
4613 MustBeFirst =
false;
4619 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4620 bool IsOR = Opcode == TargetOpcode::G_OR;
4632 if (MustBeFirstL && MustBeFirstR)
4638 if (!CanNegateL && !CanNegateR)
4642 CanNegate = WillNegate && CanNegateL && CanNegateR;
4645 MustBeFirst = !CanNegate;
4647 assert(Opcode == TargetOpcode::G_AND &&
"Must be G_AND");
4650 MustBeFirst = MustBeFirstL || MustBeFirstR;
4657MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4663 LLT OpTy =
MRI.getType(LHS);
4665 std::optional<ValueAndVReg>
C;
4669 if (
C &&
C->Value.ult(32))
4670 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4672 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4678 assert(STI.hasFullFP16() &&
"Expected Full FP16 for fp16 comparisons");
4679 CCmpOpc = AArch64::FCCMPHrr;
4682 CCmpOpc = AArch64::FCCMPSrr;
4685 CCmpOpc = AArch64::FCCMPDrr;
4695 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4696 CCmp.
addImm(
C->Value.getZExtValue());
4704MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4711 if (
auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4717 if (isa<GICmp>(Cmp)) {
4728 ExtraCmp = emitFPCompare(LHS, RHS, MIB,
CC);
4739 auto Dst =
MRI.cloneVirtualRegister(LHS);
4740 if (isa<GICmp>(Cmp))
4741 return emitSUBS(Dst,
Cmp->getOperand(2),
Cmp->getOperand(3), MIB);
4742 return emitFPCompare(
Cmp->getOperand(2).getReg(),
4743 Cmp->getOperand(3).getReg(), MIB);
4748 assert(
MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree");
4750 bool IsOR = Opcode == TargetOpcode::G_OR;
4756 assert(ValidL &&
"Valid conjunction/disjunction tree");
4763 assert(ValidR &&
"Valid conjunction/disjunction tree");
4768 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4777 bool NegateAfterAll;
4778 if (Opcode == TargetOpcode::G_OR) {
4781 assert(CanNegateR &&
"at least one side must be negatable");
4782 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4786 NegateAfterR =
true;
4789 NegateR = CanNegateR;
4790 NegateAfterR = !CanNegateR;
4793 NegateAfterAll = !Negate;
4795 assert(Opcode == TargetOpcode::G_AND &&
4796 "Valid conjunction/disjunction tree");
4797 assert(!Negate &&
"Valid conjunction/disjunction tree");
4801 NegateAfterR =
false;
4802 NegateAfterAll =
false;
4818MachineInstr *AArch64InstructionSelector::emitConjunction(
4820 bool DummyCanNegate;
4821 bool DummyMustBeFirst;
4828bool AArch64InstructionSelector::tryOptSelectConjunction(
GSelect &SelI,
4840bool AArch64InstructionSelector::tryOptSelect(
GSelect &
I) {
4864 if (!
MRI.hasOneNonDBGUse(CondDefReg)) {
4866 for (
const MachineInstr &UI :
MRI.use_nodbg_instructions(CondDefReg)) {
4869 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4875 unsigned CondOpc = CondDef->
getOpcode();
4876 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4877 if (tryOptSelectConjunction(
I, *CondDef))
4883 if (CondOpc == TargetOpcode::G_ICMP) {
4911 emitSelect(
I.getOperand(0).getReg(),
I.getOperand(2).getReg(),
4912 I.getOperand(3).getReg(), CondCode, MIB);
4913 I.eraseFromParent();
4917MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4921 "Unexpected MachineOperand");
4958 return emitCMN(LHS, RHSDef->
getOperand(2), MIRBuilder);
4969 LHSDef->
getOpcode() == TargetOpcode::G_AND) {
4972 if (!ValAndVReg || ValAndVReg->Value != 0)
4982bool AArch64InstructionSelector::selectShuffleVector(
4984 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
4985 Register Src1Reg =
I.getOperand(1).getReg();
4986 const LLT Src1Ty =
MRI.getType(Src1Reg);
4987 Register Src2Reg =
I.getOperand(2).getReg();
4988 const LLT Src2Ty =
MRI.getType(Src2Reg);
4999 LLVM_DEBUG(
dbgs() <<
"Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5006 for (
int Val : Mask) {
5009 Val = Val < 0 ? 0 : Val;
5010 for (
unsigned Byte = 0;
Byte < BytesPerElt; ++
Byte) {
5028 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5035 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5039 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5045 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5046 RBI.constrainGenericRegister(
Copy.getReg(0), AArch64::FPR64RegClass,
MRI);
5047 I.eraseFromParent();
5055 auto TBL2 = MIB.
buildInstr(AArch64::TBLv16i8Two, {
I.getOperand(0)},
5058 I.eraseFromParent();
5062MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5072 DstReg =
MRI.createVirtualRegister(DstRC);
5074 unsigned EltSize =
MRI.getType(EltReg).getSizeInBits();
5077 if (RB.
getID() == AArch64::FPRRegBankID) {
5078 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5079 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5081 .
addUse(InsSub->getOperand(0).getReg())
5084 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5093bool AArch64InstructionSelector::selectUSMovFromExtend(
5095 if (
MI.getOpcode() != TargetOpcode::G_SEXT &&
5096 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5097 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5099 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SEXT;
5100 const Register DefReg =
MI.getOperand(0).getReg();
5101 const LLT DstTy =
MRI.getType(DefReg);
5104 if (DstSize != 32 && DstSize != 64)
5108 MI.getOperand(1).getReg(),
MRI);
5114 const LLT &VecTy =
MRI.getType(Src0);
5117 const MachineInstr *ScalarToVector = emitScalarToVector(
5118 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5119 assert(ScalarToVector &&
"Didn't expect emitScalarToVector to fail!");
5125 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5127 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5129 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5131 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5133 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5142 if (DstSize == 64 && !IsSigned) {
5143 Register NewReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5144 MIB.
buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5145 ExtI = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5148 .
addImm(AArch64::sub_32);
5149 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
5151 ExtI = MIB.
buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5154 MI.eraseFromParent();
5158MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5161 if (DstSize == 128) {
5162 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5164 Op = AArch64::MOVIv16b_ns;
5166 Op = AArch64::MOVIv8b_ns;
5173 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5180MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5185 if (DstSize == 128) {
5186 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5188 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5190 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5210MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5215 if (DstSize == 128) {
5216 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5218 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5220 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5246MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5250 if (DstSize == 128) {
5251 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5253 Op = AArch64::MOVIv2d_ns;
5255 Op = AArch64::MOVID;
5261 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5268MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5273 if (DstSize == 128) {
5274 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5276 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5278 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5298MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5302 bool IsWide =
false;
5303 if (DstSize == 128) {
5304 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5306 Op = AArch64::FMOVv4f32_ns;
5309 Op = AArch64::FMOVv2f32_ns;
5318 Op = AArch64::FMOVv2f64_ns;
5322 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5327bool AArch64InstructionSelector::selectIndexedExtLoad(
5329 auto &ExtLd = cast<GIndexedAnyExtLoad>(
MI);
5331 Register WriteBack = ExtLd.getWritebackReg();
5334 LLT Ty =
MRI.getType(Dst);
5336 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5337 bool IsPre = ExtLd.isPre();
5338 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5339 bool InsertIntoXReg =
false;
5347 if (MemSizeBits == 8) {
5350 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5352 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5353 NewLdDstTy = IsDst64 ? s64 : s32;
5355 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5356 InsertIntoXReg = IsDst64;
5359 }
else if (MemSizeBits == 16) {
5362 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5364 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5365 NewLdDstTy = IsDst64 ? s64 : s32;
5367 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5368 InsertIntoXReg = IsDst64;
5371 }
else if (MemSizeBits == 32) {
5373 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5376 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5377 InsertIntoXReg = IsDst64;
5384 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5392 .addImm(Cst->getSExtValue());
5397 if (InsertIntoXReg) {
5399 auto SubToReg = MIB.
buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5402 .
addImm(AArch64::sub_32);
5403 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5409 MI.eraseFromParent();
5414bool AArch64InstructionSelector::selectIndexedLoad(
MachineInstr &
MI,
5416 auto &Ld = cast<GIndexedLoad>(
MI);
5418 Register WriteBack = Ld.getWritebackReg();
5421 assert(
MRI.getType(Dst).getSizeInBits() <= 128 &&
5422 "Unexpected type for indexed load");
5423 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5425 if (MemSize <
MRI.getType(Dst).getSizeInBytes())
5426 return selectIndexedExtLoad(
MI,
MRI);
5430 static constexpr unsigned GPROpcodes[] = {
5431 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5433 static constexpr unsigned FPROpcodes[] = {
5434 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5436 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5437 Opc = FPROpcodes[
Log2_32(MemSize)];
5439 Opc = GPROpcodes[
Log2_32(MemSize)];
5441 static constexpr unsigned GPROpcodes[] = {
5442 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5444 static constexpr unsigned FPROpcodes[] = {
5445 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5446 AArch64::LDRDpost, AArch64::LDRQpost};
5447 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5448 Opc = FPROpcodes[
Log2_32(MemSize)];
5450 Opc = GPROpcodes[
Log2_32(MemSize)];
5456 MIB.
buildInstr(Opc, {WriteBack, Dst}, {
Base}).addImm(Cst->getSExtValue());
5459 MI.eraseFromParent();
5463bool AArch64InstructionSelector::selectIndexedStore(
GIndexedStore &
I,
5469 LLT ValTy =
MRI.getType(Val);
5474 static constexpr unsigned GPROpcodes[] = {
5475 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5477 static constexpr unsigned FPROpcodes[] = {
5478 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5481 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5486 static constexpr unsigned GPROpcodes[] = {
5487 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5489 static constexpr unsigned FPROpcodes[] = {
5490 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5491 AArch64::STRDpost, AArch64::STRQpost};
5493 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5503 MIB.
buildInstr(Opc, {Dst}, {Val,
Base}).addImm(Cst->getSExtValue());
5504 Str.cloneMemRefs(
I);
5506 I.eraseFromParent();
5514 LLT DstTy =
MRI.getType(Dst);
5517 if (DstSize == 128) {
5519 MIRBuilder.
buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5524 if (DstSize == 64) {
5527 .
buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5530 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5531 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass,
MRI);
5563 if (
auto *NewOp = TryMOVIWithBits(DefBits))
5567 auto TryWithFNeg = [&](
APInt DefBits,
int NumBits,
5571 APInt NegBits(DstSize, 0);
5572 unsigned NumElts = DstSize / NumBits;
5573 for (
unsigned i = 0; i < NumElts; i++)
5574 NegBits |= Neg << (NumBits * i);
5575 NegBits = DefBits ^ NegBits;
5579 if (
auto *NewOp = TryMOVIWithBits(NegBits)) {
5580 Register NewDst =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5582 return MIRBuilder.
buildInstr(NegOpc, {Dst}, {NewDst});
5587 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5588 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5589 (STI.hasFullFP16() &&
5590 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5596 LLVM_DEBUG(
dbgs() <<
"Could not generate cp load for constant vector!");
5600 auto Copy = MIRBuilder.
buildCopy(Dst, CPLoad->getOperand(0));
5601 RBI.constrainGenericRegister(
5602 Dst, *
MRI.getRegClass(CPLoad->getOperand(0).getReg()),
MRI);
5606bool AArch64InstructionSelector::tryOptConstantBuildVec(
5608 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5610 assert(DstSize <= 128 &&
"Unexpected build_vec type!");
5616 for (
unsigned Idx = 1;
Idx <
I.getNumOperands(); ++
Idx) {
5622 const_cast<ConstantInt *
>(OpMI->getOperand(1).getCImm()));
5623 else if ((OpMI =
getOpcodeDef(TargetOpcode::G_FCONSTANT,
5624 I.getOperand(
Idx).getReg(),
MRI)))
5626 const_cast<ConstantFP *
>(OpMI->getOperand(1).getFPImm()));
5631 if (!emitConstantVector(
I.getOperand(0).getReg(), CV, MIB,
MRI))
5633 I.eraseFromParent();
5637bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5643 Register Dst =
I.getOperand(0).getReg();
5644 Register EltReg =
I.getOperand(1).getReg();
5645 LLT EltTy =
MRI.getType(EltReg);
5653 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5661 getRegClassForTypeOnBank(
MRI.getType(Dst), DstRB);
5666 auto SubregToReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5670 I.eraseFromParent();
5672 return RBI.constrainGenericRegister(Dst, *DstRC,
MRI);
5675bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &
I,
5677 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5680 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5681 const LLT EltTy =
MRI.getType(
I.getOperand(1).getReg());
5684 if (tryOptConstantBuildVec(
I, DstTy,
MRI))
5686 if (tryOptBuildVecToSubregToReg(
I,
MRI))
5689 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5696 I.getOperand(1).getReg(), MIB);
5706 for (
unsigned i = 2, e = DstSize / EltSize + 1; i <
e; ++i) {
5709 Register OpReg =
I.getOperand(i).getReg();
5711 if (!getOpcodeDef<GImplicitDef>(OpReg,
MRI)) {
5712 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5719 if (DstSize < 128) {
5722 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5725 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5733 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
5734 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << DstSize
5740 Register DstReg =
I.getOperand(0).getReg();
5742 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0,
SubReg);
5745 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5763 if (PrevMI == ScalarToVec && DstReg.
isVirtual()) {
5765 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5766 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5770 I.eraseFromParent();
5774bool AArch64InstructionSelector::selectVectorLoadIntrinsic(
unsigned Opc,
5777 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5778 assert(Opc &&
"Expected an opcode?");
5779 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5781 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5784 "Destination must be 64 bits or 128 bits?");
5785 unsigned SubReg =
Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5786 auto Ptr =
I.getOperand(
I.getNumOperands() - 1).getReg();
5787 assert(
MRI.getType(
Ptr).isPointer() &&
"Expected a pointer type?");
5789 Load.cloneMemRefs(
I);
5791 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5792 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5793 auto Vec = MIB.
buildInstr(TargetOpcode::COPY, {
I.getOperand(
Idx)}, {})
5794 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5803bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5805 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5806 assert(Opc &&
"Expected an opcode?");
5807 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5809 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5812 auto FirstSrcRegIt =
I.operands_begin() + NumVecs + 1;
5814 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.
begin(),
5815 [](
auto MO) { return MO.getReg(); });
5819 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5834 .
addImm(LaneNo->getZExtValue())
5836 Load.cloneMemRefs(
I);
5838 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5839 unsigned SubReg = AArch64::qsub0;
5840 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5841 auto Vec = MIB.
buildInstr(TargetOpcode::COPY,
5842 {Narrow ?
DstOp(&AArch64::FPR128RegClass)
5845 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5850 !emitNarrowVector(
I.getOperand(
Idx).getReg(), WideReg, MIB,
MRI))
5856void AArch64InstructionSelector::selectVectorStoreIntrinsic(
MachineInstr &
I,
5860 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
5864 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
5865 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
5874bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5877 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
5881 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
5882 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
5886 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5899 .
addImm(LaneNo->getZExtValue())
5906bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5909 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
5920 case Intrinsic::aarch64_ldxp:
5921 case Intrinsic::aarch64_ldaxp: {
5923 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5924 {
I.getOperand(0).
getReg(),
I.getOperand(1).getReg()},
5930 case Intrinsic::aarch64_neon_ld1x2: {
5931 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5934 Opc = AArch64::LD1Twov8b;
5936 Opc = AArch64::LD1Twov16b;
5938 Opc = AArch64::LD1Twov4h;
5940 Opc = AArch64::LD1Twov8h;
5942 Opc = AArch64::LD1Twov2s;
5944 Opc = AArch64::LD1Twov4s;
5946 Opc = AArch64::LD1Twov2d;
5947 else if (Ty ==
S64 || Ty == P0)
5948 Opc = AArch64::LD1Twov1d;
5951 selectVectorLoadIntrinsic(Opc, 2,
I);
5954 case Intrinsic::aarch64_neon_ld1x3: {
5955 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5958 Opc = AArch64::LD1Threev8b;
5960 Opc = AArch64::LD1Threev16b;
5962 Opc = AArch64::LD1Threev4h;
5964 Opc = AArch64::LD1Threev8h;
5966 Opc = AArch64::LD1Threev2s;
5968 Opc = AArch64::LD1Threev4s;
5970 Opc = AArch64::LD1Threev2d;
5971 else if (Ty ==
S64 || Ty == P0)
5972 Opc = AArch64::LD1Threev1d;
5975 selectVectorLoadIntrinsic(Opc, 3,
I);
5978 case Intrinsic::aarch64_neon_ld1x4: {
5979 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5982 Opc = AArch64::LD1Fourv8b;
5984 Opc = AArch64::LD1Fourv16b;
5986 Opc = AArch64::LD1Fourv4h;
5988 Opc = AArch64::LD1Fourv8h;
5990 Opc = AArch64::LD1Fourv2s;
5992 Opc = AArch64::LD1Fourv4s;
5994 Opc = AArch64::LD1Fourv2d;
5995 else if (Ty ==
S64 || Ty == P0)
5996 Opc = AArch64::LD1Fourv1d;
5999 selectVectorLoadIntrinsic(Opc, 4,
I);
6002 case Intrinsic::aarch64_neon_ld2: {
6003 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6006 Opc = AArch64::LD2Twov8b;
6008 Opc = AArch64::LD2Twov16b;
6010 Opc = AArch64::LD2Twov4h;
6012 Opc = AArch64::LD2Twov8h;
6014 Opc = AArch64::LD2Twov2s;
6016 Opc = AArch64::LD2Twov4s;
6018 Opc = AArch64::LD2Twov2d;
6019 else if (Ty ==
S64 || Ty == P0)
6020 Opc = AArch64::LD1Twov1d;
6023 selectVectorLoadIntrinsic(Opc, 2,
I);
6026 case Intrinsic::aarch64_neon_ld2lane: {
6027 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6030 Opc = AArch64::LD2i8;
6032 Opc = AArch64::LD2i16;
6034 Opc = AArch64::LD2i32;
6037 Opc = AArch64::LD2i64;
6040 if (!selectVectorLoadLaneIntrinsic(Opc, 2,
I))
6044 case Intrinsic::aarch64_neon_ld2r: {
6045 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6048 Opc = AArch64::LD2Rv8b;
6050 Opc = AArch64::LD2Rv16b;
6052 Opc = AArch64::LD2Rv4h;
6054 Opc = AArch64::LD2Rv8h;
6056 Opc = AArch64::LD2Rv2s;
6058 Opc = AArch64::LD2Rv4s;
6060 Opc = AArch64::LD2Rv2d;
6061 else if (Ty ==
S64 || Ty == P0)
6062 Opc = AArch64::LD2Rv1d;
6065 selectVectorLoadIntrinsic(Opc, 2,
I);
6068 case Intrinsic::aarch64_neon_ld3: {
6069 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6072 Opc = AArch64::LD3Threev8b;
6074 Opc = AArch64::LD3Threev16b;
6076 Opc = AArch64::LD3Threev4h;
6078 Opc = AArch64::LD3Threev8h;
6080 Opc = AArch64::LD3Threev2s;
6082 Opc = AArch64::LD3Threev4s;
6084 Opc = AArch64::LD3Threev2d;
6085 else if (Ty ==
S64 || Ty == P0)
6086 Opc = AArch64::LD1Threev1d;
6089 selectVectorLoadIntrinsic(Opc, 3,
I);
6092 case Intrinsic::aarch64_neon_ld3lane: {
6093 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6096 Opc = AArch64::LD3i8;
6098 Opc = AArch64::LD3i16;
6100 Opc = AArch64::LD3i32;
6103 Opc = AArch64::LD3i64;
6106 if (!selectVectorLoadLaneIntrinsic(Opc, 3,
I))
6110 case Intrinsic::aarch64_neon_ld3r: {
6111 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6114 Opc = AArch64::LD3Rv8b;
6116 Opc = AArch64::LD3Rv16b;
6118 Opc = AArch64::LD3Rv4h;
6120 Opc = AArch64::LD3Rv8h;
6122 Opc = AArch64::LD3Rv2s;
6124 Opc = AArch64::LD3Rv4s;
6126 Opc = AArch64::LD3Rv2d;
6127 else if (Ty ==
S64 || Ty == P0)
6128 Opc = AArch64::LD3Rv1d;
6131 selectVectorLoadIntrinsic(Opc, 3,
I);
6134 case Intrinsic::aarch64_neon_ld4: {
6135 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6138 Opc = AArch64::LD4Fourv8b;
6140 Opc = AArch64::LD4Fourv16b;
6142 Opc = AArch64::LD4Fourv4h;
6144 Opc = AArch64::LD4Fourv8h;
6146 Opc = AArch64::LD4Fourv2s;
6148 Opc = AArch64::LD4Fourv4s;
6150 Opc = AArch64::LD4Fourv2d;
6151 else if (Ty ==
S64 || Ty == P0)
6152 Opc = AArch64::LD1Fourv1d;
6155 selectVectorLoadIntrinsic(Opc, 4,
I);
6158 case Intrinsic::aarch64_neon_ld4lane: {
6159 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6162 Opc = AArch64::LD4i8;
6164 Opc = AArch64::LD4i16;
6166 Opc = AArch64::LD4i32;
6169 Opc = AArch64::LD4i64;
6172 if (!selectVectorLoadLaneIntrinsic(Opc, 4,
I))
6176 case Intrinsic::aarch64_neon_ld4r: {
6177 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6180 Opc = AArch64::LD4Rv8b;
6182 Opc = AArch64::LD4Rv16b;
6184 Opc = AArch64::LD4Rv4h;
6186 Opc = AArch64::LD4Rv8h;
6188 Opc = AArch64::LD4Rv2s;
6190 Opc = AArch64::LD4Rv4s;
6192 Opc = AArch64::LD4Rv2d;
6193 else if (Ty ==
S64 || Ty == P0)
6194 Opc = AArch64::LD4Rv1d;
6197 selectVectorLoadIntrinsic(Opc, 4,
I);
6200 case Intrinsic::aarch64_neon_st1x2: {
6201 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6204 Opc = AArch64::ST1Twov8b;
6206 Opc = AArch64::ST1Twov16b;
6208 Opc = AArch64::ST1Twov4h;
6210 Opc = AArch64::ST1Twov8h;
6212 Opc = AArch64::ST1Twov2s;
6214 Opc = AArch64::ST1Twov4s;
6216 Opc = AArch64::ST1Twov2d;
6217 else if (Ty ==
S64 || Ty == P0)
6218 Opc = AArch64::ST1Twov1d;
6221 selectVectorStoreIntrinsic(
I, 2, Opc);
6224 case Intrinsic::aarch64_neon_st1x3: {
6225 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6228 Opc = AArch64::ST1Threev8b;
6230 Opc = AArch64::ST1Threev16b;
6232 Opc = AArch64::ST1Threev4h;
6234 Opc = AArch64::ST1Threev8h;
6236 Opc = AArch64::ST1Threev2s;
6238 Opc = AArch64::ST1Threev4s;
6240 Opc = AArch64::ST1Threev2d;
6241 else if (Ty ==
S64 || Ty == P0)
6242 Opc = AArch64::ST1Threev1d;
6245 selectVectorStoreIntrinsic(
I, 3, Opc);
6248 case Intrinsic::aarch64_neon_st1x4: {
6249 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6252 Opc = AArch64::ST1Fourv8b;
6254 Opc = AArch64::ST1Fourv16b;
6256 Opc = AArch64::ST1Fourv4h;
6258 Opc = AArch64::ST1Fourv8h;
6260 Opc = AArch64::ST1Fourv2s;
6262 Opc = AArch64::ST1Fourv4s;
6264 Opc = AArch64::ST1Fourv2d;
6265 else if (Ty ==
S64 || Ty == P0)
6266 Opc = AArch64::ST1Fourv1d;
6269 selectVectorStoreIntrinsic(
I, 4, Opc);
6272 case Intrinsic::aarch64_neon_st2: {
6273 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6276 Opc = AArch64::ST2Twov8b;
6278 Opc = AArch64::ST2Twov16b;
6280 Opc = AArch64::ST2Twov4h;
6282 Opc = AArch64::ST2Twov8h;
6284 Opc = AArch64::ST2Twov2s;
6286 Opc = AArch64::ST2Twov4s;
6288 Opc = AArch64::ST2Twov2d;
6289 else if (Ty ==
S64 || Ty == P0)
6290 Opc = AArch64::ST1Twov1d;
6293 selectVectorStoreIntrinsic(
I, 2, Opc);
6296 case Intrinsic::aarch64_neon_st3: {
6297 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6300 Opc = AArch64::ST3Threev8b;
6302 Opc = AArch64::ST3Threev16b;
6304 Opc = AArch64::ST3Threev4h;
6306 Opc = AArch64::ST3Threev8h;
6308 Opc = AArch64::ST3Threev2s;
6310 Opc = AArch64::ST3Threev4s;
6312 Opc = AArch64::ST3Threev2d;
6313 else if (Ty ==
S64 || Ty == P0)
6314 Opc = AArch64::ST1Threev1d;
6317 selectVectorStoreIntrinsic(
I, 3, Opc);
6320 case Intrinsic::aarch64_neon_st4: {
6321 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6324 Opc = AArch64::ST4Fourv8b;
6326 Opc = AArch64::ST4Fourv16b;
6328 Opc = AArch64::ST4Fourv4h;
6330 Opc = AArch64::ST4Fourv8h;
6332 Opc = AArch64::ST4Fourv2s;
6334 Opc = AArch64::ST4Fourv4s;
6336 Opc = AArch64::ST4Fourv2d;
6337 else if (Ty ==
S64 || Ty == P0)
6338 Opc = AArch64::ST1Fourv1d;
6341 selectVectorStoreIntrinsic(
I, 4, Opc);
6344 case Intrinsic::aarch64_neon_st2lane: {
6345 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6348 Opc = AArch64::ST2i8;
6350 Opc = AArch64::ST2i16;
6352 Opc = AArch64::ST2i32;
6355 Opc = AArch64::ST2i64;
6358 if (!selectVectorStoreLaneIntrinsic(
I, 2, Opc))
6362 case Intrinsic::aarch64_neon_st3lane: {
6363 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6366 Opc = AArch64::ST3i8;
6368 Opc = AArch64::ST3i16;
6370 Opc = AArch64::ST3i32;
6373 Opc = AArch64::ST3i64;
6376 if (!selectVectorStoreLaneIntrinsic(
I, 3, Opc))
6380 case Intrinsic::aarch64_neon_st4lane: {
6381 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6384 Opc = AArch64::ST4i8;
6386 Opc = AArch64::ST4i16;
6388 Opc = AArch64::ST4i32;
6391 Opc = AArch64::ST4i64;
6394 if (!selectVectorStoreLaneIntrinsic(
I, 4, Opc))
6398 case Intrinsic::aarch64_mops_memset_tag: {
6411 Register DstDef =
I.getOperand(0).getReg();
6413 Register DstUse =
I.getOperand(2).getReg();
6414 Register ValUse =
I.getOperand(3).getReg();
6415 Register SizeUse =
I.getOperand(4).getReg();
6422 auto Memset = MIB.
buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6423 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6430 I.eraseFromParent();
6434bool AArch64InstructionSelector::selectIntrinsic(
MachineInstr &
I,
6436 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6441 case Intrinsic::aarch64_crypto_sha1h: {
6442 Register DstReg =
I.getOperand(0).getReg();
6443 Register SrcReg =
I.getOperand(2).getReg();
6446 if (
MRI.getType(DstReg).getSizeInBits() != 32 ||
6447 MRI.getType(SrcReg).getSizeInBits() != 32)
6452 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
6453 SrcReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6457 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
6458 AArch64::GPR32RegClass,
MRI);
6461 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID)
6462 DstReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6465 auto SHA1Inst = MIB.
buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6469 if (DstReg !=
I.getOperand(0).getReg()) {
6473 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
6474 AArch64::GPR32RegClass,
MRI);
6477 I.eraseFromParent();
6480 case Intrinsic::frameaddress:
6481 case Intrinsic::returnaddress: {
6485 unsigned Depth =
I.getOperand(2).getImm();
6486 Register DstReg =
I.getOperand(0).getReg();
6487 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6489 if (
Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6490 if (!MFReturnAddr) {
6495 MF,
TII, AArch64::LR, AArch64::GPR64RegClass,
I.getDebugLoc());
6498 if (STI.hasPAuth()) {
6499 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6506 I.eraseFromParent();
6513 Register NextFrame =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6515 MIB.
buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6517 FrameAddr = NextFrame;
6520 if (IntrinID == Intrinsic::frameaddress)
6525 if (STI.hasPAuth()) {
6526 Register TmpReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6527 MIB.
buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6528 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6537 I.eraseFromParent();
6540 case Intrinsic::swift_async_context_addr:
6541 auto Sub = MIB.
buildInstr(AArch64::SUBXri, {
I.getOperand(0).getReg()},
6549 I.eraseFromParent();
6556AArch64InstructionSelector::selectShiftA_32(
const MachineOperand &Root)
const {
6558 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6559 return std::nullopt;
6560 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6565AArch64InstructionSelector::selectShiftB_32(
const MachineOperand &Root)
const {
6567 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6568 return std::nullopt;
6574AArch64InstructionSelector::selectShiftA_64(
const MachineOperand &Root)
const {
6576 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6577 return std::nullopt;
6578 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6583AArch64InstructionSelector::selectShiftB_64(
const MachineOperand &Root)
const {
6585 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6586 return std::nullopt;
6597AArch64InstructionSelector::select12BitValueWithLeftShift(
6600 if (Immed >> 12 == 0) {
6602 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6604 Immed = Immed >> 12;
6606 return std::nullopt;
6619AArch64InstructionSelector::selectArithImmed(
MachineOperand &Root)
const {
6626 if (MaybeImmed == std::nullopt)
6627 return std::nullopt;
6628 return select12BitValueWithLeftShift(*MaybeImmed);
6634AArch64InstructionSelector::selectNegArithImmed(
MachineOperand &Root)
const {
6638 return std::nullopt;
6640 if (MaybeImmed == std::nullopt)
6641 return std::nullopt;
6648 return std::nullopt;
6653 if (
MRI.getType(Root.
getReg()).getSizeInBits() == 32)
6656 Immed = ~Immed + 1ULL;
6658 if (Immed & 0xFFFFFFFFFF000000ULL)
6659 return std::nullopt;
6661 Immed &= 0xFFFFFFULL;
6662 return select12BitValueWithLeftShift(Immed);
6668bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6672 if (
MRI.hasOneNonDBGUse(DefReg) ||
6673 MI.getParent()->getParent()->getFunction().hasOptSize())
6682 return all_of(
MRI.use_nodbg_instructions(DefReg),
6698AArch64InstructionSelector::selectExtendedSHL(
6700 unsigned SizeInBytes,
bool WantsExt)
const {
6701 assert(
Base.isReg() &&
"Expected base to be a register operand");
6702 assert(
Offset.isReg() &&
"Expected offset to be a register operand");
6707 unsigned OffsetOpc = OffsetInst->
getOpcode();
6708 bool LookedThroughZExt =
false;
6709 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6711 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6712 return std::nullopt;
6716 LookedThroughZExt =
true;
6718 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6719 return std::nullopt;
6722 int64_t LegalShiftVal =
Log2_32(SizeInBytes);
6723 if (LegalShiftVal == 0)
6724 return std::nullopt;
6725 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI))
6726 return std::nullopt;
6737 if (OffsetOpc == TargetOpcode::G_SHL)
6738 return std::nullopt;
6744 return std::nullopt;
6749 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6753 if (OffsetOpc == TargetOpcode::G_MUL) {
6754 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6755 return std::nullopt;
6761 if ((ImmVal & 0x7) != ImmVal)
6762 return std::nullopt;
6766 if (ImmVal != LegalShiftVal)
6767 return std::nullopt;
6769 unsigned SignExtend = 0;
6773 if (!LookedThroughZExt) {
6775 auto Ext = getExtendTypeForInst(*ExtInst,
MRI,
true);
6777 return std::nullopt;
6782 return std::nullopt;
6788 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
6798 MIB.addImm(SignExtend);
6812AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
6815 return std::nullopt;
6832 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI))
6833 return std::nullopt;
6839 return selectExtendedSHL(Root, PtrAdd->
getOperand(1),
6853AArch64InstructionSelector::selectAddrModeRegisterOffset(
6859 if (Gep->
getOpcode() != TargetOpcode::G_PTR_ADD)
6860 return std::nullopt;
6866 return std::nullopt;
6886AArch64InstructionSelector::selectAddrModeXRO(
MachineOperand &Root,
6887 unsigned SizeInBytes)
const {
6890 return std::nullopt;
6894 return std::nullopt;
6912 unsigned Scale =
Log2_32(SizeInBytes);
6913 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
6917 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
6918 ImmOff < (0x1000 << Scale))
6919 return std::nullopt;
6924 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
6928 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
6934 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
6935 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
6940 return std::nullopt;
6944 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
6950 return selectAddrModeRegisterOffset(Root);
6960AArch64InstructionSelector::selectAddrModeWRO(
MachineOperand &Root,
6961 unsigned SizeInBytes)
const {
6966 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI))
6967 return std::nullopt;
6988 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->
getOperand(0),
6997 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI))
6998 return std::nullopt;
7002 getExtendTypeForInst(*OffsetInst,
MRI,
true);
7004 return std::nullopt;
7009 AArch64::GPR32RegClass, MIB);
7016 MIB.addImm(SignExtend);
7027AArch64InstructionSelector::selectAddrModeUnscaled(
MachineOperand &Root,
7028 unsigned Size)
const {
7033 return std::nullopt;
7035 if (!isBaseWithConstantOffset(Root,
MRI))
7036 return std::nullopt;
7041 if (!OffImm.
isReg())
7042 return std::nullopt;
7044 if (
RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7045 return std::nullopt;
7049 return std::nullopt;
7052 if (RHSC >= -256 && RHSC < 256) {
7059 return std::nullopt;
7063AArch64InstructionSelector::tryFoldAddLowIntoImm(
MachineInstr &RootDef,
7066 if (RootDef.
getOpcode() != AArch64::G_ADD_LOW)
7067 return std::nullopt;
7070 return std::nullopt;
7075 return std::nullopt;
7079 return std::nullopt;
7083 return std::nullopt;
7085 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.
getTarget());
7090 MIB.addGlobalAddress(GV,
Offset,
7100AArch64InstructionSelector::selectAddrModeIndexed(
MachineOperand &Root,
7101 unsigned Size)
const {
7106 return std::nullopt;
7109 if (RootDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7119 auto OpFns = tryFoldAddLowIntoImm(*RootDef,
Size,
MRI);
7124 if (isBaseWithConstantOffset(Root,
MRI)) {
7132 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7133 if (LHSDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX)
7148 if (selectAddrModeUnscaled(Root,
Size))
7149 return std::nullopt;
7160 switch (
MI.getOpcode()) {
7163 case TargetOpcode::G_SHL:
7165 case TargetOpcode::G_LSHR:
7167 case TargetOpcode::G_ASHR:
7169 case TargetOpcode::G_ROTR:
7177AArch64InstructionSelector::selectShiftedRegister(
MachineOperand &Root,
7178 bool AllowROR)
const {
7180 return std::nullopt;
7189 return std::nullopt;
7191 return std::nullopt;
7192 if (!isWorthFoldingIntoExtendedReg(*ShiftInst,
MRI))
7193 return std::nullopt;
7199 return std::nullopt;
7206 unsigned NumBits =
MRI.getType(ShiftReg).getSizeInBits();
7207 unsigned Val = *Immed & (NumBits - 1);
7216 unsigned Opc =
MI.getOpcode();
7219 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7221 if (Opc == TargetOpcode::G_SEXT)
7222 Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7224 Size =
MI.getOperand(2).getImm();
7225 assert(
Size != 64 &&
"Extend from 64 bits?");
7238 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7239 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7240 assert(
Size != 64 &&
"Extend from 64 bits?");
7255 if (Opc != TargetOpcode::G_AND)
7274Register AArch64InstructionSelector::moveScalarRegClass(
7277 auto Ty =
MRI.getType(Reg);
7286 return Copy.getReg(0);
7292AArch64InstructionSelector::selectArithExtendedRegister(
7295 return std::nullopt;
7304 return std::nullopt;
7306 if (!isWorthFoldingIntoExtendedReg(*RootDef,
MRI))
7307 return std::nullopt;
7310 if (RootDef->
getOpcode() == TargetOpcode::G_SHL) {
7315 return std::nullopt;
7316 ShiftVal = *MaybeShiftVal;
7318 return std::nullopt;
7323 return std::nullopt;
7324 Ext = getExtendTypeForInst(*ExtDef,
MRI);
7326 return std::nullopt;
7330 Ext = getExtendTypeForInst(*RootDef,
MRI);
7332 return std::nullopt;
7341 if (isDef32(*ExtInst))
7342 return std::nullopt;
7349 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7353 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7358AArch64InstructionSelector::selectExtractHigh(
MachineOperand &Root)
const {
7360 return std::nullopt;
7365 while (Extract && Extract->MI->
getOpcode() == TargetOpcode::G_BITCAST &&
7366 STI.isLittleEndian())
7370 return std::nullopt;
7372 if (Extract->MI->
getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7378 if (Extract->MI->
getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7383 LaneIdx->Value.getSExtValue() == 1) {
7389 return std::nullopt;
7396 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7397 "Expected G_CONSTANT");
7398 std::optional<int64_t> CstVal =
7400 assert(CstVal &&
"Expected constant value");
7404void AArch64InstructionSelector::renderLogicalImm32(
7406 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7407 "Expected G_CONSTANT");
7408 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7413void AArch64InstructionSelector::renderLogicalImm64(
7415 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7416 "Expected G_CONSTANT");
7417 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7425 assert(
MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7426 "Expected G_UBSANTRAP");
7427 MIB.
addImm(
MI.getOperand(0).getImm() | (
'U' << 8));
7433 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7434 "Expected G_FCONSTANT");
7442 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7443 "Expected G_FCONSTANT");
7451 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7452 "Expected G_FCONSTANT");
7457void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7459 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7460 "Expected G_FCONSTANT");
7468bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7470 if (!
MI.mayLoadOrStore())
7473 "Expected load/store to have only one mem op!");
7474 return (*
MI.memoperands_begin())->getSize() == NumBytes;
7477bool AArch64InstructionSelector::isDef32(
const MachineInstr &
MI)
const {
7479 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() != 32)
7486 switch (
MI.getOpcode()) {
7489 case TargetOpcode::COPY:
7490 case TargetOpcode::G_BITCAST:
7491 case TargetOpcode::G_TRUNC:
7492 case TargetOpcode::G_PHI:
7502 assert(
MI.getOpcode() == TargetOpcode::G_PHI &&
"Expected a G_PHI");
7505 assert(DstRB &&
"Expected PHI dst to have regbank assigned");
7516 auto *OpDef =
MRI.getVRegDef(OpReg);
7517 const LLT &Ty =
MRI.getType(OpReg);
7523 if (InsertPt != OpDefBB.
end() && InsertPt->isPHI())
7527 MRI.setRegBank(Copy.getReg(0), *DstRB);
7528 MO.setReg(Copy.getReg(0));
7537 for (
auto &BB : MF) {
7538 for (
auto &
MI : BB) {
7539 if (
MI.getOpcode() == TargetOpcode::G_PHI)
7544 for (
auto *
MI : Phis) {
7566 bool HasGPROp =
false, HasFPROp =
false;
7570 const LLT &Ty =
MRI.getType(MO.getReg());
7580 if (RB->
getID() == AArch64::GPRRegBankID)
7586 if (HasGPROp && HasFPROp)
7596 return new AArch64InstructionSelector(
TM, Subtarget, RBI);
unsigned const MachineRegisterInfo * MRI
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file contains constants used for implementing Dwarf debug support.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
int getVarArgsStackIndex() const
int getVarArgsGPRIndex() const
unsigned getVarArgsGPRSize() const
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
bool isIntPredicate() const
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
bool isNegative() const
Return true if the sign bit is set.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
TypeSize getValue() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setAdjustsStack(bool V)
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
AtomicOrdering
Atomic ordering for LLVM's memory model.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.