29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
TM(
TM),
50#include
"AMDGPUGenGlobalISel.inc"
53#include
"AMDGPUGenGlobalISel.inc"
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
77bool AMDGPUInstructionSelector::isVCC(
Register Reg,
83 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
87 const LLT Ty =
MRI.getType(Reg);
91 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
116 if (!DstRC || DstRC != SrcRC)
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
142 if (!isVCC(SrcReg, *MRI)) {
150 std::optional<ValueAndVReg> ConstVal =
154 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
166 IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
171 And.setOperandDead(3);
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
178 if (!
MRI->getRegClassOrNull(SrcReg))
179 MRI->setRegClass(SrcReg, SrcRC);
193 if (MO.getReg().isPhysical())
205bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
206 const Register DefReg =
I.getOperand(0).getReg();
207 const LLT DefTy =
MRI->getType(DefReg);
219 MRI->getRegClassOrRegBank(DefReg);
238 I.setDesc(TII.get(TargetOpcode::PHI));
245 unsigned SubIdx)
const {
249 Register DstReg =
MRI->createVirtualRegister(&SubRC);
252 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
254 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
255 .
addReg(Reg, 0, ComposedSubIdx);
280 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
282 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
284 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
290bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
295 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
296 DstRB->
getID() != AMDGPU::VCCRegBankID)
299 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
311bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
314 Register DstReg =
I.getOperand(0).getReg();
316 LLT Ty =
MRI->getType(DstReg);
322 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
323 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
327 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
330 .
add(
I.getOperand(1))
331 .
add(
I.getOperand(2))
338 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
339 I.setDesc(TII.get(Opc));
345 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
351 .
add(
I.getOperand(1))
352 .
add(
I.getOperand(2))
358 assert(!Sub &&
"illegal sub should not reach here");
361 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
363 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
365 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
366 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
367 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
368 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
370 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
371 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
374 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
377 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
383 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
384 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
400 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
414bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
419 Register Dst0Reg =
I.getOperand(0).getReg();
420 Register Dst1Reg =
I.getOperand(1).getReg();
421 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
422 I.getOpcode() == AMDGPU::G_UADDE;
423 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
424 I.getOpcode() == AMDGPU::G_USUBE;
426 if (isVCC(Dst1Reg, *MRI)) {
427 unsigned NoCarryOpc =
428 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
429 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
430 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
436 Register Src0Reg =
I.getOperand(2).getReg();
437 Register Src1Reg =
I.getOperand(3).getReg();
440 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
441 .
addReg(
I.getOperand(4).getReg());
444 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
445 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
447 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
448 .
add(
I.getOperand(2))
449 .
add(
I.getOperand(3));
451 if (
MRI->use_nodbg_empty(Dst1Reg)) {
454 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
456 if (!
MRI->getRegClassOrNull(Dst1Reg))
457 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
467 AMDGPU::SReg_32RegClass, *MRI))
474bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
478 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
482 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
483 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
485 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
486 I.setDesc(TII.get(Opc));
488 I.addImplicitDefUseOperands(*
MF);
493bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
495 Register DstReg =
I.getOperand(0).getReg();
496 Register SrcReg =
I.getOperand(1).getReg();
497 LLT DstTy =
MRI->getType(DstReg);
498 LLT SrcTy =
MRI->getType(SrcReg);
503 unsigned Offset =
I.getOperand(2).getImm();
504 if (
Offset % 32 != 0 || DstSize > 128)
524 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
529 *SrcRC,
I.getOperand(1));
531 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
538bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
541 LLT DstTy =
MRI->getType(DstReg);
542 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
558 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
559 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
573 MI.eraseFromParent();
577bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
579 const int NumDst =
MI.getNumOperands() - 1;
585 LLT DstTy =
MRI->getType(DstReg0);
586 LLT SrcTy =
MRI->getType(SrcReg);
602 for (
int I = 0, E = NumDst;
I != E; ++
I) {
604 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
605 .
addReg(SrcReg, 0, SubRegs[
I]);
608 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
618 MI.eraseFromParent();
622bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
623 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
624 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
628 LLT SrcTy =
MRI->getType(Src0);
632 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
633 return selectG_MERGE_VALUES(
MI);
640 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
645 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
648 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
649 DstBank->
getID() == AMDGPU::VGPRRegBankID);
650 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
663 const int64_t K0 = ConstSrc0->Value.getSExtValue();
664 const int64_t K1 = ConstSrc1->Value.getSExtValue();
672 MI.eraseFromParent();
678 MI.eraseFromParent();
690 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
691 MI.setDesc(TII.get(AMDGPU::COPY));
694 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
701 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
702 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
708 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
715 MI.eraseFromParent();
740 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
741 if (Shift0 && Shift1) {
742 Opc = AMDGPU::S_PACK_HH_B32_B16;
743 MI.getOperand(1).setReg(ShiftSrc0);
744 MI.getOperand(2).setReg(ShiftSrc1);
746 Opc = AMDGPU::S_PACK_LH_B32_B16;
747 MI.getOperand(2).setReg(ShiftSrc1);
751 if (ConstSrc1 && ConstSrc1->Value == 0) {
753 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
758 MI.eraseFromParent();
762 Opc = AMDGPU::S_PACK_HL_B32_B16;
763 MI.getOperand(1).setReg(ShiftSrc0);
767 MI.setDesc(TII.get(Opc));
771bool AMDGPUInstructionSelector::selectG_PTR_ADD(
MachineInstr &
I)
const {
772 return selectG_ADD_SUB(
I);
775bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
781 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
783 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
790bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
793 Register DstReg =
I.getOperand(0).getReg();
794 Register Src0Reg =
I.getOperand(1).getReg();
795 Register Src1Reg =
I.getOperand(2).getReg();
796 LLT Src1Ty =
MRI->getType(Src1Reg);
798 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
801 int64_t
Offset =
I.getOperand(3).getImm();
804 if (
Offset % 32 != 0 || InsSize % 32 != 0)
812 if (
SubReg == AMDGPU::NoSubRegister)
830 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
831 if (!Src0RC || !Src1RC)
840 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
849bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
852 Register OffsetReg =
MI.getOperand(2).getReg();
853 Register WidthReg =
MI.getOperand(3).getReg();
856 "scalar BFX instructions are expanded in regbankselect");
857 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
858 "64-bit vector BFX instructions are expanded in regbankselect");
863 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
864 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
869 MI.eraseFromParent();
873bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
892 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
898 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
914 MI.eraseFromParent();
923bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
932 Register LaneSelect =
MI.getOperand(3).getReg();
935 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
937 std::optional<ValueAndVReg> ConstSelect =
943 MIB.
addImm(ConstSelect->Value.getSExtValue() &
946 std::optional<ValueAndVReg> ConstVal =
953 MIB.
addImm(ConstVal->Value.getSExtValue());
963 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
971 MI.eraseFromParent();
977bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
981 LLT Ty =
MRI->getType(Dst0);
984 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
986 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
997 unsigned ChooseDenom =
MI.getOperand(5).getImm();
999 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1012 MI.eraseFromParent();
1016bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1017 unsigned IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1018 switch (IntrinsicID) {
1019 case Intrinsic::amdgcn_if_break: {
1024 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1025 .
add(
I.getOperand(0))
1026 .
add(
I.getOperand(2))
1027 .
add(
I.getOperand(3));
1029 Register DstReg =
I.getOperand(0).getReg();
1030 Register Src0Reg =
I.getOperand(2).getReg();
1031 Register Src1Reg =
I.getOperand(3).getReg();
1033 I.eraseFromParent();
1035 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1040 case Intrinsic::amdgcn_interp_p1_f16:
1041 return selectInterpP1F16(
I);
1042 case Intrinsic::amdgcn_wqm:
1043 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1044 case Intrinsic::amdgcn_softwqm:
1045 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1046 case Intrinsic::amdgcn_strict_wwm:
1047 case Intrinsic::amdgcn_wwm:
1048 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1049 case Intrinsic::amdgcn_strict_wqm:
1050 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1051 case Intrinsic::amdgcn_writelane:
1052 return selectWritelane(
I);
1053 case Intrinsic::amdgcn_div_scale:
1054 return selectDivScale(
I);
1055 case Intrinsic::amdgcn_icmp:
1056 case Intrinsic::amdgcn_fcmp:
1059 return selectIntrinsicCmp(
I);
1060 case Intrinsic::amdgcn_ballot:
1061 return selectBallot(
I);
1062 case Intrinsic::amdgcn_inverse_ballot:
1063 return selectInverseBallot(
I);
1064 case Intrinsic::amdgcn_reloc_constant:
1065 return selectRelocConstant(
I);
1066 case Intrinsic::amdgcn_groupstaticsize:
1067 return selectGroupStaticSize(
I);
1068 case Intrinsic::returnaddress:
1069 return selectReturnAddress(
I);
1070 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1071 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1072 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1073 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1074 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1075 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1076 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1077 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1078 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1079 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1080 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1081 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1082 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1083 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1084 return selectSMFMACIntrin(
I);
1095 if (
Size == 16 && !ST.has16BitInsts())
1098 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1101 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1111 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1112 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1114 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1115 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1117 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1118 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1120 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1121 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1123 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1124 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1126 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1127 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1129 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1130 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1132 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1133 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1135 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1136 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1138 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1139 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1142 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1143 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1145 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1146 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1148 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1149 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1151 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1152 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1154 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1155 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1157 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1158 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1160 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1161 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1163 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1164 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1166 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1167 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1169 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1170 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1172 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1173 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1175 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1176 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1178 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1179 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1181 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1182 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1184 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1185 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1187 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1188 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1193 unsigned Size)
const {
1200 return AMDGPU::S_CMP_LG_U64;
1202 return AMDGPU::S_CMP_EQ_U64;
1211 return AMDGPU::S_CMP_LG_U32;
1213 return AMDGPU::S_CMP_EQ_U32;
1215 return AMDGPU::S_CMP_GT_I32;
1217 return AMDGPU::S_CMP_GE_I32;
1219 return AMDGPU::S_CMP_LT_I32;
1221 return AMDGPU::S_CMP_LE_I32;
1223 return AMDGPU::S_CMP_GT_U32;
1225 return AMDGPU::S_CMP_GE_U32;
1227 return AMDGPU::S_CMP_LT_U32;
1229 return AMDGPU::S_CMP_LE_U32;
1231 return AMDGPU::S_CMP_EQ_F32;
1233 return AMDGPU::S_CMP_GT_F32;
1235 return AMDGPU::S_CMP_GE_F32;
1237 return AMDGPU::S_CMP_LT_F32;
1239 return AMDGPU::S_CMP_LE_F32;
1241 return AMDGPU::S_CMP_LG_F32;
1243 return AMDGPU::S_CMP_O_F32;
1245 return AMDGPU::S_CMP_U_F32;
1247 return AMDGPU::S_CMP_NLG_F32;
1249 return AMDGPU::S_CMP_NLE_F32;
1251 return AMDGPU::S_CMP_NLT_F32;
1253 return AMDGPU::S_CMP_NGE_F32;
1255 return AMDGPU::S_CMP_NGT_F32;
1257 return AMDGPU::S_CMP_NEQ_F32;
1269 return AMDGPU::S_CMP_EQ_F16;
1271 return AMDGPU::S_CMP_GT_F16;
1273 return AMDGPU::S_CMP_GE_F16;
1275 return AMDGPU::S_CMP_LT_F16;
1277 return AMDGPU::S_CMP_LE_F16;
1279 return AMDGPU::S_CMP_LG_F16;
1281 return AMDGPU::S_CMP_O_F16;
1283 return AMDGPU::S_CMP_U_F16;
1285 return AMDGPU::S_CMP_NLG_F16;
1287 return AMDGPU::S_CMP_NLE_F16;
1289 return AMDGPU::S_CMP_NLT_F16;
1291 return AMDGPU::S_CMP_NGE_F16;
1293 return AMDGPU::S_CMP_NGT_F16;
1295 return AMDGPU::S_CMP_NEQ_F16;
1304bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1309 Register SrcReg =
I.getOperand(2).getReg();
1314 Register CCReg =
I.getOperand(0).getReg();
1315 if (!isVCC(CCReg, *MRI)) {
1316 int Opcode = getS_CMPOpcode(Pred,
Size);
1320 .
add(
I.getOperand(2))
1321 .
add(
I.getOperand(3));
1322 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1327 I.eraseFromParent();
1331 if (
I.getOpcode() == AMDGPU::G_FCMP)
1339 I.getOperand(0).getReg())
1340 .
add(
I.getOperand(2))
1341 .
add(
I.getOperand(3));
1345 I.eraseFromParent();
1349bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1350 Register Dst =
I.getOperand(0).getReg();
1351 if (isVCC(Dst, *MRI))
1354 LLT DstTy =
MRI->getType(Dst);
1360 Register SrcReg =
I.getOperand(2).getReg();
1369 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1370 I.eraseFromParent();
1381 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
1382 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
1384 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1386 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1387 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1389 SelectedMI.
addImm(Src0Mods);
1390 SelectedMI.
addReg(Src0Reg);
1392 SelectedMI.
addImm(Src1Mods);
1393 SelectedMI.
addReg(Src1Reg);
1403 I.eraseFromParent();
1407bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1410 Register DstReg =
I.getOperand(0).getReg();
1411 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1412 const bool Is64 =
Size == 64;
1420 std::optional<ValueAndVReg> Arg =
1423 const auto BuildCopy = [&](
Register SrcReg) {
1425 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), DstReg)
1431 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1433 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1441 const int64_t
Value = Arg->
Value.getSExtValue();
1443 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1445 }
else if (
Value == -1)
1446 BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
1450 BuildCopy(
I.getOperand(2).getReg());
1452 I.eraseFromParent();
1456bool AMDGPUInstructionSelector::selectInverseBallot(
MachineInstr &
I)
const {
1459 const Register DstReg =
I.getOperand(0).getReg();
1460 const Register MaskReg =
I.getOperand(2).getReg();
1463 I.eraseFromParent();
1467bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1468 Register DstReg =
I.getOperand(0).getReg();
1474 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1479 auto RelocSymbol = cast<GlobalVariable>(
1484 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1487 I.eraseFromParent();
1491bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1494 Register DstReg =
I.getOperand(0).getReg();
1496 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1497 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1514 I.eraseFromParent();
1518bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1525 unsigned Depth =
I.getOperand(2).getImm();
1538 I.eraseFromParent();
1549 AMDGPU::SReg_64RegClass,
DL);
1552 I.eraseFromParent();
1556bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1560 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1561 .
add(
MI.getOperand(1));
1564 MI.eraseFromParent();
1566 if (!
MRI->getRegClassOrNull(Reg))
1571bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1577 unsigned IndexOperand =
MI.getOperand(7).getImm();
1578 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1579 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1581 if (WaveDone && !WaveRelease)
1584 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1585 IndexOperand &= ~0x3f;
1586 unsigned CountDw = 0;
1589 CountDw = (IndexOperand >> 24) & 0xf;
1590 IndexOperand &= ~(0xf << 24);
1592 if (CountDw < 1 || CountDw > 4) {
1594 "ds_ordered_count: dword count must be between 1 and 4");
1601 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1604 unsigned Offset0 = OrderedCountIndex << 2;
1605 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1608 Offset1 |= (CountDw - 1) << 6;
1611 Offset1 |= ShaderType << 2;
1613 unsigned Offset = Offset0 | (Offset1 << 8);
1622 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1631 MI.eraseFromParent();
1637 case Intrinsic::amdgcn_ds_gws_init:
1638 return AMDGPU::DS_GWS_INIT;
1639 case Intrinsic::amdgcn_ds_gws_barrier:
1640 return AMDGPU::DS_GWS_BARRIER;
1641 case Intrinsic::amdgcn_ds_gws_sema_v:
1642 return AMDGPU::DS_GWS_SEMA_V;
1643 case Intrinsic::amdgcn_ds_gws_sema_br:
1644 return AMDGPU::DS_GWS_SEMA_BR;
1645 case Intrinsic::amdgcn_ds_gws_sema_p:
1646 return AMDGPU::DS_GWS_SEMA_P;
1647 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1648 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1654bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1656 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1661 const bool HasVSrc =
MI.getNumOperands() == 3;
1662 assert(HasVSrc ||
MI.getNumOperands() == 2);
1664 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1666 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1680 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1681 Readfirstlane = OffsetDef;
1686 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1696 std::tie(BaseOffset, ImmOffset) =
1699 if (Readfirstlane) {
1709 AMDGPU::SReg_32RegClass, *MRI))
1713 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1741 MI.eraseFromParent();
1745bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1746 bool IsAppend)
const {
1747 Register PtrBase =
MI.getOperand(2).getReg();
1748 LLT PtrTy =
MRI->getType(PtrBase);
1752 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1755 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1756 PtrBase =
MI.getOperand(2).getReg();
1762 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1773 MI.eraseFromParent();
1777bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1784 MI.eraseFromParent();
1797 MI.eraseFromParent();
1809 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1811 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1814 return TexFailCtrl == 0;
1817bool AMDGPUInstructionSelector::selectImageIntrinsic(
1826 unsigned IntrOpcode =
Intr->BaseOpcode;
1831 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1835 int NumVDataDwords = -1;
1836 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1837 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1843 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1847 bool IsTexFail =
false;
1849 TFE, LWE, IsTexFail))
1852 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1853 const bool IsA16 = (
Flags & 1) != 0;
1854 const bool IsG16 = (
Flags & 2) != 0;
1857 if (IsA16 && !STI.
hasG16() && !IsG16)
1861 unsigned DMaskLanes = 0;
1863 if (BaseOpcode->
Atomic) {
1864 VDataOut =
MI.getOperand(0).getReg();
1865 VDataIn =
MI.getOperand(2).getReg();
1866 LLT Ty =
MRI->getType(VDataIn);
1869 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1874 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1876 DMask = Is64Bit ? 0xf : 0x3;
1877 NumVDataDwords = Is64Bit ? 4 : 2;
1879 DMask = Is64Bit ? 0x3 : 0x1;
1880 NumVDataDwords = Is64Bit ? 2 : 1;
1883 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1886 if (BaseOpcode->
Store) {
1887 VDataIn =
MI.getOperand(1).getReg();
1888 VDataTy =
MRI->getType(VDataIn);
1891 VDataOut =
MI.getOperand(0).getReg();
1892 VDataTy =
MRI->getType(VDataOut);
1893 NumVDataDwords = DMaskLanes;
1896 NumVDataDwords = (DMaskLanes + 1) / 2;
1901 if (Subtarget->
hasG16() && IsG16) {
1905 IntrOpcode = G16MappingInfo->
G16;
1909 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1911 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1918 int NumVAddrRegs = 0;
1919 int NumVAddrDwords = 0;
1920 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1923 if (!AddrOp.
isReg())
1931 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1938 NumVAddrRegs != 1 &&
1940 : NumVAddrDwords == NumVAddrRegs);
1941 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1952 NumVDataDwords, NumVAddrDwords);
1953 }
else if (IsGFX11Plus) {
1955 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1956 : AMDGPU::MIMGEncGfx11Default,
1957 NumVDataDwords, NumVAddrDwords);
1958 }
else if (IsGFX10Plus) {
1960 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1961 : AMDGPU::MIMGEncGfx10Default,
1962 NumVDataDwords, NumVAddrDwords);
1966 NumVDataDwords, NumVAddrDwords);
1970 <<
"requested image instruction is not supported on this GPU\n");
1977 NumVDataDwords, NumVAddrDwords);
1980 NumVDataDwords, NumVAddrDwords);
1990 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1993 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1994 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1997 if (!
MRI->use_empty(VDataOut)) {
2010 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2012 if (
SrcOp.isReg()) {
2018 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2020 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2031 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2033 MIB.
addImm(IsA16 ? -1 : 0);
2047 MIB.
addImm(IsD16 ? -1 : 0);
2049 MI.eraseFromParent();
2057bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2068 unsigned Offset =
MI.getOperand(6).getImm();
2070 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2078 MI.eraseFromParent();
2082bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2084 unsigned IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2085 switch (IntrinsicID) {
2086 case Intrinsic::amdgcn_end_cf:
2087 return selectEndCfIntrinsic(
I);
2088 case Intrinsic::amdgcn_ds_ordered_add:
2089 case Intrinsic::amdgcn_ds_ordered_swap:
2090 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2091 case Intrinsic::amdgcn_ds_gws_init:
2092 case Intrinsic::amdgcn_ds_gws_barrier:
2093 case Intrinsic::amdgcn_ds_gws_sema_v:
2094 case Intrinsic::amdgcn_ds_gws_sema_br:
2095 case Intrinsic::amdgcn_ds_gws_sema_p:
2096 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2097 return selectDSGWSIntrinsic(
I, IntrinsicID);
2098 case Intrinsic::amdgcn_ds_append:
2099 return selectDSAppendConsume(
I,
true);
2100 case Intrinsic::amdgcn_ds_consume:
2101 return selectDSAppendConsume(
I,
false);
2102 case Intrinsic::amdgcn_s_barrier:
2103 return selectSBarrier(
I);
2104 case Intrinsic::amdgcn_raw_buffer_load_lds:
2105 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2106 case Intrinsic::amdgcn_struct_buffer_load_lds:
2107 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2108 return selectBufferLoadLds(
I);
2109 case Intrinsic::amdgcn_global_load_lds:
2110 return selectGlobalLoadLds(
I);
2111 case Intrinsic::amdgcn_exp_compr:
2115 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2116 F.getContext().diagnose(NoFpRet);
2120 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2121 return selectDSBvhStackIntrinsic(
I);
2122 case Intrinsic::amdgcn_s_barrier_init:
2123 case Intrinsic::amdgcn_s_barrier_join:
2124 case Intrinsic::amdgcn_s_wakeup_barrier:
2125 case Intrinsic::amdgcn_s_get_barrier_state:
2126 return selectNamedBarrierInst(
I, IntrinsicID);
2127 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2128 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
2129 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2130 case Intrinsic::amdgcn_s_barrier_leave:
2131 return selectSBarrierLeave(
I);
2136bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2143 Register DstReg =
I.getOperand(0).getReg();
2148 if (!isVCC(CCReg, *MRI)) {
2149 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2150 AMDGPU::S_CSELECT_B32;
2157 if (!
MRI->getRegClassOrNull(CCReg))
2160 .
add(
I.getOperand(2))
2161 .
add(
I.getOperand(3));
2166 I.eraseFromParent();
2175 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2177 .
add(
I.getOperand(3))
2179 .
add(
I.getOperand(2))
2180 .
add(
I.getOperand(1));
2183 I.eraseFromParent();
2190 return AMDGPU::sub0;
2192 return AMDGPU::sub0_sub1;
2194 return AMDGPU::sub0_sub1_sub2;
2196 return AMDGPU::sub0_sub1_sub2_sub3;
2198 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2201 return AMDGPU::sub0;
2208bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2209 Register DstReg =
I.getOperand(0).getReg();
2210 Register SrcReg =
I.getOperand(1).getReg();
2211 const LLT DstTy =
MRI->getType(DstReg);
2212 const LLT SrcTy =
MRI->getType(SrcReg);
2227 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2236 if (!SrcRC || !DstRC)
2249 Register LoReg =
MRI->createVirtualRegister(DstRC);
2250 Register HiReg =
MRI->createVirtualRegister(DstRC);
2252 .
addReg(SrcReg, 0, AMDGPU::sub0);
2254 .
addReg(SrcReg, 0, AMDGPU::sub1);
2256 if (IsVALU && STI.
hasSDWA()) {
2260 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2270 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2271 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2272 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2274 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2284 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2285 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2286 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2298 And.setOperandDead(3);
2299 Or.setOperandDead(3);
2303 I.eraseFromParent();
2312 if (SubRegIdx == -1)
2318 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2322 if (SrcWithSubRC != SrcRC) {
2327 I.getOperand(1).setSubReg(SubRegIdx);
2330 I.setDesc(TII.get(TargetOpcode::COPY));
2336 Mask = maskTrailingOnes<unsigned>(
Size);
2337 int SignedMask =
static_cast<int>(Mask);
2338 return SignedMask >= -16 && SignedMask <= 64;
2342const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2355bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2356 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2357 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2360 const Register DstReg =
I.getOperand(0).getReg();
2361 const Register SrcReg =
I.getOperand(1).getReg();
2363 const LLT DstTy =
MRI->getType(DstReg);
2364 const LLT SrcTy =
MRI->getType(SrcReg);
2365 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2372 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2375 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2377 return selectCOPY(
I);
2380 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2383 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2385 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2386 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2392 I.eraseFromParent();
2398 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2408 I.eraseFromParent();
2412 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2418 I.eraseFromParent();
2422 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2424 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2428 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2429 const unsigned SextOpc = SrcSize == 8 ?
2430 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2433 I.eraseFromParent();
2439 if (DstSize > 32 && SrcSize == 32) {
2440 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2441 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2456 I.eraseFromParent();
2461 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2462 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2465 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2467 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2468 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2469 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2471 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2482 I.eraseFromParent();
2498 I.eraseFromParent();
2516bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2520 Register Dst =
I.getOperand(0).getReg();
2522 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2525 Register Src =
I.getOperand(1).getReg();
2531 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2533 I.eraseFromParent();
2541bool AMDGPUInstructionSelector::selectG_CONSTANT(
MachineInstr &
I)
const {
2544 Register DstReg =
I.getOperand(0).getReg();
2545 unsigned Size =
MRI->getType(DstReg).getSizeInBits();
2553 }
else if (ImmOp.
isCImm()) {
2560 const bool IsSgpr = DstRB->
getID() == AMDGPU::SGPRRegBankID;
2563 if (DstRB->
getID() == AMDGPU::VCCRegBankID) {
2564 Opcode = STI.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2565 }
else if (
Size == 64 &&
2567 Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
2568 I.setDesc(TII.get(Opcode));
2569 I.addImplicitDefUseOperands(*
MF);
2572 Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2582 I.setDesc(TII.get(Opcode));
2583 I.addImplicitDefUseOperands(*
MF);
2593 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
2594 .
addImm(
I.getOperand(1).getImm());
2597 &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
2607 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2616 I.eraseFromParent();
2618 TRI.getConstrainedRegClassForOperand(ResInst->
getOperand(0), *MRI);
2624bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2638 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2653 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2654 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2655 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2656 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2658 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2659 .
addReg(Src, 0, AMDGPU::sub0);
2660 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2661 .
addReg(Src, 0, AMDGPU::sub1);
2662 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2666 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2671 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2676 MI.eraseFromParent();
2681bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2684 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2691 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2692 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2693 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2694 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2700 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2701 .
addReg(Src, 0, AMDGPU::sub0);
2702 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2703 .
addReg(Src, 0, AMDGPU::sub1);
2704 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2709 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2713 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2719 MI.eraseFromParent();
2724 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2727void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2730 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2732 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2736 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2741 for (
unsigned i = 1; i != 3; ++i) {
2748 assert(GEPInfo.Imm == 0);
2753 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2754 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2756 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2760 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2763bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2764 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2767bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2768 if (!
MI.hasOneMemOperand())
2778 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2779 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2785 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2787 AMDGPU::SGPRRegBankID;
2790 return I &&
I->getMetadata(
"amdgpu.uniform");
2794 for (
const GEPInfo &GEPInfo : AddrInfo) {
2795 if (!GEPInfo.VgprParts.empty())
2801void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2802 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2809 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2814bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2821 if (Reg.isPhysical())
2825 const unsigned Opcode =
MI.getOpcode();
2827 if (Opcode == AMDGPU::COPY)
2830 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2831 Opcode == AMDGPU::G_XOR)
2835 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2836 return GI->is(Intrinsic::amdgcn_class);
2838 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2841bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2856 if (!isVCC(CondReg, *MRI)) {
2860 CondPhysReg = AMDGPU::SCC;
2861 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2862 ConstrainRC = &AMDGPU::SReg_32RegClass;
2870 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2871 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2874 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2881 CondPhysReg =
TRI.getVCC();
2882 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2883 ConstrainRC =
TRI.getBoolRC();
2886 if (!
MRI->getRegClassOrNull(CondReg))
2887 MRI->setRegClass(CondReg, ConstrainRC);
2889 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2892 .
addMBB(
I.getOperand(1).getMBB());
2894 I.eraseFromParent();
2898bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2900 Register DstReg =
I.getOperand(0).getReg();
2902 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2903 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2908 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2911bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2912 Register DstReg =
I.getOperand(0).getReg();
2913 Register SrcReg =
I.getOperand(1).getReg();
2914 Register MaskReg =
I.getOperand(2).getReg();
2915 LLT Ty =
MRI->getType(DstReg);
2916 LLT MaskTy =
MRI->getType(MaskReg);
2923 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2933 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2934 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2937 !CanCopyLow32 && !CanCopyHi32) {
2938 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2942 I.eraseFromParent();
2946 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2948 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2953 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2962 "ptrmask should have been narrowed during legalize");
2964 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
2970 I.eraseFromParent();
2974 Register HiReg =
MRI->createVirtualRegister(&RegRC);
2975 Register LoReg =
MRI->createVirtualRegister(&RegRC);
2978 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
2979 .
addReg(SrcReg, 0, AMDGPU::sub0);
2980 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
2981 .
addReg(SrcReg, 0, AMDGPU::sub1);
2990 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
2991 MaskedLo =
MRI->createVirtualRegister(&RegRC);
2993 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
2994 .
addReg(MaskReg, 0, AMDGPU::sub0);
2995 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3004 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
3005 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3007 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3008 .
addReg(MaskReg, 0, AMDGPU::sub1);
3009 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3014 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3019 I.eraseFromParent();
3025static std::pair<Register, unsigned>
3032 std::tie(IdxBaseReg,
Offset) =
3034 if (IdxBaseReg == AMDGPU::NoRegister) {
3038 IdxBaseReg = IdxReg;
3045 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3046 return std::pair(IdxReg, SubRegs[0]);
3047 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3050bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3056 LLT DstTy =
MRI->getType(DstReg);
3057 LLT SrcTy =
MRI->getType(SrcReg);
3065 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3069 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3071 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3072 if (!SrcRC || !DstRC)
3087 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3091 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3094 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3098 MI.eraseFromParent();
3106 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3108 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3111 MI.eraseFromParent();
3122 MI.eraseFromParent();
3127bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3134 LLT VecTy =
MRI->getType(DstReg);
3135 LLT ValTy =
MRI->getType(ValReg);
3147 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3151 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3153 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3161 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3165 std::tie(IdxReg,
SubReg) =
3168 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3175 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3179 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3184 MI.eraseFromParent();
3196 MI.eraseFromParent();
3200bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3203 unsigned Size =
MI.getOperand(3).getImm();
3206 const bool HasVIndex =
MI.getNumOperands() == 9;
3210 VIndex =
MI.getOperand(4).getReg();
3214 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3215 std::optional<ValueAndVReg> MaybeVOffset =
3217 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3223 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3224 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3225 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3226 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3229 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3230 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3231 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3232 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3235 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3236 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3237 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3238 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3245 .
add(
MI.getOperand(2));
3249 if (HasVIndex && HasVOffset) {
3250 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3251 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3258 }
else if (HasVIndex) {
3260 }
else if (HasVOffset) {
3264 MIB.
add(
MI.getOperand(1));
3265 MIB.
add(
MI.getOperand(5 + OpOffset));
3266 MIB.
add(
MI.getOperand(6 + OpOffset));
3267 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3273 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3275 StorePtrI.
V =
nullptr;
3289 MI.eraseFromParent();
3301 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3304 assert(Def->getNumOperands() == 3 &&
3307 return Def->getOperand(1).getReg();
3313bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3315 unsigned Size =
MI.getOperand(3).getImm();
3321 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3324 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3327 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3334 .
add(
MI.getOperand(2));
3340 if (!isSGPR(
Addr)) {
3342 if (isSGPR(AddrDef->Reg)) {
3343 Addr = AddrDef->Reg;
3344 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3347 if (isSGPR(SAddr)) {
3348 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3360 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3372 MIB.
add(
MI.getOperand(4))
3373 .
add(
MI.getOperand(5));
3377 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3387 sizeof(int32_t),
Align(4));
3391 MI.eraseFromParent();
3395bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3396 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3397 MI.removeOperand(1);
3398 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3402bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3405 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3406 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3408 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3409 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3411 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3412 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3414 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3415 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3417 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3418 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3420 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3421 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3423 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3424 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3426 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3427 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3429 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3430 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3432 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3433 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3435 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3436 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3438 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3439 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3441 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3442 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3444 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3445 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3451 auto VDst_In =
MI.getOperand(4);
3453 MI.setDesc(TII.get(Opc));
3454 MI.removeOperand(4);
3455 MI.removeOperand(1);
3456 MI.addOperand(VDst_In);
3457 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3461bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3465 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3470 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3481 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3485 MI.eraseFromParent();
3489bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3502 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3512 MI.eraseFromParent();
3518 if (!
I.isPreISelOpcode()) {
3520 return selectCOPY(
I);
3524 switch (
I.getOpcode()) {
3525 case TargetOpcode::G_AND:
3526 case TargetOpcode::G_OR:
3527 case TargetOpcode::G_XOR:
3530 return selectG_AND_OR_XOR(
I);
3531 case TargetOpcode::G_ADD:
3532 case TargetOpcode::G_SUB:
3535 return selectG_ADD_SUB(
I);
3536 case TargetOpcode::G_UADDO:
3537 case TargetOpcode::G_USUBO:
3538 case TargetOpcode::G_UADDE:
3539 case TargetOpcode::G_USUBE:
3540 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3541 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3542 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3543 return selectG_AMDGPU_MAD_64_32(
I);
3544 case TargetOpcode::G_INTTOPTR:
3545 case TargetOpcode::G_BITCAST:
3546 case TargetOpcode::G_PTRTOINT:
3547 return selectCOPY(
I);
3548 case TargetOpcode::G_CONSTANT:
3549 case TargetOpcode::G_FCONSTANT:
3550 return selectG_CONSTANT(
I);
3551 case TargetOpcode::G_FNEG:
3554 return selectG_FNEG(
I);
3555 case TargetOpcode::G_FABS:
3558 return selectG_FABS(
I);
3559 case TargetOpcode::G_EXTRACT:
3560 return selectG_EXTRACT(
I);
3561 case TargetOpcode::G_MERGE_VALUES:
3562 case TargetOpcode::G_CONCAT_VECTORS:
3563 return selectG_MERGE_VALUES(
I);
3564 case TargetOpcode::G_UNMERGE_VALUES:
3565 return selectG_UNMERGE_VALUES(
I);
3566 case TargetOpcode::G_BUILD_VECTOR:
3567 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3568 return selectG_BUILD_VECTOR(
I);
3569 case TargetOpcode::G_PTR_ADD:
3572 return selectG_PTR_ADD(
I);
3573 case TargetOpcode::G_IMPLICIT_DEF:
3574 return selectG_IMPLICIT_DEF(
I);
3575 case TargetOpcode::G_FREEZE:
3576 return selectCOPY(
I);
3577 case TargetOpcode::G_INSERT:
3578 return selectG_INSERT(
I);
3579 case TargetOpcode::G_INTRINSIC:
3580 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3581 return selectG_INTRINSIC(
I);
3582 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3583 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3584 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3585 case TargetOpcode::G_ICMP:
3586 case TargetOpcode::G_FCMP:
3587 if (selectG_ICMP_or_FCMP(
I))
3590 case TargetOpcode::G_LOAD:
3591 case TargetOpcode::G_STORE:
3592 case TargetOpcode::G_ATOMIC_CMPXCHG:
3593 case TargetOpcode::G_ATOMICRMW_XCHG:
3594 case TargetOpcode::G_ATOMICRMW_ADD:
3595 case TargetOpcode::G_ATOMICRMW_SUB:
3596 case TargetOpcode::G_ATOMICRMW_AND:
3597 case TargetOpcode::G_ATOMICRMW_OR:
3598 case TargetOpcode::G_ATOMICRMW_XOR:
3599 case TargetOpcode::G_ATOMICRMW_MIN:
3600 case TargetOpcode::G_ATOMICRMW_MAX:
3601 case TargetOpcode::G_ATOMICRMW_UMIN:
3602 case TargetOpcode::G_ATOMICRMW_UMAX:
3603 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3604 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3605 case TargetOpcode::G_ATOMICRMW_FADD:
3606 case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
3607 case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
3608 return selectG_LOAD_STORE_ATOMICRMW(
I);
3609 case TargetOpcode::G_SELECT:
3610 return selectG_SELECT(
I);
3611 case TargetOpcode::G_TRUNC:
3612 return selectG_TRUNC(
I);
3613 case TargetOpcode::G_SEXT:
3614 case TargetOpcode::G_ZEXT:
3615 case TargetOpcode::G_ANYEXT:
3616 case TargetOpcode::G_SEXT_INREG:
3623 return selectG_SZA_EXT(
I);
3624 case TargetOpcode::G_FPEXT:
3625 if (selectG_FPEXT(
I))
3628 case TargetOpcode::G_BRCOND:
3629 return selectG_BRCOND(
I);
3630 case TargetOpcode::G_GLOBAL_VALUE:
3631 return selectG_GLOBAL_VALUE(
I);
3632 case TargetOpcode::G_PTRMASK:
3633 return selectG_PTRMASK(
I);
3634 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3635 return selectG_EXTRACT_VECTOR_ELT(
I);
3636 case TargetOpcode::G_INSERT_VECTOR_ELT:
3637 return selectG_INSERT_VECTOR_ELT(
I);
3638 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3639 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3640 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3641 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3644 assert(
Intr &&
"not an image intrinsic with image pseudo");
3645 return selectImageIntrinsic(
I,
Intr);
3647 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3648 return selectBVHIntrinsic(
I);
3649 case AMDGPU::G_SBFX:
3650 case AMDGPU::G_UBFX:
3651 return selectG_SBFX_UBFX(
I);
3652 case AMDGPU::G_SI_CALL:
3653 I.setDesc(TII.get(AMDGPU::SI_CALL));
3655 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3656 return selectWaveAddress(
I);
3657 case AMDGPU::G_STACKRESTORE:
3658 return selectStackRestore(
I);
3660 return selectPHI(
I);
3668AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3675std::pair<Register, unsigned>
3676AMDGPUInstructionSelector::selectVOP3ModsImpl(
MachineOperand &Root,
3677 bool IsCanonicalizing,
3678 bool AllowAbs,
bool OpSel)
const {
3683 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3684 Src =
MI->getOperand(1).getReg();
3687 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
3692 if (LHS &&
LHS->isZero()) {
3694 Src =
MI->getOperand(2).getReg();
3698 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3699 Src =
MI->getOperand(1).getReg();
3706 return std::pair(Src, Mods);
3709Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3711 bool ForceVGPR)
const {
3712 if ((Mods != 0 || ForceVGPR) &&
3720 TII.get(AMDGPU::COPY), VGPRSrc)
3732AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3739AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3742 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3746 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3755AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3758 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3764 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3773AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3782AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3785 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3789 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3796AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3800 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
false);
3804 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3811AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3814 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
true,
3819 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3826AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3829 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3836std::pair<Register, unsigned>
3837AMDGPUInstructionSelector::selectVOP3PModsImpl(
3842 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3847 Src =
MI->getOperand(1).getReg();
3848 MI =
MRI.getVRegDef(Src);
3859 return std::pair(Src, Mods);
3863AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3869 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3878AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3884 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3893AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
3898 "expected i1 value");
3908AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3911 "expected i1 value");
3925 switch (Elts.
size()) {
3927 DstRegClass = &AMDGPU::VReg_256RegClass;
3930 DstRegClass = &AMDGPU::VReg_128RegClass;
3933 DstRegClass = &AMDGPU::VReg_64RegClass;
3940 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
3941 .addDef(
MRI.createVirtualRegister(DstRegClass));
3942 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3953 if (ModOpcode == TargetOpcode::G_FNEG) {
3957 for (
auto El : Elts) {
3963 if (Elts.size() != NegAbsElts.
size()) {
3972 assert(ModOpcode == TargetOpcode::G_FABS);
3980AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
3985 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
3986 assert(BV->getNumSources() > 0);
3989 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
3992 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
3993 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
4000 if (BV->getNumSources() == EltsF32.
size()) {
4011AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
4017 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4025 if (CV->getNumSources() == EltsV2F16.
size()) {
4037AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4043 assert(CV->getNumSources() > 0);
4046 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4050 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4051 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4058 if (CV->getNumSources() == EltsV2F16.
size()) {
4070AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4071 std::optional<FPValueAndVReg> FPValReg;
4075 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4095AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4101 std::optional<ValueAndVReg> ShiftAmt;
4103 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4104 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4105 Key = ShiftAmt->Value.getZExtValue() / 8;
4116AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4123 std::optional<ValueAndVReg> ShiftAmt;
4125 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4126 ShiftAmt->Value.getZExtValue() == 16) {
4138AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4141 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
4151AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4154 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4162 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4169AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4172 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4180 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4186bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4196 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4198 if (AddrInfo.
empty())
4201 const GEPInfo &GEPI = AddrInfo[0];
4202 std::optional<int64_t> EncodedImm =
4206 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4207 AddrInfo.
size() > 1) {
4208 const GEPInfo &GEPI2 = AddrInfo[1];
4209 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4212 Base = GEPI2.SgprParts[0];
4213 *SOffset = OffsetReg;
4222 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4223 Base = GEPI.SgprParts[0];
4229 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4235 Base = GEPI.SgprParts[0];
4236 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4237 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4242 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4244 Base = GEPI.SgprParts[0];
4245 *SOffset = OffsetReg;
4254AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4257 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4258 return std::nullopt;
4265AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4267 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4269 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4270 return std::nullopt;
4272 const GEPInfo &GEPInfo = AddrInfo[0];
4273 Register PtrReg = GEPInfo.SgprParts[0];
4274 std::optional<int64_t> EncodedImm =
4277 return std::nullopt;
4286AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4288 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4289 return std::nullopt;
4296AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4299 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4300 return std::nullopt;
4307std::pair<Register, int>
4308AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4318 int64_t ConstOffset;
4319 std::tie(PtrBase, ConstOffset) =
4320 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4323 !isFlatScratchBaseLegal(Root.
getReg())))
4330 return std::pair(PtrBase, ConstOffset);
4334AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4344AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4354AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4365AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4368 int64_t ConstOffset;
4369 int64_t ImmOffset = 0;
4373 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4375 if (ConstOffset != 0) {
4379 ImmOffset = ConstOffset;
4382 if (isSGPR(PtrBaseDef->Reg)) {
4383 if (ConstOffset > 0) {
4389 int64_t SplitImmOffset, RemainderOffset;
4393 if (isUInt<32>(RemainderOffset)) {
4397 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4399 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4401 .
addImm(RemainderOffset);
4418 unsigned NumLiterals =
4422 return std::nullopt;
4429 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4434 if (isSGPR(SAddr)) {
4435 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4455 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4456 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4457 return std::nullopt;
4463 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4465 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4476AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4479 int64_t ConstOffset;
4480 int64_t ImmOffset = 0;
4484 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4486 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4490 ImmOffset = ConstOffset;
4494 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4495 int FI = AddrDef->MI->getOperand(1).getIndex();
4504 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4505 Register LHS = AddrDef->MI->getOperand(1).getReg();
4506 Register RHS = AddrDef->MI->getOperand(2).getReg();
4510 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4511 isSGPR(RHSDef->Reg)) {
4512 int FI = LHSDef->MI->getOperand(1).getIndex();
4516 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4518 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4526 return std::nullopt;
4535bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4547 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4549 return (VMax & 3) + (
SMax & 3) >= 4;
4553AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4556 int64_t ConstOffset;
4557 int64_t ImmOffset = 0;
4561 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4564 if (ConstOffset != 0 &&
4567 ImmOffset = ConstOffset;
4571 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4572 return std::nullopt;
4574 Register RHS = AddrDef->MI->getOperand(2).getReg();
4576 return std::nullopt;
4578 Register LHS = AddrDef->MI->getOperand(1).getReg();
4581 if (OrigAddr !=
Addr) {
4582 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4583 return std::nullopt;
4585 if (!isFlatScratchBaseLegalSV(OrigAddr))
4586 return std::nullopt;
4589 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4590 return std::nullopt;
4592 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4593 int FI = LHSDef->MI->getOperand(1).getIndex();
4602 return std::nullopt;
4612AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4621 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4626 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4650 std::optional<int> FI;
4654 int64_t ConstOffset;
4655 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4656 if (ConstOffset != 0) {
4661 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4667 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4668 FI = RootDef->getOperand(1).getIndex();
4691bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4704bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4706 unsigned Size)
const {
4707 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4709 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4722 return Addr->getOpcode() == TargetOpcode::G_OR ||
4723 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
4730bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
4744 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
4745 std::optional<ValueAndVReg> RhsValReg =
4751 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
4752 RhsValReg->Value.getSExtValue() > -0x40000000)
4761bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
4779bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
4788 std::optional<DefinitionAndSourceRegister> BaseDef =
4790 std::optional<ValueAndVReg> RHSOffset =
4800 (RHSOffset->Value.getSExtValue() < 0 &&
4801 RHSOffset->Value.getSExtValue() > -0x40000000)))
4804 Register LHS = BaseDef->MI->getOperand(1).getReg();
4805 Register RHS = BaseDef->MI->getOperand(2).getReg();
4809bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
4810 unsigned ShAmtBits)
const {
4811 assert(
MI.getOpcode() == TargetOpcode::G_AND);
4813 std::optional<APInt>
RHS =
4818 if (
RHS->countr_one() >= ShAmtBits)
4822 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
4826AMDGPUInstructionSelector::selectMUBUFScratchOffset(
4831 std::optional<DefinitionAndSourceRegister>
Def =
4833 assert(Def &&
"this shouldn't be an optional result");
4888std::pair<Register, unsigned>
4889AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
4892 return std::pair(Root.
getReg(), 0);
4894 int64_t ConstAddr = 0;
4898 std::tie(PtrBase,
Offset) =
4899 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4902 if (isDSOffsetLegal(PtrBase,
Offset)) {
4904 return std::pair(PtrBase,
Offset);
4906 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4915 return std::pair(Root.
getReg(), 0);
4919AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
4922 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
4930AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
4931 return selectDSReadWrite2(Root, 4);
4935AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
4936 return selectDSReadWrite2(Root, 8);
4940AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
4941 unsigned Size)
const {
4944 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
4952std::pair<Register, unsigned>
4953AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
4954 unsigned Size)
const {
4957 return std::pair(Root.
getReg(), 0);
4959 int64_t ConstAddr = 0;
4963 std::tie(PtrBase,
Offset) =
4964 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4967 int64_t OffsetValue0 =
Offset;
4969 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
4971 return std::pair(PtrBase, OffsetValue0 /
Size);
4973 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4981 return std::pair(Root.
getReg(), 0);
4988std::pair<Register, int64_t>
4989AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
4992 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
4996 std::optional<ValueAndVReg> MaybeOffset =
5012 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5013 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5014 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5015 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5017 B.buildInstr(AMDGPU::S_MOV_B32)
5020 B.buildInstr(AMDGPU::S_MOV_B32)
5027 B.buildInstr(AMDGPU::REG_SEQUENCE)
5030 .addImm(AMDGPU::sub0)
5032 .addImm(AMDGPU::sub1);
5036 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5037 B.buildInstr(AMDGPU::S_MOV_B64)
5042 B.buildInstr(AMDGPU::REG_SEQUENCE)
5045 .addImm(AMDGPU::sub0_sub1)
5047 .addImm(AMDGPU::sub2_sub3);
5054 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5063 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5070AMDGPUInstructionSelector::MUBUFAddressData
5071AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5072 MUBUFAddressData
Data;
5078 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5079 if (isUInt<32>(
Offset)) {
5086 Data.N2 = InputAdd->getOperand(1).getReg();
5087 Data.N3 = InputAdd->getOperand(2).getReg();
5102bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5109 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5115void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5121 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5122 B.buildInstr(AMDGPU::S_MOV_B32)
5128bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5136 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5137 if (!shouldUseAddr64(AddrData))
5143 Offset = AddrData.Offset;
5149 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5151 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5164 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5175 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5179bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5187 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5188 if (shouldUseAddr64(AddrData))
5194 Offset = AddrData.Offset;
5200 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5205AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5211 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5227 MIB.
addReg(AMDGPU::SGPR_NULL);
5241AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5246 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5257 MIB.
addReg(AMDGPU::SGPR_NULL);
5269AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5274 SOffset = AMDGPU::SGPR_NULL;
5280static std::optional<uint64_t>
5284 if (!OffsetVal || !isInt<32>(*OffsetVal))
5285 return std::nullopt;
5286 return Lo_32(*OffsetVal);
5290AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5295 std::optional<int64_t> EncodedImm =
5304AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5311 std::optional<int64_t> EncodedImm =
5320AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5328 return std::nullopt;
5330 std::optional<int64_t> EncodedOffset =
5333 return std::nullopt;
5343 if (
MI->getOpcode() == AMDGPU::G_BITCAST)
5354 if (Inst->
getOpcode() != AMDGPU::G_TRUNC)
5362 if (TruncOp->
getOpcode() == AMDGPU::G_LSHR) {
5365 if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) {
5376 if (TruncOp->
getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) {
5381 assert(Mask.size() == 2);
5383 if (Mask[0] == 1 && Mask[1] <= 1) {
5393std::pair<Register, unsigned>
5394AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5395 bool &Matched)
const {
5400 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
5403 if (
MI->getOpcode() == AMDGPU::G_FPEXT) {
5412 if (
MI->getOpcode() == AMDGPU::G_BITCAST) {
5413 MO = &
MI->getOperand(1);
5418 const auto CheckAbsNeg = [&]() {
5423 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO);
5445 MI = ExtractHiEltMI;
5446 MO = &
MI->getOperand(0);
5459AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5464 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5475AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5479 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5487bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5491 Register CCReg =
I.getOperand(0).getReg();
5493 bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
5496 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5497 .
addReg(
I.getOperand(2).getReg());
5498 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
5502 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5503 .
addImm(
I.getOperand(2).getImm());
5508 I.eraseFromParent();
5514 if (HasInlineConst) {
5518 case Intrinsic::amdgcn_s_barrier_init:
5519 return AMDGPU::S_BARRIER_INIT_IMM;
5520 case Intrinsic::amdgcn_s_barrier_join:
5521 return AMDGPU::S_BARRIER_JOIN_IMM;
5522 case Intrinsic::amdgcn_s_wakeup_barrier:
5523 return AMDGPU::S_WAKEUP_BARRIER_IMM;
5524 case Intrinsic::amdgcn_s_get_barrier_state:
5525 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5531 case Intrinsic::amdgcn_s_barrier_init:
5532 return AMDGPU::S_BARRIER_INIT_M0;
5533 case Intrinsic::amdgcn_s_barrier_join:
5534 return AMDGPU::S_BARRIER_JOIN_M0;
5535 case Intrinsic::amdgcn_s_wakeup_barrier:
5536 return AMDGPU::S_WAKEUP_BARRIER_M0;
5537 case Intrinsic::amdgcn_s_get_barrier_state:
5538 return AMDGPU::S_GET_BARRIER_STATE_M0;
5543bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5547 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
5550 std::optional<int64_t> BarValImm =
5556 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5557 Register MemberCount =
I.getOperand(2).getReg();
5558 TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5569 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5573 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5594 if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
5595 MIB.
addDef(
I.getOperand(0).getReg());
5600 I.eraseFromParent();
5604bool AMDGPUInstructionSelector::selectSBarrierLeave(
MachineInstr &
I)
const {
5607 Register CCReg =
I.getOperand(0).getReg();
5609 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
5612 I.eraseFromParent();
5620 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5621 "Expected G_CONSTANT");
5622 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
5628 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5629 "Expected G_CONSTANT");
5630 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
5639 if (
MI.getOpcode() == TargetOpcode::G_FCONSTANT)
5640 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5642 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
"Expected G_CONSTANT");
5643 MIB.
addImm(
Op.getCImm()->getSExtValue());
5650 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5651 "Expected G_CONSTANT");
5652 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
5660 MIB.
addImm(
MI.getOperand(OpIdx).getImm());
5666 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5673 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5674 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
5682 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5683 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
5689void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
5691 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5692 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
5707 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
5709 assert(ExpVal != INT_MIN);
5713bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
5717bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static int sizeToSubRegIndex(unsigned Size)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static MachineInstr * stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
amdgpu AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
static Error getAddrSpace(StringRef R, unsigned &AddrSpace)
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
const ConstantFP * getFPImm() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
const Triple & getTargetTriple() const
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.