31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
45#define GET_D16ImageDimIntrinsics_IMPL
46#define GET_ImageDimIntrinsicTable_IMPL
47#define GET_RsrcIntrinsics_IMPL
48#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
155 if (
MI.isCompare()) {
161 switch (
Use.getOpcode()) {
162 case AMDGPU::S_AND_SAVEEXEC_B32:
163 case AMDGPU::S_AND_SAVEEXEC_B64:
165 case AMDGPU::S_AND_B32:
166 case AMDGPU::S_AND_B64:
167 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
177 switch (
MI.getOpcode()) {
180 case AMDGPU::V_READFIRSTLANE_B32:
197 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
202 for (
auto Op :
MI.uses()) {
203 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 if (FromCycle ==
nullptr)
215 while (FromCycle && !FromCycle->
contains(ToCycle)) {
235 int64_t &Offset1)
const {
243 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
247 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
265 if (Offset0Idx == -1 || Offset1Idx == -1)
272 Offset0Idx -=
get(Opc0).NumDefs;
273 Offset1Idx -=
get(Opc1).NumDefs;
294 assert(NumOps == 4 || NumOps == 5);
299 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
301 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
303 if (!Load0Offset || !Load1Offset)
323 if (OffIdx0 == -1 || OffIdx1 == -1)
329 OffIdx0 -=
get(Opc0).NumDefs;
330 OffIdx1 -=
get(Opc1).NumDefs;
336 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
349 case AMDGPU::DS_READ2ST64_B32:
350 case AMDGPU::DS_READ2ST64_B64:
351 case AMDGPU::DS_WRITE2ST64_B32:
352 case AMDGPU::DS_WRITE2ST64_B64:
367 OffsetIsScalable =
false;
397 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
398 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
399 if (Offset0 + 1 != Offset1)
418 Offset = EltSize * Offset0;
421 if (DataOpIdx == -1) {
439 if (BaseOp && !BaseOp->
isFI())
447 if (SOffset->
isReg())
464 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
468 if (VAddr0Idx >= 0) {
470 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
526 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
534 if (MO1->getAddrSpace() != MO2->getAddrSpace())
537 auto Base1 = MO1->getValue();
538 auto Base2 = MO2->getValue();
539 if (!Base1 || !Base2)
544 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
547 return Base1 == Base2;
551 int64_t Offset1,
bool OffsetIsScalable1,
553 int64_t Offset2,
bool OffsetIsScalable2,
554 unsigned ClusterSize,
555 unsigned NumBytes)
const {
563 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
580 const unsigned LoadSize = NumBytes / ClusterSize;
581 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize;
582 return NumDWORDs <= 8;
596 int64_t Offset0, int64_t Offset1,
597 unsigned NumLoads)
const {
598 assert(Offset1 > Offset0 &&
599 "Second offset should be larger than first offset!");
604 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
611 const char *Msg =
"illegal VGPR to SGPR copy") {
615 C.diagnose(IllegalCopy);
632 assert((
TII.getSubtarget().hasMAIInsts() &&
633 !
TII.getSubtarget().hasGFX90AInsts()) &&
634 "Expected GFX908 subtarget.");
637 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
638 "Source register of the copy should be either an SGPR or an AGPR.");
641 "Destination register of the copy should be an AGPR.");
650 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
653 if (!Def->modifiesRegister(SrcReg, &RI))
656 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
657 Def->getOperand(0).getReg() != SrcReg)
664 bool SafeToPropagate =
true;
667 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
668 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
669 SafeToPropagate =
false;
671 if (!SafeToPropagate)
683 if (ImpUseSuperReg) {
684 Builder.
addReg(ImpUseSuperReg,
702 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
706 "VGPR used for an intermediate copy should have been reserved.");
721 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
722 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
723 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
730 if (ImpUseSuperReg) {
731 UseBuilder.
addReg(ImpUseSuperReg,
753 int16_t SubIdx = BaseIndices[
Idx];
754 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
755 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
756 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
757 unsigned Opcode = AMDGPU::S_MOV_B32;
760 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
761 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
762 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
766 DestSubReg = RI.getSubReg(DestReg, SubIdx);
767 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
768 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
769 Opcode = AMDGPU::S_MOV_B64;
784 assert(FirstMI && LastMI);
792 LastMI->addRegisterKilled(SrcReg, &RI);
800 unsigned Size = RI.getRegSizeInBits(*RC);
802 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
808 if (((
Size == 16) != (SrcSize == 16))) {
815 if (DestReg == SrcReg) {
821 RC = RI.getPhysRegBaseClass(DestReg);
822 Size = RI.getRegSizeInBits(*RC);
823 SrcRC = RI.getPhysRegBaseClass(SrcReg);
824 SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (RC == &AMDGPU::VGPR_32RegClass) {
830 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
831 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
832 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
833 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
839 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
840 RC == &AMDGPU::SReg_32RegClass) {
841 if (SrcReg == AMDGPU::SCC) {
848 if (DestReg == AMDGPU::VCC_LO) {
849 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
863 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
873 if (RC == &AMDGPU::SReg_64RegClass) {
874 if (SrcReg == AMDGPU::SCC) {
881 if (DestReg == AMDGPU::VCC) {
882 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
896 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
906 if (DestReg == AMDGPU::SCC) {
909 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
927 if (RC == &AMDGPU::AGPR_32RegClass) {
928 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
929 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
951 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
952 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
954 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
955 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
956 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
957 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
974 if (IsAGPRDst || IsAGPRSrc) {
975 if (!DstLow || !SrcLow) {
977 "Cannot use hi16 subreg with an AGPR!");
990 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
991 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1004 if (!DstLow || !SrcLow) {
1006 "Cannot use hi16 subreg on VI!");
1057 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1063 unsigned EltSize = 4;
1064 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1067 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1070 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1072 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1074 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1080 Opcode = AMDGPU::V_MOV_B64_e32;
1083 Opcode = AMDGPU::V_PK_MOV_B32;
1093 std::unique_ptr<RegScavenger> RS;
1094 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1101 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1102 const bool CanKillSuperReg = KillSrc && !Overlap;
1107 SubIdx = SubIndices[
Idx];
1109 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1110 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1111 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1112 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1114 bool IsFirstSubreg =
Idx == 0;
1115 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1117 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1121 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1122 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1169 int64_t
Value)
const {
1172 if (RegClass == &AMDGPU::SReg_32RegClass ||
1173 RegClass == &AMDGPU::SGPR_32RegClass ||
1174 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1175 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1181 if (RegClass == &AMDGPU::SReg_64RegClass ||
1182 RegClass == &AMDGPU::SGPR_64RegClass ||
1183 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1189 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1200 unsigned EltSize = 4;
1201 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1203 if (RI.getRegSizeInBits(*RegClass) > 32) {
1204 Opcode = AMDGPU::S_MOV_B64;
1207 Opcode = AMDGPU::S_MOV_B32;
1214 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1217 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1218 Builder.
addImm(IdxValue);
1224 return &AMDGPU::VGPR_32RegClass;
1236 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1237 "Not a VGPR32 reg");
1239 if (
Cond.size() == 1) {
1240 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1249 }
else if (
Cond.size() == 2) {
1251 switch (
Cond[0].getImm()) {
1252 case SIInstrInfo::SCC_TRUE: {
1253 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 : AMDGPU::S_CSELECT_B64), SReg)
1266 case SIInstrInfo::SCC_FALSE: {
1267 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1269 : AMDGPU::S_CSELECT_B64), SReg)
1280 case SIInstrInfo::VCCNZ: {
1283 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1294 case SIInstrInfo::VCCZ: {
1297 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1308 case SIInstrInfo::EXECNZ: {
1309 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1312 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1315 : AMDGPU::S_CSELECT_B64), SReg)
1326 case SIInstrInfo::EXECZ: {
1327 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1330 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1333 : AMDGPU::S_CSELECT_B64), SReg)
1382 return AMDGPU::COPY;
1383 if (RI.getRegSizeInBits(*DstRC) == 16) {
1386 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1387 }
else if (RI.getRegSizeInBits(*DstRC) == 32) {
1388 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1389 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1390 return AMDGPU::S_MOV_B64;
1391 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1392 return AMDGPU::V_MOV_B64_PSEUDO;
1394 return AMDGPU::COPY;
1399 bool IsIndirectSrc)
const {
1400 if (IsIndirectSrc) {
1402 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1404 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1406 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1423 if (VecSize <= 1024)
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1430 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1432 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1434 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1451 if (VecSize <= 1024)
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1459 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1461 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1463 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1480 if (VecSize <= 1024)
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1488 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1490 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1492 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1509 if (VecSize <= 1024)
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1517 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1524 if (VecSize <= 1024)
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1532 bool IsSGPR)
const {
1544 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1551 return AMDGPU::SI_SPILL_S32_SAVE;
1553 return AMDGPU::SI_SPILL_S64_SAVE;
1555 return AMDGPU::SI_SPILL_S96_SAVE;
1557 return AMDGPU::SI_SPILL_S128_SAVE;
1559 return AMDGPU::SI_SPILL_S160_SAVE;
1561 return AMDGPU::SI_SPILL_S192_SAVE;
1563 return AMDGPU::SI_SPILL_S224_SAVE;
1565 return AMDGPU::SI_SPILL_S256_SAVE;
1567 return AMDGPU::SI_SPILL_S288_SAVE;
1569 return AMDGPU::SI_SPILL_S320_SAVE;
1571 return AMDGPU::SI_SPILL_S352_SAVE;
1573 return AMDGPU::SI_SPILL_S384_SAVE;
1575 return AMDGPU::SI_SPILL_S512_SAVE;
1577 return AMDGPU::SI_SPILL_S1024_SAVE;
1586 return AMDGPU::SI_SPILL_V32_SAVE;
1588 return AMDGPU::SI_SPILL_V64_SAVE;
1590 return AMDGPU::SI_SPILL_V96_SAVE;
1592 return AMDGPU::SI_SPILL_V128_SAVE;
1594 return AMDGPU::SI_SPILL_V160_SAVE;
1596 return AMDGPU::SI_SPILL_V192_SAVE;
1598 return AMDGPU::SI_SPILL_V224_SAVE;
1600 return AMDGPU::SI_SPILL_V256_SAVE;
1602 return AMDGPU::SI_SPILL_V288_SAVE;
1604 return AMDGPU::SI_SPILL_V320_SAVE;
1606 return AMDGPU::SI_SPILL_V352_SAVE;
1608 return AMDGPU::SI_SPILL_V384_SAVE;
1610 return AMDGPU::SI_SPILL_V512_SAVE;
1612 return AMDGPU::SI_SPILL_V1024_SAVE;
1621 return AMDGPU::SI_SPILL_A32_SAVE;
1623 return AMDGPU::SI_SPILL_A64_SAVE;
1625 return AMDGPU::SI_SPILL_A96_SAVE;
1627 return AMDGPU::SI_SPILL_A128_SAVE;
1629 return AMDGPU::SI_SPILL_A160_SAVE;
1631 return AMDGPU::SI_SPILL_A192_SAVE;
1633 return AMDGPU::SI_SPILL_A224_SAVE;
1635 return AMDGPU::SI_SPILL_A256_SAVE;
1637 return AMDGPU::SI_SPILL_A288_SAVE;
1639 return AMDGPU::SI_SPILL_A320_SAVE;
1641 return AMDGPU::SI_SPILL_A352_SAVE;
1643 return AMDGPU::SI_SPILL_A384_SAVE;
1645 return AMDGPU::SI_SPILL_A512_SAVE;
1647 return AMDGPU::SI_SPILL_A1024_SAVE;
1656 return AMDGPU::SI_SPILL_AV32_SAVE;
1658 return AMDGPU::SI_SPILL_AV64_SAVE;
1660 return AMDGPU::SI_SPILL_AV96_SAVE;
1662 return AMDGPU::SI_SPILL_AV128_SAVE;
1664 return AMDGPU::SI_SPILL_AV160_SAVE;
1666 return AMDGPU::SI_SPILL_AV192_SAVE;
1668 return AMDGPU::SI_SPILL_AV224_SAVE;
1670 return AMDGPU::SI_SPILL_AV256_SAVE;
1672 return AMDGPU::SI_SPILL_AV288_SAVE;
1674 return AMDGPU::SI_SPILL_AV320_SAVE;
1676 return AMDGPU::SI_SPILL_AV352_SAVE;
1678 return AMDGPU::SI_SPILL_AV384_SAVE;
1680 return AMDGPU::SI_SPILL_AV512_SAVE;
1682 return AMDGPU::SI_SPILL_AV1024_SAVE;
1689 bool IsVectorSuperClass) {
1694 if (IsVectorSuperClass)
1695 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1697 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1705 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1711 if (IsVectorSuperClass)
1731 FrameInfo.getObjectAlign(FrameIndex));
1732 unsigned SpillSize =
TRI->getSpillSize(*RC);
1737 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1738 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1739 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1747 if (SrcReg.
isVirtual() && SpillSize == 4) {
1748 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1763 SpillSize, RI, *MFI);
1777 return AMDGPU::SI_SPILL_S32_RESTORE;
1779 return AMDGPU::SI_SPILL_S64_RESTORE;
1781 return AMDGPU::SI_SPILL_S96_RESTORE;
1783 return AMDGPU::SI_SPILL_S128_RESTORE;
1785 return AMDGPU::SI_SPILL_S160_RESTORE;
1787 return AMDGPU::SI_SPILL_S192_RESTORE;
1789 return AMDGPU::SI_SPILL_S224_RESTORE;
1791 return AMDGPU::SI_SPILL_S256_RESTORE;
1793 return AMDGPU::SI_SPILL_S288_RESTORE;
1795 return AMDGPU::SI_SPILL_S320_RESTORE;
1797 return AMDGPU::SI_SPILL_S352_RESTORE;
1799 return AMDGPU::SI_SPILL_S384_RESTORE;
1801 return AMDGPU::SI_SPILL_S512_RESTORE;
1803 return AMDGPU::SI_SPILL_S1024_RESTORE;
1812 return AMDGPU::SI_SPILL_V32_RESTORE;
1814 return AMDGPU::SI_SPILL_V64_RESTORE;
1816 return AMDGPU::SI_SPILL_V96_RESTORE;
1818 return AMDGPU::SI_SPILL_V128_RESTORE;
1820 return AMDGPU::SI_SPILL_V160_RESTORE;
1822 return AMDGPU::SI_SPILL_V192_RESTORE;
1824 return AMDGPU::SI_SPILL_V224_RESTORE;
1826 return AMDGPU::SI_SPILL_V256_RESTORE;
1828 return AMDGPU::SI_SPILL_V288_RESTORE;
1830 return AMDGPU::SI_SPILL_V320_RESTORE;
1832 return AMDGPU::SI_SPILL_V352_RESTORE;
1834 return AMDGPU::SI_SPILL_V384_RESTORE;
1836 return AMDGPU::SI_SPILL_V512_RESTORE;
1838 return AMDGPU::SI_SPILL_V1024_RESTORE;
1847 return AMDGPU::SI_SPILL_A32_RESTORE;
1849 return AMDGPU::SI_SPILL_A64_RESTORE;
1851 return AMDGPU::SI_SPILL_A96_RESTORE;
1853 return AMDGPU::SI_SPILL_A128_RESTORE;
1855 return AMDGPU::SI_SPILL_A160_RESTORE;
1857 return AMDGPU::SI_SPILL_A192_RESTORE;
1859 return AMDGPU::SI_SPILL_A224_RESTORE;
1861 return AMDGPU::SI_SPILL_A256_RESTORE;
1863 return AMDGPU::SI_SPILL_A288_RESTORE;
1865 return AMDGPU::SI_SPILL_A320_RESTORE;
1867 return AMDGPU::SI_SPILL_A352_RESTORE;
1869 return AMDGPU::SI_SPILL_A384_RESTORE;
1871 return AMDGPU::SI_SPILL_A512_RESTORE;
1873 return AMDGPU::SI_SPILL_A1024_RESTORE;
1882 return AMDGPU::SI_SPILL_AV32_RESTORE;
1884 return AMDGPU::SI_SPILL_AV64_RESTORE;
1886 return AMDGPU::SI_SPILL_AV96_RESTORE;
1888 return AMDGPU::SI_SPILL_AV128_RESTORE;
1890 return AMDGPU::SI_SPILL_AV160_RESTORE;
1892 return AMDGPU::SI_SPILL_AV192_RESTORE;
1894 return AMDGPU::SI_SPILL_AV224_RESTORE;
1896 return AMDGPU::SI_SPILL_AV256_RESTORE;
1898 return AMDGPU::SI_SPILL_AV288_RESTORE;
1900 return AMDGPU::SI_SPILL_AV320_RESTORE;
1902 return AMDGPU::SI_SPILL_AV352_RESTORE;
1904 return AMDGPU::SI_SPILL_AV384_RESTORE;
1906 return AMDGPU::SI_SPILL_AV512_RESTORE;
1908 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1915 bool IsVectorSuperClass) {
1920 if (IsVectorSuperClass)
1921 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1923 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1930 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1936 if (IsVectorSuperClass)
1953 unsigned SpillSize =
TRI->getSpillSize(*RC);
1960 FrameInfo.getObjectAlign(FrameIndex));
1964 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1965 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1966 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1971 if (DestReg.
isVirtual() && SpillSize == 4) {
1973 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1987 SpillSize, RI, *MFI);
2002 unsigned Quantity)
const {
2004 while (Quantity > 0) {
2005 unsigned Arg = std::min(Quantity, 8u);
2019 if (HasNoTerminator) {
2020 if (
Info->returnsVoid()) {
2038 constexpr unsigned DoorbellIDMask = 0x3ff;
2039 constexpr unsigned ECQueueWaveAbort = 0x400;
2045 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2051 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2056 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2058 .
addUse(DoorbellRegMasked)
2059 .
addImm(ECQueueWaveAbort);
2061 .
addUse(SetWaveAbortBit);
2072 if (SplitBB != &
MBB)
2081 switch (
MI.getOpcode()) {
2083 if (
MI.isMetaInstruction())
2088 return MI.getOperand(0).getImm() + 1;
2098 switch (
MI.getOpcode()) {
2100 case AMDGPU::S_MOV_B64_term:
2103 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2106 case AMDGPU::S_MOV_B32_term:
2109 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2112 case AMDGPU::S_XOR_B64_term:
2115 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2118 case AMDGPU::S_XOR_B32_term:
2121 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2123 case AMDGPU::S_OR_B64_term:
2126 MI.setDesc(
get(AMDGPU::S_OR_B64));
2128 case AMDGPU::S_OR_B32_term:
2131 MI.setDesc(
get(AMDGPU::S_OR_B32));
2134 case AMDGPU::S_ANDN2_B64_term:
2137 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2140 case AMDGPU::S_ANDN2_B32_term:
2143 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2146 case AMDGPU::S_AND_B64_term:
2149 MI.setDesc(
get(AMDGPU::S_AND_B64));
2152 case AMDGPU::S_AND_B32_term:
2155 MI.setDesc(
get(AMDGPU::S_AND_B32));
2158 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2161 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2164 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2167 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2170 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2171 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2174 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2175 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2178 case AMDGPU::V_MOV_B64_PSEUDO: {
2180 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2181 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2187 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2192 if (
SrcOp.isImm()) {
2194 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2195 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2238 MI.eraseFromParent();
2241 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2245 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2250 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2255 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2256 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2258 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2259 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2266 MI.eraseFromParent();
2269 case AMDGPU::V_SET_INACTIVE_B32: {
2270 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2271 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2275 .
add(
MI.getOperand(1));
2279 .
add(
MI.getOperand(2));
2282 MI.eraseFromParent();
2285 case AMDGPU::V_SET_INACTIVE_B64: {
2286 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2287 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2289 MI.getOperand(0).getReg())
2290 .
add(
MI.getOperand(1));
2295 MI.getOperand(0).getReg())
2296 .
add(
MI.getOperand(2));
2300 MI.eraseFromParent();
2303 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2307 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2308 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2309 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2310 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2311 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2324 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2325 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2326 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2327 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2328 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2329 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2330 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2331 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2336 Opc = AMDGPU::V_MOVRELD_B32_e32;
2338 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2339 : AMDGPU::S_MOVRELD_B32;
2344 bool IsUndef =
MI.getOperand(1).isUndef();
2345 unsigned SubReg =
MI.getOperand(3).getImm();
2346 assert(VecReg ==
MI.getOperand(1).getReg());
2351 .
add(
MI.getOperand(2))
2355 const int ImpDefIdx =
2357 const int ImpUseIdx = ImpDefIdx + 1;
2359 MI.eraseFromParent();
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2364 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2365 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2366 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2367 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2368 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2369 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2370 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2371 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2372 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2373 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2376 bool IsUndef =
MI.getOperand(1).isUndef();
2385 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2389 .
add(
MI.getOperand(2))
2394 const int ImpDefIdx =
2396 const int ImpUseIdx = ImpDefIdx + 1;
2403 MI.eraseFromParent();
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2408 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2409 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2410 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2411 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2412 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2413 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2414 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2415 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2416 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2417 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2421 bool IsUndef =
MI.getOperand(1).isUndef();
2439 MI.eraseFromParent();
2442 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2445 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2446 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2469 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2476 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2486 MI.eraseFromParent();
2489 case AMDGPU::ENTER_STRICT_WWM: {
2493 : AMDGPU::S_OR_SAVEEXEC_B64));
2496 case AMDGPU::ENTER_STRICT_WQM: {
2499 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2500 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2501 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2505 MI.eraseFromParent();
2508 case AMDGPU::EXIT_STRICT_WWM:
2509 case AMDGPU::EXIT_STRICT_WQM: {
2512 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2515 case AMDGPU::ENTER_PSEUDO_WM:
2516 case AMDGPU::EXIT_PSEUDO_WM: {
2518 MI.eraseFromParent();
2521 case AMDGPU::SI_RETURN: {
2535 MI.eraseFromParent();
2539 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2540 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2541 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2544 case AMDGPU::S_GETPC_B64_pseudo:
2545 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2548 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2570 case AMDGPU::S_LOAD_DWORDX16_IMM:
2571 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2584 for (
auto &CandMO :
I->operands()) {
2585 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2593 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2601 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2603 unsigned NewOpcode = -1;
2604 if (SubregSize == 256)
2605 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2606 else if (SubregSize == 128)
2607 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2614 MRI.setRegClass(DestReg, NewRC);
2617 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2622 MI->getOperand(0).setReg(DestReg);
2623 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2627 OffsetMO->
setImm(FinalOffset);
2633 MI->setMemRefs(*MF, NewMMOs);
2646std::pair<MachineInstr*, MachineInstr*>
2648 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2653 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2654 return std::pair(&
MI,
nullptr);
2665 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2667 if (Dst.isPhysical()) {
2668 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2671 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2675 for (
unsigned I = 1;
I <= 2; ++
I) {
2678 if (
SrcOp.isImm()) {
2680 Imm.ashrInPlace(Part * 32);
2681 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2685 if (Src.isPhysical())
2686 MovDPP.addReg(RI.getSubReg(Src, Sub));
2693 MovDPP.addImm(MO.getImm());
2695 Split[Part] = MovDPP;
2699 if (Dst.isVirtual())
2706 MI.eraseFromParent();
2707 return std::pair(Split[0], Split[1]);
2710std::optional<DestSourcePair>
2712 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2715 return std::nullopt;
2720 unsigned Src0OpName,
2722 unsigned Src1OpName)
const {
2729 "All commutable instructions have both src0 and src1 modifiers");
2731 int Src0ModsVal = Src0Mods->
getImm();
2732 int Src1ModsVal = Src1Mods->
getImm();
2734 Src1Mods->
setImm(Src0ModsVal);
2735 Src0Mods->
setImm(Src1ModsVal);
2744 bool IsKill = RegOp.
isKill();
2746 bool IsUndef = RegOp.
isUndef();
2747 bool IsDebug = RegOp.
isDebug();
2749 if (NonRegOp.
isImm())
2751 else if (NonRegOp.
isFI())
2770 unsigned Src1Idx)
const {
2771 assert(!NewMI &&
"this should never be used");
2773 unsigned Opc =
MI.getOpcode();
2775 if (CommutedOpcode == -1)
2778 if (Src0Idx > Src1Idx)
2782 static_cast<int>(Src0Idx) &&
2784 static_cast<int>(Src1Idx) &&
2785 "inconsistency with findCommutedOpIndices");
2812 Src1, AMDGPU::OpName::src1_modifiers);
2824 unsigned &SrcOpIdx0,
2825 unsigned &SrcOpIdx1)
const {
2830 unsigned &SrcOpIdx0,
2831 unsigned &SrcOpIdx1)
const {
2832 if (!
Desc.isCommutable())
2835 unsigned Opc =
Desc.getOpcode();
2844 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2848 int64_t BrOffset)
const {
2851 assert(BranchOp != AMDGPU::S_SETPC_B64);
2865 return MI.getOperand(0).getMBB();
2870 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
2871 MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2872 MI.getOpcode() == AMDGPU::SI_LOOP)
2883 assert(RS &&
"RegScavenger required for long branching");
2885 "new block should be inserted for expanding unconditional branch");
2888 "restore block should be inserted for restoring clobbered registers");
2896 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2906 MCCtx.createTempSymbol(
"post_getpc",
true);
2910 MCCtx.createTempSymbol(
"offset_lo",
true);
2912 MCCtx.createTempSymbol(
"offset_hi",
true);
2915 .
addReg(PCReg, 0, AMDGPU::sub0)
2919 .
addReg(PCReg, 0, AMDGPU::sub1)
2961 if (LongBranchReservedReg) {
2963 Scav = LongBranchReservedReg;
2972 MRI.replaceRegWith(PCReg, Scav);
2973 MRI.clearVirtRegs();
2979 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2980 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2981 MRI.clearVirtRegs();
2996unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2998 case SIInstrInfo::SCC_TRUE:
2999 return AMDGPU::S_CBRANCH_SCC1;
3000 case SIInstrInfo::SCC_FALSE:
3001 return AMDGPU::S_CBRANCH_SCC0;
3002 case SIInstrInfo::VCCNZ:
3003 return AMDGPU::S_CBRANCH_VCCNZ;
3004 case SIInstrInfo::VCCZ:
3005 return AMDGPU::S_CBRANCH_VCCZ;
3006 case SIInstrInfo::EXECNZ:
3007 return AMDGPU::S_CBRANCH_EXECNZ;
3008 case SIInstrInfo::EXECZ:
3009 return AMDGPU::S_CBRANCH_EXECZ;
3015SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3017 case AMDGPU::S_CBRANCH_SCC0:
3019 case AMDGPU::S_CBRANCH_SCC1:
3021 case AMDGPU::S_CBRANCH_VCCNZ:
3023 case AMDGPU::S_CBRANCH_VCCZ:
3025 case AMDGPU::S_CBRANCH_EXECNZ:
3027 case AMDGPU::S_CBRANCH_EXECZ:
3039 bool AllowModify)
const {
3040 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3042 TBB =
I->getOperand(0).getMBB();
3048 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
3049 CondBB =
I->getOperand(1).getMBB();
3050 Cond.push_back(
I->getOperand(0));
3052 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3053 if (Pred == INVALID_BR)
3056 CondBB =
I->getOperand(0).getMBB();
3058 Cond.push_back(
I->getOperand(1));
3068 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3070 FBB =
I->getOperand(0).getMBB();
3080 bool AllowModify)
const {
3088 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3089 switch (
I->getOpcode()) {
3090 case AMDGPU::S_MOV_B64_term:
3091 case AMDGPU::S_XOR_B64_term:
3092 case AMDGPU::S_OR_B64_term:
3093 case AMDGPU::S_ANDN2_B64_term:
3094 case AMDGPU::S_AND_B64_term:
3095 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3096 case AMDGPU::S_MOV_B32_term:
3097 case AMDGPU::S_XOR_B32_term:
3098 case AMDGPU::S_OR_B32_term:
3099 case AMDGPU::S_ANDN2_B32_term:
3100 case AMDGPU::S_AND_B32_term:
3101 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3104 case AMDGPU::SI_ELSE:
3105 case AMDGPU::SI_KILL_I1_TERMINATOR:
3106 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3123 int *BytesRemoved)
const {
3125 unsigned RemovedSize = 0;
3128 if (
MI.isBranch() ||
MI.isReturn()) {
3130 MI.eraseFromParent();
3136 *BytesRemoved = RemovedSize;
3153 int *BytesAdded)
const {
3154 if (!FBB &&
Cond.empty()) {
3162 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
3172 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3209 if (
Cond.size() != 2) {
3224 Register FalseReg,
int &CondCycles,
3225 int &TrueCycles,
int &FalseCycles)
const {
3226 switch (
Cond[0].getImm()) {
3231 if (
MRI.getRegClass(FalseReg) != RC)
3235 CondCycles = TrueCycles = FalseCycles = NumInsts;
3238 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3246 if (
MRI.getRegClass(FalseReg) != RC)
3252 if (NumInsts % 2 == 0)
3255 CondCycles = TrueCycles = FalseCycles = NumInsts;
3267 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3268 if (Pred == VCCZ || Pred == SCC_FALSE) {
3269 Pred =
static_cast<BranchPredicate
>(-Pred);
3275 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3277 if (DstSize == 32) {
3279 if (Pred == SCC_TRUE) {
3294 if (DstSize == 64 && Pred == SCC_TRUE) {
3304 static const int16_t Sub0_15[] = {
3305 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3306 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3307 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3308 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3311 static const int16_t Sub0_15_64[] = {
3312 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3313 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3314 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3315 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3318 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3320 const int16_t *SubIndices = Sub0_15;
3321 int NElts = DstSize / 32;
3325 if (Pred == SCC_TRUE) {
3327 SelOp = AMDGPU::S_CSELECT_B32;
3328 EltRC = &AMDGPU::SGPR_32RegClass;
3330 SelOp = AMDGPU::S_CSELECT_B64;
3331 EltRC = &AMDGPU::SGPR_64RegClass;
3332 SubIndices = Sub0_15_64;
3338 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3343 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3344 Register DstElt =
MRI.createVirtualRegister(EltRC);
3347 unsigned SubIdx = SubIndices[
Idx];
3350 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3353 .
addReg(FalseReg, 0, SubIdx)
3354 .
addReg(TrueReg, 0, SubIdx);
3358 .
addReg(TrueReg, 0, SubIdx)
3359 .
addReg(FalseReg, 0, SubIdx);
3371 switch (
MI.getOpcode()) {
3372 case AMDGPU::V_MOV_B32_e32:
3373 case AMDGPU::V_MOV_B32_e64:
3374 case AMDGPU::V_MOV_B64_PSEUDO:
3375 case AMDGPU::V_MOV_B64_e32:
3376 case AMDGPU::V_MOV_B64_e64:
3377 case AMDGPU::S_MOV_B32:
3378 case AMDGPU::S_MOV_B64:
3379 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3381 case AMDGPU::WWM_COPY:
3382 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3383 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3384 case AMDGPU::V_ACCVGPR_MOV_B32:
3392 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3393 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3394 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3397 unsigned Opc =
MI.getOpcode();
3401 MI.removeOperand(
Idx);
3407 if (!
MRI->hasOneNonDBGUse(Reg))
3410 switch (
DefMI.getOpcode()) {
3413 case AMDGPU::V_MOV_B64_e32:
3414 case AMDGPU::S_MOV_B64:
3415 case AMDGPU::V_MOV_B64_PSEUDO:
3416 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3417 case AMDGPU::V_MOV_B32_e32:
3418 case AMDGPU::S_MOV_B32:
3419 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3426 if (!ImmOp->
isImm())
3429 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3430 int64_t Imm = ImmOp->
getImm();
3431 switch (UseOp.getSubReg()) {
3442 case AMDGPU::sub1_lo16:
3444 case AMDGPU::sub1_hi16:
3449 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3451 unsigned Opc =
UseMI.getOpcode();
3452 if (Opc == AMDGPU::COPY) {
3453 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3457 bool Is16Bit = OpSize == 2;
3458 bool Is64Bit = OpSize == 8;
3460 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3461 : AMDGPU::V_MOV_B32_e32
3462 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3463 : AMDGPU::S_MOV_B32;
3464 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)));
3469 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3476 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3479 UseMI.getOperand(0).setSubReg(0);
3482 UseMI.getOperand(0).setReg(DstReg);
3492 UseMI.setDesc(NewMCID);
3493 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3498 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3499 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3500 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3501 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3502 Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3517 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3518 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3520 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3521 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3522 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3530 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3531 if (!RegSrc->
isReg())
3549 if (Def && Def->isMoveImmediate() &&
3554 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3556 : AMDGPU::V_FMAMK_F16)
3557 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3564 if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
3567 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3573 unsigned SrcSubReg = RegSrc->
getSubReg();
3578 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3579 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3580 Opc == AMDGPU::V_FMAC_F16_e64)
3581 UseMI.untieRegOperand(
3584 Src1->ChangeToImmediate(Imm);
3589 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3591 DefMI.eraseFromParent();
3601 bool Src0Inlined =
false;
3602 if (Src0->
isReg()) {
3607 if (Def && Def->isMoveImmediate() &&
3619 if (Src1->
isReg() && !Src0Inlined) {
3622 if (Def && Def->isMoveImmediate() &&
3633 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3635 : AMDGPU::V_FMAAK_F16)
3636 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3643 if (NewOpc == AMDGPU::V_FMAAK_F16_t16)
3649 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3650 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3651 Opc == AMDGPU::V_FMAC_F16_e64)
3652 UseMI.untieRegOperand(
3666 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3668 DefMI.eraseFromParent();
3680 if (BaseOps1.
size() != BaseOps2.
size())
3682 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3683 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3691 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3692 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3693 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3695 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3698bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3701 int64_t Offset0, Offset1;
3703 bool Offset0IsScalable, Offset1IsScalable;
3725 "MIa must load from or modify a memory location");
3727 "MIb must load from or modify a memory location");
3746 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3753 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3763 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3777 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3788 if (Reg.isPhysical())
3790 auto *Def =
MRI.getUniqueVRegDef(Reg);
3792 Imm = Def->getOperand(1).getImm();
3812 unsigned NumOps =
MI.getNumOperands();
3813 for (
unsigned I = 1;
I < NumOps; ++
I) {
3815 if (
Op.isReg() &&
Op.isKill())
3825 unsigned Opc =
MI.getOpcode();
3829 if (NewMFMAOpc != -1) {
3832 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3833 MIB.
add(
MI.getOperand(
I));
3839 if (Def.isEarlyClobber() && Def.isReg() &&
3844 auto UpdateDefIndex = [&](
LiveRange &LR) {
3845 auto S = LR.
find(OldIndex);
3846 if (S != LR.end() && S->start == OldIndex) {
3847 assert(S->valno && S->valno->def == OldIndex);
3848 S->start = NewIndex;
3849 S->valno->def = NewIndex;
3853 for (
auto &SR : LI.subranges())
3864 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3874 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3875 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3879 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3880 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3881 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3882 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3883 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3884 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3885 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3886 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3887 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3888 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3889 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3890 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3891 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3892 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3893 bool Src0Literal =
false;
3898 case AMDGPU::V_MAC_F16_e64:
3899 case AMDGPU::V_FMAC_F16_e64:
3900 case AMDGPU::V_FMAC_F16_t16_e64:
3901 case AMDGPU::V_MAC_F32_e64:
3902 case AMDGPU::V_MAC_LEGACY_F32_e64:
3903 case AMDGPU::V_FMAC_F32_e64:
3904 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3905 case AMDGPU::V_FMAC_F64_e64:
3907 case AMDGPU::V_MAC_F16_e32:
3908 case AMDGPU::V_FMAC_F16_e32:
3909 case AMDGPU::V_MAC_F32_e32:
3910 case AMDGPU::V_MAC_LEGACY_F32_e32:
3911 case AMDGPU::V_FMAC_F32_e32:
3912 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3913 case AMDGPU::V_FMAC_F64_e32: {
3915 AMDGPU::OpName::src0);
3942 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3948 const auto killDef = [&]() ->
void {
3952 if (!
MRI.hasOneNonDBGUse(DefReg))
3966 : AMDGPU::V_FMAAK_F16)
3967 : AMDGPU::V_FMAAK_F32)
3968 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3984 : AMDGPU::V_FMAMK_F16)
3985 : AMDGPU::V_FMAMK_F32)
3986 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4030 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4031 : IsF64 ? AMDGPU::V_FMA_F64_e64
4033 ? AMDGPU::V_FMA_LEGACY_F32_e64
4034 : AMDGPU::V_FMA_F32_e64
4035 : IsF16 ? AMDGPU::V_MAD_F16_e64
4036 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4037 : AMDGPU::V_MAD_F32_e64;
4052 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4063 switch (
MI.getOpcode()) {
4064 case AMDGPU::S_SET_GPR_IDX_ON:
4065 case AMDGPU::S_SET_GPR_IDX_MODE:
4066 case AMDGPU::S_SET_GPR_IDX_OFF:
4084 if (
MI.isTerminator() ||
MI.isPosition())
4088 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4091 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4097 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4098 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4099 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4100 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4105 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4116 unsigned Opcode =
MI.getOpcode();
4131 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4133 Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
4134 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
4137 if (
MI.isCall() ||
MI.isInlineAsm())
4149 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4150 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4151 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4152 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4160 if (
MI.isMetaInstruction())
4164 if (
MI.isCopyLike()) {
4169 return MI.readsRegister(AMDGPU::EXEC, &RI);
4180 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4184 switch (Imm.getBitWidth()) {
4204 APInt IntImm = Imm.bitcastToAPInt();
4223 uint8_t OperandType)
const {
4224 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4233 int64_t Imm = MO.
getImm();
4234 switch (OperandType) {
4247 int32_t Trunc =
static_cast<int32_t
>(Imm);
4287 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4292 int16_t Trunc =
static_cast<int16_t
>(Imm);
4303 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4304 int16_t Trunc =
static_cast<int16_t
>(Imm);
4364 AMDGPU::OpName::src2))
4380 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4400 return Mods && Mods->
getImm();
4413 switch (
MI.getOpcode()) {
4414 default:
return false;
4416 case AMDGPU::V_ADDC_U32_e64:
4417 case AMDGPU::V_SUBB_U32_e64:
4418 case AMDGPU::V_SUBBREV_U32_e64: {
4426 case AMDGPU::V_MAC_F16_e64:
4427 case AMDGPU::V_MAC_F32_e64:
4428 case AMDGPU::V_MAC_LEGACY_F32_e64:
4429 case AMDGPU::V_FMAC_F16_e64:
4430 case AMDGPU::V_FMAC_F16_t16_e64:
4431 case AMDGPU::V_FMAC_F32_e64:
4432 case AMDGPU::V_FMAC_F64_e64:
4433 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4439 case AMDGPU::V_CNDMASK_B32_e64:
4471 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4480 unsigned Op32)
const {
4490 Inst32.
add(
MI.getOperand(0));
4494 assert(((
MI.getOperand(0).getReg() == AMDGPU::VCC) ||
4495 (
MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
4509 if (Op32Src2Idx != -1) {
4539 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4544 return MO.
getReg() == AMDGPU::M0 ||
4545 MO.
getReg() == AMDGPU::VCC ||
4546 MO.
getReg() == AMDGPU::VCC_LO;
4548 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4549 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4559 switch (MO.getReg()) {
4561 case AMDGPU::VCC_LO:
4562 case AMDGPU::VCC_HI:
4564 case AMDGPU::FLAT_SCR:
4577 switch (
MI.getOpcode()) {
4578 case AMDGPU::V_READLANE_B32:
4579 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4580 case AMDGPU::V_WRITELANE_B32:
4581 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4588 if (
MI.isPreISelOpcode() ||
4589 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4600 if (
SubReg.getReg().isPhysical())
4603 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4610 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4620 if (Src0Idx == -1) {
4630 if (!
Desc.isVariadic() &&
4631 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4632 ErrInfo =
"Instruction has wrong number of operands.";
4636 if (
MI.isInlineAsm()) {
4649 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4650 ErrInfo =
"inlineasm operand has incorrect register class.";
4658 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4659 ErrInfo =
"missing memory operand from image instruction.";
4664 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4667 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4668 "all fp values to integers.";
4672 int RegClass =
Desc.operands()[i].RegClass;
4674 switch (
Desc.operands()[i].OperandType) {
4676 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4677 ErrInfo =
"Illegal immediate value for operand.";
4698 ErrInfo =
"Illegal immediate value for operand.";
4705 ErrInfo =
"Expected inline constant for operand.";
4714 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4715 ErrInfo =
"Expected immediate, but got non-immediate";
4737 RI.getSubRegisterClass(RC, MO.
getSubReg());
4745 ErrInfo =
"Subtarget requires even aligned vector registers";
4750 if (RegClass != -1) {
4751 if (Reg.isVirtual())
4756 ErrInfo =
"Operand has incorrect register class.";
4765 ErrInfo =
"SDWA is not supported on this target";
4771 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4779 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4786 "Only reg allowed as operands in SDWA instructions on GFX9+";
4795 if (OMod !=
nullptr &&
4797 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4802 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4803 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4804 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4805 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4808 unsigned Mods = Src0ModsMO->
getImm();
4811 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4817 if (
isVOPC(BasicOpcode)) {
4821 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4822 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4828 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4829 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4835 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4836 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4843 if (DstUnused && DstUnused->isImm() &&
4846 if (!Dst.isReg() || !Dst.isTied()) {
4847 ErrInfo =
"Dst register should have tied register";
4852 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4855 "Dst register should be tied to implicit use of preserved register";
4858 Dst.getReg() != TiedMO.
getReg()) {
4859 ErrInfo =
"Dst register should use same physical register as preserved";
4891 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4892 if (RegCount > DstSize) {
4893 ErrInfo =
"Image instruction returns too many registers for dst "
4902 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
4903 unsigned ConstantBusCount = 0;
4904 bool UsesLiteral =
false;
4911 LiteralVal = &
MI.getOperand(ImmIdx);
4920 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4938 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4948 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4949 return !RI.regsOverlap(SGPRUsed, SGPR);
4959 Opcode != AMDGPU::V_WRITELANE_B32) {
4960 ErrInfo =
"VOP* instruction violates constant bus restriction";
4965 ErrInfo =
"VOP3 instruction uses literal";
4972 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
4973 unsigned SGPRCount = 0;
4976 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4984 if (MO.
getReg() != SGPRUsed)
4990 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
4997 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
4998 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5005 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5015 ErrInfo =
"ABS not allowed in VOP3B instructions";
5028 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5035 if (
Desc.isBranch()) {
5037 ErrInfo =
"invalid branch target for SOPK instruction";
5043 if (!isUInt<16>(Imm)) {
5044 ErrInfo =
"invalid immediate for SOPK instruction";
5048 if (!isInt<16>(Imm)) {
5049 ErrInfo =
"invalid immediate for SOPK instruction";
5056 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5057 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5058 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5059 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5060 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5061 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5063 const unsigned StaticNumOps =
5064 Desc.getNumOperands() +
Desc.implicit_uses().size();
5065 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5070 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5071 ErrInfo =
"missing implicit register operands";
5077 if (!Dst->isUse()) {
5078 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5083 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5084 UseOpIdx != StaticNumOps + 1) {
5085 ErrInfo =
"movrel implicit operands should be tied";
5092 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5094 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5095 ErrInfo =
"src0 should be subreg of implicit vector use";
5103 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5104 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5110 if (
MI.mayStore() &&
5115 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5116 ErrInfo =
"scalar stores must use m0 as offset register";
5124 if (
Offset->getImm() != 0) {
5125 ErrInfo =
"subtarget does not support offsets in flat instructions";
5132 if (GDSOp && GDSOp->
getImm() != 0) {
5133 ErrInfo =
"GDS is not supported on this subtarget";
5142 AMDGPU::OpName::vaddr0);
5144 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5153 ErrInfo =
"dim is out of range";
5160 IsA16 = R128A16->
getImm() != 0;
5161 }
else if (ST.
hasA16()) {
5163 IsA16 = A16->
getImm() != 0;
5166 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5168 unsigned AddrWords =
5171 unsigned VAddrWords;
5173 VAddrWords = RsrcIdx - VAddr0Idx;
5176 unsigned LastVAddrIdx = RsrcIdx - 1;
5177 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5185 if (VAddrWords != AddrWords) {
5187 <<
" but got " << VAddrWords <<
"\n");
5188 ErrInfo =
"bad vaddr size";
5196 using namespace AMDGPU::DPP;
5198 unsigned DC = DppCt->
getImm();
5199 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5200 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5201 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5202 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5203 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5204 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5205 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5206 ErrInfo =
"Invalid dpp_ctrl value";
5209 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5211 ErrInfo =
"Invalid dpp_ctrl value: "
5212 "wavefront shifts are not supported on GFX10+";
5215 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5217 ErrInfo =
"Invalid dpp_ctrl value: "
5218 "broadcasts are not supported on GFX10+";
5221 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5223 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5224 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5226 ErrInfo =
"Invalid dpp_ctrl value: "
5227 "row_newbroadcast/row_share is not supported before "
5230 }
else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.
hasGFX90AInsts()) {
5231 ErrInfo =
"Invalid dpp_ctrl value: "
5232 "row_share and row_xmask are not supported before GFX10";
5237 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5239 ErrInfo =
"Invalid dpp_ctrl value: "
5240 "DP ALU dpp only support row_newbcast";
5247 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5248 : AMDGPU::OpName::vdata;
5257 ErrInfo =
"Invalid register class: "
5258 "vdata and vdst should be both VGPR or AGPR";
5261 if (
Data && Data2 &&
5263 ErrInfo =
"Invalid register class: "
5264 "both data operands should be VGPR or AGPR";
5268 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5271 ErrInfo =
"Invalid register class: "
5272 "agpr loads and stores not supported on this GPU";
5279 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5284 if (Reg.isPhysical())
5291 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5292 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5293 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5295 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5296 ErrInfo =
"Subtarget requires even aligned vector registers "
5297 "for DS_GWS instructions";
5303 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5304 ErrInfo =
"Subtarget requires even aligned vector registers "
5305 "for vaddr operand of image instructions";
5311 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5314 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5315 ErrInfo =
"Invalid register class: "
5316 "v_accvgpr_write with an SGPR is not supported on this GPU";
5321 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5324 ErrInfo =
"pseudo expects only physical SGPRs";
5336 switch (
MI.getOpcode()) {
5337 default:
return AMDGPU::INSTRUCTION_LIST_END;
5338 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5339 case AMDGPU::COPY:
return AMDGPU::COPY;
5340 case AMDGPU::PHI:
return AMDGPU::PHI;
5341 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5342 case AMDGPU::WQM:
return AMDGPU::WQM;
5343 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5344 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5345 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5346 case AMDGPU::S_MOV_B32: {
5348 return MI.getOperand(1).isReg() ||
5350 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5352 case AMDGPU::S_ADD_I32:
5353 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5354 case AMDGPU::S_ADDC_U32:
5355 return AMDGPU::V_ADDC_U32_e32;
5356 case AMDGPU::S_SUB_I32:
5357 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5360 case AMDGPU::S_ADD_U32:
5361 return AMDGPU::V_ADD_CO_U32_e32;
5362 case AMDGPU::S_SUB_U32:
5363 return AMDGPU::V_SUB_CO_U32_e32;
5364 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5365 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5366 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5367 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5368 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5369 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5370 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5371 case AMDGPU::S_XNOR_B32:
5372 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5373 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5374 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5375 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5376 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5377 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5378 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5379 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5380 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5381 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5382 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5383 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5384 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5385 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5386 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5387 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5388 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5389 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5390 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5391 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5392 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5393 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5394 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5395 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5396 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5397 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5398 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5399 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5400 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5401 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5402 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5403 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5404 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5405 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5406 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5407 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5408 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5409 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5410 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5411 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5412 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5413 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5414 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5415 case AMDGPU::S_CVT_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5416 case AMDGPU::S_CVT_HI_F32_F16:
return AMDGPU::V_CVT_F32_F16_t16_e64;
5417 case AMDGPU::S_CVT_F16_F32:
return AMDGPU::V_CVT_F16_F32_t16_e64;
5418 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5419 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5420 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5421 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5422 case AMDGPU::S_CEIL_F16:
5424 : AMDGPU::V_CEIL_F16_fake16_e64;
5425 case AMDGPU::S_FLOOR_F16:
5427 : AMDGPU::V_FLOOR_F16_fake16_e64;
5428 case AMDGPU::S_TRUNC_F16:
5429 return AMDGPU::V_TRUNC_F16_fake16_e64;
5430 case AMDGPU::S_RNDNE_F16:
5431 return AMDGPU::V_RNDNE_F16_fake16_e64;
5432 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5433 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5434 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5435 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5436 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5437 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5438 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5439 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5440 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5441 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5442 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5443 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5444 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5445 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5446 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5447 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5448 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_t16_e64;
5449 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5450 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5451 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5452 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5453 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5454 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5455 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5456 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5457 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5458 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5459 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5460 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5461 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5462 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5463 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5464 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5465 case AMDGPU::S_CMP_LT_F16:
return AMDGPU::V_CMP_LT_F16_t16_e64;
5466 case AMDGPU::S_CMP_EQ_F16:
return AMDGPU::V_CMP_EQ_F16_t16_e64;
5467 case AMDGPU::S_CMP_LE_F16:
return AMDGPU::V_CMP_LE_F16_t16_e64;
5468 case AMDGPU::S_CMP_GT_F16:
return AMDGPU::V_CMP_GT_F16_t16_e64;
5469 case AMDGPU::S_CMP_LG_F16:
return AMDGPU::V_CMP_LG_F16_t16_e64;
5470 case AMDGPU::S_CMP_GE_F16:
return AMDGPU::V_CMP_GE_F16_t16_e64;
5471 case AMDGPU::S_CMP_O_F16:
return AMDGPU::V_CMP_O_F16_t16_e64;
5472 case AMDGPU::S_CMP_U_F16:
return AMDGPU::V_CMP_U_F16_t16_e64;
5473 case AMDGPU::S_CMP_NGE_F16:
return AMDGPU::V_CMP_NGE_F16_t16_e64;
5474 case AMDGPU::S_CMP_NLG_F16:
return AMDGPU::V_CMP_NLG_F16_t16_e64;
5475 case AMDGPU::S_CMP_NGT_F16:
return AMDGPU::V_CMP_NGT_F16_t16_e64;
5476 case AMDGPU::S_CMP_NLE_F16:
return AMDGPU::V_CMP_NLE_F16_t16_e64;
5477 case AMDGPU::S_CMP_NEQ_F16:
return AMDGPU::V_CMP_NEQ_F16_t16_e64;
5478 case AMDGPU::S_CMP_NLT_F16:
return AMDGPU::V_CMP_NLT_F16_t16_e64;
5479 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5480 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5481 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5482 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5483 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5484 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5485 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5486 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5487 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5488 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5491 "Unexpected scalar opcode without corresponding vector one!");
5504 bool IsWave32 = ST.isWave32();
5509 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5510 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5519 const unsigned OrSaveExec =
5520 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5533 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5535 auto ExecRestoreMI =
5545 bool IsAllocatable) {
5546 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5551 case AMDGPU::AV_32RegClassID:
5552 RCID = AMDGPU::VGPR_32RegClassID;
5554 case AMDGPU::AV_64RegClassID:
5555 RCID = AMDGPU::VReg_64RegClassID;
5557 case AMDGPU::AV_96RegClassID:
5558 RCID = AMDGPU::VReg_96RegClassID;
5560 case AMDGPU::AV_128RegClassID:
5561 RCID = AMDGPU::VReg_128RegClassID;
5563 case AMDGPU::AV_160RegClassID:
5564 RCID = AMDGPU::VReg_160RegClassID;
5566 case AMDGPU::AV_512RegClassID:
5567 RCID = AMDGPU::VReg_512RegClassID;
5583 auto RegClass = TID.
operands()[OpNum].RegClass;
5584 bool IsAllocatable =
false;
5594 AMDGPU::OpName::vdst);
5597 : AMDGPU::OpName::vdata);
5598 if (DataIdx != -1) {
5600 TID.
Opcode, AMDGPU::OpName::data1);
5608 unsigned OpNo)
const {
5611 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5612 Desc.operands()[OpNo].RegClass == -1) {
5615 if (Reg.isVirtual())
5616 return MRI.getRegClass(Reg);
5617 return RI.getPhysRegBaseClass(Reg);
5620 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5629 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5631 unsigned Size = RI.getRegSizeInBits(*RC);
5632 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
5634 Opcode = AMDGPU::COPY;
5636 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5653 if (SuperReg.
getSubReg() == AMDGPU::NoSubRegister) {
5663 Register NewSuperReg =
MRI.createVirtualRegister(SuperRC);
5669 .
addReg(NewSuperReg, 0, SubIdx);
5679 if (SubIdx == AMDGPU::sub0)
5681 if (SubIdx == AMDGPU::sub1)
5693void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5709 if (Reg.isPhysical())
5720 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5747 MO = &
MI.getOperand(OpIdx);
5759 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5765 if (!SGPRsUsed.
count(SGPR) &&
5768 if (--ConstantBusLimit <= 0)
5774 if (!LiteralLimit--)
5776 if (--ConstantBusLimit <= 0)
5790 unsigned Opc =
MI.getOpcode();
5798 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5799 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5800 MI.getOperand(DataIdx).isReg() &&
5801 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5803 if ((
int)OpIdx == DataIdx) {
5804 if (VDstIdx != -1 &&
5805 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5809 AMDGPU::OpName::data1);
5810 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5811 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5814 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5824 bool Is64BitOp = Is64BitFPOp ||
5837 if (!Is64BitFPOp && (int32_t)Imm < 0)
5855 unsigned Opc =
MI.getOpcode();
5874 if (Opc == AMDGPU::V_WRITELANE_B32) {
5877 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5883 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5900 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
5902 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
5914 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
5916 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5928 if (HasImplicitSGPR || !
MI.isCommutable()) {
5945 if (CommutedOpc == -1) {
5950 MI.setDesc(
get(CommutedOpc));
5954 bool Src0Kill = Src0.
isKill();
5958 else if (Src1.
isReg()) {
5973 unsigned Opc =
MI.getOpcode();
5981 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
5982 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
5988 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
5994 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6005 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6007 SGPRsUsed.
insert(SGPRReg);
6011 for (
int Idx : VOP3Idx) {
6020 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6045 if (ConstantBusLimit > 0) {
6057 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6058 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6067 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6071 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6073 get(TargetOpcode::COPY), NewSrcReg)
6080 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6086 for (
unsigned i = 0; i < SubRegs; ++i) {
6087 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6089 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6096 get(AMDGPU::REG_SEQUENCE), DstReg);
6097 for (
unsigned i = 0; i < SubRegs; ++i) {
6112 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6114 SBase->setReg(SGPR);
6126 if (OldSAddrIdx < 0)
6143 if (NewVAddrIdx < 0)
6150 if (OldVAddrIdx >= 0) {
6152 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6153 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6164 if (OldVAddrIdx == NewVAddrIdx) {
6167 MRI.removeRegOperandFromUseList(&NewVAddr);
6168 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6172 MRI.removeRegOperandFromUseList(&NewVAddr);
6173 MRI.addRegOperandToUseList(&NewVAddr);
6175 assert(OldSAddrIdx == NewVAddrIdx);
6177 if (OldVAddrIdx >= 0) {
6179 AMDGPU::OpName::vdst_in);
6183 if (NewVDstIn != -1) {
6190 if (NewVDstIn != -1) {
6229 unsigned OpSubReg =
Op.getSubReg();
6238 Register DstReg =
MRI.createVirtualRegister(DstRC);
6249 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6252 bool ImpDef = Def->isImplicitDef();
6253 while (!ImpDef && Def && Def->isCopy()) {
6254 if (Def->getOperand(1).getReg().isPhysical())
6256 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6257 ImpDef = Def && Def->isImplicitDef();
6259 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6274 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6275 unsigned SaveExecOpc =
6276 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6277 unsigned XorTermOpc =
6278 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6280 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6281 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6289 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6290 unsigned NumSubRegs =
RegSize / 32;
6291 Register VScalarOp = ScalarOp->getReg();
6293 if (NumSubRegs == 1) {
6294 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6296 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6299 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6301 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6307 CondReg = NewCondReg;
6309 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6317 ScalarOp->setReg(CurReg);
6318 ScalarOp->setIsKill();
6321 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6322 "Unhandled register size");
6324 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6325 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6326 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6329 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6330 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6333 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6334 .
addReg(VScalarOp, VScalarOpUndef,
6335 TRI->getSubRegFromChannel(
Idx + 1));
6341 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6342 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6348 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6349 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6352 if (NumSubRegs <= 2)
6353 Cmp.addReg(VScalarOp);
6355 Cmp.addReg(VScalarOp, VScalarOpUndef,
6356 TRI->getSubRegFromChannel(
Idx, 2));
6360 CondReg = NewCondReg;
6362 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6371 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6372 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6376 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6377 unsigned Channel = 0;
6378 for (
Register Piece : ReadlanePieces) {
6379 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6383 ScalarOp->setReg(SScalarOp);
6384 ScalarOp->setIsKill();
6388 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6389 MRI.setSimpleHint(SaveExec, CondReg);
6420 if (!Begin.isValid())
6422 if (!
End.isValid()) {
6427 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6428 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6429 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6436 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6442 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6451 for (
auto I = Begin;
I != AfterMI;
I++) {
6452 for (
auto &MO :
I->all_uses())
6453 MRI.clearKillFlags(MO.getReg());
6488 for (
auto &Succ : RemainderBB->
successors()) {
6511static std::tuple<unsigned, unsigned>
6519 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6520 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6523 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6524 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6525 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6526 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6527 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6535 .
addImm(RsrcDataFormat & 0xFFFFFFFF);
6539 .
addImm(RsrcDataFormat >> 32);
6544 .
addImm(AMDGPU::sub0_sub1)
6550 return std::tuple(RsrcPtr, NewSRsrc);
6587 if (
MI.getOpcode() == AMDGPU::PHI) {
6589 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6590 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6593 MRI.getRegClass(
MI.getOperand(i).getReg());
6608 VRC = &AMDGPU::VReg_1RegClass;
6624 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6626 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6642 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6649 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6651 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6669 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6674 if (DstRC != Src0RC) {
6683 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6691 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6692 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6693 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6694 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6695 MI.getOpcode() == AMDGPU::S_WQM_B64) {
6710 : AMDGPU::OpName::srsrc;
6715 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6724 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6730 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6731 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6736 while (Start->getOpcode() != FrameSetupOpcode)
6739 while (
End->getOpcode() != FrameDestroyOpcode)
6743 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6744 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
6752 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6754 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6765 bool isSoffsetLegal =
true;
6768 if (SoffsetIdx != -1) {
6772 isSoffsetLegal =
false;
6776 bool isRsrcLegal =
true;
6779 if (RsrcIdx != -1) {
6782 isRsrcLegal =
false;
6787 if (isRsrcLegal && isSoffsetLegal)
6811 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6812 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6813 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6815 const auto *BoolXExecRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
6816 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6817 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6819 unsigned RsrcPtr, NewSRsrc;
6826 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6833 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6851 "FIXME: Need to emit flat atomics here");
6853 unsigned RsrcPtr, NewSRsrc;
6856 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6879 MIB.
addImm(CPol->getImm());
6884 MIB.
addImm(TFE->getImm());
6904 MI.removeFromParent();
6909 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
6911 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
6915 if (!isSoffsetLegal) {
6927 if (!isSoffsetLegal) {
6936 InstrList.insert(
MI);
6940 if (RsrcIdx != -1) {
6941 DeferredList.insert(
MI);
6946 return DeferredList.contains(
MI);
6952 while (!Worklist.
empty()) {
6966 "Deferred MachineInstr are not supposed to re-populate worklist");
6984 case AMDGPU::S_ADD_U64_PSEUDO:
6985 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
6987 case AMDGPU::S_SUB_U64_PSEUDO:
6988 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
6990 case AMDGPU::S_ADD_I32:
6991 case AMDGPU::S_SUB_I32: {
6995 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7003 case AMDGPU::S_MUL_U64:
7005 splitScalarSMulU64(Worklist, Inst, MDT);
7009 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7010 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7013 splitScalarSMulPseudo(Worklist, Inst, MDT);
7017 case AMDGPU::S_AND_B64:
7018 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7022 case AMDGPU::S_OR_B64:
7023 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7027 case AMDGPU::S_XOR_B64:
7028 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7032 case AMDGPU::S_NAND_B64:
7033 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7037 case AMDGPU::S_NOR_B64:
7038 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7042 case AMDGPU::S_XNOR_B64:
7044 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7046 splitScalar64BitXnor(Worklist, Inst, MDT);
7050 case AMDGPU::S_ANDN2_B64:
7051 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7055 case AMDGPU::S_ORN2_B64:
7056 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7060 case AMDGPU::S_BREV_B64:
7061 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7065 case AMDGPU::S_NOT_B64:
7066 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7070 case AMDGPU::S_BCNT1_I32_B64:
7071 splitScalar64BitBCNT(Worklist, Inst);
7075 case AMDGPU::S_BFE_I64:
7076 splitScalar64BitBFE(Worklist, Inst);
7080 case AMDGPU::S_FLBIT_I32_B64:
7081 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7084 case AMDGPU::S_FF1_I32_B64:
7085 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7089 case AMDGPU::S_LSHL_B32:
7091 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7095 case AMDGPU::S_ASHR_I32:
7097 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7101 case AMDGPU::S_LSHR_B32:
7103 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7107 case AMDGPU::S_LSHL_B64:
7110 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7111 : AMDGPU::V_LSHLREV_B64_e64;
7115 case AMDGPU::S_ASHR_I64:
7117 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7121 case AMDGPU::S_LSHR_B64:
7123 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7128 case AMDGPU::S_ABS_I32:
7129 lowerScalarAbs(Worklist, Inst);
7133 case AMDGPU::S_CBRANCH_SCC0:
7134 case AMDGPU::S_CBRANCH_SCC1: {
7137 bool IsSCC = CondReg == AMDGPU::SCC;
7140 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7143 .
addReg(IsSCC ? VCC : CondReg);
7147 case AMDGPU::S_BFE_U64:
7148 case AMDGPU::S_BFM_B64:
7151 case AMDGPU::S_PACK_LL_B32_B16:
7152 case AMDGPU::S_PACK_LH_B32_B16:
7153 case AMDGPU::S_PACK_HL_B32_B16:
7154 case AMDGPU::S_PACK_HH_B32_B16:
7155 movePackToVALU(Worklist,
MRI, Inst);
7159 case AMDGPU::S_XNOR_B32:
7160 lowerScalarXnor(Worklist, Inst);
7164 case AMDGPU::S_NAND_B32:
7165 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7169 case AMDGPU::S_NOR_B32:
7170 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7174 case AMDGPU::S_ANDN2_B32:
7175 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7179 case AMDGPU::S_ORN2_B32:
7180 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7188 case AMDGPU::S_ADD_CO_PSEUDO:
7189 case AMDGPU::S_SUB_CO_PSEUDO: {
7190 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7191 ? AMDGPU::V_ADDC_U32_e64
7192 : AMDGPU::V_SUBB_U32_e64;
7193 const auto *CarryRC = RI.
getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
7196 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7197 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7215 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7219 case AMDGPU::S_UADDO_PSEUDO:
7220 case AMDGPU::S_USUBO_PSEUDO: {
7227 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7228 ? AMDGPU::V_ADD_CO_U32_e64
7229 : AMDGPU::V_SUB_CO_U32_e64;
7232 Register DestReg =
MRI.createVirtualRegister(NewRC);
7240 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7247 case AMDGPU::S_CSELECT_B32:
7248 case AMDGPU::S_CSELECT_B64:
7249 lowerSelect(Worklist, Inst, MDT);
7252 case AMDGPU::S_CMP_EQ_I32:
7253 case AMDGPU::S_CMP_LG_I32:
7254 case AMDGPU::S_CMP_GT_I32:
7255 case AMDGPU::S_CMP_GE_I32:
7256 case AMDGPU::S_CMP_LT_I32:
7257 case AMDGPU::S_CMP_LE_I32:
7258 case AMDGPU::S_CMP_EQ_U32:
7259 case AMDGPU::S_CMP_LG_U32:
7260 case AMDGPU::S_CMP_GT_U32:
7261 case AMDGPU::S_CMP_GE_U32:
7262 case AMDGPU::S_CMP_LT_U32:
7263 case AMDGPU::S_CMP_LE_U32:
7264 case AMDGPU::S_CMP_EQ_U64:
7265 case AMDGPU::S_CMP_LG_U64:
7266 case AMDGPU::S_CMP_LT_F32:
7267 case AMDGPU::S_CMP_EQ_F32:
7268 case AMDGPU::S_CMP_LE_F32:
7269 case AMDGPU::S_CMP_GT_F32:
7270 case AMDGPU::S_CMP_LG_F32:
7271 case AMDGPU::S_CMP_GE_F32:
7272 case AMDGPU::S_CMP_O_F32:
7273 case AMDGPU::S_CMP_U_F32:
7274 case AMDGPU::S_CMP_NGE_F32:
7275 case AMDGPU::S_CMP_NLG_F32:
7276 case AMDGPU::S_CMP_NGT_F32:
7277 case AMDGPU::S_CMP_NLE_F32:
7278 case AMDGPU::S_CMP_NEQ_F32:
7279 case AMDGPU::S_CMP_NLT_F32:
7280 case AMDGPU::S_CMP_LT_F16:
7281 case AMDGPU::S_CMP_EQ_F16:
7282 case AMDGPU::S_CMP_LE_F16:
7283 case AMDGPU::S_CMP_GT_F16:
7284 case AMDGPU::S_CMP_LG_F16:
7285 case AMDGPU::S_CMP_GE_F16:
7286 case AMDGPU::S_CMP_O_F16:
7287 case AMDGPU::S_CMP_U_F16:
7288 case AMDGPU::S_CMP_NGE_F16:
7289 case AMDGPU::S_CMP_NLG_F16:
7290 case AMDGPU::S_CMP_NGT_F16:
7291 case AMDGPU::S_CMP_NLE_F16:
7292 case AMDGPU::S_CMP_NEQ_F16:
7293 case AMDGPU::S_CMP_NLT_F16: {
7299 AMDGPU::OpName::src0_modifiers) >= 0) {
7314 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7318 case AMDGPU::S_CVT_HI_F32_F16: {
7320 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7321 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7332 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7336 case AMDGPU::S_MINIMUM_F32:
7337 case AMDGPU::S_MAXIMUM_F32:
7338 case AMDGPU::S_MINIMUM_F16:
7339 case AMDGPU::S_MAXIMUM_F16: {
7341 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7352 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7358 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7366 if (NewOpcode == Opcode) {
7390 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7402 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7403 MRI.replaceRegWith(DstReg, NewDstReg);
7405 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7416 AMDGPU::OpName::src0_modifiers) >= 0)
7421 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7422 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7424 NewInstr->addOperand(Src);
7427 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7430 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7432 NewInstr.addImm(
Size);
7433 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7437 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7442 "Scalar BFE is only implemented for constant width and offset");
7451 AMDGPU::OpName::src1_modifiers) >= 0)
7456 AMDGPU::OpName::src2_modifiers) >= 0)
7470 NewInstr->addOperand(
Op);
7477 if (
Op.getReg() == AMDGPU::SCC) {
7479 if (
Op.isDef() && !
Op.isDead())
7480 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7482 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7487 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7488 Register DstReg = NewInstr->getOperand(0).getReg();
7493 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7494 MRI.replaceRegWith(DstReg, NewDstReg);
7500 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7504std::pair<bool, MachineBasicBlock *>
7516 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7519 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7521 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7522 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7530 MRI.replaceRegWith(OldDstReg, ResultReg);
7533 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7534 return std::pair(
true, NewBB);
7537 return std::pair(
false,
nullptr);
7554 bool IsSCC = (CondReg == AMDGPU::SCC);
7562 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7570 NewCondReg =
MRI.createVirtualRegister(TC);
7574 bool CopyFound =
false;
7578 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
7580 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7582 .
addReg(CandI.getOperand(1).getReg());
7594 : AMDGPU::S_CSELECT_B32;
7604 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7605 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7618 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7620 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7632 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7633 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7636 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7646 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7647 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7662 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7670 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7671 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7677 bool Src0IsSGPR = Src0.
isReg() &&
7679 bool Src1IsSGPR = Src1.
isReg() &&
7682 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7683 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7693 }
else if (Src1IsSGPR) {
7707 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7711 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7717 unsigned Opcode)
const {
7727 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7728 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7740 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7741 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7746 unsigned Opcode)
const {
7756 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7757 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7769 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7770 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7788 &AMDGPU::SGPR_32RegClass;
7791 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7794 AMDGPU::sub0, Src0SubRC);
7799 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
7801 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
7805 AMDGPU::sub1, Src0SubRC);
7807 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
7813 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
7820 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7822 Worklist.
insert(&LoHalf);
7823 Worklist.
insert(&HiHalf);
7829 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7840 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7841 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7842 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7853 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7857 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7887 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7893 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7899 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7910 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7926 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
7938 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
7949 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7950 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7951 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7962 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
7966 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
7978 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
7979 ? AMDGPU::V_MUL_HI_U32_e64
7980 : AMDGPU::V_MUL_HI_I32_e64;
7995 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8003 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8022 &AMDGPU::SGPR_32RegClass;
8025 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8028 &AMDGPU::SGPR_32RegClass;
8031 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8034 AMDGPU::sub0, Src0SubRC);
8036 AMDGPU::sub0, Src1SubRC);
8038 AMDGPU::sub1, Src0SubRC);
8040 AMDGPU::sub1, Src1SubRC);
8045 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8047 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8052 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8057 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8064 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8066 Worklist.
insert(&LoHalf);
8067 Worklist.
insert(&HiHalf);
8070 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8088 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8104 Register NewDest =
MRI.createVirtualRegister(DestRC);
8110 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8128 MRI.getRegClass(Src.getReg()) :
8129 &AMDGPU::SGPR_32RegClass;
8131 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8132 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8135 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8138 AMDGPU::sub0, SrcSubRC);
8140 AMDGPU::sub1, SrcSubRC);
8146 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8150 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8169 Offset == 0 &&
"Not implemented");
8172 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8173 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8174 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8191 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8192 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8197 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8198 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8202 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8205 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8210 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8211 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8232 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8233 unsigned OpcodeAdd =
8234 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8237 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8239 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8246 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8247 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8248 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8249 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8256 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8262 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8264 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8266 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8269void SIInstrInfo::addUsersToMoveToVALUWorklist(
8273 E =
MRI.use_end();
I != E;) {
8278 switch (
UseMI.getOpcode()) {
8281 case AMDGPU::SOFT_WQM:
8282 case AMDGPU::STRICT_WWM:
8283 case AMDGPU::STRICT_WQM:
8284 case AMDGPU::REG_SEQUENCE:
8286 case AMDGPU::INSERT_SUBREG:
8289 OpNo =
I.getOperandNo();
8298 }
while (
I != E &&
I->getParent() == &
UseMI);
8308 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8315 case AMDGPU::S_PACK_LL_B32_B16: {
8316 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8317 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8334 case AMDGPU::S_PACK_LH_B32_B16: {
8335 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8344 case AMDGPU::S_PACK_HL_B32_B16: {
8345 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8355 case AMDGPU::S_PACK_HH_B32_B16: {
8356 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8357 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8374 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8375 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8384 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8385 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8393 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
8397 Register DestReg =
MI.getOperand(0).getReg();
8399 MRI.replaceRegWith(DestReg, NewCond);
8404 MI.getOperand(SCCIdx).setReg(NewCond);
8410 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
8413 for (
auto &Copy : CopyToDelete)
8414 Copy->eraseFromParent();
8422void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8431 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8433 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8450 case AMDGPU::REG_SEQUENCE:
8451 case AMDGPU::INSERT_SUBREG:
8453 case AMDGPU::SOFT_WQM:
8454 case AMDGPU::STRICT_WWM:
8455 case AMDGPU::STRICT_WQM: {
8463 case AMDGPU::REG_SEQUENCE:
8464 case AMDGPU::INSERT_SUBREG:
8474 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8491 int OpIndices[3])
const {
8510 for (
unsigned i = 0; i < 3; ++i) {
8511 int Idx = OpIndices[i];
8548 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8549 SGPRReg = UsedSGPRs[0];
8552 if (!SGPRReg && UsedSGPRs[1]) {
8553 if (UsedSGPRs[1] == UsedSGPRs[2])
8554 SGPRReg = UsedSGPRs[1];
8561 unsigned OperandName)
const {
8566 return &
MI.getOperand(
Idx);
8583 RsrcDataFormat |= (1ULL << 56);
8588 RsrcDataFormat |= (2ULL << 59);
8591 return RsrcDataFormat;
8613 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8619 unsigned Opc =
MI.getOpcode();
8625 return get(Opc).mayLoad() &&
8630 int &FrameIndex)
const {
8638 FrameIndex =
Addr->getIndex();
8643 int &FrameIndex)
const {
8646 FrameIndex =
Addr->getIndex();
8651 int &FrameIndex)
const {
8665 int &FrameIndex)
const {
8682 while (++
I != E &&
I->isInsideBundle()) {
8683 assert(!
I->isBundle() &&
"No nested bundle!");
8691 unsigned Opc =
MI.getOpcode();
8693 unsigned DescSize =
Desc.getSize();
8698 unsigned Size = DescSize;
8713 bool HasLiteral =
false;
8714 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
8722 return HasLiteral ? DescSize + 4 : DescSize;
8732 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8736 case TargetOpcode::BUNDLE:
8738 case TargetOpcode::INLINEASM:
8739 case TargetOpcode::INLINEASM_BR: {
8741 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8745 if (
MI.isMetaInstruction())
8755 if (
MI.memoperands_empty())
8766 return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
8778 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8781 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_IF), DstReg)
8782 .
add(Branch->getOperand(0))
8783 .
add(Branch->getOperand(1));
8785 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_END_CF))
8804 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
8809 BuildMI(*(MF), Branch->getDebugLoc(),
get(TargetOpcode::PHI), DstReg);
8811 if (PMBB == LoopEnd) {
8812 HeaderPHIBuilder.
addReg(BackEdgeReg);
8817 HeaderPHIBuilder.
addReg(ZeroReg);
8819 HeaderPHIBuilder.
addMBB(PMBB);
8823 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
8825 .
add(Branch->getOperand(0));
8827 BuildMI(*(MF), Branch->getDebugLoc(),
get(AMDGPU::SI_LOOP))
8833 LoopEnd->
insert(LoopEnd->
end(), SIIFBREAK);
8834 LoopEnd->
insert(LoopEnd->
end(), SILOOP);
8840 static const std::pair<int, const char *> TargetIndices[] = {
8878std::pair<unsigned, unsigned>
8885 static const std::pair<unsigned, const char *> TargetFlags[] = {
8900 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8914 return AMDGPU::WWM_COPY;
8916 return AMDGPU::COPY;
8927 bool IsNullOrVectorRegister =
true;
8936 return IsNullOrVectorRegister &&
8937 (
isSpill(Opcode) || (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
8938 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
8951 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
8982 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
8983 case AMDGPU::SI_KILL_I1_TERMINATOR:
8992 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
8993 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
8994 case AMDGPU::SI_KILL_I1_PSEUDO:
8995 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9007 const unsigned OffsetBits =
9009 return (1 << OffsetBits) - 1;
9016 if (
MI.isInlineAsm())
9019 for (
auto &
Op :
MI.implicit_operands()) {
9020 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9021 Op.setReg(AMDGPU::VCC_LO);
9034 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9052 if (Imm <= MaxImm + 64) {
9054 Overflow = Imm - MaxImm;
9144std::pair<int64_t, int64_t>
9147 int64_t RemainderOffset = COffsetVal;
9148 int64_t ImmField = 0;
9153 if (AllowNegative) {
9155 int64_t
D = 1LL << NumBits;
9156 RemainderOffset = (COffsetVal /
D) *
D;
9157 ImmField = COffsetVal - RemainderOffset;
9161 (ImmField % 4) != 0) {
9163 RemainderOffset += ImmField % 4;
9164 ImmField -= ImmField % 4;
9166 }
else if (COffsetVal >= 0) {
9167 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9168 RemainderOffset = COffsetVal - ImmField;
9172 assert(RemainderOffset + ImmField == COffsetVal);
9173 return {ImmField, RemainderOffset};
9185 switch (ST.getGeneration()) {
9210 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9211 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9212 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9213 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9214 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9215 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9216 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9217 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9253 if (
isMAI(Opcode)) {
9298 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9299 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9300 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9312 switch (
MI.getOpcode()) {
9314 case AMDGPU::REG_SEQUENCE:
9318 case AMDGPU::INSERT_SUBREG:
9319 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9336 if (!
P.Reg.isVirtual())
9340 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9341 while (
auto *
MI = DefInst) {
9343 switch (
MI->getOpcode()) {
9345 case AMDGPU::V_MOV_B32_e32: {
9346 auto &Op1 =
MI->getOperand(1);
9351 DefInst =
MRI.getVRegDef(RSR.Reg);
9359 DefInst =
MRI.getVRegDef(RSR.Reg);
9372 assert(
MRI.isSSA() &&
"Must be run on SSA");
9374 auto *
TRI =
MRI.getTargetRegisterInfo();
9375 auto *DefBB =
DefMI.getParent();
9379 if (
UseMI.getParent() != DefBB)
9382 const int MaxInstScan = 20;
9386 auto E =
UseMI.getIterator();
9387 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
9388 if (
I->isDebugInstr())
9391 if (++NumInst > MaxInstScan)
9394 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9404 assert(
MRI.isSSA() &&
"Must be run on SSA");
9406 auto *
TRI =
MRI.getTargetRegisterInfo();
9407 auto *DefBB =
DefMI.getParent();
9409 const int MaxUseScan = 10;
9412 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9413 auto &UseInst = *
Use.getParent();
9416 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9419 if (++NumUse > MaxUseScan)
9426 const int MaxInstScan = 20;
9430 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9433 if (
I->isDebugInstr())
9436 if (++NumInst > MaxInstScan)
9449 if (Reg == VReg && --NumUse == 0)
9451 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9463 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
9466 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9475 if (InsPt !=
MBB.
end() &&
9476 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9477 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9478 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9479 InsPt->definesRegister(Src,
nullptr)) {
9483 : AMDGPU::S_MOV_B64_term),
9485 .
addReg(Src, 0, SrcSubReg)
9510 if (isFullCopyInstr(
MI)) {
9519 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9522 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9533 unsigned *PredCost)
const {
9534 if (
MI.isBundle()) {
9537 unsigned Lat = 0, Count = 0;
9538 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
9540 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9542 return Lat + Count - 1;
9545 return SchedModel.computeInstrLatency(&
MI);
9550 unsigned opcode =
MI.getOpcode();
9551 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9552 auto IID = GI->getIntrinsicID();
9559 case Intrinsic::amdgcn_if:
9560 case Intrinsic::amdgcn_else:
9574 if (opcode == AMDGPU::G_LOAD) {
9575 if (
MI.memoperands_empty())
9579 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9580 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9588 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9589 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9590 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9603 unsigned opcode =
MI.getOpcode();
9604 if (opcode == AMDGPU::V_READLANE_B32 ||
9605 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9606 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9609 if (isCopyInstr(
MI)) {
9613 RI.getPhysRegBaseClass(srcOp.
getReg());
9621 if (
MI.isPreISelOpcode())
9636 if (
MI.memoperands_empty())
9640 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9641 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9656 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
9662 if (!Reg || !
SrcOp.readsReg())
9668 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9705 Register &SrcReg2, int64_t &CmpMask,
9706 int64_t &CmpValue)
const {
9707 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9710 switch (
MI.getOpcode()) {
9713 case AMDGPU::S_CMP_EQ_U32:
9714 case AMDGPU::S_CMP_EQ_I32:
9715 case AMDGPU::S_CMP_LG_U32:
9716 case AMDGPU::S_CMP_LG_I32:
9717 case AMDGPU::S_CMP_LT_U32:
9718 case AMDGPU::S_CMP_LT_I32:
9719 case AMDGPU::S_CMP_GT_U32:
9720 case AMDGPU::S_CMP_GT_I32:
9721 case AMDGPU::S_CMP_LE_U32:
9722 case AMDGPU::S_CMP_LE_I32:
9723 case AMDGPU::S_CMP_GE_U32:
9724 case AMDGPU::S_CMP_GE_I32:
9725 case AMDGPU::S_CMP_EQ_U64:
9726 case AMDGPU::S_CMP_LG_U64:
9727 SrcReg =
MI.getOperand(0).getReg();
9728 if (
MI.getOperand(1).isReg()) {
9729 if (
MI.getOperand(1).getSubReg())
9731 SrcReg2 =
MI.getOperand(1).getReg();
9733 }
else if (
MI.getOperand(1).isImm()) {
9735 CmpValue =
MI.getOperand(1).getImm();
9741 case AMDGPU::S_CMPK_EQ_U32:
9742 case AMDGPU::S_CMPK_EQ_I32:
9743 case AMDGPU::S_CMPK_LG_U32:
9744 case AMDGPU::S_CMPK_LG_I32:
9745 case AMDGPU::S_CMPK_LT_U32:
9746 case AMDGPU::S_CMPK_LT_I32:
9747 case AMDGPU::S_CMPK_GT_U32:
9748 case AMDGPU::S_CMPK_GT_I32:
9749 case AMDGPU::S_CMPK_LE_U32:
9750 case AMDGPU::S_CMPK_LE_I32:
9751 case AMDGPU::S_CMPK_GE_U32:
9752 case AMDGPU::S_CMPK_GE_I32:
9753 SrcReg =
MI.getOperand(0).getReg();
9755 CmpValue =
MI.getOperand(1).getImm();
9773 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9774 this](int64_t ExpectedValue,
unsigned SrcSize,
9775 bool IsReversible,
bool IsSigned) ->
bool {
9800 if (!Def || Def->getParent() != CmpInstr.
getParent())
9803 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9804 Def->getOpcode() != AMDGPU::S_AND_B64)
9808 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9819 SrcOp = &Def->getOperand(2);
9820 else if (isMask(&Def->getOperand(2)))
9821 SrcOp = &Def->getOperand(1);
9826 if (IsSigned && BitNo == SrcSize - 1)
9829 ExpectedValue <<= BitNo;
9831 bool IsReversedCC =
false;
9832 if (CmpValue != ExpectedValue) {
9835 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
9840 Register DefReg = Def->getOperand(0).getReg();
9841 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
9844 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
9846 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
9847 I->killsRegister(AMDGPU::SCC, &RI))
9852 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
9856 if (!
MRI->use_nodbg_empty(DefReg)) {
9864 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
9865 : AMDGPU::S_BITCMP1_B32
9866 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
9867 : AMDGPU::S_BITCMP1_B64;
9872 Def->eraseFromParent();
9880 case AMDGPU::S_CMP_EQ_U32:
9881 case AMDGPU::S_CMP_EQ_I32:
9882 case AMDGPU::S_CMPK_EQ_U32:
9883 case AMDGPU::S_CMPK_EQ_I32:
9884 return optimizeCmpAnd(1, 32,
true,
false);
9885 case AMDGPU::S_CMP_GE_U32:
9886 case AMDGPU::S_CMPK_GE_U32:
9887 return optimizeCmpAnd(1, 32,
false,
false);
9888 case AMDGPU::S_CMP_GE_I32:
9889 case AMDGPU::S_CMPK_GE_I32:
9890 return optimizeCmpAnd(1, 32,
false,
true);
9891 case AMDGPU::S_CMP_EQ_U64:
9892 return optimizeCmpAnd(1, 64,
true,
false);
9893 case AMDGPU::S_CMP_LG_U32:
9894 case AMDGPU::S_CMP_LG_I32:
9895 case AMDGPU::S_CMPK_LG_U32:
9896 case AMDGPU::S_CMPK_LG_I32:
9897 return optimizeCmpAnd(0, 32,
true,
false);
9898 case AMDGPU::S_CMP_GT_U32:
9899 case AMDGPU::S_CMPK_GT_U32:
9900 return optimizeCmpAnd(0, 32,
false,
false);
9901 case AMDGPU::S_CMP_GT_I32:
9902 case AMDGPU::S_CMPK_GT_I32:
9903 return optimizeCmpAnd(0, 32,
false,
true);
9904 case AMDGPU::S_CMP_LG_U64:
9905 return optimizeCmpAnd(0, 64,
true,
false);
9930 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
9933 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
9934 : &AMDGPU::VReg_64_Align2RegClass);
9936 .
addReg(DataReg, 0,
Op.getSubReg())
9941 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const SIRegisterInfo * getRegisterInfo() const override
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
bool isNonUniformBranchInstr(MachineInstr &Instr) const
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
Whether we must prevent this instruction from executing with EXEC = 0.
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
void setHasSpilledVGPRs(bool Spill=true)
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.