20#include "llvm/IR/IntrinsicsNVPTX.h"
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
35 cl::desc(
"Enable reciprocal sqrt optimization"));
64int NVPTXDAGToDAGISel::getDivF32Level()
const {
68bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
72bool NVPTXDAGToDAGISel::useF32FTZ()
const {
76bool NVPTXDAGToDAGISel::allowFMA()
const {
81bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
86bool NVPTXDAGToDAGISel::doRsqrtOpt()
const {
return EnableRsqrtOpt; }
90void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
92 if (
N->isMachineOpcode()) {
97 switch (
N->getOpcode()) {
109 if (tryEXTRACT_VECTOR_ELEMENT(
N))
116 SelectSETP_BF16X2(
N);
120 if (tryLoadVector(
N))
132 if (tryStoreVector(
N))
144 if (tryStoreRetval(
N))
152 if (tryStoreParam(
N))
156 if (tryIntrinsicNoChain(
N))
160 if (tryIntrinsicChain(
N))
337 if (tryTextureIntrinsic(
N))
505 if (trySurfaceIntrinsic(
N))
516 SelectAddrSpaceCast(
N);
519 if (tryConstantFP(
N))
528bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
529 unsigned IID =
N->getConstantOperandVal(1);
533 case Intrinsic::nvvm_ldg_global_f:
534 case Intrinsic::nvvm_ldg_global_i:
535 case Intrinsic::nvvm_ldg_global_p:
536 case Intrinsic::nvvm_ldu_global_f:
537 case Intrinsic::nvvm_ldu_global_i:
538 case Intrinsic::nvvm_ldu_global_p:
545bool NVPTXDAGToDAGISel::tryConstantFP(
SDNode *
N) {
546 if (
N->getValueType(0) != MVT::f16 &&
N->getValueType(0) != MVT::bf16)
549 cast<ConstantFPSDNode>(
N)->getValueAPF(),
SDLoc(
N),
N->getValueType(0));
551 (
N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
552 : NVPTX::LOAD_CONST_BF16),
553 SDLoc(
N),
N->getValueType(0), Val);
581 return CmpMode::NotANumber;
615bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
616 unsigned PTXCmpMode =
617 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
620 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
626bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(
SDNode *
N) {
627 unsigned PTXCmpMode =
628 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
631 NVPTX::SETP_bf16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
639bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
649 for (
auto *U :
Vector.getNode()->uses()) {
652 if (
U->getOperand(0) !=
Vector)
655 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
656 if (IdxConst->getZExtValue() == 0)
658 else if (IdxConst->getZExtValue() == 1)
675 for (
auto *
Node : E0)
677 for (
auto *
Node : E1)
684 const Value *Src =
N->getMemOperand()->getValue();
689 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
690 switch (PT->getAddressSpace()) {
725 if (
N->isInvariant())
737 if (
auto *
A = dyn_cast<const Argument>(V))
738 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
739 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
740 return GV->isConstant();
745bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
746 unsigned IID =
N->getConstantOperandVal(0);
750 case Intrinsic::nvvm_texsurf_handle_internal:
751 SelectTexSurfHandle(
N);
756void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
761 MVT::i64, GlobalVal));
764void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
769 assert(SrcAddrSpace != DstAddrSpace &&
770 "addrspacecast must be between different address spaces");
775 switch (SrcAddrSpace) {
778 Opc = TM.
is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
782 ? NVPTX::cvta_shared_6432
783 : NVPTX::cvta_shared_64)
784 : NVPTX::cvta_shared;
788 ? NVPTX::cvta_const_6432
789 : NVPTX::cvta_const_64)
794 ? NVPTX::cvta_local_6432
795 : NVPTX::cvta_local_64)
804 if (SrcAddrSpace != 0)
807 switch (DstAddrSpace) {
810 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
814 ? NVPTX::cvta_to_shared_3264
815 : NVPTX::cvta_to_shared_64)
816 : NVPTX::cvta_to_shared;
820 ? NVPTX::cvta_to_const_3264
821 : NVPTX::cvta_to_const_64)
822 : NVPTX::cvta_to_const;
826 ? NVPTX::cvta_to_local_3264
827 : NVPTX::cvta_to_local_64)
828 : NVPTX::cvta_to_local;
831 Opc = TM.
is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
832 : NVPTX::nvvm_ptr_gen_to_param;
843static std::optional<unsigned>
845 unsigned Opcode_i16,
unsigned Opcode_i32,
846 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f32,
847 std::optional<unsigned> Opcode_f64) {
890bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
893 assert(
LD->readMem() &&
"Expected load");
895 EVT LoadedVT =
LD->getMemoryVT();
896 SDNode *NVPTXLD =
nullptr;
940 unsigned fromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
941 unsigned int fromType;
947 "Unexpected vector type");
962 std::optional<unsigned> Opcode;
965 if (SelectDirectAddr(N1,
Addr)) {
966 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
967 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
968 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
971 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
972 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
973 getI32Imm(fromTypeWidth, dl),
Addr, Chain };
977 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
978 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
979 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
982 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
983 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
984 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
988 if (PointerSize == 64)
991 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
992 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
994 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
995 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
996 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
999 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1000 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1001 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
1004 if (PointerSize == 64)
1006 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
1007 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
1008 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
1010 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
1011 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1012 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1015 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1016 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1017 getI32Imm(fromTypeWidth, dl), N1, Chain };
1031bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1036 std::optional<unsigned> Opcode;
1048 return tryLDGLDU(
N);
1073 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1076 unsigned ExtensionType = cast<ConstantSDNode>(
1077 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1085 switch (
N->getOpcode()) {
1096 EVT EltVT =
N->getValueType(0);
1108 if (SelectDirectAddr(Op1,
Addr)) {
1109 switch (
N->getOpcode()) {
1114 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1115 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1116 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1121 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1122 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1127 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1128 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1129 getI32Imm(FromTypeWidth,
DL),
Addr, Chain };
1131 }
else if (PointerSize == 64
1134 switch (
N->getOpcode()) {
1139 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1140 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1141 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1146 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1147 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1152 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1153 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1156 }
else if (PointerSize == 64
1159 if (PointerSize == 64) {
1160 switch (
N->getOpcode()) {
1166 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1167 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1168 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1173 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1174 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1178 switch (
N->getOpcode()) {
1183 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1184 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1185 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1190 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1191 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1197 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1198 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1203 if (PointerSize == 64) {
1204 switch (
N->getOpcode()) {
1210 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1211 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1212 NVPTX::LDV_f64_v2_areg_64);
1217 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1218 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1222 switch (
N->getOpcode()) {
1228 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1229 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1230 NVPTX::LDV_f64_v2_areg);
1235 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1236 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1242 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1243 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1244 getI32Imm(FromTypeWidth,
DL), Op1, Chain };
1255bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1265 Op1 =
N->getOperand(2);
1266 Mem = cast<MemIntrinsicSDNode>(
N);
1267 unsigned IID =
N->getConstantOperandVal(1);
1271 case Intrinsic::nvvm_ldg_global_f:
1272 case Intrinsic::nvvm_ldg_global_i:
1273 case Intrinsic::nvvm_ldg_global_p:
1276 case Intrinsic::nvvm_ldu_global_f:
1277 case Intrinsic::nvvm_ldu_global_i:
1278 case Intrinsic::nvvm_ldu_global_p:
1283 Op1 =
N->getOperand(1);
1284 Mem = cast<MemSDNode>(
N);
1287 std::optional<unsigned> Opcode;
1291 EVT OrigType =
N->getValueType(0);
1294 unsigned NumElts = 1;
1299 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1300 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1301 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1302 assert(NumElts % 2 == 0 &&
"Vector must have even number of elements");
1305 }
else if (OrigType == MVT::v4i8) {
1314 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1316 for (
unsigned i = 0; i != NumElts; ++i) {
1322 if (SelectDirectAddr(Op1,
Addr)) {
1323 switch (
N->getOpcode()) {
1330 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1331 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1332 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1333 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1334 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1335 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1338 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1339 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1340 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1341 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1342 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1343 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1348 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1349 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1350 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1351 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1352 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1353 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1357 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1358 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1359 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1360 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1361 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1362 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1368 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1369 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1370 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1375 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1376 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1377 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1387 switch (
N->getOpcode()) {
1394 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1395 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1396 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1397 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1398 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1399 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1402 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1403 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1404 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1405 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1406 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1407 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1412 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1413 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1414 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1415 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1416 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1417 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1421 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1422 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1423 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1424 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1425 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1426 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1432 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1433 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1434 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1439 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1440 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1441 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1445 switch (
N->getOpcode()) {
1452 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1453 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1454 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1455 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1456 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1457 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1460 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1461 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1462 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1463 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1464 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1465 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1470 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1471 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1472 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1473 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1474 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1475 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1479 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1480 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1481 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1482 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1483 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1484 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1490 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1491 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1492 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1497 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1498 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1499 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1509 switch (
N->getOpcode()) {
1516 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1517 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1518 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1519 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1520 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1521 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1524 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1525 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1526 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1527 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1528 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1529 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1534 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1535 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1536 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1537 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1538 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1539 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1543 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1544 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1545 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1546 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1547 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1548 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1554 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1555 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1556 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1561 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1562 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1563 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1567 switch (
N->getOpcode()) {
1574 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1575 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1576 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1577 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1578 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1579 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1582 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1583 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1584 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1585 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1586 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1587 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1592 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1593 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1594 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1595 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1596 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1597 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1601 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1602 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1603 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1604 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1605 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1606 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1612 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1613 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1614 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1619 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1620 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1621 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1627 SDValue Ops[] = { Op1, Chain };
1644 if (OrigType != EltVT &&
1654 for (
unsigned i = 0; i != NumElts; ++i) {
1670bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1673 assert(
ST->writeMem() &&
"Expected store");
1676 assert((PlainStore || AtomicStore) &&
"Expected store");
1677 EVT StoreVT =
ST->getMemoryVT();
1678 SDNode *NVPTXST =
nullptr;
1681 if (PlainStore && PlainStore->
isIndexed())
1720 "Unexpected vector type");
1733 std::optional<unsigned> Opcode;
1735 Value.getNode()->getSimpleValueType(0).SimpleTy;
1737 if (SelectDirectAddr(BasePtr,
Addr)) {
1738 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1739 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1740 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1744 getI32Imm(isVolatile, dl),
1745 getI32Imm(CodeAddrSpace, dl),
1746 getI32Imm(vecType, dl),
1747 getI32Imm(toType, dl),
1748 getI32Imm(toTypeWidth, dl),
1752 }
else if (PointerSize == 64
1755 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1756 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1757 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1761 getI32Imm(isVolatile, dl),
1762 getI32Imm(CodeAddrSpace, dl),
1763 getI32Imm(vecType, dl),
1764 getI32Imm(toType, dl),
1765 getI32Imm(toTypeWidth, dl),
1770 }
else if (PointerSize == 64
1773 if (PointerSize == 64)
1776 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1777 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1779 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1780 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1781 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1786 getI32Imm(isVolatile, dl),
1787 getI32Imm(CodeAddrSpace, dl),
1788 getI32Imm(vecType, dl),
1789 getI32Imm(toType, dl),
1790 getI32Imm(toTypeWidth, dl),
1796 if (PointerSize == 64)
1798 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1799 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1800 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1802 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1803 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1804 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1808 getI32Imm(isVolatile, dl),
1809 getI32Imm(CodeAddrSpace, dl),
1810 getI32Imm(vecType, dl),
1811 getI32Imm(toType, dl),
1812 getI32Imm(toTypeWidth, dl),
1827bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1831 std::optional<unsigned> Opcode;
1866 switch (
N->getOpcode()) {
1871 N2 =
N->getOperand(3);
1879 N2 =
N->getOperand(5);
1901 if (SelectDirectAddr(N2,
Addr)) {
1902 switch (
N->getOpcode()) {
1907 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1908 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1909 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1913 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1914 NVPTX::STV_i32_v4_avar, std::nullopt,
1915 NVPTX::STV_f32_v4_avar, std::nullopt);
1921 switch (
N->getOpcode()) {
1926 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1927 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1928 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1933 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1934 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1941 if (PointerSize == 64) {
1942 switch (
N->getOpcode()) {
1948 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1949 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1950 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1955 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1956 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1960 switch (
N->getOpcode()) {
1965 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1966 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1967 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1971 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1972 NVPTX::STV_i32_v4_ari, std::nullopt,
1973 NVPTX::STV_f32_v4_ari, std::nullopt);
1980 if (PointerSize == 64) {
1981 switch (
N->getOpcode()) {
1987 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1988 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1989 NVPTX::STV_f64_v2_areg_64);
1994 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1995 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1999 switch (
N->getOpcode()) {
2005 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2006 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
2007 NVPTX::STV_f64_v2_areg);
2012 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2013 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2034bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
2042 switch (
Node->getOpcode()) {
2056 EVT EltVT =
Node->getValueType(0);
2059 std::optional<unsigned> Opcode;
2066 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2067 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2068 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2073 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2074 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2075 NVPTX::LoadParamMemV2F64);
2080 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2081 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2090 }
else if (VecSize == 2) {
2093 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2097 unsigned OffsetVal =
Offset->getAsZExtVal();
2108bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
2112 unsigned OffsetVal =
Offset->getAsZExtVal();
2116 unsigned NumElts = 1;
2117 switch (
N->getOpcode()) {
2133 for (
unsigned i = 0; i < NumElts; ++i)
2141 std::optional<unsigned> Opcode = 0;
2147 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2148 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2149 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2150 if (Opcode == NVPTX::StoreRetvalI8) {
2154 switch (Ops[0].getSimpleValueType().SimpleTy) {
2158 Opcode = NVPTX::StoreRetvalI8TruncI32;
2161 Opcode = NVPTX::StoreRetvalI8TruncI64;
2168 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2169 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2170 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2174 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2175 NVPTX::StoreRetvalV4I32, std::nullopt,
2176 NVPTX::StoreRetvalV4F32, std::nullopt);
2191#define getOpcV2H(ty, opKind0, opKind1) \
2192 NVPTX::StoreParamV2##ty##_##opKind0##opKind1
2194#define getOpcV2H1(ty, opKind0, isImm1) \
2195 (isImm1) ? getOpcV2H(ty, opKind0, i) : getOpcV2H(ty, opKind0, r)
2197#define getOpcodeForVectorStParamV2(ty, isimm) \
2198 (isimm[0]) ? getOpcV2H1(ty, i, isimm[1]) : getOpcV2H1(ty, r, isimm[1])
2200#define getOpcV4H(ty, opKind0, opKind1, opKind2, opKind3) \
2201 NVPTX::StoreParamV4##ty##_##opKind0##opKind1##opKind2##opKind3
2203#define getOpcV4H3(ty, opKind0, opKind1, opKind2, isImm3) \
2204 (isImm3) ? getOpcV4H(ty, opKind0, opKind1, opKind2, i) \
2205 : getOpcV4H(ty, opKind0, opKind1, opKind2, r)
2207#define getOpcV4H2(ty, opKind0, opKind1, isImm2, isImm3) \
2208 (isImm2) ? getOpcV4H3(ty, opKind0, opKind1, i, isImm3) \
2209 : getOpcV4H3(ty, opKind0, opKind1, r, isImm3)
2211#define getOpcV4H1(ty, opKind0, isImm1, isImm2, isImm3) \
2212 (isImm1) ? getOpcV4H2(ty, opKind0, i, isImm2, isImm3) \
2213 : getOpcV4H2(ty, opKind0, r, isImm2, isImm3)
2215#define getOpcodeForVectorStParamV4(ty, isimm) \
2216 (isimm[0]) ? getOpcV4H1(ty, i, isimm[1], isimm[2], isimm[3]) \
2217 : getOpcV4H1(ty, r, isimm[1], isimm[2], isimm[3])
2219#define getOpcodeForVectorStParam(n, ty, isimm) \
2220 (n == 2) ? getOpcodeForVectorStParamV2(ty, isimm) \
2221 : getOpcodeForVectorStParamV4(ty, isimm)
2230 for (
unsigned i = 0; i < NumElts; i++) {
2231 IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
2234 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2256 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2261 assert(NumElts == 2 &&
"MVT too large for NumElts > 2");
2267 return (NumElts == 2) ? NVPTX::StoreParamV2I8_rr
2268 : NVPTX::StoreParamV4I8_rrrr;
2271 return (NumElts == 2) ? NVPTX::StoreParamV2I16_rr
2272 : NVPTX::StoreParamV4I16_rrrr;
2277 return (NumElts == 2) ? NVPTX::StoreParamV2I32_rr
2278 : NVPTX::StoreParamV4I32_rrrr;
2284bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2288 unsigned ParamVal =
Param->getAsZExtVal();
2290 unsigned OffsetVal =
Offset->getAsZExtVal();
2292 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2296 switch (
N->getOpcode()) {
2314 for (
unsigned i = 0; i < NumElts; ++i)
2324 std::optional<unsigned> Opcode;
2325 switch (
N->getOpcode()) {
2333 if (MemTy != MVT::f16 && MemTy != MVT::v2f16 &&
2334 (isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
2336 if (MemTy == MVT::f32 || MemTy == MVT::f64) {
2348 NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
2349 NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
2350 NVPTX::StoreParamF64_i);
2354 NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
2355 NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
2356 NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
2357 if (Opcode == NVPTX::StoreParamI8_r) {
2361 switch (Ops[0].getSimpleValueType().SimpleTy) {
2365 Opcode = NVPTX::StoreParamI8TruncI32_r;
2368 Opcode = NVPTX::StoreParamI8TruncI64_r;
2386 Opcode = NVPTX::StoreParamI32_r;
2390 MVT::i32, Ops[0], CvtNone);
2395 Opcode = NVPTX::StoreParamI32_r;
2399 MVT::i32, Ops[0], CvtNone);
2414bool NVPTXDAGToDAGISel::tryTextureIntrinsic(
SDNode *
N) {
2417 switch (
N->getOpcode()) {
2418 default:
return false;
2420 Opc = NVPTX::TEX_1D_F32_S32_RR;
2423 Opc = NVPTX::TEX_1D_F32_F32_RR;
2426 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2429 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2432 Opc = NVPTX::TEX_1D_S32_S32_RR;
2435 Opc = NVPTX::TEX_1D_S32_F32_RR;
2438 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2441 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2444 Opc = NVPTX::TEX_1D_U32_S32_RR;
2447 Opc = NVPTX::TEX_1D_U32_F32_RR;
2450 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2453 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2456 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2459 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2462 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2465 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2468 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2471 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2474 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2477 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2480 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2483 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2486 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2489 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2492 Opc = NVPTX::TEX_2D_F32_S32_RR;
2495 Opc = NVPTX::TEX_2D_F32_F32_RR;
2498 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2501 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2504 Opc = NVPTX::TEX_2D_S32_S32_RR;
2507 Opc = NVPTX::TEX_2D_S32_F32_RR;
2510 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2513 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2516 Opc = NVPTX::TEX_2D_U32_S32_RR;
2519 Opc = NVPTX::TEX_2D_U32_F32_RR;
2522 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2525 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2528 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2531 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2534 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2537 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2540 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2543 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2546 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2549 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2552 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2555 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2558 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2561 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2564 Opc = NVPTX::TEX_3D_F32_S32_RR;
2567 Opc = NVPTX::TEX_3D_F32_F32_RR;
2570 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2573 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2576 Opc = NVPTX::TEX_3D_S32_S32_RR;
2579 Opc = NVPTX::TEX_3D_S32_F32_RR;
2582 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2585 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2588 Opc = NVPTX::TEX_3D_U32_S32_RR;
2591 Opc = NVPTX::TEX_3D_U32_F32_RR;
2594 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2597 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2600 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2603 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2606 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2609 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2612 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2615 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2618 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2621 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2624 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2627 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2630 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2633 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2636 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2639 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2642 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2645 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2648 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2651 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2654 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2657 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2660 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2663 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2666 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2669 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2672 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2675 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2678 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2681 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2684 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2687 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2690 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2693 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2696 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2699 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2702 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2705 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2708 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2711 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2714 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2717 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2720 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2723 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2726 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2729 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2732 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2735 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2738 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2741 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2744 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2747 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2750 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2753 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2756 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2759 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2762 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2765 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2768 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2771 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2774 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2777 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2780 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2783 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2786 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2789 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2792 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2795 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2798 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2801 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2804 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2807 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2810 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2813 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2816 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2819 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2822 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2825 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2828 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2831 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2834 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2837 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2840 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2843 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2846 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2849 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2852 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2855 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2858 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2861 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2864 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2867 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2870 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2873 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2876 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2879 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2882 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2885 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2888 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2891 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2894 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2897 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2900 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2903 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2906 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2909 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2912 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2915 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2918 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2921 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2924 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2927 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2930 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2933 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2936 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2939 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2951bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(
SDNode *
N) {
2953 switch (
N->getOpcode()) {
2954 default:
return false;
2956 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2959 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2962 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2965 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2968 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2971 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2974 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2977 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2980 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2983 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2986 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2989 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2992 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2995 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2998 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
3001 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
3004 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
3007 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
3010 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
3013 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
3016 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
3019 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
3022 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
3025 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
3028 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
3031 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
3034 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
3037 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
3040 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
3043 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
3046 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
3049 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
3052 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
3055 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
3058 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
3061 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
3064 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
3067 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
3070 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
3073 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
3076 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
3079 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
3082 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
3085 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
3088 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
3091 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
3094 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
3097 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
3100 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
3103 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
3106 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
3109 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
3112 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
3115 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3118 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3121 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3124 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3127 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3130 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3133 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3136 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3139 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3142 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3145 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3148 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3151 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3154 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3157 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3160 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3163 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3166 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3169 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3172 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3175 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3178 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3181 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3184 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3187 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3190 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3193 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3196 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3199 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3202 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3205 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3208 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3211 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3214 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3217 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3220 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3223 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3226 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3229 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3232 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3235 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3238 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3241 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3244 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3247 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3250 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3253 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3256 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3259 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3262 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3265 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3268 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3271 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3274 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3277 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3280 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3283 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3286 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3289 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3292 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3295 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3298 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3301 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3304 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3307 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3310 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3313 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3316 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3319 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3322 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3325 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3328 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3331 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3334 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3337 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3340 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3343 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3346 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3349 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3352 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3355 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3358 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3361 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3364 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3367 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3370 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3373 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3376 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3379 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3382 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3385 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3388 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3391 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3394 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3397 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3400 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3403 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3406 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3409 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3412 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3415 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3418 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3421 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3424 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3427 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3430 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3433 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3436 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3439 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3442 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3445 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3448 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3463bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
3470 bool IsSigned =
false;
3475 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3500 Val =
LHS.getNode()->getOperand(0);
3501 Start =
LHS.getNode()->getOperand(1);
3507 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3508 if (NumBits > GoodBits) {
3542 if (isa<ConstantSDNode>(AndLHS)) {
3566 NumBits = NumZeros + NumOnes - ShiftAmt;
3572 if (ShiftAmt < NumZeros) {
3589 Val =
LHS->getOperand(0);
3608 if (OuterShiftAmt < InnerShiftAmt) {
3644 Opc = NVPTX::BFE_S32rii;
3646 Opc = NVPTX::BFE_U32rii;
3650 Opc = NVPTX::BFE_S64rii;
3652 Opc = NVPTX::BFE_U64rii;
3691bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3696 if (SelectDirectAddr(base,
Base)) {
3719bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3731 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
3736 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
3761bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
3762 unsigned int spN)
const {
3763 const Value *Src =
nullptr;
3764 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
3765 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3767 Src = mN->getMemOperand()->getValue();
3771 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
3772 return (PT->getAddressSpace() == spN);
3780 std::vector<SDValue> &OutOps) {
3782 switch (ConstraintID) {
3786 if (SelectDirectAddr(
Op, Op0)) {
3787 OutOps.push_back(Op0);
3791 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
3792 OutOps.push_back(Op0);
3793 OutOps.push_back(Op1);
3803unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
3814 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3816 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3818 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3825 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3827 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3829 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3836 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3838 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3840 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3847 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3849 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3851 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3858 return NVPTX::CVT_f32_f16;
3860 return NVPTX::CVT_f64_f16;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define getOpcodeForVectorStParam(n, ty, isimm)
static unsigned int getCodeAddrSpace(MemSDNode *N)
static int getLdStRegType(EVT VT)
static unsigned pickOpcodeForVectorStParam(SmallVector< SDValue, 8 > &Ops, unsigned NumElts, MVT::SimpleValueType MemTy, SelectionDAG *CurDAG, SDLoc DL)
#define getOpcodeForVectorStParamV2(ty, isimm)
static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
const ConstantFP * getConstantFPValue() const
ConstantFP - Floating Point Values [float, double].
This is the shared class of boolean and integer constants.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
NVPTXDAGToDAGISelLegacy(NVPTXTargetMachine &tm, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
unsigned getPointerSizeInBits(unsigned AS) const
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ TexUnified1DS32FloatLevel
@ Tex1DArrayFloatFloatLevel
@ TexUnified2DU32FloatGrad
@ Tld4UnifiedG2DFloatFloat
@ TexUnifiedCubeArrayFloatFloatLevel
@ Tld4UnifiedR2DFloatFloat
@ Tex2DArrayS32FloatLevel
@ TexUnified1DArrayFloatFloatLevel
@ TexUnified2DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
@ TexUnified2DArrayU32Float
@ TexUnified1DArrayFloatFloat
@ Tex1DArrayFloatFloatGrad
@ TexUnifiedCubeArrayU32FloatGrad
@ TexUnified1DFloatFloatGrad
@ TexUnifiedCubeFloatFloatGrad
@ TexUnified2DArrayFloatFloat
@ TexUnified3DU32FloatLevel
@ TexUnified1DArrayU32Float
@ TexUnified2DArrayFloatFloatLevel
@ TexUnified2DFloatFloatGrad
@ TexUnified2DArrayU32S32
@ TexUnifiedCubeArrayS32FloatLevel
@ TexUnified1DArrayS32Float
@ TexUnified1DArrayS32FloatLevel
@ TexUnified2DS32FloatLevel
@ TexUnified3DU32FloatGrad
@ TexUnifiedCubeU32FloatLevel
@ TexUnified2DArrayU32FloatGrad
@ TexUnifiedCubeFloatFloatLevel
@ TexUnified1DArrayFloatS32
@ TexUnifiedCubeS32FloatLevel
@ TexUnified1DS32FloatGrad
@ Tex2DArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeArrayFloatFloatGrad
@ TexUnifiedCubeFloatFloat
@ TexUnified1DArrayU32S32
@ TexUnified3DFloatFloatGrad
@ Tld4UnifiedA2DFloatFloat
@ TexUnified3DS32FloatGrad
@ TexUnified2DU32FloatLevel
@ TexUnified1DArrayS32S32
@ TexCubeArrayFloatFloatLevel
@ TexUnified1DU32FloatGrad
@ TexCubeArrayS32FloatLevel
@ Tex2DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
@ TexUnified1DArrayFloatFloatGrad
@ TexUnifiedCubeS32FloatGrad
@ TexCubeArrayU32FloatLevel
@ TexUnified3DS32FloatLevel
@ TexUnifiedCubeArrayS32FloatGrad
@ TexUnified2DArrayS32Float
@ Tex2DArrayFloatFloatGrad
@ TexUnifiedCubeArrayS32Float
@ TexUnified2DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
@ TexUnified2DArrayS32S32
@ TexUnified2DArrayFloatFloatGrad
@ TexUnifiedCubeU32FloatGrad
@ Tld4UnifiedB2DFloatFloat
@ TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayS32FloatGrad
@ TexUnified2DS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
@ TexUnified1DU32FloatLevel
@ TexUnifiedCubeArrayU32Float
@ TexUnified2DArrayFloatS32
@ TexUnified1DArrayU32FloatGrad
initializer< Ty > init(const Ty &Val)
constexpr uint64_t PointerSize
aarch64 pointer size.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
CodeGenOptLevel
Code generation optimization level.
AtomicOrdering
Atomic ordering for LLVM's memory model.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
bool isKernelFunction(const Function &F)
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.