llvm.org GIT mirror llvm / cb8f983
[NVPTX] Fix handling of ldg/ldu intrinsics. The address space of the pointer must be global (1) for these intrinsics. There must also be alignment metadata attached to the intrinsic calls, e.g. %val = tail call i32 @llvm.nvvm.ldu.i.global.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 !0 = metadata !{i32 4} git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211939 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Holewinski 6 years ago
8 changed file(s) with 429 addition(s) and 113 deletion(s). Raw diff Collapse all Expand all
795795
796796
797797 // Generated within nvvm. Use for ldu on sm_20 or later
798 // @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
799798 def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
800 [LLVMPointerType>], [IntrReadMem, NoCapture<0>],
799 [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>],
801800 "llvm.nvvm.ldu.global.i">;
802801 def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
803 [LLVMPointerType>], [IntrReadMem, NoCapture<0>],
802 [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>],
804803 "llvm.nvvm.ldu.global.f">;
805804 def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
806 [LLVMPointerType>], [IntrReadMem, NoCapture<0>],
805 [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>],
807806 "llvm.nvvm.ldu.global.p">;
808807
809808 // Generated within nvvm. Use for ldg on sm_35 or later
810809 def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
811 [LLVMPointerType>], [IntrReadMem, NoCapture<0>],
810 [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>],
812811 "llvm.nvvm.ldg.global.i">;
813812 def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
814 [LLVMPointerType>], [IntrReadMem, NoCapture<0>],
813 [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>],
815814 "llvm.nvvm.ldg.global.f">;
816815 def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
817 [LLVMPointerType>], [IntrReadMem, NoCapture<0>],
816 [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>],
818817 "llvm.nvvm.ldg.global.p">;
819818
820819 // Use for generic pointers
140140 case NVPTXISD::LDGV4:
141141 case NVPTXISD::LDUV2:
142142 case NVPTXISD::LDUV4:
143 ResNode = SelectLDGLDUVector(N);
143 ResNode = SelectLDGLDU(N);
144144 break;
145145 case NVPTXISD::StoreV2:
146146 case NVPTXISD::StoreV4:
165165 break;
166166 case ISD::INTRINSIC_WO_CHAIN:
167167 ResNode = SelectIntrinsicNoChain(N);
168 break;
169 case ISD::INTRINSIC_W_CHAIN:
170 ResNode = SelectIntrinsicChain(N);
168171 break;
169172 case NVPTXISD::Tex1DFloatI32:
170173 case NVPTXISD::Tex1DFloatFloat:
272275 return SelectCode(N);
273276 }
274277
278 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
279 unsigned IID = cast(N->getOperand(1))->getZExtValue();
280 switch (IID) {
281 default:
282 return NULL;
283 case Intrinsic::nvvm_ldg_global_f:
284 case Intrinsic::nvvm_ldg_global_i:
285 case Intrinsic::nvvm_ldg_global_p:
286 case Intrinsic::nvvm_ldu_global_f:
287 case Intrinsic::nvvm_ldu_global_i:
288 case Intrinsic::nvvm_ldu_global_p:
289 return SelectLDGLDU(N);
290 }
291 }
292
275293 static unsigned int getCodeAddrSpace(MemSDNode *N,
276294 const NVPTXSubtarget &Subtarget) {
277295 const Value *Src = N->getMemOperand()->getValue();
9891007 return LD;
9901008 }
9911009
992 SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
1010 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
9931011
9941012 SDValue Chain = N->getOperand(0);
995 SDValue Op1 = N->getOperand(1);
1013 SDValue Op1;
1014 MemSDNode *Mem;
1015 bool IsLDG = true;
1016
1017 // If this is an LDG intrinsic, the address is the third operand. Its its an
1018 // LDG/LDU SD node (from custom vector handling), then its the second operand
1019 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1020 Op1 = N->getOperand(2);
1021 Mem = cast(N);
1022 unsigned IID = cast(N->getOperand(1))->getZExtValue();
1023 switch (IID) {
1024 default:
1025 return NULL;
1026 case Intrinsic::nvvm_ldg_global_f:
1027 case Intrinsic::nvvm_ldg_global_i:
1028 case Intrinsic::nvvm_ldg_global_p:
1029 IsLDG = true;
1030 break;
1031 case Intrinsic::nvvm_ldu_global_f:
1032 case Intrinsic::nvvm_ldu_global_i:
1033 case Intrinsic::nvvm_ldu_global_p:
1034 IsLDG = false;
1035 break;
1036 }
1037 } else {
1038 Op1 = N->getOperand(1);
1039 Mem = cast(N);
1040 }
1041
9961042 unsigned Opcode;
9971043 SDLoc DL(N);
9981044 SDNode *LD;
999 MemSDNode *Mem = cast(N);
10001045 SDValue Base, Offset, Addr;
10011046
1002 EVT EltVT = Mem->getMemoryVT().getVectorElementType();
1047 EVT EltVT = Mem->getMemoryVT();
1048 if (EltVT.isVector()) {
1049 EltVT = EltVT.getVectorElementType();
1050 }
10031051
10041052 if (SelectDirectAddr(Op1, Addr)) {
10051053 switch (N->getOpcode()) {
10061054 default:
10071055 return nullptr;
1056 case ISD::INTRINSIC_W_CHAIN:
1057 if (IsLDG) {
1058 switch (EltVT.getSimpleVT().SimpleTy) {
1059 default:
1060 return nullptr;
1061 case MVT::i8:
1062 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1063 break;
1064 case MVT::i16:
1065 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1066 break;
1067 case MVT::i32:
1068 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1069 break;
1070 case MVT::i64:
1071 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1072 break;
1073 case MVT::f32:
1074 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1075 break;
1076 case MVT::f64:
1077 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1078 break;
1079 }
1080 } else {
1081 switch (EltVT.getSimpleVT().SimpleTy) {
1082 default:
1083 return nullptr;
1084 case MVT::i8:
1085 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1086 break;
1087 case MVT::i16:
1088 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1089 break;
1090 case MVT::i32:
1091 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1092 break;
1093 case MVT::i64:
1094 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1095 break;
1096 case MVT::f32:
1097 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1098 break;
1099 case MVT::f64:
1100 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1101 break;
1102 }
1103 }
1104 break;
10081105 case NVPTXISD::LDGV2:
10091106 switch (EltVT.getSimpleVT().SimpleTy) {
10101107 default:
11001197 switch (N->getOpcode()) {
11011198 default:
11021199 return nullptr;
1200 case ISD::INTRINSIC_W_CHAIN:
1201 if (IsLDG) {
1202 switch (EltVT.getSimpleVT().SimpleTy) {
1203 default:
1204 return nullptr;
1205 case MVT::i8:
1206 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1207 break;
1208 case MVT::i16:
1209 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1210 break;
1211 case MVT::i32:
1212 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1213 break;
1214 case MVT::i64:
1215 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1216 break;
1217 case MVT::f32:
1218 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1219 break;
1220 case MVT::f64:
1221 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1222 break;
1223 }
1224 } else {
1225 switch (EltVT.getSimpleVT().SimpleTy) {
1226 default:
1227 return nullptr;
1228 case MVT::i8:
1229 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1230 break;
1231 case MVT::i16:
1232 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1233 break;
1234 case MVT::i32:
1235 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1236 break;
1237 case MVT::i64:
1238 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1239 break;
1240 case MVT::f32:
1241 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1242 break;
1243 case MVT::f64:
1244 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1245 break;
1246 }
1247 }
1248 break;
11031249 case NVPTXISD::LDGV2:
11041250 switch (EltVT.getSimpleVT().SimpleTy) {
11051251 default:
11891335 switch (N->getOpcode()) {
11901336 default:
11911337 return nullptr;
1338 case ISD::INTRINSIC_W_CHAIN:
1339 if (IsLDG) {
1340 switch (EltVT.getSimpleVT().SimpleTy) {
1341 default:
1342 return nullptr;
1343 case MVT::i8:
1344 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1345 break;
1346 case MVT::i16:
1347 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1348 break;
1349 case MVT::i32:
1350 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1351 break;
1352 case MVT::i64:
1353 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1354 break;
1355 case MVT::f32:
1356 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1357 break;
1358 case MVT::f64:
1359 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1360 break;
1361 }
1362 } else {
1363 switch (EltVT.getSimpleVT().SimpleTy) {
1364 default:
1365 return nullptr;
1366 case MVT::i8:
1367 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1368 break;
1369 case MVT::i16:
1370 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1371 break;
1372 case MVT::i32:
1373 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1374 break;
1375 case MVT::i64:
1376 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1377 break;
1378 case MVT::f32:
1379 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1380 break;
1381 case MVT::f64:
1382 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1383 break;
1384 }
1385 }
1386 break;
11921387 case NVPTXISD::LDGV2:
11931388 switch (EltVT.getSimpleVT().SimpleTy) {
11941389 default:
12841479 switch (N->getOpcode()) {
12851480 default:
12861481 return nullptr;
1482 case ISD::INTRINSIC_W_CHAIN:
1483 if (IsLDG) {
1484 switch (EltVT.getSimpleVT().SimpleTy) {
1485 default:
1486 return nullptr;
1487 case MVT::i8:
1488 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1489 break;
1490 case MVT::i16:
1491 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1492 break;
1493 case MVT::i32:
1494 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1495 break;
1496 case MVT::i64:
1497 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1498 break;
1499 case MVT::f32:
1500 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1501 break;
1502 case MVT::f64:
1503 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1504 break;
1505 }
1506 } else {
1507 switch (EltVT.getSimpleVT().SimpleTy) {
1508 default:
1509 return nullptr;
1510 case MVT::i8:
1511 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1512 break;
1513 case MVT::i16:
1514 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1515 break;
1516 case MVT::i32:
1517 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1518 break;
1519 case MVT::i64:
1520 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1521 break;
1522 case MVT::f32:
1523 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1524 break;
1525 case MVT::f64:
1526 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1527 break;
1528 }
1529 }
1530 break;
12871531 case NVPTXISD::LDGV2:
12881532 switch (EltVT.getSimpleVT().SimpleTy) {
12891533 default:
13731617 switch (N->getOpcode()) {
13741618 default:
13751619 return nullptr;
1620 case ISD::INTRINSIC_W_CHAIN:
1621 if (IsLDG) {
1622 switch (EltVT.getSimpleVT().SimpleTy) {
1623 default:
1624 return nullptr;
1625 case MVT::i8:
1626 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1627 break;
1628 case MVT::i16:
1629 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1630 break;
1631 case MVT::i32:
1632 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1633 break;
1634 case MVT::i64:
1635 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1636 break;
1637 case MVT::f32:
1638 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1639 break;
1640 case MVT::f64:
1641 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1642 break;
1643 }
1644 } else {
1645 switch (EltVT.getSimpleVT().SimpleTy) {
1646 default:
1647 return nullptr;
1648 case MVT::i8:
1649 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1650 break;
1651 case MVT::i16:
1652 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1653 break;
1654 case MVT::i32:
1655 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1656 break;
1657 case MVT::i64:
1658 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1659 break;
1660 case MVT::f32:
1661 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1662 break;
1663 case MVT::f64:
1664 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1665 break;
1666 }
1667 }
1668 break;
13761669 case NVPTXISD::LDGV2:
13771670 switch (EltVT.getSimpleVT().SimpleTy) {
13781671 default:
14651758 }
14661759
14671760 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1468 MemRefs0[0] = cast(N)->getMemOperand();
1761 MemRefs0[0] = Mem->getMemOperand();
14691762 cast(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
14701763
14711764 return LD;
5858
5959 SDNode *Select(SDNode *N) override;
6060 SDNode *SelectIntrinsicNoChain(SDNode *N);
61 SDNode *SelectIntrinsicChain(SDNode *N);
6162 SDNode *SelectTexSurfHandle(SDNode *N);
6263 SDNode *SelectLoad(SDNode *N);
6364 SDNode *SelectLoadVector(SDNode *N);
64 SDNode *SelectLDGLDUVector(SDNode *N);
65 SDNode *SelectLDGLDU(SDNode *N);
6566 SDNode *SelectStore(SDNode *N);
6667 SDNode *SelectStoreVector(SDNode *N);
6768 SDNode *SelectLoadParam(SDNode *N);
23622362
23632363 case Intrinsic::nvvm_ldu_global_i:
23642364 case Intrinsic::nvvm_ldu_global_f:
2365 case Intrinsic::nvvm_ldu_global_p:
2365 case Intrinsic::nvvm_ldu_global_p: {
23662366
23672367 Info.opc = ISD::INTRINSIC_W_CHAIN;
23682368 if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
23692369 Info.memVT = getValueType(I.getType());
2370 else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
2370 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
2371 Info.memVT = getPointerTy();
2372 else
23712373 Info.memVT = getValueType(I.getType());
2372 else
2373 Info.memVT = MVT::f32;
23742374 Info.ptrVal = I.getArgOperand(0);
23752375 Info.offset = 0;
23762376 Info.vol = 0;
23772377 Info.readMem = true;
23782378 Info.writeMem = false;
2379 Info.align = 0;
2379
2380 // alignment is available as metadata.
2381 // Grab it and set the alignment.
2382 assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
2383 MDNode *AlignMD = I.getMetadata("align");
2384 assert(AlignMD && "Must have a non-null MDNode");
2385 assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
2386 Value *Align = AlignMD->getOperand(0);
2387 int64_t Alignment = cast(Align)->getZExtValue();
2388 Info.align = Alignment;
2389
23802390 return true;
2391 }
2392 case Intrinsic::nvvm_ldg_global_i:
2393 case Intrinsic::nvvm_ldg_global_f:
2394 case Intrinsic::nvvm_ldg_global_p: {
2395
2396 Info.opc = ISD::INTRINSIC_W_CHAIN;
2397 if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
2398 Info.memVT = getValueType(I.getType());
2399 else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
2400 Info.memVT = getPointerTy();
2401 else
2402 Info.memVT = getValueType(I.getType());
2403 Info.ptrVal = I.getArgOperand(0);
2404 Info.offset = 0;
2405 Info.vol = 0;
2406 Info.readMem = true;
2407 Info.writeMem = false;
2408
2409 // alignment is available as metadata.
2410 // Grab it and set the alignment.
2411 assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
2412 MDNode *AlignMD = I.getMetadata("align");
2413 assert(AlignMD && "Must have a non-null MDNode");
2414 assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
2415 Value *Align = AlignMD->getOperand(0);
2416 int64_t Alignment = cast(Align)->getZExtValue();
2417 Info.align = Alignment;
2418
2419 return true;
2420 }
23812421
23822422 case Intrinsic::nvvm_tex_1d_v4f32_i32:
23832423 case Intrinsic::nvvm_tex_1d_v4f32_f32:
13731373 // Support for ldu on sm_20 or later
13741374 //-----------------------------------
13751375
1376 def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{
1377 MemIntrinsicSDNode *M = cast(N);
1378 return M->getMemoryVT() == MVT::i8;
1379 }]>;
1380
13811376 // Scalar
1382 // @TODO: Revisit this, Changed imemAny to imem
1383 multiclass LDU_G, Intrinsic IntOp> {
1377 multiclass LDU_G> {
13841378 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
13851379 !strconcat("ldu.global.", TyStr),
1386 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
1380 []>, Requires<[hasLDU]>;
13871381 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
13881382 !strconcat("ldu.global.", TyStr),
1389 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
1390 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
1383 []>, Requires<[hasLDU]>;
1384 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
13911385 !strconcat("ldu.global.", TyStr),
1392 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
1393 Requires<[hasLDU]>;
1386 []>, Requires<[hasLDU]>;
13941387 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
13951388 !strconcat("ldu.global.", TyStr),
1396 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
1389 []>, Requires<[hasLDU]>;
13971390 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
13981391 !strconcat("ldu.global.", TyStr),
1399 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
1392 []>, Requires<[hasLDU]>;
14001393 }
14011394
1402 multiclass LDU_G_NOINTRIN {
1403 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1404 !strconcat("ldu.global.", TyStr),
1405 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
1406 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1407 !strconcat("ldu.global.", TyStr),
1408 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
1409 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
1410 !strconcat("ldu.global.", TyStr),
1411 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
1412 Requires<[hasLDU]>;
1413 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1414 !strconcat("ldu.global.", TyStr),
1415 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
1416 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1417 !strconcat("ldu.global.", TyStr),
1418 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
1419 }
1420
1421 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs,
1422 ldu_i8>;
1423 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
1424 int_nvvm_ldu_global_i>;
1425 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
1426 int_nvvm_ldu_global_i>;
1427 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
1428 int_nvvm_ldu_global_i>;
1429 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs,
1430 int_nvvm_ldu_global_f>;
1431 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs,
1432 int_nvvm_ldu_global_f>;
1433 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
1434 int_nvvm_ldu_global_p>;
1435 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
1436 int_nvvm_ldu_global_p>;
1395 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1396 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1397 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1398 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1399 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1400 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1401 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1402 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
14371403
14381404 // vector
14391405
15031469 // Support for ldg on sm_35 or later
15041470 //-----------------------------------
15051471
1506 def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
1507 MemIntrinsicSDNode *M = cast(N);
1508 return M->getMemoryVT() == MVT::i8;
1509 }]>;
1510
1511 multiclass LDG_G, Intrinsic IntOp> {
1472 multiclass LDG_G> {
15121473 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
15131474 !strconcat("ld.global.nc.", TyStr),
1514 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
1475 []>, Requires<[hasLDG]>;
15151476 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
15161477 !strconcat("ld.global.nc.", TyStr),
1517 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
1518 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
1478 []>, Requires<[hasLDG]>;
1479 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
15191480 !strconcat("ld.global.nc.", TyStr),
1520 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
1521 Requires<[hasLDG]>;
1481 []>, Requires<[hasLDG]>;
15221482 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
15231483 !strconcat("ld.global.nc.", TyStr),
1524 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
1484 []>, Requires<[hasLDG]>;
15251485 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
15261486 !strconcat("ld.global.nc.", TyStr),
1527 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
1487 []>, Requires<[hasLDG]>;
15281488 }
15291489
1530 multiclass LDG_G_NOINTRIN {
1531 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1532 !strconcat("ld.global.nc.", TyStr),
1533 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
1534 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1535 !strconcat("ld.global.nc.", TyStr),
1536 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
1537 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
1538 !strconcat("ld.global.nc.", TyStr),
1539 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
1540 Requires<[hasLDG]>;
1541 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1542 !strconcat("ld.global.nc.", TyStr),
1543 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
1544 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1545 !strconcat("ld.global.nc.", TyStr),
1546 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
1547 }
1548
15491490 defm INT_PTX_LDG_GLOBAL_i8
1550 : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
1491 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
15511492 defm INT_PTX_LDG_GLOBAL_i16
1552 : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
1493 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
15531494 defm INT_PTX_LDG_GLOBAL_i32
1554 : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
1495 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
15551496 defm INT_PTX_LDG_GLOBAL_i64
1556 : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
1497 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
15571498 defm INT_PTX_LDG_GLOBAL_f32
1558 : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
1499 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
15591500 defm INT_PTX_LDG_GLOBAL_f64
1560 : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
1501 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
15611502 defm INT_PTX_LDG_GLOBAL_p32
1562 : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
1503 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
15631504 defm INT_PTX_LDG_GLOBAL_p64
1564 : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
1505 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
15651506
15661507 // vector
15671508
11
22 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
33
4 declare i8 @llvm.nvvm.ldu.global.i.i8(i8*)
4 declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*)
55
66 define i8 @foo(i8* %a) {
77 ; Ensure we properly truncate off the high-order 24 bits
88 ; CHECK: ldu.global.u8
99 ; CHECK: cvt.u32.u16
1010 ; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255
11 %val = tail call i8 @llvm.nvvm.ldu.global.i.i8(i8* %a)
11 %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0
1212 ret i8 %val
1313 }
14
15 !0 = metadata !{i32 4}
0 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
1
2
3 declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
4 declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
5 declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
6 declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
7
8
9 ; CHECK: func0
10 define i8 @func0(i8 addrspace(1)* %ptr) {
11 ; ldu.global.u8
12 %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
13 ret i8 %val
14 }
15
16 ; CHECK: func1
17 define i32 @func1(i32 addrspace(1)* %ptr) {
18 ; ldu.global.u32
19 %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
20 ret i32 %val
21 }
22
23 ; CHECK: func2
24 define i8 @func2(i8 addrspace(1)* %ptr) {
25 ; ld.global.nc.u8
26 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
27 ret i8 %val
28 }
29
30 ; CHECK: func3
31 define i32 @func3(i32 addrspace(1)* %ptr) {
32 ; ld.global.nc.u32
33 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
34 ret i32 %val
35 }
36
37
38
39 !0 = metadata !{i32 4}
66 ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
77 ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
88 %p2 = getelementptr i32* %a, i32 8
9 %t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1
9 %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1
1010 %p3 = getelementptr i32* %a, i32 9
11 %t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1
11 %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1
1212 %t3 = mul i32 %t1, %t2
1313 store i32 %t3, i32* %a
1414 ret void
1616
1717 !1 = metadata !{ i32 4 }
1818
19 declare i32 @llvm.nvvm.ldu.global.i.i32(i32*)
19 declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*)
2020 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()