llvm.org GIT mirror llvm / 0cbbf04
[NVPTX] Removed always-true predicates in NVPTX. NVPTX stopped supporting GPUs older than sm_20 (Fermi) quite a while back. Removal of support of pre-Fermi GPUs made a lot of predicates in the NVPTX backend pointless as they can't ever be false any more. It's time to retire them. NFC intended. Differential Revision: https://reviews.llvm.org/D43843 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326349 91177308-0d34-0410-b5e6-96231b3b80d8 Artem Belevich 2 years ago
5 changed file(s) with 132 addition(s) and 216 deletion(s). Raw diff Collapse all Expand all
973973 const NVPTXTargetMachine &NTM = static_cast(TM);
974974 if (NTM.getDrvInterface() == NVPTX::NVCL)
975975 O << ", texmode_independent";
976 else {
977 if (!STI.hasDouble())
978 O << ", map_f64_to_f32";
979 }
980976
981977 if (MAI->doesSupportDebugInformation())
982978 O << ", debug";
416416 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
417417 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
418418
419 if (STI.hasROT64()) {
420 setOperationAction(ISD::ROTL, MVT::i64, Legal);
421 setOperationAction(ISD::ROTR, MVT::i64, Legal);
422 } else {
423 setOperationAction(ISD::ROTL, MVT::i64, Expand);
424 setOperationAction(ISD::ROTR, MVT::i64, Expand);
425 }
426 if (STI.hasROT32()) {
427 setOperationAction(ISD::ROTL, MVT::i32, Legal);
428 setOperationAction(ISD::ROTR, MVT::i32, Legal);
429 } else {
430 setOperationAction(ISD::ROTL, MVT::i32, Expand);
431 setOperationAction(ISD::ROTR, MVT::i32, Expand);
432 }
419 // TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs
420 // that don't have h/w rotation we lower them to multi-instruction assembly.
421 // See ROT*_sw in NVPTXIntrInfo.td
422 setOperationAction(ISD::ROTL, MVT::i64, Legal);
423 setOperationAction(ISD::ROTR, MVT::i64, Legal);
424 setOperationAction(ISD::ROTL, MVT::i32, Legal);
425 setOperationAction(ISD::ROTR, MVT::i32, Legal);
433426
434427 setOperationAction(ISD::ROTL, MVT::i16, Expand);
435428 setOperationAction(ISD::ROTR, MVT::i16, Expand);
110110 //===----------------------------------------------------------------------===//
111111
112112
113 def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">;
114 def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">;
115 def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">;
116 def useAtomRedG32forGen32 :
117 Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">;
118 def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">;
119 def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">;
120 def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">;
121 def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">;
122 def useAtomRedG64forGen64 :
123 Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">;
124 def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">;
125113 def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">;
126114 def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">;
127115 def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">;
128116 def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
129117 def hasVote : Predicate<"Subtarget->hasVote()">;
130118 def hasDouble : Predicate<"Subtarget->hasDouble()">;
131 def reqPTX20 : Predicate<"Subtarget->reqPTX20()">;
132119 def hasLDG : Predicate<"Subtarget->hasLDG()">;
133120 def hasLDU : Predicate<"Subtarget->hasLDU()">;
134 def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">;
135121
136122 def doF32FTZ : Predicate<"useF32FTZ()">;
137123 def doNoF32FTZ : Predicate<"!useF32FTZ()">;
960946 (ins f32imm:$a, Float32Regs:$b),
961947 "rcp.rn.ftz.f32 \t$dst, $b;",
962948 [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
963 Requires<[reqPTX20, doF32FTZ]>;
949 Requires<[doF32FTZ]>;
964950 def FDIV321r_prec :
965951 NVPTXInst<(outs Float32Regs:$dst),
966952 (ins f32imm:$a, Float32Regs:$b),
967953 "rcp.rn.f32 \t$dst, $b;",
968 [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
969 Requires<[reqPTX20]>;
954 [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>;
970955 //
971956 // F32 Accurate division
972957 //
975960 (ins Float32Regs:$a, Float32Regs:$b),
976961 "div.rn.ftz.f32 \t$dst, $a, $b;",
977962 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
978 Requires<[doF32FTZ, reqPTX20]>;
963 Requires<[doF32FTZ]>;
979964 def FDIV32ri_prec_ftz :
980965 NVPTXInst<(outs Float32Regs:$dst),
981966 (ins Float32Regs:$a, f32imm:$b),
982967 "div.rn.ftz.f32 \t$dst, $a, $b;",
983968 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
984 Requires<[doF32FTZ, reqPTX20]>;
969 Requires<[doF32FTZ]>;
985970 def FDIV32rr_prec :
986971 NVPTXInst<(outs Float32Regs:$dst),
987972 (ins Float32Regs:$a, Float32Regs:$b),
988973 "div.rn.f32 \t$dst, $a, $b;",
989 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
990 Requires<[reqPTX20]>;
974 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>;
991975 def FDIV32ri_prec :
992976 NVPTXInst<(outs Float32Regs:$dst),
993977 (ins Float32Regs:$a, f32imm:$b),
994978 "div.rn.f32 \t$dst, $a, $b;",
995 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
996 Requires<[reqPTX20]>;
979 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>;
997980
998981 //
999982 // FMA
10241024
10251025 multiclass F_ATOMIC_2_imp
10261026 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1027 Operand IMMType, SDNode IMM, Predicate Pred> {
1027 Operand IMMType, SDNode IMM, list Pred> {
10281028 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
10291029 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
10301030 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1031 Requires<[Pred]>;
1031 Requires<Pred>;
10321032 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
10331033 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
10341034 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1035 Requires<[Pred]>;
1035 Requires<Pred>;
10361036 }
10371037 multiclass F_ATOMIC_2
1038 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
1038 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1039 list Pred = []> {
10391040 defm p32 : F_ATOMIC_2_imp
10401041 IntOp, IMMType, IMM, Pred>;
10411042 defm p64 : F_ATOMIC_2_imp
10451046 // has 2 operands, neg the second one
10461047 multiclass F_ATOMIC_2_NEG_imp
10471048 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1048 Operand IMMType, Predicate Pred> {
1049 Operand IMMType, list Pred> {
10491050 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
10501051 !strconcat(
10511052 "{{ \n\t",
10541055 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
10551056 "}}"),
10561057 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1057 Requires<[Pred]>;
1058 Requires<Pred>;
10581059 }
10591060 multiclass F_ATOMIC_2_NEG
10601061 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1061 Predicate Pred> {
1062 list Pred = []> {
10621063 defm p32: F_ATOMIC_2_NEG_imp
10631064 IntOp, IMMType, Pred> ;
10641065 defm p64: F_ATOMIC_2_NEG_imp
10681069 // has 3 operands
10691070 multiclass F_ATOMIC_3_imp
10701071 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1071 Operand IMMType, Predicate Pred> {
1072 Operand IMMType, list Pred> {
10721073 def reg : NVPTXInst<(outs regclass:$dst),
10731074 (ins ptrclass:$addr, regclass:$b, regclass:$c),
10741075 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
10751076 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1076 Requires<[Pred]>;
1077 Requires<Pred>;
10771078
10781079 def imm1 : NVPTXInst<(outs regclass:$dst),
10791080 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
10801081 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
10811082 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1082 Requires<[Pred]>;
1083 Requires<Pred>;
10831084
10841085 def imm2 : NVPTXInst<(outs regclass:$dst),
10851086 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
10861087 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
10871088 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1088 Requires<[Pred]>;
1089 Requires<Pred>;
10891090
10901091 def imm3 : NVPTXInst<(outs regclass:$dst),
10911092 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
10921093 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
10931094 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1094 Requires<[Pred]>;
1095 Requires<Pred>;
10951096 }
10961097 multiclass F_ATOMIC_3
1097 string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
1098 string OpcStr, PatFrag IntOp, Operand IMMType, list Pred = []> {
10981099 defm p32 : F_ATOMIC_3_imp
10991100 IntOp, IMMType, Pred>;
11001101 defm p64 : F_ATOMIC_3_imp
11291130 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
11301131
11311132 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2
1132 atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
1133 atomic_load_add_32_g, i32imm, imm>;
11331134 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2
1134 atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
1135 atomic_load_add_32_s, i32imm, imm>;
11351136 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2
1136 atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
1137 atomic_load_add_32_gen, i32imm, imm>;
11371138 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2
1138 ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1139 ".add", atomic_load_add_32_gen, i32imm, imm>;
11391140
11401141 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2
1141 atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
1142 atomic_load_add_64_g, i64imm, imm>;
11421143 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2
1143 atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
1144 atomic_load_add_64_s, i64imm, imm>;
11441145 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2
1145 atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
1146 atomic_load_add_64_gen, i64imm, imm>;
11461147 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2
1147 ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1148 ".add", atomic_load_add_64_gen, i64imm, imm>;
11481149
11491150 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2
1150 atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
1151 atomic_load_add_f32_g, f32imm, fpimm>;
11511152 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2
1152 atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
1153 atomic_load_add_f32_s, f32imm, fpimm>;
11531154 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2
1154 atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
1155 atomic_load_add_f32_gen, f32imm, fpimm>;
11551156
11561157 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2
1157 atomic_load_add_f64_g, f64imm, fpimm, hasAtomAddF64>;
1158 atomic_load_add_f64_g, f64imm, fpimm, [hasAtomAddF64]>;
11581159 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2
1159 atomic_load_add_f64_s, f64imm, fpimm, hasAtomAddF64>;
1160 atomic_load_add_f64_s, f64imm, fpimm, [hasAtomAddF64]>;
11601161 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2
1161 atomic_load_add_f64_gen, f64imm, fpimm, hasAtomAddF64>;
1162 atomic_load_add_f64_gen, f64imm, fpimm, [hasAtomAddF64]>;
11621163
11631164 // atom_sub
11641165
11761177 (atomic_load_sub_64 node:$a, node:$b)>;
11771178
11781179 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG
1179 atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
1180 atomic_load_sub_32_g, i32imm>;
11801181 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG
1181 atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
1182 atomic_load_sub_64_g, i64imm>;
11821183 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG
1183 atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
1184 atomic_load_sub_32_gen, i32imm>;
11841185 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG
1185 ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
1186 ".add", atomic_load_sub_32_gen, i32imm>;
11861187 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG
1187 atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
1188 atomic_load_sub_32_s, i32imm>;
11881189 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG
1189 atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
1190 atomic_load_sub_64_s, i64imm>;
11901191 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG
1191 atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
1192 atomic_load_sub_64_gen, i64imm>;
11921193 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG
1193 ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
1194 ".add", atomic_load_sub_64_gen, i64imm>;
11941195
11951196 // atom_swap
11961197
12081209 (atomic_swap_64 node:$a, node:$b)>;
12091210
12101211 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2
1211 atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
1212 atomic_swap_32_g, i32imm, imm>;
12121213 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2
1213 atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
1214 atomic_swap_32_s, i32imm, imm>;
12141215 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2
1215 atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
1216 atomic_swap_32_gen, i32imm, imm>;
12161217 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2
1217 ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1218 ".exch", atomic_swap_32_gen, i32imm, imm>;
12181219 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2
1219 atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
1220 atomic_swap_64_g, i64imm, imm>;
12201221 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2
1221 atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
1222 atomic_swap_64_s, i64imm, imm>;
12221223 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2
1223 atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
1224 atomic_swap_64_gen, i64imm, imm>;
12241225 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2
1225 ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1226 ".exch", atomic_swap_64_gen, i64imm, imm>;
12261227
12271228 // atom_max
12281229
12521253 (atomic_load_umax_64 node:$a, node:$b)>;
12531254
12541255 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2
1255 ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
1256 ".max", atomic_load_max_32_g, i32imm, imm>;
12561257 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2
1257 ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
1258 ".max", atomic_load_max_32_s, i32imm, imm>;
12581259 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2
1259 atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
1260 atomic_load_max_32_gen, i32imm, imm>;
12601261 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2
1261 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1262 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
12621263 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2
1263 ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
1264 ".max", atomic_load_max_64_g, i64imm, imm>;
12641265 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2
1265 ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
1266 ".max", atomic_load_max_64_s, i64imm, imm>;
12661267 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2
1267 atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
1268 atomic_load_max_64_gen, i64imm, imm>;
12681269 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2
1269 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1270 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
12701271 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2
1271 ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
1272 ".max", atomic_load_umax_32_g, i32imm, imm>;
12721273 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2
1273 ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
1274 ".max", atomic_load_umax_32_s, i32imm, imm>;
12741275 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2
1275 atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
1276 atomic_load_umax_32_gen, i32imm, imm>;
12761277 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2
1277 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1278 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
12781279 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2
1279 ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
1280 ".max", atomic_load_umax_64_g, i64imm, imm>;
12801281 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2
1281 ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
1282 ".max", atomic_load_umax_64_s, i64imm, imm>;
12821283 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2
1283 atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
1284 atomic_load_umax_64_gen, i64imm, imm>;
12841285 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2
1285 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1286 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
12861287
12871288 // atom_min
12881289
13121313 (atomic_load_umin_64 node:$a, node:$b)>;
13131314
13141315 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2
1315 ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
1316 ".min", atomic_load_min_32_g, i32imm, imm>;
13161317 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2
1317 ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
1318 ".min", atomic_load_min_32_s, i32imm, imm>;
13181319 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2
1319 atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
1320 atomic_load_min_32_gen, i32imm, imm>;
13201321 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2
1321 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1322 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
13221323 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2
1323 ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
1324 ".min", atomic_load_min_64_g, i64imm, imm>;
13241325 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2
1325 ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
1326 ".min", atomic_load_min_64_s, i64imm, imm>;
13261327 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2
1327 atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
1328 atomic_load_min_64_gen, i64imm, imm>;
13281329 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2
1329 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1330 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
13301331 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2
1331 ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
1332 ".min", atomic_load_umin_32_g, i32imm, imm>;
13321333 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2
1333 ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
1334 ".min", atomic_load_umin_32_s, i32imm, imm>;
13341335 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2
1335 atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
1336 atomic_load_umin_32_gen, i32imm, imm>;
13361337 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2
1337 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1338 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
13381339 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2
1339 ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
1340 ".min", atomic_load_umin_64_g, i64imm, imm>;
13401341 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2
1341 ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
1342 ".min", atomic_load_umin_64_s, i64imm, imm>;
13421343 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2
1343 atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
1344 atomic_load_umin_64_gen, i64imm, imm>;
13441345 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2
1345 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1346 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
13461347
13471348 // atom_inc atom_dec
13481349
13601361 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
13611362
13621363 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2
1363 atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
1364 atomic_load_inc_32_g, i32imm, imm>;
13641365 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2
1365 atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
1366 atomic_load_inc_32_s, i32imm, imm>;
13661367 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2
1367 atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
1368 atomic_load_inc_32_gen, i32imm, imm>;
13681369 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2
1369 ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1370 ".inc", atomic_load_inc_32_gen, i32imm, imm>;
13701371 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2
1371 atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
1372 atomic_load_dec_32_g, i32imm, imm>;
13721373 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2
1373 atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
1374 atomic_load_dec_32_s, i32imm, imm>;
13741375 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2
1375 atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
1376 atomic_load_dec_32_gen, i32imm, imm>;
13761377 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2
1377 ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1378 ".dec", atomic_load_dec_32_gen, i32imm, imm>;
13781379
13791380 // atom_and
13801381
13921393 (atomic_load_and_64 node:$a, node:$b)>;
13931394
13941395 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2
1395 atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
1396 atomic_load_and_32_g, i32imm, imm>;
13961397 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2
1397 atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
1398 atomic_load_and_32_s, i32imm, imm>;
13981399 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2
1399 atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
1400 atomic_load_and_32_gen, i32imm, imm>;
14001401 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2
1401 ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1402 ".and", atomic_load_and_32_gen, i32imm, imm>;
14021403 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2
1403 atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
1404 atomic_load_and_64_g, i64imm, imm>;
14041405 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2
1405 atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
1406 atomic_load_and_64_s, i64imm, imm>;
14061407 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2
1407 atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
1408 atomic_load_and_64_gen, i64imm, imm>;
14081409 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2
1409 ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1410 ".and", atomic_load_and_64_gen, i64imm, imm>;
14101411
14111412 // atom_or
14121413
14241425 (atomic_load_or_64 node:$a, node:$b)>;
14251426
14261427 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2
1427 atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
1428 atomic_load_or_32_g, i32imm, imm>;
14281429 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2
1429 atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
1430 atomic_load_or_32_gen, i32imm, imm>;
14301431 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2
1431 ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1432 ".or", atomic_load_or_32_gen, i32imm, imm>;
14321433 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2
1433 atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
1434 atomic_load_or_32_s, i32imm, imm>;
14341435 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2
1435 atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
1436 atomic_load_or_64_g, i64imm, imm>;
14361437 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2
1437 atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
1438 atomic_load_or_64_gen, i64imm, imm>;
14381439 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2
1439 ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1440 ".or", atomic_load_or_64_gen, i64imm, imm>;
14401441 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2
1441 atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
1442 atomic_load_or_64_s, i64imm, imm>;
14421443
14431444 // atom_xor
14441445
14561457 (atomic_load_xor_64 node:$a, node:$b)>;
14571458
14581459 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2
1459 atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
1460 atomic_load_xor_32_g, i32imm, imm>;
14601461 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2
1461 atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
1462 atomic_load_xor_32_s, i32imm, imm>;
14621463 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2
1463 atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
1464 atomic_load_xor_32_gen, i32imm, imm>;
14641465 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2
1465 ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1466 ".xor", atomic_load_xor_32_gen, i32imm, imm>;
14661467 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2
1467 atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
1468 atomic_load_xor_64_g, i64imm, imm>;
14681469 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2
1469 atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
1470 atomic_load_xor_64_s, i64imm, imm>;
14701471 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2
1471 atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
1472 atomic_load_xor_64_gen, i64imm, imm>;
14721473 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2
1473 ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1474 ".xor", atomic_load_xor_64_gen, i64imm, imm>;
14741475
14751476 // atom_cas
14761477
14881489 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
14891490
14901491 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3
1491 atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
1492 atomic_cmp_swap_32_g, i32imm>;
14921493 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3
1493 atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
1494 atomic_cmp_swap_32_s, i32imm>;
14941495 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3
1495 atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
1496 atomic_cmp_swap_32_gen, i32imm>;
14961497 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3
1497 ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
1498 ".cas", atomic_cmp_swap_32_gen, i32imm>;
14981499 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3
1499 atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
1500 atomic_cmp_swap_64_g, i64imm>;
15001501 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3
1501 atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
1502 atomic_cmp_swap_64_s, i64imm>;
15021503 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3
1503 atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
1504 atomic_cmp_swap_64_gen, i64imm>;
15041505 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3
1505 ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
1506 ".cas", atomic_cmp_swap_64_gen, i64imm>;
15061507
15071508 // Support for scoped atomic operations. Matches
15081509 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
16531654 defm _u32 : ATOM2S_impl;
16541655 defm _u64 : ATOM2S_impl;
16551656 defm _f32 : ATOM2S_impl
1656 [hasAtomAddF32]>;
1657 []>;
16571658 defm _f64 : ATOM2S_impl
16581659 [hasAtomAddF64]>;
16591660 }
19351936 multiclass NG_TO_G {
19361937 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
19371938 !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1938 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1939 Requires<[hasGenericLdSt]>;
1939 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
19401940 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
19411941 !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1942 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1943 Requires<[hasGenericLdSt]>;
1944
1945 // @TODO: Are these actually needed? I believe global addresses will be copied
1946 // to register values anyway.
1947 /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
1948 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1949 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1950 Requires<[hasGenericLdSt]>;
1951 def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
1952 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1953 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1954 Requires<[hasGenericLdSt]>;*/
1955
1956 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1957 "mov.u32 \t$result, $src;",
1958 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1959 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1960 "mov.u64 \t$result, $src;",
19611942 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1962
1963 // @TODO: Are these actually needed? I believe global addresses will be copied
1964 // to register values anyway.
1965 /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
1966 "mov.u32 \t$result, $src;",
1967 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
1968 def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1969 "mov.u64 \t$result, $src;",
1970 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
19711943 }
19721944
19731945 multiclass G_TO_NG {
19741946 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
19751947 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1976 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1977 Requires<[hasGenericLdSt]>;
1948 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
19781949 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
19791950 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1980 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1981 Requires<[hasGenericLdSt]>;
1982 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1983 "mov.u32 \t$result, $src;",
1984 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1985 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1986 "mov.u64 \t$result, $src;",
19871951 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
19881952 }
19891953
7272 return &TSInfo;
7373 }
7474
75 bool hasBrkPt() const { return SmVersion >= 11; }
76 bool hasAtomRedG32() const { return SmVersion >= 11; }
77 bool hasAtomRedS32() const { return SmVersion >= 12; }
78 bool hasAtomRedG64() const { return SmVersion >= 12; }
79 bool hasAtomRedS64() const { return SmVersion >= 20; }
80 bool hasAtomRedGen32() const { return SmVersion >= 20; }
81 bool hasAtomRedGen64() const { return SmVersion >= 20; }
82 bool hasAtomAddF32() const { return SmVersion >= 20; }
8375 bool hasAtomAddF64() const { return SmVersion >= 60; }
8476 bool hasAtomScope() const { return HasAtomScope; }
8577 bool hasAtomBitwise64() const { return SmVersion >= 32; }
8678 bool hasAtomMinMax64() const { return SmVersion >= 32; }
87 bool hasVote() const { return SmVersion >= 12; }
88 bool hasDouble() const { return SmVersion >= 13; }
89 bool reqPTX20() const { return SmVersion >= 20; }
90 bool hasF32FTZ() const { return SmVersion >= 20; }
91 bool hasFMAF32() const { return SmVersion >= 20; }
92 bool hasFMAF64() const { return SmVersion >= 13; }
9379 bool hasLDG() const { return SmVersion >= 32; }
9480 bool hasLDU() const { return ((SmVersion >= 20) && (SmVersion < 30)); }
95 bool hasGenericLdSt() const { return SmVersion >= 20; }
9681 inline bool hasHWROT32() const { return SmVersion >= 32; }
97 inline bool hasSWROT32() const {
98 return ((SmVersion >= 20) && (SmVersion < 32));
99 }
100 inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
101 inline bool hasROT64() const { return SmVersion >= 20; }
10282 bool hasImageHandles() const;
10383 bool hasFP16Math() const { return SmVersion >= 53; }
10484 bool allowFP16Math() const;