llvm.org GIT mirror llvm / 8860d3c
[lanai] Custom lowering of SHL_PARTS Summary: Implement custom lowering of SHL_PARTS to enable lowering of left shift with larger than 32-bit shifts. Reviewers: eliben, majnemer Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D27232 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288541 91177308-0d34-0410-b5e6-96231b3b80d8 Jacques Pienaar 2 years ago
3 changed file(s) with 77 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
103103
104104 setOperationAction(ISD::ROTR, MVT::i32, Expand);
105105 setOperationAction(ISD::ROTL, MVT::i32, Expand);
106 setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
106 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
107107 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
108108 setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
109109
178178 return LowerSETCC(Op, DAG);
179179 case ISD::SETCCE:
180180 return LowerSETCCE(Op, DAG);
181 case ISD::SHL_PARTS:
182 return LowerSHL_PARTS(Op, DAG);
181183 case ISD::SRL_PARTS:
182184 return LowerSRL_PARTS(Op, DAG);
183185 case ISD::VASTART:
12321234 }
12331235 }
12341236
1237 SDValue LanaiTargetLowering::LowerSHL_PARTS(SDValue Op,
1238 SelectionDAG &DAG) const {
1239 EVT VT = Op.getValueType();
1240 unsigned VTBits = VT.getSizeInBits();
1241 SDLoc dl(Op);
1242 assert(Op.getNumOperands() == 3 && "Unexpected SHL!");
1243 SDValue ShOpLo = Op.getOperand(0);
1244 SDValue ShOpHi = Op.getOperand(1);
1245 SDValue ShAmt = Op.getOperand(2);
1246
1247 // Performs the following for (ShOpLo + (ShOpHi << 32)) << ShAmt:
1248 // LoBitsForHi = (ShAmt == 0) ? 0 : (ShOpLo >> (32-ShAmt))
1249 // HiBitsForHi = ShOpHi << ShAmt
1250 // Hi = (ShAmt >= 32) ? (ShOpLo << (ShAmt-32)) : (LoBitsForHi | HiBitsForHi)
1251 // Lo = (ShAmt >= 32) ? 0 : (ShOpLo << ShAmt)
1252 // return (Hi << 32) | Lo;
1253
1254 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1255 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
1256 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
1257
1258 // If ShAmt == 0, we just calculated "(SRL ShOpLo, 32)" which is "undef". We
1259 // wanted 0, so CSEL it directly.
1260 SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
1261 SDValue SetCC = DAG.getSetCC(dl, MVT::i32, ShAmt, Zero, ISD::SETEQ);
1262 LoBitsForHi = DAG.getSelect(dl, MVT::i32, SetCC, Zero, LoBitsForHi);
1263
1264 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1265 DAG.getConstant(VTBits, dl, MVT::i32));
1266 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
1267 SDValue HiForNormalShift =
1268 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
1269
1270 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
1271
1272 SetCC = DAG.getSetCC(dl, MVT::i32, ExtraShAmt, Zero, ISD::SETGE);
1273 SDValue Hi =
1274 DAG.getSelect(dl, MVT::i32, SetCC, HiForBigShift, HiForNormalShift);
1275
1276 // Lanai shifts of larger than register sizes are wrapped rather than
1277 // clamped, so we can't just emit "lo << b" if b is too big.
1278 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1279 SDValue Lo = DAG.getSelect(
1280 dl, MVT::i32, SetCC, DAG.getConstant(0, dl, MVT::i32), LoForNormalShift);
1281
1282 SDValue Ops[2] = {Lo, Hi};
1283 return DAG.getMergeValues(Ops, dl);
1284 }
1285
12351286 SDValue LanaiTargetLowering::LowerSRL_PARTS(SDValue Op,
12361287 SelectionDAG &DAG) const {
12371288 MVT VT = Op.getSimpleValueType();
8787 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
8888 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
8989 SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const;
90 SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
9091 SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
9192 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
9293
0 ; RUN: llc < %s -mtriple=lanai-unknown-unknown | FileCheck %s
1
2 ; Test left-shift i64 lowering does not result in call being inserted.
3
4 ; CHECK-LABEL: shift
5 ; CHECKT: bt __ashldi3
6 ; CHECK: or %r0, 0x0, %r[[T0:[0-9]+]]
7 ; CHECK: mov 0x20, %r[[T1:[0-9]+]]
8 ; CHECK: sub %r[[T1]], %r[[ShAmt:[0-9]+]], %r[[T1]]
9 ; CHECK: sub %r0, %r[[T1]], %r[[T1]]
10 ; CHECK: sh %r[[ShOpB:[0-9]+]], %r[[T1]], %r[[T1]]
11 ; CHECK: sub.f %r[[ShAmt]], 0x0, %r0
12 ; CHECK: sel.eq %r0, %r[[T1]], %r[[T1]]
13 ; CHECK: sh %r[[ShOpA:[0-9]+]], %r[[ShAmt]], %r[[T2:[0-9]+]]
14 ; CHECK: or %r[[T1]], %r[[T2]], %rv
15 ; CHECK: sub.f %r[[ShAmt]], 0x20, %r[[T1]]
16 ; CHECK: sh.pl %r[[ShOpB]], %r[[T1]], %rv
17 ; CHECK: sh.mi %r[[ShOpB]], %r[[ShAmt]], %r[[T0]]
18
19 define i64 @shift(i64 inreg, i32 inreg) {
20 %3 = zext i32 %1 to i64
21 %4 = shl i64 %0, %3
22 ret i64 %4
23 }