llvm.org GIT mirror llvm / f40deed
Shifter ops are not always free. Do not fold them (especially to form complex load / store addressing mode) when they have higher cost and when they have more than one use. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117509 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 9 years ago
4 changed file(s) with 183 addition(s) and 25 deletion(s). Raw diff Collapse all Expand all
7777
7878 SDNode *Select(SDNode *N);
7979
80 bool isShifterOpProfitable(const SDValue &Shift,
81 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
8082 bool SelectShifterOperandReg(SDValue N, SDValue &A,
8183 SDValue &B, SDValue &C);
84 bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
85 SDValue &B, SDValue &C);
8286 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
8387 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
8488
245249 }
246250
247251
252 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
253 ARM_AM::ShiftOpc ShOpcVal,
254 unsigned ShAmt) {
255 if (!Subtarget->isCortexA9())
256 return true;
257 if (Shift.hasOneUse())
258 return true;
259 // R << 2 is free.
260 return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
261 }
262
248263 bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
249264 SDValue &BaseReg,
250265 SDValue &ShReg,
260275
261276 BaseReg = N.getOperand(0);
262277 unsigned ShImmVal = 0;
278 if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) {
279 ShReg = CurDAG->getRegister(0, MVT::i32);
280 ShImmVal = RHS->getZExtValue() & 31;
281 } else {
282 ShReg = N.getOperand(1);
283 if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
284 return false;
285 }
286 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
287 MVT::i32);
288 return true;
289 }
290
291 bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
292 SDValue &BaseReg,
293 SDValue &ShReg,
294 SDValue &Opc) {
295 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
296
297 // Don't match base register only case. That is matched to a separate
298 // lower complexity pattern with explicit register operand.
299 if (ShOpcVal == ARM_AM::no_shift) return false;
300
301 BaseReg = N.getOperand(0);
302 unsigned ShImmVal = 0;
303 // Do not check isShifterOpProfitable. This must return true.
263304 if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) {
264305 ShReg = CurDAG->getRegister(0, MVT::i32);
265306 ShImmVal = RHS->getZExtValue() & 31;
320361
321362 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
322363 SDValue &Opc) {
323 if (N.getOpcode() == ISD::MUL) {
364 if (N.getOpcode() == ISD::MUL &&
365 (!Subtarget->isCortexA9() || N.hasOneUse())) {
324366 if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) {
325367 // X * [3,5,9] -> X + X * [2,4,8] etc.
326368 int RHSC = (int)RHS->getZExtValue();
356398 }
357399 }
358400
401 if (Subtarget->isCortexA9() && !N.hasOneUse())
402 // Compute R +/- (R << N) and reuse it.
403 return false;
404
359405 // Otherwise this is R +/- [possibly shifted] R.
360406 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
361407 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
370416 if (ConstantSDNode *Sh =
371417 dyn_cast(N.getOperand(1).getOperand(1))) {
372418 ShAmt = Sh->getZExtValue();
373 Offset = N.getOperand(1).getOperand(0);
419 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
420 Offset = N.getOperand(1).getOperand(0);
421 else {
422 ShAmt = 0;
423 ShOpcVal = ARM_AM::no_shift;
424 }
374425 } else {
375426 ShOpcVal = ARM_AM::no_shift;
376427 }
377428 }
378429
379430 // Try matching (R shl C) + (R).
380 if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
431 if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
432 !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
381433 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
382434 if (ShOpcVal != ARM_AM::no_shift) {
383435 // Check to see if the RHS of the shift is a constant, if not, we can't
385437 if (ConstantSDNode *Sh =
386438 dyn_cast(N.getOperand(0).getOperand(1))) {
387439 ShAmt = Sh->getZExtValue();
388 Offset = N.getOperand(0).getOperand(0);
389 Base = N.getOperand(1);
440 if (!Subtarget->isCortexA9() ||
441 (N.hasOneUse() &&
442 isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
443 Offset = N.getOperand(0).getOperand(0);
444 Base = N.getOperand(1);
445 } else {
446 ShAmt = 0;
447 ShOpcVal = ARM_AM::no_shift;
448 }
390449 } else {
391450 ShOpcVal = ARM_AM::no_shift;
392451 }
407466 SDValue &Base,
408467 SDValue &Offset,
409468 SDValue &Opc) {
410 if (N.getOpcode() == ISD::MUL) {
469 if (N.getOpcode() == ISD::MUL &&
470 (!Subtarget->isCortexA9() || N.hasOneUse())) {
411471 if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) {
412472 // X * [3,5,9] -> X + X * [2,4,8] etc.
413473 int RHSC = (int)RHS->getZExtValue();
473533 }
474534 }
475535
536 if (Subtarget->isCortexA9() && !N.hasOneUse()) {
537 // Compute R +/- (R << N) and reuse it.
538 Base = N;
539 Offset = CurDAG->getRegister(0, MVT::i32);
540 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
541 ARM_AM::no_shift),
542 MVT::i32);
543 return AM2_BASE;
544 }
545
476546 // Otherwise this is R +/- [possibly shifted] R.
477547 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
478548 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
487557 if (ConstantSDNode *Sh =
488558 dyn_cast(N.getOperand(1).getOperand(1))) {
489559 ShAmt = Sh->getZExtValue();
490 Offset = N.getOperand(1).getOperand(0);
560 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
561 Offset = N.getOperand(1).getOperand(0);
562 else {
563 ShAmt = 0;
564 ShOpcVal = ARM_AM::no_shift;
565 }
491566 } else {
492567 ShOpcVal = ARM_AM::no_shift;
493568 }
494569 }
495570
496571 // Try matching (R shl C) + (R).
497 if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
572 if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
573 !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
498574 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
499575 if (ShOpcVal != ARM_AM::no_shift) {
500576 // Check to see if the RHS of the shift is a constant, if not, we can't
502578 if (ConstantSDNode *Sh =
503579 dyn_cast(N.getOperand(0).getOperand(1))) {
504580 ShAmt = Sh->getZExtValue();
505 Offset = N.getOperand(0).getOperand(0);
506 Base = N.getOperand(1);
581 if (!Subtarget->isCortexA9() ||
582 (N.hasOneUse() &&
583 isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
584 Offset = N.getOperand(0).getOperand(0);
585 Base = N.getOperand(1);
586 } else {
587 ShAmt = 0;
588 ShOpcVal = ARM_AM::no_shift;
589 }
507590 } else {
508591 ShOpcVal = ARM_AM::no_shift;
509592 }
542625 // it.
543626 if (ConstantSDNode *Sh = dyn_cast(N.getOperand(1))) {
544627 ShAmt = Sh->getZExtValue();
545 Offset = N.getOperand(0);
628 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
629 Offset = N.getOperand(0);
630 else {
631 ShAmt = 0;
632 ShOpcVal = ARM_AM::no_shift;
633 }
546634 } else {
547635 ShOpcVal = ARM_AM::no_shift;
548636 }
9581046 return false;
9591047 }
9601048
1049 if (Subtarget->isCortexA9() && !N.hasOneUse()) {
1050 // Compute R + (R << [1,2,3]) and reuse it.
1051 Base = N;
1052 return false;
1053 }
1054
9611055 // Look for (R + R) or (R + (R << [1,2,3])).
9621056 unsigned ShAmt = 0;
9631057 Base = N.getOperand(0);
9761070 // it.
9771071 if (ConstantSDNode *Sh = dyn_cast(OffReg.getOperand(1))) {
9781072 ShAmt = Sh->getZExtValue();
979 if (ShAmt >= 4) {
1073 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1074 OffReg = OffReg.getOperand(0);
1075 else {
9801076 ShAmt = 0;
9811077 ShOpcVal = ARM_AM::no_shift;
982 } else
983 OffReg = OffReg.getOperand(0);
1078 }
9841079 } else {
9851080 ShOpcVal = ARM_AM::no_shift;
9861081 }
324324 let PrintMethod = "printSORegOperand";
325325 let MIOperandInfo = (ops GPR, GPR, i32imm);
326326 }
327 def shift_so_reg : Operand, // reg reg imm
328 ComplexPattern
329 [shl,srl,sra,rotr]> {
330 string EncoderMethod = "getSORegOpValue";
331 let PrintMethod = "printSORegOperand";
332 let MIOperandInfo = (ops GPR, GPR, i32imm);
333 }
327334
328335 // so_imm - Match a 32-bit shifter_operand immediate operand, which is an
329336 // 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
17141721 let Inst{15-12} = Rd;
17151722 }
17161723
1717 def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins so_reg:$src),
1724 def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
17181725 DPSoRegFrm, IIC_iMOVsr,
1719 "mov", "\t$Rd, $src", [(set GPR:$Rd, so_reg:$src)]>, UnaryDP {
1726 "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
1727 UnaryDP {
17201728 bits<4> Rd;
17211729 bits<12> src;
17221730 let Inst{15-12} = Rd;
None ; RUN: llc < %s -march=arm | grep add | grep lsl
1 ; RUN: llc < %s -march=arm | grep bic | grep asr
0 ; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
1 ; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
2 ; rdar://8576755
23
34
45 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
5 %shift.upgrd.1 = zext i8 %sh to i32 ; [#uses=1]
6 %A = shl i32 %Y, %shift.upgrd.1 ; [#uses=1]
7 %B = add i32 %X, %A ; [#uses=1]
6 ; A8: test1:
7 ; A8: add r0, r0, r1, lsl r2
8
9 ; A9: test1:
10 ; A9: add r0, r0, r1, lsl r2
11 %shift.upgrd.1 = zext i8 %sh to i32
12 %A = shl i32 %Y, %shift.upgrd.1
13 %B = add i32 %X, %A
814 ret i32 %B
915 }
1016
1117 define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
12 %shift.upgrd.2 = zext i8 %sh to i32 ; [#uses=1]
13 %A = ashr i32 %Y, %shift.upgrd.2 ; [#uses=1]
14 %B = xor i32 %A, -1 ; [#uses=1]
15 %C = and i32 %X, %B ; [#uses=1]
18 ; A8: test2:
19 ; A8: bic r0, r0, r1, asr r2
20
21 ; A9: test2:
22 ; A9: bic r0, r0, r1, asr r2
23 %shift.upgrd.2 = zext i8 %sh to i32
24 %A = ashr i32 %Y, %shift.upgrd.2
25 %B = xor i32 %A, -1
26 %C = and i32 %X, %B
1627 ret i32 %C
1728 }
29
30 define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
31 entry:
32 ; A8: test3:
33 ; A8: ldr r0, [r0, r2, lsl #2]
34 ; A8: ldr r1, [r1, r2, lsl #2]
35
36 ; lsl #2 is free
37 ; A9: test3:
38 ; A9: ldr r1, [r1, r2, lsl #2]
39 ; A9: ldr r0, [r0, r2, lsl #2]
40 %tmp1 = shl i32 %offset, 2
41 %tmp2 = add i32 %base, %tmp1
42 %tmp3 = inttoptr i32 %tmp2 to i32*
43 %tmp4 = add i32 %base2, %tmp1
44 %tmp5 = inttoptr i32 %tmp4 to i32*
45 %tmp6 = load i32* %tmp3
46 %tmp7 = load i32* %tmp5
47 %tmp8 = add i32 %tmp7, %tmp6
48 ret i32 %tmp8
49 }
50
51 declare i8* @malloc(...)
52
53 define fastcc void @test4() nounwind {
54 entry:
55 ; A8: test4:
56 ; A8: ldr r1, [r0, r0, lsl #2]
57 ; A8: str r1, [r0, r0, lsl #2]
58
59 ; A9: test4:
60 ; A9: add r0, r0, r0, lsl #2
61 ; A9: ldr r1, [r0]
62 ; A9: str r1, [r0]
63 %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
64 %1 = bitcast i8* %0 to i32*
65 %2 = sext i16 undef to i32
66 %3 = getelementptr inbounds i32* %1, i32 %2
67 %4 = load i32* %3, align 4
68 %5 = add nsw i32 %4, 1
69 store i32 %5, i32* %3, align 4
70 ret void
71 }
585585
586586 MISC("brtarget", "kOperandTypeARMBranchTarget"); // ?
587587 MISC("so_reg", "kOperandTypeARMSoReg"); // R, R, I
588 MISC("shift_so_reg", "kOperandTypeARMSoReg"); // R, R, I
588589 MISC("t2_so_reg", "kOperandTypeThumb2SoReg"); // R, I
589590 MISC("so_imm", "kOperandTypeARMSoImm"); // I
590591 MISC("rot_imm", "kOperandTypeARMRotImm"); // I