llvm.org GIT mirror llvm / cf85ddc
[Thumb-1] Select post-increment load and store where possible Thumb-1 doesn't have post-inc or pre-inc load or store instructions. However the LDM/STM instructions with writeback can function as post-inc load/store: ldm r0!, {r1} @ load from r0 into r1 and increment r0 by 4 Obviously, this only works if the post increment is 4. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275540 91177308-0d34-0410-b5e6-96231b3b80d8 James Molloy 4 years ago
4 changed file(s) with 164 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
194194 private:
195195 /// Indexed (pre/post inc/dec) load matching code for ARM.
196196 bool tryARMIndexedLoad(SDNode *N);
197 bool tryT1IndexedLoad(SDNode *N);
197198 bool tryT2IndexedLoad(SDNode *N);
198199
199200 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
15401541 }
15411542
15421543 return false;
1544 }
1545
1546 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1547 LoadSDNode *LD = cast(N);
1548 EVT LoadedVT = LD->getMemoryVT();
1549 ISD::MemIndexedMode AM = LD->getAddressingMode();
1550 if (AM == ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1551 AM != ISD::POST_INC || LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1552 return false;
1553
1554 auto *COffs = dyn_cast(LD->getOffset());
1555 if (!COffs || COffs->getZExtValue() != 4)
1556 return false;
1557
1558 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1559 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1560 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1561 // ISel.
1562 SDValue Chain = LD->getChain();
1563 SDValue Base = LD->getBasePtr();
1564 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1565 CurDAG->getRegister(0, MVT::i32), Chain };
1566 ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32,
1567 MVT::Other, Ops));
1568 return true;
15431569 }
15441570
15451571 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
30143040 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
30153041 if (tryT2IndexedLoad(N))
30163042 return;
3043 } else if (Subtarget->isThumb()) {
3044 if (tryT1IndexedLoad(N))
3045 return;
30173046 } else if (tryARMIndexedLoad(N))
30183047 return;
30193048 // Other cases are autogenerated.
714714 setIndexedStoreAction(im, MVT::i16, Legal);
715715 setIndexedStoreAction(im, MVT::i32, Legal);
716716 }
717 } else {
718 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
719 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
720 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
717721 }
718722
719723 setOperationAction(ISD::SADDO, MVT::i32, Custom);
82468250 MI.dump();
82478251 llvm_unreachable("Unexpected instr type to insert");
82488252 }
8253
8254 // Thumb1 post-indexed loads are really just single-register LDMs.
8255 case ARM::tLDR_postidx: {
8256 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
8257 .addOperand(MI->getOperand(1)) // Rn_wb
8258 .addOperand(MI->getOperand(2)) // Rn
8259 .addOperand(MI->getOperand(3)) // PredImm
8260 .addOperand(MI->getOperand(4)) // PredReg
8261 .addOperand(MI->getOperand(0)); // Rt
8262 MI->eraseFromParent();
8263 return BB;
8264 }
8265
82498266 // The Thumb2 pre-indexed stores have the same MI operands, they just
82508267 // define them differently in the .td files from the isel patterns, so
82518268 // they need pseudos.
1159511612 SDValue &Offset,
1159611613 ISD::MemIndexedMode &AM,
1159711614 SelectionDAG &DAG) const {
11598 if (Subtarget->isThumb1Only())
11599 return false;
11600
1160111615 EVT VT;
1160211616 SDValue Ptr;
11603 bool isSEXTLoad = false;
11617 bool isSEXTLoad = false, isNonExt;
1160411618 if (LoadSDNode *LD = dyn_cast(N)) {
1160511619 VT = LD->getMemoryVT();
1160611620 Ptr = LD->getBasePtr();
1160711621 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
11622 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
1160811623 } else if (StoreSDNode *ST = dyn_cast(N)) {
1160911624 VT = ST->getMemoryVT();
1161011625 Ptr = ST->getBasePtr();
11626 isNonExt = !ST->isTruncatingStore();
1161111627 } else
1161211628 return false;
1161311629
11630 if (Subtarget->isThumb1Only()) {
11631 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
11632 // must be non-extending/truncating, i32, with an offset of 4.
11633 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
11634 if (Op->getOpcode() != ISD::ADD || !isNonExt)
11635 return false;
11636 auto *RHS = dyn_cast(Op->getOperand(1));
11637 if (!RHS || RHS->getZExtValue() != 4)
11638 return false;
11639
11640 Offset = Op->getOperand(1);
11641 Base = Op->getOperand(0);
11642 AM = ISD::POST_INC;
11643 return true;
11644 }
11645
1161411646 bool isInc;
1161511647 bool isLegal = false;
1161611648 if (Subtarget->isThumb2())
14491449 def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
14501450 def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
14511451 def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
1452
1453 // post-inc loads and stores
1454
1455 // post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is
1456 // different to how ISel expects them for a post-inc load, so use a pseudo
1457 // and expand it just after ISel.
1458 let usesCustomInserter = 1,
1459 Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in
1460 def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb),
1461 (ins rGPR:$Rn, pred:$p),
1462 4, IIC_iStore_ru,
1463 []>;
1464
1465 // post-inc STR -> STM r0!, {r1}. The layout of this (because it doesn't def
1466 // multiple registers) is the same in ISel as MachineInstr, so there's no need
1467 // for a pseudo.
1468 def : T1Pat<(post_store rGPR:$Rt, rGPR:$Rn, 4),
1469 (tSTMIA_UPD rGPR:$Rn, rGPR:$Rt)>;
14521470
14531471 // If it's impossible to use [r,r] address mode for sextload, select to
14541472 // ldr{b|h} + sxt{b|h} instead.
0 ; RUN: llc -mtriple=thumbv7 -mcpu=cortex-m0 < %s -disable-lsr | FileCheck %s
1 ; FIXME: LSR mangles the last two testcases pretty badly. When this is fixed, remove
2 ; the -disable-lsr above.
3
4 ; CHECK-LABEL: @f
5 ; CHECK: ldm {{r[0-9]}}!, {r{{[0-9]}}}
6 define i32 @f(i32* readonly %a, i32* readnone %b) {
7 %1 = icmp eq i32* %a, %b
8 br i1 %1, label %._crit_edge, label %.lr.ph
9
10 .lr.ph: ; preds = %.lr.ph, %0
11 %i.02 = phi i32 [ %3, %.lr.ph ], [ 0, %0 ]
12 %.01 = phi i32* [ %4, %.lr.ph ], [ %a, %0 ]
13 %2 = load i32, i32* %.01, align 4
14 %3 = add nsw i32 %2, %i.02
15 %4 = getelementptr inbounds i32, i32* %.01, i32 1
16 %5 = icmp eq i32* %4, %b
17 br i1 %5, label %._crit_edge, label %.lr.ph
18
19 ._crit_edge: ; preds = %.lr.ph, %0
20 %i.0.lcssa = phi i32 [ 0, %0 ], [ %3, %.lr.ph ]
21 ret i32 %i.0.lcssa
22 }
23
24 ; CHECK-LABEL: @g
25 ; CHECK-NOT: ldm
26 define i32 @g(i32* readonly %a, i32* readnone %b) {
27 %1 = icmp eq i32* %a, %b
28 br i1 %1, label %._crit_edge, label %.lr.ph
29
30 .lr.ph: ; preds = %.lr.ph, %0
31 %i.02 = phi i32 [ %3, %.lr.ph ], [ 0, %0 ]
32 %.01 = phi i32* [ %4, %.lr.ph ], [ %a, %0 ]
33 %2 = load i32, i32* %.01, align 4
34 %3 = add nsw i32 %2, %i.02
35 %4 = getelementptr inbounds i32, i32* %.01, i32 2
36 %5 = icmp eq i32* %4, %b
37 br i1 %5, label %._crit_edge, label %.lr.ph
38
39 ._crit_edge: ; preds = %.lr.ph, %0
40 %i.0.lcssa = phi i32 [ 0, %0 ], [ %3, %.lr.ph ]
41 ret i32 %i.0.lcssa
42 }
43
44 ; CHECK-LABEL: @h
45 ; CHECK: stm {{r[0-9]}}!, {r{{[0-9]}}}
46 define void @h(i32* %a, i32* readnone %b) {
47 %1 = icmp eq i32* %a, %b
48 br i1 %1, label %._crit_edge, label %.lr.ph
49
50 .lr.ph: ; preds = %.lr.ph, %0
51 %i.02 = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]
52 %.01 = phi i32* [ %3, %.lr.ph ], [ %a, %0 ]
53 %2 = add nsw i32 %i.02, 1
54 store i32 %i.02, i32* %.01, align 4
55 %3 = getelementptr inbounds i32, i32* %.01, i32 1
56 %4 = icmp eq i32* %3, %b
57 br i1 %4, label %._crit_edge, label %.lr.ph
58
59 ._crit_edge: ; preds = %.lr.ph, %0
60 ret void
61 }
62
63 ; CHECK-LABEL: @j
64 ; CHECK-NOT: stm
65 define void @j(i32* %a, i32* readnone %b) {
66 %1 = icmp eq i32* %a, %b
67 br i1 %1, label %._crit_edge, label %.lr.ph
68
69 .lr.ph: ; preds = %.lr.ph, %0
70 %i.02 = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]
71 %.01 = phi i32* [ %3, %.lr.ph ], [ %a, %0 ]
72 %2 = add nsw i32 %i.02, 1
73 store i32 %i.02, i32* %.01, align 4
74 %3 = getelementptr inbounds i32, i32* %.01, i32 2
75 %4 = icmp eq i32* %3, %b
76 br i1 %4, label %._crit_edge, label %.lr.ph
77
78 ._crit_edge: ; preds = %.lr.ph, %0
79 ret void
80 }