llvm.org GIT mirror llvm / 2f297df
Smarter isel of ldrsb / ldrsh. Only make use of these when [r,r] address is feasible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@75360 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 10 years ago
4 changed file(s) with 27 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
453453 // FIXME dl should come from the parent load or store, not the address
454454 DebugLoc dl = Op.getDebugLoc();
455455 if (N.getOpcode() != ISD::ADD) {
456 Base = N;
457 // We must materialize a zero in a reg! Returning a constant here
458 // wouldn't work without additional code to position the node within
459 // ISel's topological ordering in a place where ISel will process it
460 // normally. Instead, just explicitly issue a tMOVri8 node!
461 SDValue CC = CurDAG->getRegister(ARM::CPSR, MVT::i32);
462 SDValue Pred = CurDAG->getTargetConstant(0xEULL, MVT::i32);
463 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
464 SDValue Ops[] = { CC, CurDAG->getTargetConstant(0, MVT::i32), Pred, PredReg };
465 Offset = SDValue(CurDAG->getTargetNode(ARM::tMOVi8, dl, MVT::i32, Ops,4),0);
456 ConstantSDNode *NC = dyn_cast(N);
457 if (!NC || NC->getZExtValue() != 0)
458 return false;
459
460 Base = Offset = N;
466461 return true;
467462 }
468463
223223 "ldrh", " $dst, $addr",
224224 [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>;
225225
226 let AddedComplexity = 10 in
226227 def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
227228 "ldrsb", " $dst, $addr",
228229 [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
229230
231 let AddedComplexity = 10 in
230232 def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
231233 "ldrsh", " $dst, $addr",
232234 [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
619621 def : T1Pat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>;
620622 def : T1Pat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>;
621623
624 // If it's possible to use [r,r] address mode for sextload, select to
625 // ldr{b|h} + sxt{b|h} instead.
626 def : TPat<(sextloadi8 t_addrmode_s1:$addr),
627 (tSXTB (tLDRB t_addrmode_s1:$addr))>;
628 def : TPat<(sextloadi16 t_addrmode_s2:$addr),
629 (tSXTH (tLDRH t_addrmode_s2:$addr))>;
630
631
622632 // Large immediate handling.
623633
624634 // Two piece imms.
243243 Make use of hi register variants of cmp: tCMPhir / tCMPZhir.
244244
245245 //===---------------------------------------------------------------------===//
246
247 Rather than generating ldrsb, sometimes it's better to select to ldrb + sxtb.
248 The problem is ldrsb addressing mode [r, r] means the zero offset requires an
249 extra move. e.g. ldr_ext.ll test3:
250 movs r1, #0
251 ldrsb r0, [r0, r1]
252 =>
253 ldrb r0, [r0, #0]
254 sxtb r0, r0
None ; RUN: llvm-as < %s | llc -march=thumb | grep ldrb | count 1
1 ; RUN: llvm-as < %s | llc -march=thumb | grep ldrh | count 1
2 ; RUN: llvm-as < %s | llc -march=thumb | grep ldrsb | count 1
3 ; RUN: llvm-as < %s | llc -march=thumb | grep ldrsh | count 1
0 ; RUN: llvm-as < %s | llc -march=thumb | FileCheck %s
41
52 define i32 @test1(i8* %v.pntr.s0.u1) {
3 ; CHECK: test1:
4 ; CHECK: ldrb
65 %tmp.u = load i8* %v.pntr.s0.u1
76 %tmp1.s = zext i8 %tmp.u to i32
87 ret i32 %tmp1.s
98 }
109
1110 define i32 @test2(i16* %v.pntr.s0.u1) {
11 ; CHECK: test2:
12 ; CHECK: ldrh
1213 %tmp.u = load i16* %v.pntr.s0.u1
1314 %tmp1.s = zext i16 %tmp.u to i32
1415 ret i32 %tmp1.s
1516 }
1617
1718 define i32 @test3(i8* %v.pntr.s1.u0) {
18 %tmp.s = load i8* %v.pntr.s1.u0
19 ; CHECK: test3:
20 ; CHECK: ldrb
21 ; CHECK: sxtb
22 %tmp.s = load i8* %v.pntr.s1.u0
1923 %tmp1.s = sext i8 %tmp.s to i32
2024 ret i32 %tmp1.s
2125 }
2226
2327 define i32 @test4() {
28 ; CHECK: test4:
29 ; CHECK: movs
30 ; CHECK: ldrsh
2431 %tmp.s = load i16* null
2532 %tmp1.s = sext i16 %tmp.s to i32
2633 ret i32 %tmp1.s