llvm.org GIT mirror llvm / c4b527a
DAGCombine's logic for forming pre- and post- indexed loads / stores were being overly conservative. It was concerned about cases where it would prohibit folding simple [r, c] addressing modes. e.g. ldr r0, [r2] ldr r1, [r2, #4] => ldr r0, [r2], #4 ldr r1, [r2] Change the logic to look for such cases which allows it to form indexed memory ops more aggressively. rdar://10674430 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148086 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 8 years ago
3 changed file(s) with 143 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
59835983 return SDValue();
59845984 }
59855985
5986 /// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
5987 /// uses N as its base pointer and that N may be folded in the load / store
5988 /// addressing mode. FIXME: This currently only looks for folding of
5989 /// [reg +/- imm] addressing modes.
5990 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
5991 SelectionDAG &DAG,
5992 const TargetLowering &TLI) {
5993 EVT VT;
5994 if (LoadSDNode *LD = dyn_cast(Use)) {
5995 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
5996 return false;
5997 VT = Use->getValueType(0);
5998 } else if (StoreSDNode *ST = dyn_cast(Use)) {
5999 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
6000 return false;
6001 VT = ST->getValue().getValueType();
6002 } else
6003 return false;
6004
6005 TargetLowering::AddrMode AM;
6006 if (N->getOpcode() == ISD::ADD) {
6007 ConstantSDNode *Offset = dyn_cast(N->getOperand(1));
6008 if (Offset)
6009 AM.BaseOffs = Offset->getSExtValue();
6010 else
6011 return false;
6012 } else if (N->getOpcode() == ISD::SUB) {
6013 ConstantSDNode *Offset = dyn_cast(N->getOperand(1));
6014 if (Offset)
6015 AM.BaseOffs = -Offset->getSExtValue();
6016 else
6017 return false;
6018 } else
6019 return false;
6020
6021 return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
6022 }
6023
59866024 /// CombineToPreIndexedLoadStore - Try turning a load / store into a
59876025 /// pre-indexed load / store when the base pointer is an add or subtract
59886026 /// and it has other uses besides the load / store. After the
60696107 if (N->hasPredecessorHelper(Use, Visited, Worklist))
60706108 return false;
60716109
6072 if (!((Use->getOpcode() == ISD::LOAD &&
6073 cast(Use)->getBasePtr() == Ptr) ||
6074 (Use->getOpcode() == ISD::STORE &&
6075 cast(Use)->getBasePtr() == Ptr)))
6110 // If Ptr may be folded in addressing mode of other use, then it's
6111 // not profitable to do this transformation.
6112 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
60766113 RealUse = true;
60776114 }
60786115
61696206 continue;
61706207
61716208 // Try turning it into a post-indexed load / store except when
6172 // 1) All uses are load / store ops that use it as base ptr.
6209 // 1) All uses are load / store ops that use it as base ptr (and
6210 // it may be folded as addressing mmode).
61736211 // 2) Op must be independent of N, i.e. Op is neither a predecessor
61746212 // nor a successor of N. Otherwise, if Op is folded that would
61756213 // create a cycle.
61926230 for (SDNode::use_iterator III = Use->use_begin(),
61936231 EEE = Use->use_end(); III != EEE; ++III) {
61946232 SDNode *UseUse = *III;
6195 if (!((UseUse->getOpcode() == ISD::LOAD &&
6196 cast(UseUse)->getBasePtr().getNode() == Use) ||
6197 (UseUse->getOpcode() == ISD::STORE &&
6198 cast(UseUse)->getBasePtr().getNode() == Use)))
6233 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
61996234 RealUse = true;
62006235 }
62016236
5353 define fastcc void @test4(i16 %addr) nounwind {
5454 entry:
5555 ; A8: test4:
56 ; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
57 ; A8: str [[REG]], [r0, r1, lsl #2]
56 ; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
57 ; A8: str [[REG]], [r0]
5858
5959 ; A9: test4:
60 ; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
61 ; A9: str [[REG]], [r0, r1, lsl #2]
60 ; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
61 ; A9: str [[REG]], [r0]
6262 %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
6363 %1 = bitcast i8* %0 to i32*
6464 %2 = sext i16 %addr to i32
0 ; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
1
2 ; @sharedidx is an unrolled variant of this loop:
3 ; for (unsigned long i = 0; i < len; i += s) {
4 ; c[i] = a[i] + b[i];
5 ; }
6 ; where 's' cannot be folded into the addressing mode.
7 ;
8 ; This is not quite profitable to chain. But with -stress-ivchain, we
9 ; can form three address chains in place of the shared induction
10 ; variable.
11
12 ; rdar://10674430
13 define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
14 entry:
15 ; CHECK: sharedidx:
16 %cmp8 = icmp eq i32 %len, 0
17 br i1 %cmp8, label %for.end, label %for.body
18
19 for.body: ; preds = %entry, %for.body.3
20 ; CHECK: %for.body
21 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
22 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
23 %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
24 %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
25 %0 = load i8* %arrayidx, align 1
26 %conv6 = zext i8 %0 to i32
27 %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
28 %1 = load i8* %arrayidx1, align 1
29 %conv27 = zext i8 %1 to i32
30 %add = add nsw i32 %conv27, %conv6
31 %conv3 = trunc i32 %add to i8
32 %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
33 store i8 %conv3, i8* %arrayidx4, align 1
34 %add5 = add i32 %i.09, %s
35 %cmp = icmp ult i32 %add5, %len
36 br i1 %cmp, label %for.body.1, label %for.end
37
38 for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
39 ret void
40
41 for.body.1: ; preds = %for.body
42 ; CHECK: %for.body.1
43 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
44 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
45 %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
46 %2 = load i8* %arrayidx.1, align 1
47 %conv6.1 = zext i8 %2 to i32
48 %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
49 %3 = load i8* %arrayidx1.1, align 1
50 %conv27.1 = zext i8 %3 to i32
51 %add.1 = add nsw i32 %conv27.1, %conv6.1
52 %conv3.1 = trunc i32 %add.1 to i8
53 %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
54 store i8 %conv3.1, i8* %arrayidx4.1, align 1
55 %add5.1 = add i32 %add5, %s
56 %cmp.1 = icmp ult i32 %add5.1, %len
57 br i1 %cmp.1, label %for.body.2, label %for.end
58
59 for.body.2: ; preds = %for.body.1
60 ; CHECK: %for.body.2
61 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
62 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
63 %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
64 %4 = load i8* %arrayidx.2, align 1
65 %conv6.2 = zext i8 %4 to i32
66 %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
67 %5 = load i8* %arrayidx1.2, align 1
68 %conv27.2 = zext i8 %5 to i32
69 %add.2 = add nsw i32 %conv27.2, %conv6.2
70 %conv3.2 = trunc i32 %add.2 to i8
71 %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
72 store i8 %conv3.2, i8* %arrayidx4.2, align 1
73 %add5.2 = add i32 %add5.1, %s
74 %cmp.2 = icmp ult i32 %add5.2, %len
75 br i1 %cmp.2, label %for.body.3, label %for.end
76
77 for.body.3: ; preds = %for.body.2
78 ; CHECK: %for.body.3
79 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
80 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
81 %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
82 %6 = load i8* %arrayidx.3, align 1
83 %conv6.3 = zext i8 %6 to i32
84 %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
85 %7 = load i8* %arrayidx1.3, align 1
86 %conv27.3 = zext i8 %7 to i32
87 %add.3 = add nsw i32 %conv27.3, %conv6.3
88 %conv3.3 = trunc i32 %add.3 to i8
89 %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
90 store i8 %conv3.3, i8* %arrayidx4.3, align 1
91 %add5.3 = add i32 %add5.2, %s
92 %cmp.3 = icmp ult i32 %add5.3, %len
93 br i1 %cmp.3, label %for.body, label %for.end
94 }