llvm.org GIT mirror llvm / a7aa63a
[SDAG] Relax conditions under stores of loaded values can be merged Summary: Allow consecutive stores whose values come from consecutive loads to merged in the presense of other uses of the loads. Previously this was disallowed as in general the merged load cannot be shared with the other uses. Merging N stores into 1 may cause as many as N redundant loads. However in the context of caching this should have neglible affect on memory pressure and reduce instruction count making it almost always a win. Fixes PR32086. Reviewers: spatel, jyknight, andreadb, hfinkel, efriedma Reviewed By: efriedma Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30471 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302712 91177308-0d34-0410-b5e6-96231b3b80d8 Nirav Dave 3 years ago
3 changed file(s) with 36 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
4242
4343 /// EntryToken - This is the marker used to indicate the start of a region.
4444 EntryToken,
45
46 /// DummyNode - Temporary node for node replacement. These nodes
47 /// should not persist beyond their introduction.
48 DummyNode,
4549
4650 /// TokenFactor - This node takes multiple tokens as input and produces a
4751 /// single token result. This is used to represent the fact that the operand
1278212782 LoadSDNode *Ld = dyn_cast(St->getValue());
1278312783 if (!Ld) break;
1278412784
12785 // Loads must only have one use.
12786 if (!Ld->hasNUsesOfValue(1, 0))
12787 break;
12788
1278912785 // The memory operands must not be volatile.
1279012786 if (Ld->isVolatile() || Ld->isIndexed())
1279112787 break;
1279212788
1279312789 // We do not accept ext loads.
1279412790 if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
12795 break;
12796
12797 // The stored memory type must be the same.
12798 if (Ld->getMemoryVT() != MemVT)
1279912791 break;
1280012792
1280112793 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
1292912921 // Transfer chain users from old loads to the new load.
1293012922 for (unsigned i = 0; i < NumElem; ++i) {
1293112923 LoadSDNode *Ld = cast(LoadNodes[i].MemNode);
12932 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
12933 SDValue(NewLoad.getNode(), 1));
12924 if (SDValue(Ld, 0).hasOneUse()) {
12925 // Only the original store used value so just replace chain.
12926 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
12927 SDValue(NewLoad.getNode(), 1));
12928 } else {
12929 // Multiple uses exist. Keep the old load in line with the new
12930 // load, i.e. Replace chains using Ld's chain with a
12931 // TokenFactor. Create a temporary node to serve as a placer so
12932 // we do not replace the reference to original Load's chain in
12933 // the TokenFactor.
12934 SDValue TokenDummy = DAG.getNode(ISD::DummyNode, SDLoc(Ld), MVT::Other);
12935
12936 // Replace all references to Load's output chain to TokenDummy
12937 CombineTo(Ld, SDValue(Ld, 0), TokenDummy, false);
12938 SDValue Token =
12939 DAG.getNode(ISD::TokenFactor, SDLoc(Ld), MVT::Other, SDValue(Ld, 1),
12940 SDValue(NewLoad.getNode(), 1));
12941 // Replace all uses of TokenDummy from itself to Ld's output chain.
12942 CombineTo(TokenDummy.getNode(), Token);
12943 assert(TokenDummy.use_empty() && "TokenDummy should be unused");
12944 AddToWorklist(Ld);
12945 }
1293412946 }
1293512947
1293612948 // Replace the all stores with the new store.
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
11 ; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -o - | FileCheck %s
22
3
3 ; PR32086
44 target triple = "x86_64-unknown-linux-gnu"
55
66 define void @merge_double(double* noalias nocapture %st, double* noalias nocapture readonly %ld) #0 {
77 ; CHECK-LABEL: merge_double:
88 ; CHECK: # BB#0:
9 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
10 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
11 ; CHECK-NEXT: movsd %xmm0, (%rdi)
12 ; CHECK-NEXT: movsd %xmm1, 8(%rdi)
13 ; CHECK-NEXT: movsd %xmm0, 16(%rdi)
14 ; CHECK-NEXT: movsd %xmm1, 24(%rdi)
9 ; CHECK-NEXT: movups (%rsi), %xmm0
10 ; CHECK-NEXT: movups %xmm0, (%rdi)
11 ; CHECK-NEXT: movups %xmm0, 16(%rdi)
1512 ; CHECK-NEXT: retq
1613 %ld_idx1 = getelementptr inbounds double, double* %ld, i64 1
1714 %ld0 = load double, double* %ld, align 8, !tbaa !2
3128 define void @merge_loadstore_int(i64* noalias nocapture readonly %p, i64* noalias nocapture %q) local_unnamed_addr #0 {
3229 ; CHECK-LABEL: merge_loadstore_int:
3330 ; CHECK: # BB#0: # %entry
34 ; CHECK-NEXT: movq (%rdi), %rax
35 ; CHECK-NEXT: movq 8(%rdi), %rcx
36 ; CHECK-NEXT: movq %rax, (%rsi)
37 ; CHECK-NEXT: movq %rcx, 8(%rsi)
38 ; CHECK-NEXT: movq %rax, 16(%rsi)
39 ; CHECK-NEXT: movq %rcx, 24(%rsi)
31 ; CHECK-NEXT: movups (%rdi), %xmm0
32 ; CHECK-NEXT: movups %xmm0, (%rsi)
33 ; CHECK-NEXT: movups %xmm0, 16(%rsi)
4034 ; CHECK-NEXT: retq
4135 entry:
4236 %0 = load i64, i64* %p, align 8, !tbaa !1
5650 ; CHECK-LABEL: merge_loadstore_int_with_extra_use:
5751 ; CHECK: # BB#0: # %entry
5852 ; CHECK-NEXT: movq (%rdi), %rax
59 ; CHECK-NEXT: movq 8(%rdi), %rcx
60 ; CHECK-NEXT: movq %rax, (%rsi)
61 ; CHECK-NEXT: movq %rcx, 8(%rsi)
62 ; CHECK-NEXT: movq %rax, 16(%rsi)
63 ; CHECK-NEXT: movq %rcx, 24(%rsi)
53 ; CHECK-NEXT: movups (%rdi), %xmm0
54 ; CHECK-NEXT: movups %xmm0, (%rsi)
55 ; CHECK-NEXT: movups %xmm0, 16(%rsi)
6456 ; CHECK-NEXT: retq
6557 entry:
6658 %0 = load i64, i64* %p, align 8, !tbaa !1