llvm.org GIT mirror llvm / 843f7ad
[LV] Allow interleaved accesses in loops with predicated blocks This patch allows the formation of interleaved access groups in loops containing predicated blocks. However, the predicated accesses are prevented from forming groups. Differential Revision: https://reviews.llvm.org/D19694 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275471 91177308-0d34-0410-b5e6-96231b3b80d8 Matthew Simpson 4 years ago
2 changed file(s) with 198 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
946946 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
947947 }
948948
949 /// \brief Returns true if \p BB is a predicated block.
950 bool isPredicated(BasicBlock *BB) const {
951 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
952 }
953
949954 /// \brief Returns true if LoopAccessInfo can be used for dependence queries.
950955 bool areDependencesValid() const {
951956 return LAI && LAI->getDepChecker().getDependences();
49244929 void InterleavedAccessInfo::collectConstStridedAccesses(
49254930 MapVector &StrideAccesses,
49264931 const ValueToValueMap &Strides) {
4927 // Holds load/store instructions in program order.
4928 SmallVector AccessList;
4932
4933 auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
49294934
49304935 // Since it's desired that the load/store instructions be maintained in
49314936 // "program order" for the interleaved access analysis, we have to visit the
49324937 // blocks in the loop in reverse postorder (i.e., in a topological order).
49334938 // Such an ordering will ensure that any load/store that may be executed
4934 // before a second load/store will precede the second load/store in the
4935 // AccessList.
4939 // before a second load/store will precede the second load/store in
4940 // StrideAccesses.
49364941 LoopBlocksDFS DFS(TheLoop);
49374942 DFS.perform(LI);
4938 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
4939 bool IsPred = LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
4940
4943 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
49414944 for (auto &I : *BB) {
4942 if (!isa(&I) && !isa(&I))
4945 auto *LI = dyn_cast(&I);
4946 auto *SI = dyn_cast(&I);
4947 if (!LI && !SI)
49434948 continue;
4944 // FIXME: Currently we can't handle mixed accesses and predicated accesses
4945 if (IsPred)
4946 return;
4947
4948 AccessList.push_back(&I);
4949 }
4950 }
4951
4952 if (AccessList.empty())
4953 return;
4954
4955 auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
4956 for (auto I : AccessList) {
4957 auto *LI = dyn_cast(I);
4958 auto *SI = dyn_cast(I);
4959
4960 Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
4961 int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides);
4962
4963 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
4964 PointerType *PtrTy = dyn_cast(Ptr->getType());
4965 uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
4966
4967 // An alignment of 0 means target ABI alignment.
4968 unsigned Align = LI ? LI->getAlignment() : SI->getAlignment();
4969 if (!Align)
4970 Align = DL.getABITypeAlignment(PtrTy->getElementType());
4971
4972 StrideAccesses[I] = StrideDescriptor(Stride, Scev, Size, Align);
4973 }
4949
4950 Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
4951 int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides);
4952
4953 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
4954 PointerType *PtrTy = dyn_cast(Ptr->getType());
4955 uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
4956
4957 // An alignment of 0 means target ABI alignment.
4958 unsigned Align = LI ? LI->getAlignment() : SI->getAlignment();
4959 if (!Align)
4960 Align = DL.getABITypeAlignment(PtrTy->getElementType());
4961
4962 StrideAccesses[&I] = StrideDescriptor(Stride, Scev, Size, Align);
4963 }
49744964 }
49754965
49764966 // Analyze interleaved accesses and collect them into interleaved load and
51245114 if (DistanceToA % static_cast(DesA.Size))
51255115 continue;
51265116
5117 // If either A or B is in a predicated block, we prevent adding them to a
5118 // group. We may be able to relax this limitation in the future once we
5119 // handle more complicated blocks.
5120 if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
5121 continue;
5122
51275123 // The index of B is the index of A plus the related index to A.
51285124 int IndexB =
51295125 Group->getIndex(A) + DistanceToA / static_cast(DesA.Size);
0 ; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=2 -force-vector-interleave=1 -enable-interleaved-mem-accesses -vectorize-num-stores-pred=1 -enable-cond-stores-vec < %s | FileCheck %s
1
2 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
3 %pair = type { i64, i64 }
4
5 ; Ensure that we vectorize the interleaved load group even though the loop
6 ; contains a conditional store. The store group contains gaps and is not
7 ; vectorized.
8 ;
9 ; CHECK-LABEL: @interleaved_with_cond_store_0(
10 ;
11 ; CHECK: min.iters.checked
12 ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
13 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
14 ; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
15 ; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
16 ;
17 ; CHECK: vector.body:
18 ; CHECK: %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}}
19 ; CHECK: %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> undef, <2 x i32>
20 ;
21 ; CHECK: pred.store.if
22 ; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
23 ; CHECK: store i64 %[[X1]], {{.*}}
24 ;
25 ; CHECK: pred.store.if
26 ; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2
27 ; CHECK: store i64 %[[X2]], {{.*}}
28
29 define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) {
30 entry:
31 br label %for.body
32
33 for.body:
34 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
35 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
36 %0 = load i64, i64* %p.1, align 8
37 %1 = icmp eq i64 %0, %x
38 br i1 %1, label %if.then, label %if.merge
39
40 if.then:
41 store i64 %0, i64* %p.1, align 8
42 br label %if.merge
43
44 if.merge:
45 %i.next = add nuw nsw i64 %i, 1
46 %cond = icmp slt i64 %i.next, %n
47 br i1 %cond, label %for.body, label %for.end
48
49 for.end:
50 ret void
51 }
52
53 ; Ensure that we don't form a single interleaved group for the two loads. The
54 ; conditional store prevents the second load from being hoisted. The two load
55 ; groups are separately vectorized. The store group contains gaps and is not
56 ; vectorized.
57 ;
58 ; CHECK-LABEL: @interleaved_with_cond_store_1(
59 ;
60 ; CHECK: min.iters.checked
61 ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
62 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
63 ; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
64 ; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
65 ;
66 ; CHECK: vector.body:
67 ; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}}
68 ; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32>
69 ;
70 ; CHECK: pred.store.if
71 ; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
72 ; CHECK: store i64 %[[X1]], {{.*}}
73 ;
74 ; CHECK: pred.store.if
75 ; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2
76 ; CHECK: store i64 %[[X2]], {{.*}}
77 ;
78 ; CHECK: pred.store.continue
79 ; CHECK: %[[L2:.+]] = load <4 x i64>, <4 x i64>* {{.*}}
80 ; CHECK: %[[X3:.+]] = extractelement <4 x i64> %[[L2]], i32 0
81 ; CHECK: store i64 %[[X3]], {{.*}}
82 ; CHECK: %[[X4:.+]] = extractelement <4 x i64> %[[L2]], i32 2
83 ; CHECK: store i64 %[[X4]], {{.*}}
84
85 define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) {
86 entry:
87 br label %for.body
88
89 for.body:
90 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
91 %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
92 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
93 %0 = load i64, i64* %p.1, align 8
94 %1 = icmp eq i64 %0, %x
95 br i1 %1, label %if.then, label %if.merge
96
97 if.then:
98 store i64 %0, i64* %p.0, align 8
99 br label %if.merge
100
101 if.merge:
102 %2 = load i64, i64* %p.0, align 8
103 store i64 %2, i64 *%p.1, align 8
104 %i.next = add nuw nsw i64 %i, 1
105 %cond = icmp slt i64 %i.next, %n
106 br i1 %cond, label %for.body, label %for.end
107
108 for.end:
109 ret void
110 }
111
112 ; Ensure that we don't create a single interleaved group for the two stores.
113 ; The second store is conditional and we can't sink the first store inside the
114 ; predicated block. The load group is vectorized, and the store groups contain
115 ; gaps and are not vectorized.
116 ;
117 ; CHECK-LABEL: @interleaved_with_cond_store_2(
118 ;
119 ; CHECK: min.iters.checked
120 ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
121 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
122 ; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
123 ; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
124 ;
125 ; CHECK: vector.body:
126 ; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}}
127 ; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32>
128 ; CHECK: store i64 %x, {{.*}}
129 ; CHECK: store i64 %x, {{.*}}
130 ;
131 ; CHECK: pred.store.if
132 ; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
133 ; CHECK: store i64 %[[X1]], {{.*}}
134 ;
135 ; CHECK: pred.store.if
136 ; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2
137 ; CHECK: store i64 %[[X2]], {{.*}}
138
139 define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) {
140 entry:
141 br label %for.body
142
143 for.body:
144 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
145 %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
146 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
147 %0 = load i64, i64* %p.1, align 8
148 store i64 %x, i64* %p.0, align 8
149 %1 = icmp eq i64 %0, %x
150 br i1 %1, label %if.then, label %if.merge
151
152 if.then:
153 store i64 %0, i64* %p.1, align 8
154 br label %if.merge
155
156 if.merge:
157 %i.next = add nuw nsw i64 %i, 1
158 %cond = icmp slt i64 %i.next, %n
159 br i1 %cond, label %for.body, label %for.end
160
161 for.end:
162 ret void
163 }