llvm.org GIT mirror llvm / 10c34d1
Teach getSCEVAtScope how to handle loop phis w/invariant operands in loops w/taken backedges This patch really contains two pieces: Teach SCEV how to fold a phi in the header of a loop to the value on the backedge when a) the backedge is known to execute at least once, and b) the value is safe to use globally within the scope dominated by the original phi. Teach IndVarSimplify's rewriteLoopExitValues to allow loop invariant expressions which already exist (and thus don't need new computation inserted) even in loops where we can't optimize away other uses. Differential Revision: https://reviews.llvm.org/D63224 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363619 91177308-0d34-0410-b5e6-96231b3b80d8 Philip Reames 3 months ago
4 changed file(s) with 63 addition(s) and 37 deletion(s). Raw diff Collapse all Expand all
81218121 // count. If so, we may be able to force computation of the exit
81228122 // value.
81238123 const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
8124 if (const SCEVConstant *BTCC =
8125 dyn_cast(BackedgeTakenCount)) {
8126
8127 // This trivial case can show up in some degenerate cases where
8128 // the incoming IR has not yet been fully simplified.
8129 if (BTCC->getValue()->isZero()) {
8130 Value *InitValue = nullptr;
8131 bool MultipleInitValues = false;
8132 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
8133 if (!LI->contains(PN->getIncomingBlock(i))) {
8134 if (!InitValue)
8135 InitValue = PN->getIncomingValue(i);
8136 else if (InitValue != PN->getIncomingValue(i)) {
8137 MultipleInitValues = true;
8138 break;
8139 }
8124 // This trivial case can show up in some degenerate cases where
8125 // the incoming IR has not yet been fully simplified.
8126 if (BackedgeTakenCount->isZero()) {
8127 Value *InitValue = nullptr;
8128 bool MultipleInitValues = false;
8129 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
8130 if (!LI->contains(PN->getIncomingBlock(i))) {
8131 if (!InitValue)
8132 InitValue = PN->getIncomingValue(i);
8133 else if (InitValue != PN->getIncomingValue(i)) {
8134 MultipleInitValues = true;
8135 break;
81408136 }
81418137 }
8142 if (!MultipleInitValues && InitValue)
8143 return getSCEV(InitValue);
81448138 }
8139 if (!MultipleInitValues && InitValue)
8140 return getSCEV(InitValue);
8141 }
8142 // Do we have a loop invariant value flowing around the backedge
8143 // for a loop which must execute the backedge?
8144 if (!isa(BackedgeTakenCount) &&
8145 isKnownPositive(BackedgeTakenCount) &&
8146 PN->getNumIncomingValues() == 2) {
8147 unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1;
8148 const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred));
8149 if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent()))
8150 return OnBackedge;
8151 }
8152 if (auto *BTCC = dyn_cast(BackedgeTakenCount)) {
81458153 // Okay, we know how many times the containing loop executes. If
81468154 // this is a constant evolving PHI node, get the final value at
81478155 // the specified iteration number.
634634
635635 // Computing the value outside of the loop brings no benefit if it is
636636 // definitely used inside the loop in a way which can not be optimized
637 // away.
637 // away. Avoid doing so unless we know we have a value which computes
638 // the ExitValue already. TODO: This should be merged into SCEV
639 // expander to leverage its knowledge of existing expressions.
638640 if (ReplaceExitValue != AlwaysRepl &&
639 !isa(ExitValue) && hasHardUserWithinLoop(L, Inst))
641 !isa(ExitValue) && !isa(ExitValue) &&
642 hasHardUserWithinLoop(L, Inst))
640643 continue;
641644
642645 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
152152 define i32 @unroll_phi_select_constant_nonzero(i32 %arg1, i32 %arg2) {
153153 ; CHECK-LABEL: @unroll_phi_select_constant_nonzero(
154154 ; CHECK-NEXT: entry:
155 ; CHECK-NEXT: ret i32 [[ARG2:%.*]]
156 ;
157 entry:
158 br label %loop
159
160 loop:
161 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
162 %selector = phi i32 [%arg1, %entry], [%arg2, %loop]
163 %i.next = add nsw nuw i32 %i, 1
164 %c = icmp ult i32 %i, 4
165 br i1 %c, label %loop, label %loopexit
166
167 loopexit:
168 ret i32 %selector
169 }
170
171 declare i32 @f()
172
173 ; After LCSSA formation, there's no LCSSA phi for %f since it isn't directly
174 ; used outside the loop, and thus we can't directly replace %selector w/ %f.
175 define i32 @neg_unroll_phi_select_constant_nonzero(i32 %arg) {
176 ; CHECK-LABEL: @neg_unroll_phi_select_constant_nonzero(
177 ; CHECK-NEXT: entry:
155178 ; CHECK-NEXT: br label [[LOOP:%.*]]
156179 ; CHECK: loop:
157180 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
158 ; CHECK-NEXT: [[SELECTOR:%.*]] = phi i32 [ [[ARG1:%.*]], [[ENTRY]] ], [ [[ARG2:%.*]], [[LOOP]] ]
181 ; CHECK-NEXT: [[SELECTOR:%.*]] = phi i32 [ [[ARG:%.*]], [[ENTRY]] ], [ [[F:%.*]], [[LOOP]] ]
182 ; CHECK-NEXT: [[F]] = call i32 @f()
159183 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
160184 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[I]], 4
161185 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
168192
169193 loop:
170194 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
171 %selector = phi i32 [%arg1, %entry], [%arg2, %loop]
195 %selector = phi i32 [%arg, %entry], [%f, %loop]
196 %f = call i32 @f()
172197 %i.next = add nsw nuw i32 %i, 1
173198 %c = icmp ult i32 %i, 4
174199 br i1 %c, label %loop, label %loopexit
176201 loopexit:
177202 ret i32 %selector
178203 }
204
179205
180206 define i32 @unroll_phi_select_constant_zero(i32 %arg1, i32 %arg2) {
181207 ; CHECK-LABEL: @unroll_phi_select_constant_zero(
200226 ; CHECK-LABEL: @unroll_phi_select(
201227 ; CHECK-NEXT: entry:
202228 ; CHECK-NEXT: [[LENGTH:%.*]] = zext i16 [[LEN:%.*]] to i32
203 ; CHECK-NEXT: br label [[LOOP:%.*]]
204 ; CHECK: loop:
205 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
206 ; CHECK-NEXT: [[SELECTOR:%.*]] = phi i32 [ [[ARG1:%.*]], [[ENTRY]] ], [ [[ARG2:%.*]], [[LOOP]] ]
207 ; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
208 ; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[I]], [[LENGTH]]
209 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
210 ; CHECK: loopexit:
211 ; CHECK-NEXT: [[SELECTOR_LCSSA:%.*]] = phi i32 [ [[SELECTOR]], [[LOOP]] ]
212 ; CHECK-NEXT: ret i32 [[SELECTOR_LCSSA]]
229 ; CHECK-NEXT: ret i32 [[ARG2:%.*]]
213230 ;
214231 entry:
215232 %length = zext i16 %len to i32
5757 ; CHECK-NEXT: br label [[LOOP1:%.*]]
5858 ; CHECK: loop1:
5959 ; CHECK-NEXT: [[L1:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[L1_ADD:%.*]], [[LOOP1]] ]
60 ; CHECK-NEXT: [[SELECTOR:%.*]] = phi i16 [ [[ARG1:%.*]], [[ENTRY]] ], [ [[ARG2:%.*]], [[LOOP1]] ]
6160 ; CHECK-NEXT: [[L1_ADD]] = add nuw nsw i16 [[L1]], 1
6261 ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i16 [[L1_ADD]], 2
6362 ; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP1]], label [[LOOP2_PREHEADER:%.*]]
6463 ; CHECK: loop2.preheader:
65 ; CHECK-NEXT: [[K1_ADD_LCSSA:%.*]] = phi i16 [ [[SELECTOR]], [[LOOP1]] ]
66 ; CHECK-NEXT: br label [[LOOP2:%.*]]
67 ; CHECK: loop2:
68 ; CHECK-NEXT: [[K2:%.*]] = phi i16 [ [[K2_ADD:%.*]], [[LOOP2]] ], [ [[K1_ADD_LCSSA]], [[LOOP2_PREHEADER]] ]
64 ; CHECK-NEXT: br label [[LOOP2:%.*]]
65 ; CHECK: loop2:
66 ; CHECK-NEXT: [[K2:%.*]] = phi i16 [ [[K2_ADD:%.*]], [[LOOP2]] ], [ [[ARG2:%.*]], [[LOOP2_PREHEADER]] ]
6967 ; CHECK-NEXT: [[L2:%.*]] = phi i16 [ [[L2_ADD:%.*]], [[LOOP2]] ], [ 0, [[LOOP2_PREHEADER]] ]
7068 ; CHECK-NEXT: [[L2_ADD]] = add nuw nsw i16 [[L2]], 1
7169 ; CHECK-NEXT: tail call void @foo(i16 [[K2]])