llvm.org GIT mirror llvm / e5935fd
[LICM&MSSA] Limit store hoisting. Summary: If there is no clobbering access for a store inside the loop, that store can only be hoisted if there are no interfearing loads. A more general verification introduced here: there are no loads that are not optimized to an access outside the loop. Addresses PR40586. Reviewers: george.burgess.iv Subscribers: sanjoy, jlebar, Prazek, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57967 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353734 91177308-0d34-0410-b5e6-96231b3b80d8 Alina Sbirlea 7 months ago
4 changed file(s) with 81 addition(s) and 21 deletion(s). Raw diff Collapse all Expand all
111111 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
112112 TargetLibraryInfo *, TargetTransformInfo *, Loop *,
113113 AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
114 OptimizationRemarkEmitter *ORE);
114 bool, OptimizationRemarkEmitter *);
115115
116116 /// Walk the specified region of the CFG (defined by all blocks
117117 /// dominated by the specified block, and that are in the current loop) in depth
123123 /// ORE. It returns changed status.
124124 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
125125 TargetLibraryInfo *, Loop *, AliasSetTracker *,
126 MemorySSAUpdater *, ICFLoopSafetyInfo *,
127 OptimizationRemarkEmitter *ORE);
126 MemorySSAUpdater *, ICFLoopSafetyInfo *, bool,
127 OptimizationRemarkEmitter *);
128128
129129 /// This function deletes dead loops. The caller of this function needs to
130130 /// guarantee that the loop is infact dead.
275275 bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
276276 Loop *CurLoop, AliasSetTracker *CurAST,
277277 MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
278 bool NoOfMemAccessesTooLarge,
278279 OptimizationRemarkEmitter *ORE = nullptr);
279280
280281 /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
305305
306306 std::unique_ptr CurAST;
307307 std::unique_ptr MSSAU;
308 bool LocalDisablePromotion = false;
308 bool NoOfMemAccTooLarge = false;
309
309310 if (!MSSA) {
310311 LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
311312 CurAST = collectAliasInfoForLoop(L, LI, AA);
320321 (void)MA;
321322 AccessCapCount++;
322323 if (AccessCapCount > AccessCapForMSSAPromotion) {
323 LocalDisablePromotion = true;
324 NoOfMemAccTooLarge = true;
324325 break;
325326 }
326327 }
327328 }
328 if (LocalDisablePromotion)
329 if (NoOfMemAccTooLarge)
329330 break;
330331 }
331332 }
349350 //
350351 if (L->hasDedicatedExits())
351352 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
352 CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
353 CurAST.get(), MSSAU.get(), &SafetyInfo,
354 NoOfMemAccTooLarge, ORE);
353355 if (Preheader)
354356 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
355 CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
357 CurAST.get(), MSSAU.get(), &SafetyInfo,
358 NoOfMemAccTooLarge, ORE);
356359
357360 // Now that all loop invariants have been removed from the loop, promote any
358361 // memory references to scalars that we can.
362365 // preheader for SSA updater, so also avoid sinking when no preheader
363366 // is available.
364367 if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
365 !LocalDisablePromotion) {
368 !NoOfMemAccTooLarge) {
366369 // Figure out the loop exits and their insertion points
367370 SmallVector ExitBlocks;
368371 L->getUniqueExitBlocks(ExitBlocks);
456459 DominatorTree *DT, TargetLibraryInfo *TLI,
457460 TargetTransformInfo *TTI, Loop *CurLoop,
458461 AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
459 ICFLoopSafetyInfo *SafetyInfo,
462 ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
460463 OptimizationRemarkEmitter *ORE) {
461464
462465 // Verify inputs.
500503 //
501504 bool FreeInLoop = false;
502505 if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
503 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
506 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
507 NoOfMemAccTooLarge, ORE) &&
504508 !I.mayHaveSideEffects()) {
505509 if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
506510 if (!FreeInLoop) {
754758 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
755759 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
756760 AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
757 ICFLoopSafetyInfo *SafetyInfo,
761 ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
758762 OptimizationRemarkEmitter *ORE) {
759763 // Verify inputs.
760764 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
807811 // and we have accurately duplicated the control flow from the loop header
808812 // to that block.
809813 if (CurLoop->hasLoopInvariantOperands(&I) &&
810 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
814 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
815 NoOfMemAccTooLarge, ORE) &&
811816 isSafeToExecuteUnconditionally(
812817 I, DT, CurLoop, SafetyInfo, ORE,
813818 CurLoop->getLoopPreheader()->getTerminator())) {
10341039 Loop *CurLoop, AliasSetTracker *CurAST,
10351040 MemorySSAUpdater *MSSAU,
10361041 bool TargetExecutesOncePerLoop,
1042 bool NoOfMemAccTooLarge,
10371043 OptimizationRemarkEmitter *ORE) {
10381044 // If we don't understand the instruction, bail early.
10391045 if (!isHoistableAndSinkableInst(I))
11721178 return true;
11731179 if (!EnableLicmCap) {
11741180 auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
1175 if (MSSA->isLiveOnEntryDef(Source) ||
1176 !CurLoop->contains(Source->getBlock()))
1177 return true;
1181 // If there are no clobbering Defs in the loop, we still need to check
1182 // for interfering Uses. If there are more accesses than the Promotion
1183 // cap, give up, we're not walking a list that long. Otherwise, walk the
1184 // list, check each Use if it's optimized to an access outside the loop.
1185 // If yes, store is safe to hoist. This is fairly restrictive, but
1186 // conservatively correct.
1187 // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1188 // moving accesses. Can also extend to dominating uses.
1189 if ((!MSSA->isLiveOnEntryDef(Source) &&
1190 CurLoop->contains(Source->getBlock())) ||
1191 NoOfMemAccTooLarge)
1192 return false;
1193 for (auto *BB : CurLoop->getBlocks())
1194 if (auto *Accesses = MSSA->getBlockAccesses(BB))
1195 for (const auto &MA : *Accesses)
1196 if (const auto *MU = dyn_cast(&MA)) {
1197 auto *MD = MU->getDefiningAccess();
1198 if (!MSSA->isLiveOnEntryDef(MD) &&
1199 CurLoop->contains(MD->getBlock()))
1200 return false;
1201 }
1202 return true;
11781203 }
11791204 return false;
11801205 }
302302 // No need to check for instruction's operands are loop invariant.
303303 assert(L.hasLoopInvariantOperands(I) &&
304304 "Insts in a loop's preheader should have loop invariant operands!");
305 if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
305 if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false, false))
306306 continue;
307307 if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
308308 Changed = true;
None ; RUN: opt -S -basicaa -licm %s | FileCheck %s
1 ; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s
0 ; RUN: opt -S -basicaa -licm %s | FileCheck -check-prefixes=CHECK,AST %s
1 ; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true %s | FileCheck -check-prefixes=CHECK,MSSA %s
2 ; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck -check-prefixes=CHECK,AST %s
23
34 define void @test(i32* %loc) {
45 ; CHECK-LABEL: @test
4546
4647 define i32* @false_negative_2use(i32* %loc) {
4748 ; CHECK-LABEL: @false_negative_2use
48 ; CHECK-LABEL: exit:
49 ; CHECK: store i32 0, i32* %loc
49 ; AST-LABEL: exit:
50 ; AST: store i32 0, i32* %loc
51 ; MSSA-LABEL: entry:
52 ; MSSA: store i32 0, i32* %loc
53 ; MSSA-LABEL: loop:
5054 entry:
5155 br label %loop
5256
118122 ret void
119123 }
120124
125 ; Hoisting the store is actually valid here, as it dominates the load.
121126 define void @neg_ref(i32* %loc) {
122127 ; CHECK-LABEL: @neg_ref
123128 ; CHECK-LABEL: exit1:
131136 %iv = phi i32 [0, %entry], [%iv.next, %backedge]
132137 store i32 0, i32* %loc
133138 %v = load i32, i32* %loc
139 %earlycnd = icmp eq i32 %v, 198
140 br i1 %earlycnd, label %exit1, label %backedge
141
142 backedge:
143 %iv.next = add i32 %iv, 1
144 %cmp = icmp slt i32 %iv, 200
145 br i1 %cmp, label %loop, label %exit2
146
147 exit1:
148 ret void
149 exit2:
150 ret void
151 }
152
153 ; Hoisting the store here leads to a miscompile.
154 define void @neg_ref2(i32* %loc) {
155 ; CHECK-LABEL: @neg_ref2
156 ; CHECK-LABEL: exit1:
157 ; CHECK: store i32 0, i32* %loc
158 ; CHECK-LABEL: exit2:
159 ; CHECK: store i32 0, i32* %loc
160 entry:
161 store i32 198, i32* %loc
162 br label %loop
163
164 loop:
165 %iv = phi i32 [0, %entry], [%iv.next, %backedge]
166 %v = load i32, i32* %loc
167 store i32 0, i32* %loc
134168 %earlycnd = icmp eq i32 %v, 198
135169 br i1 %earlycnd, label %exit1, label %backedge
136170