llvm.org GIT mirror llvm / 3ad1311
[LICM & MSSA] Limit unsafe sinking and hoisting. Summary: The getClobberingMemoryAccess API checks for clobbering accesses in a loop by walking the backedge. This may check if a memory access is being clobbered by the loop in a previous iteration, depending how smart AA got over the course of the updates in MemorySSA (it does not occur when built from scratch). If no clobbering access is found inside the loop, it will optimize to an access outside the loop. This however does not mean that access is safe to sink. Given: ``` for i load a[i] store a[i] ``` The access corresponding to the load can be optimized to outside the loop, and the load can be hoisted. But it is incorrect to sink it. In order to sink the load, we'd need to check no Def clobbers the Use in the same iteration. With this patch we currently restrict sinking to either Defs not existing in the loop, or Defs preceding the load in the same block. An easy extension is to ensure the load (Use) post-dominates all Defs. Caught by PR42294. This issue also shed light on the converse problem: hoisting stores in this same scenario would be illegal. With this patch we restrict hoisting of stores to the case when their corresponding Defs are dominating all Uses in the loop. Reviewers: george.burgess.iv Subscribers: jlebar, Prazek, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63582 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363982 91177308-0d34-0410-b5e6-96231b3b80d8 Alina Sbirlea 2 months ago
4 changed file(s) with 105 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
105105 unsigned LicmMssaOptCounter;
106106 unsigned LicmMssaOptCap;
107107 unsigned LicmMssaNoAccForPromotionCap;
108 bool IsSink;
108109 };
109110
110111 /// Walk the specified region of the CFG (defined by all blocks
375375 // us to sink instructions in one pass, without iteration. After sinking
376376 // instructions, we perform another pass to hoist them out of the loop.
377377 SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
378 LicmMssaOptCap, LicmMssaNoAccForPromotionCap};
378 LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
379 /*IsSink=*/true};
379380 if (L->hasDedicatedExits())
380381 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
381382 CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
383 Flags.IsSink = false;
382384 if (Preheader)
383385 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
384386 CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
12231225 // Could do better here, but this is conservatively correct.
12241226 // TODO: Cache set of Uses on the first walk in runOnLoop, update when
12251227 // moving accesses. Can also extend to dominating uses.
1228 auto *SIMD = MSSA->getMemoryAccess(SI);
12261229 for (auto *BB : CurLoop->getBlocks())
12271230 if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
12281231 for (const auto &MA : *Accesses)
12301233 auto *MD = MU->getDefiningAccess();
12311234 if (!MSSA->isLiveOnEntryDef(MD) &&
12321235 CurLoop->contains(MD->getBlock()))
1236 return false;
1237 // Disable hoisting past potentially interfering loads. Optimized
1238 // Uses may point to an access outside the loop, as getClobbering
1239 // checks the previous iteration when walking the backedge.
1240 // FIXME: More precise: no Uses that alias SI.
1241 if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
12331242 return false;
12341243 } else if (const auto *MD = dyn_cast(&MA))
12351244 if (auto *LI = dyn_cast(MD->getMemoryInst())) {
22562265 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
22572266 Loop *CurLoop,
22582267 SinkAndHoistLICMFlags &Flags) {
2259 MemoryAccess *Source;
2260 // See declaration of SetLicmMssaOptCap for usage details.
2261 if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
2262 Source = MU->getDefiningAccess();
2263 else {
2264 Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2265 Flags.LicmMssaOptCounter++;
2266 }
2267 return !MSSA->isLiveOnEntryDef(Source) &&
2268 CurLoop->contains(Source->getBlock());
2268 // For hoisting, use the walker to determine safety
2269 if (!Flags.IsSink) {
2270 MemoryAccess *Source;
2271 // See declaration of SetLicmMssaOptCap for usage details.
2272 if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
2273 Source = MU->getDefiningAccess();
2274 else {
2275 Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2276 Flags.LicmMssaOptCounter++;
2277 }
2278 return !MSSA->isLiveOnEntryDef(Source) &&
2279 CurLoop->contains(Source->getBlock());
2280 }
2281
2282 // For sinking, we'd need to check all Defs below this use. The getClobbering
2283 // call will look on the backedge of the loop, but will check aliasing with
2284 // the instructions on the previous iteration.
2285 // For example:
2286 // for (i ... )
2287 // load a[i] ( Use (LoE)
2288 // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2289 // i++;
2290 // The load sees no clobbering inside the loop, as the backedge alias check
2291 // does phi translation, and will check aliasing against store a[i-1].
2292 // However sinking the load outside the loop, below the store is incorrect.
2293
2294 // For now, only sink if there are no Defs in the loop, and the existing ones
2295 // precede the use and are in the same block.
2296 // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2297 // needs PostDominatorTreeAnalysis.
2298 // FIXME: More precise: no Defs that alias this Use.
2299 if (Flags.NoOfMemAccTooLarge)
2300 return true;
2301 for (auto *BB : CurLoop->getBlocks())
2302 if (auto *Accesses = MSSA->getBlockDefs(BB))
2303 for (const auto &MA : *Accesses)
2304 if (const auto *MD = dyn_cast(&MA))
2305 if (MU->getBlock() != MD->getBlock() ||
2306 !MSSA->locallyDominates(MD, MU))
2307 return true;
2308 return false;
22692309 }
22702310
22712311 /// Little predicate that returns true if the specified basic block is in
0 ; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=true -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM
1 ; RUN: opt -loop-rotate -licm %s -disable-output -enable-mssa-loop-dependency=false -debug-only=licm 2>&1 | FileCheck %s -check-prefix=LICM
2 ; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=true | FileCheck %s
3 ; RUN: opt -loop-rotate -licm %s -S -enable-mssa-loop-dependency=false | FileCheck %s
4
5 ; LICM: Using
6 ; LICM-NOT: LICM sinking instruction: %.pre = load i8, i8* %arrayidx.phi.trans.insert
7
8 ; CHECK-LABEL: @fn1
9 ; CHECK-LABEL: entry:
10 ; CHECK: br i1 true, label %[[END:.*]], label %[[PH:.*]]
11 ; CHECK: [[PH]]:
12 ; CHECK: br label %[[CRIT:.*]]
13 ; CHECK: [[CRIT]]:
14 ; CHECK: load i8
15 ; CHECK: store i8
16 ; CHECK: br i1 true, label %[[ENDCRIT:.*]], label %[[CRIT]]
17 ; CHECK: [[ENDCRIT]]:
18 ; CHECK-NOT: load i8
19 ; CHECK: br label %[[END]]
20
21 target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
22 target triple = "s390x-unknown-linux-gnu"
23
24 define void @fn1() {
25 entry:
26 %g = alloca [9 x i8], align 1
27 br label %for.body
28
29 for.body: ; preds = %for.body.for.body_crit_edge, %entry
30 %0 = phi i64 [ 0, %entry ], [ %phitmp, %for.body.for.body_crit_edge ]
31 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
32 %arrayidx = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv
33 store i8 2, i8* %arrayidx, align 1
34 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
35 br i1 undef, label %for.end18, label %for.body.for.body_crit_edge
36
37 for.body.for.body_crit_edge: ; preds = %for.body
38 %arrayidx.phi.trans.insert = getelementptr inbounds [9 x i8], [9 x i8]* %g, i64 0, i64 %indvars.iv.next
39 %.pre = load i8, i8* %arrayidx.phi.trans.insert, align 1
40 %phitmp = zext i8 %.pre to i64
41 br label %for.body
42
43 for.end18: ; preds = %for.body
44 store i64 %0, i64* undef, align 8
45 ret void
46 }
47
347347 ; the load must observe.
348348 define i32 @test_dominated_read(i32* %loc) {
349349 ; CHECK-LABEL: @test_dominated_read
350 ; CHECK-LABEL: exit:
351 ; CHECK: store i32 0, i32* %loc
350 ; MSSA-LABEL: entry:
351 ; MSSA: store i32 0, i32* %loc
352 ; MSSA-LABEL: loop:
353 ; AST-LABEL: exit:
354 ; AST: store i32 0, i32* %loc
352355 entry:
353356 br label %loop
354357