llvm.org GIT mirror llvm / d68ef42
[LICM] Infer proper alignment from loads during scalar promotion This patch fixes an issue where we would compute an unnecessarily small alignment during scalar promotion when no store is not to be guaranteed to execute, but we've proven load speculation safety. Since speculating a load requires proving the existing alignment is valid at the new location (see Loads.cpp), we can use the alignment fact from the load. For non-atomics, this is a performance problem. For atomics, this is a correctness issue, though an *incredibly* rare one to see in practice. For atomics, we might not be able to lower an improperly aligned load or store (i.e. i32 align 1). If such an instruction makes it all the way to codegen, we *may* fail to codegen the operation, or we may simply generate a slow call to a library function. The part that makes this super hard to see in practice is that the memory location actually *is* well aligned, and instcombine knows that. So, to see a failure, you have to have a) hit the bug in LICM, b) somehow hit a depth limit in InstCombine/ValueTracking to avoid fixing the alignment, and c) then have generated an instruction which fails codegen rather than simply emitting a slow libcall. All around, pretty hard to hit. Differential Revision: https://reviews.llvm.org/D58809 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355217 91177308-0d34-0410-b5e6-96231b3b80d8 Philip Reames 6 months ago
3 changed file(s) with 33 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
19231923 SawUnorderedAtomic |= Load->isAtomic();
19241924 SawNotAtomic |= !Load->isAtomic();
19251925
1926 if (!DereferenceableInPH)
1927 DereferenceableInPH = isSafeToExecuteUnconditionally(
1928 *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
1926 unsigned InstAlignment = Load->getAlignment();
1927 if (!InstAlignment)
1928 InstAlignment =
1929 MDL.getABITypeAlignment(Load->getType());
1930
1931 // Note that proving a load safe to speculate requires proving
1932 // sufficient alignment at the target location. Proving it guaranteed
1933 // to execute does as well. Thus we can increase our guaranteed
1934 // alignment as well.
1935 if (!DereferenceableInPH || (InstAlignment > Alignment))
1936 if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
1937 ORE, Preheader->getTerminator())) {
1938 DereferenceableInPH = true;
1939 Alignment = std::max(Alignment, InstAlignment);
1940 }
19291941 } else if (const StoreInst *Store = dyn_cast(UI)) {
19301942 // Stores *of* the pointer are not interesting, only stores *to* the
19311943 // pointer.
19942006 // might not be able to lower the result. We can't downgrade since that
19952007 // would violate memory model. Also, align 0 is an error for atomics.
19962008 if (SawUnorderedAtomic && SawNotAtomic)
2009 return false;
2010
2011 // If we're inserting an atomic load in the preheader, we must be able to
2012 // lower it. We're only guaranteed to be able to lower naturally aligned
2013 // atomics.
2014 auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
2015 if (SawUnorderedAtomic &&
2016 Alignment < MDL.getTypeStoreSize(SomePtrElemType))
19972017 return false;
19982018
19992019 // If we couldn't prove we can hoist the load, bail.
2121
2222 for.body.lr.ph: ; preds = %entry
2323 ; CHECK-LABEL: for.body.lr.ph:
24 ; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 1
24 ; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4
2525 br label %for.header
2626
2727 for.header:
3434
3535 early-exit:
3636 ; CHECK-LABEL: early-exit:
37 ; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
37 ; CHECK: store i32 %new1.lcssa, i32* %addr, align 4
3838 ret i32* null
3939
4040 for.body:
4646
4747 for.cond.for.end_crit_edge: ; preds = %for.body
4848 ; CHECK-LABEL: for.cond.for.end_crit_edge:
49 ; CHECK: store i32 %new.lcssa, i32* %addr, align 1
49 ; CHECK: store i32 %new.lcssa, i32* %addr, align 4
5050 %split = phi i32* [ %addr, %for.body ]
5151 ret i32* null
5252 }
6161
6262 for.body.lr.ph: ; preds = %entry
6363 ; CHECK-LABEL: for.body.lr.ph:
64 ; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 1
64 ; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4
6565 br label %for.header
6666
6767 for.header:
7474
7575 early-exit:
7676 ; CHECK-LABEL: early-exit:
77 ; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
77 ; CHECK: store i32 %new1.lcssa, i32* %addr, align 4
7878 ret i32* null
7979
8080 for.body:
8686
8787 for.cond.for.end_crit_edge: ; preds = %for.body
8888 ; CHECK-LABEL: for.cond.for.end_crit_edge:
89 ; CHECK: store i32 %new.lcssa, i32* %addr, align 1
89 ; CHECK: store i32 %new.lcssa, i32* %addr, align 4
9090 %split = phi i32* [ %addr, %for.body ]
9191 ret i32* null
9292 }
7373 entry:
7474 ; CHECK-LABEL: entry:
7575 ; CHECK-NEXT: %a = alloca i32
76 ; CHECK-NEXT: %a.promoted = load i32, i32* %a, align 1
76 ; CHECK-NEXT: %a.promoted = load i32, i32* %a, align 4
7777 %a = alloca i32
7878 br label %for.body
7979
8989
9090 for.cond.cleanup:
9191 ; CHECK-LABEL: for.cond.cleanup:
92 ; CHECK: store i32 %add.lcssa, i32* %a, align 1
92 ; CHECK: store i32 %add.lcssa, i32* %a, align 4
9393 ; CHECK-NEXT: ret void
9494 ret void
9595 }
9696
9797 ;; Same as test3, but with unordered atomics
98 ;; FIXME: doing the transform w/o alignment here is wrong since we're
99 ;; creating an unaligned atomic which we may not be able to lower.
10098 define void @test3b(i1 zeroext %y) uwtable {
10199 ; CHECK-LABEL: @test3
102100 entry:
103101 ; CHECK-LABEL: entry:
104102 ; CHECK-NEXT: %a = alloca i32
105 ; CHECK-NEXT: %a.promoted = load atomic i32, i32* %a unordered, align 1
103 ; CHECK-NEXT: %a.promoted = load atomic i32, i32* %a unordered, align 4
106104 %a = alloca i32
107105 br label %for.body
108106
118116
119117 for.cond.cleanup:
120118 ; CHECK-LABEL: for.cond.cleanup:
121 ; CHECK: store atomic i32 %add.lcssa, i32* %a unordered, align 1
119 ; CHECK: store atomic i32 %add.lcssa, i32* %a unordered, align 4
122120 ; CHECK-NEXT: ret void
123121 ret void
124122 }