llvm.org GIT mirror llvm / 908d451
Fix a long-standing miscompile in the load analysis that was uncovered by my refactoring of this code. The method isSafeToLoadUnconditionally assumes that the load will proceed with the preferred type alignment. Given that, it has to ensure that the alloca or global is at least that aligned. It has always done this historically when a datalayout is present, but has never checked it when the datalayout is absent. When I refactored the code in r220156, I exposed this path when datalayout was present and that turned the latent bug into a patent bug. This fixes the issue by just removing the special case which allows folding things without datalayout. This isn't worth the complexity of trying to tease apart when it is or isn't safe without actually knowing the preferred alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220161 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 4 years ago
5 changed file(s) with 67 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
7272 Type *BaseType = nullptr;
7373 unsigned BaseAlign = 0;
7474 if (const AllocaInst *AI = dyn_cast(Base)) {
75 // Loading directly from an alloca is trivially safe. We can't even look
76 // through pointer casts here though, as that might change the size loaded.
77 if (AI == V)
78 return true;
79
8075 // An alloca is safe to load from as load as it is suitably aligned.
8176 BaseType = AI->getAllocatedType();
8277 BaseAlign = AI->getAlignment();
8580 // overridden. Their size may change or they may be weak and require a test
8681 // to determine if they were in fact provided.
8782 if (!GV->mayBeOverridden()) {
88 // Loading directly from the non-overridden global is trivially safe. We
89 // can't even look through pointer casts here though, as that might change
90 // the size loaded.
91 if (GV == V)
92 return true;
93
9483 BaseType = GV->getType()->getElementType();
9584 BaseAlign = GV->getAlignment();
9685 }
6262 #include "llvm/IR/CFG.h"
6363 #include "llvm/IR/CallSite.h"
6464 #include "llvm/IR/Constants.h"
65 #include "llvm/IR/DataLayout.h"
6566 #include "llvm/IR/DerivedTypes.h"
6667 #include "llvm/IR/DiagnosticInfo.h"
6768 #include "llvm/IR/Function.h"
8586 namespace {
8687 struct TailCallElim : public FunctionPass {
8788 const TargetTransformInfo *TTI;
89 const DataLayout *DL;
8890
8991 static char ID; // Pass identification, replacement for typeid
9092 TailCallElim() : FunctionPass(ID) {
155157 bool TailCallElim::runOnFunction(Function &F) {
156158 if (skipOptnoneFunction(F))
157159 return false;
160
161 DL = F.getParent()->getDataLayout();
158162
159163 bool AllCallsAreTailCalls = false;
160164 bool Modified = markTails(F, AllCallsAreTailCalls);
449453 // being loaded from.
450454 if (CI->mayWriteToMemory() ||
451455 !isSafeToLoadUnconditionally(L->getPointerOperand(), L,
452 L->getAlignment()))
456 L->getAlignment(), DL))
453457 return false;
454458 }
455459 }
11
22 ; This test makes sure that these instructions are properly eliminated.
33
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
45
56 @X = constant i32 42 ; [#uses=2]
67 @X2 = constant i32 47 ; [#uses=1]
12351235 ; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x
12361236 ; CHECK-NEXT: ret i32 [[SEL]]
12371237 }
1238
1239 @under_aligned = external global i32, align 1
1240
1241 define i32 @test76(i1 %flag, i32* %x) {
1242 ; The load here must not be speculated around the select. One side of the
1243 ; select is trivially dereferencable but may have a lower alignment than the
1244 ; load does.
1245 ; CHECK-LABEL: @test76(
1246 ; CHECK: store i32 0, i32* %x
1247 ; CHECK: %[[P:.*]] = select i1 %flag, i32* @under_aligned, i32* %x
1248 ; CHECK: load i32* %[[P]]
1249
1250 store i32 0, i32* %x
1251 %p = select i1 %flag, i32* @under_aligned, i32* %x
1252 %v = load i32* %p
1253 ret i32 %v
1254 }
1255
1256 declare void @scribble_on_memory(i32*)
1257
1258 define i32 @test77(i1 %flag, i32* %x) {
1259 ; The load here must not be speculated around the select. One side of the
1260 ; select is trivially dereferencable but may have a lower alignment than the
1261 ; load does.
1262 ; CHECK-LABEL: @test77(
1263 ; CHECK: %[[A:.*]] = alloca i32, align 1
1264 ; CHECK: call void @scribble_on_memory(i32* %[[A]])
1265 ; CHECK: store i32 0, i32* %x
1266 ; CHECK: %[[P:.*]] = select i1 %flag, i32* %[[A]], i32* %x
1267 ; CHECK: load i32* %[[P]]
1268
1269 %under_aligned = alloca i32, align 1
1270 call void @scribble_on_memory(i32* %under_aligned)
1271 store i32 0, i32* %x
1272 %p = select i1 %flag, i32* %under_aligned, i32* %x
1273 %v = load i32* %p
1274 ret i32 %v
1275 }
0 ; RUN: opt < %s -tailcallelim -S | FileCheck %s
11 ; PR4323
2
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
24
35 ; Several cases where tail call elimination should move the load above the call,
46 ; then eliminate the tail recursion.
1113 ; This load can be moved above the call because the function won't write to it
1214 ; and the call has no side effects.
1315 define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
16 ; CHECK-LABEL: @raise_load_1(
17 ; CHECK-NOT: call
18 ; CHECK: load i32*
19 ; CHECK-NOT: call
20 ; CHECK: }
1421 entry:
1522 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; [#uses=1]
1623 br i1 %tmp2, label %if, label %else
2027
2128 else: ; preds = %entry
2229 %tmp7 = add i32 %start_arg, 1 ; [#uses=1]
23 ; CHECK-NOT: call
2430 %tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; [#uses=1]
2531 %tmp9 = load i32* %a_arg ; [#uses=1]
2632 %tmp10 = add i32 %tmp9, %tmp8 ; [#uses=1]
3137 ; This load can be moved above the call because the function won't write to it
3238 ; and the load provably can't trap.
3339 define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
40 ; CHECK-LABEL: @raise_load_2(
41 ; CHECK-NOT: call
42 ; CHECK: load i32*
43 ; CHECK-NOT: call
44 ; CHECK: }
3445 entry:
3546 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; [#uses=1]
3647 br i1 %tmp2, label %if, label %else
4758
4859 recurse: ; preds = %else
4960 %tmp7 = add i32 %start_arg, 1 ; [#uses=1]
50 ; CHECK-NOT: call
5161 %tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; [#uses=1]
5262 %tmp9 = load i32* @global ; [#uses=1]
5363 %tmp10 = add i32 %tmp9, %tmp8 ; [#uses=1]
5868 ; This load can be safely moved above the call (even though it's from an
5969 ; extern_weak global) because the call has no side effects.
6070 define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
71 ; CHECK-LABEL: @raise_load_3(
72 ; CHECK-NOT: call
73 ; CHECK: load i32*
74 ; CHECK-NOT: call
75 ; CHECK: }
6176 entry:
6277 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; [#uses=1]
6378 br i1 %tmp2, label %if, label %else
6782
6883 else: ; preds = %entry
6984 %tmp7 = add i32 %start_arg, 1 ; [#uses=1]
70 ; CHECK-NOT: call
7185 %tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; [#uses=1]
7286 %tmp9 = load i32* @extern_weak_global ; [#uses=1]
7387 %tmp10 = add i32 %tmp9, %tmp8 ; [#uses=1]
7993 ; unknown pointer (which normally means it might trap) because the first load
8094 ; proves it doesn't trap.
8195 define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
96 ; CHECK-LABEL: @raise_load_4(
97 ; CHECK-NOT: call
98 ; CHECK: load i32*
99 ; CHECK-NEXT: load i32*
100 ; CHECK-NOT: call
101 ; CHECK: }
82102 entry:
83103 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; [#uses=1]
84104 br i1 %tmp2, label %if, label %else
96116 recurse: ; preds = %else
97117 %tmp7 = add i32 %start_arg, 1 ; [#uses=1]
98118 %first = load i32* %a_arg ; [#uses=1]
99 ; CHECK-NOT: call
100119 %tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7) ; [#uses=1]
101120 %second = load i32* %a_arg ; [#uses=1]
102121 %tmp10 = add i32 %second, %tmp8 ; [#uses=1]