llvm.org GIT mirror llvm / 7848793
[EarlyCSEwMemorySSA] Add MSSA verification and tests to make EarlyCSE failures easier to track. Summary: EarlyCSE can make IR changes that will leave MemorySSA with accesses claiming to be optimized, but for which a subsequent MemorySSA run will yield a different optimized result. Due to relying on AA queries, we can't fix this in general, unless we recompute MemorySSA. Adding some tests to track this and a basic verify for future potential failures. Reviewers: george.burgess.iv, gberry Subscribers: sanjoy, jlebar, Prazek, llvm-commits Differential Revision: https://reviews.llvm.org/D51960 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@342422 91177308-0d34-0410-b5e6-96231b3b80d8 Alina Sbirlea 1 year, 4 days ago
2 changed file(s) with 139 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
603603 void removeMSSA(Instruction *Inst) {
604604 if (!MSSA)
605605 return;
606 if (VerifyMemorySSA)
607 MSSA->verifyMemorySSA();
606608 // Removing a store here can leave MemorySSA in an unoptimized state by
607609 // creating MemoryPhis that have identical arguments and by creating
608610 // MemoryUses whose defining access is not an actual clobber. We handle the
0 ; RUN: opt < %s -early-cse-memssa -verify-memoryssa -disable-output
1 ; REQUIRES: asserts
2
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-linux-gnu"
5
6 ; Tests below highlight scenarios where EarlyCSE does not preserve MemorySSA
7 ; optimized accesses. Current MemorySSA verify will accept these.
8
9 ; Test 1:
10 ; AA cannot tell here that the last load does not alias the only store.
11 ; The first two loads are a common expression, EarlyCSE removes the second one,
12 ; and then AA can see that the last load is a Use(LoE). Hence not optimized as
13 ; it claims. Note that if we replace the GEP indices 2 and 1, AA sees NoAlias
14 ; for the last load, before CSE-ing the first 2 loads.
15 %struct.ImageParameters = type { i32, i32, i32 }
16 @img = external global %struct.ImageParameters*, align 8
17 define void @test1_macroblock() {
18 entry:
19 ; MemoryUse(LoE)
20 %0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
21
22 %Pos_2 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 2
23 ; 1 = MemoryDef(LoE)
24 store i32 undef, i32* %Pos_2, align 8
25
26 ; MemoryUse(LoE)
27 %1 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
28
29 %Pos_1 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %1, i64 0, i32 1
30 ; MemoryUse(1) MayAlias
31 %2 = load i32, i32* %Pos_1, align 4
32 unreachable
33 }
34
35 ; Test 2:
36 ; EarlyCSE simplifies %string to undef. Def and Use used to be MustAlias, with
37 ; undef they are NoAlias. The Use can be optimized further to LoE. We can
38 ; de-optimize uses of replaced instructions, but in general this is not enough
39 ; (see next tests).
40 %struct.TermS = type { i32, i32, i32, i32, i32, i8* }
41 define fastcc void @test2_term_string() {
42 entry:
43 %string = getelementptr inbounds %struct.TermS, %struct.TermS* undef, i64 0, i32 5
44 ; 1 = MemoryDef(LoE)
45 store i8* undef, i8** %string, align 8
46 ; MemoryUse(1) MustAlias
47 %0 = load i8*, i8** %string, align 8
48 unreachable
49 }
50
51 ; Test 3:
52 ; EarlyCSE simplifies %0 to undef. So the second Def now stores to undef.
53 ; We now find the second load (Use(2) can be optimized further to LoE)
54 ; When replacing instructions, we can deoptimize all uses of the replaced
55 ; instruction and all uses of transitive accesses. However this does not stop
56 ; MemorySSA from being tripped by AA (see test4).
57 %struct.Grammar = type { i8*, i8*, %struct.anon }
58 %struct.anon = type { i32, i32, %struct.Term**, [3 x %struct.Term*] }
59 %struct.Term = type { i32 }
60
61 define fastcc void @test3_term_string(%struct.Grammar* %g) {
62 entry:
63 ; 1 = MemoryDef(LoE)
64 store i8* undef, i8** undef, align 8
65 ; MemoryUse(LoE)
66 %0 = load i8*, i8** undef, align 8
67 %arrayidx = getelementptr inbounds i8, i8* %0, i64 undef
68 ; 2 = MemoryDef(1)
69 store i8 0, i8* %arrayidx, align 1
70 %v = getelementptr inbounds %struct.Grammar, %struct.Grammar* %g, i64 0, i32 2, i32 2
71 ; MemoryUse(2) MayAlias
72 %1 = load %struct.Term**, %struct.Term*** %v, align 8
73 unreachable
74 }
75
76 ; Test 4:
77 ; Removing dead/unused instructions in if.then274 makes AA smarter. Before
78 ; removal, it finds %4 MayAlias the store above. After removal this can be
79 ; optimized to LoE. Hence after EarlyCSE, there is an access who claims is
80 ; optimized and it can be optimized further.
81
82 ; We can't escape such cases in general when relying on Alias Analysis.
83 ; The only fail-safe way to actually preserve MemorySSA when removing or
84 ; replacing instructions (i.e. get the *same* MemorySSA as if it was computed
85 ; for the updated IR) is to recompute it from scratch. What we get now is still
86 ; a correct update, but with accesses that claim to be optimized and can be
87 ; optimized further if we were to re-run MemorySSA on the IR.
88 %struct.gnode.0.1.3.6.9.18.20.79 = type { i32, i32, i32, i32, i32, i32, i32, %struct.gnode.0.1.3.6.9.18.20.79* }
89 @gnodeArray = external global %struct.gnode.0.1.3.6.9.18.20.79**, align 8
90
91 define void @test4_shortest() {
92 entry:
93 %exl.i = alloca [5 x i32], align 16
94 br i1 undef, label %if.then274, label %for.cond404
95
96 if.then274: ; preds = %if.end256
97 %0 = bitcast [5 x i32]* %exl.i to i8*
98 %arrayidx.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
99 %arrayidx1.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
100 %arrayidx2.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
101 %arrayidx3.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
102 %1 = bitcast [5 x i32]* %exl.i to i8*
103 %arrayidx.i1034 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
104 %arrayidx1.i1035 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
105 %arrayidx2.i1036 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
106 %arrayidx3.i1037 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
107 unreachable
108
109 for.cond404: ; preds = %if.end256
110 %2 = bitcast [5 x i32]* %exl.i to i8*
111 %arrayidx.i960 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
112 %arrayidx1.i961 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
113 %arrayidx2.i962 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
114 ; 1 = MemoryDef(LoE)
115 store i32 undef, i32* %arrayidx2.i962, align 4
116 %arrayidx3.i963 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
117
118 ; MemoryUse(LoE)
119 %3 = load %struct.gnode.0.1.3.6.9.18.20.79**, %struct.gnode.0.1.3.6.9.18.20.79*** @gnodeArray, align 8
120 %arrayidx6.i968 = getelementptr inbounds %struct.gnode.0.1.3.6.9.18.20.79*, %struct.gnode.0.1.3.6.9.18.20.79** %3, i64 undef
121 ; MemoryUse(1) MayAlias
122 %4 = load %struct.gnode.0.1.3.6.9.18.20.79*, %struct.gnode.0.1.3.6.9.18.20.79** %arrayidx6.i968, align 8
123 br i1 undef, label %for.cond26.preheader.i974, label %if.then20.for.body_crit_edge.i999
124
125 for.cond26.preheader.i974: ; preds = %if.then20.i996
126 %5 = bitcast [5 x i32]* %exl.i to i8*
127 %arrayidx.i924 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
128 %arrayidx1.i925 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
129 %arrayidx2.i926 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
130 %arrayidx3.i927 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
131 unreachable
132
133 if.then20.for.body_crit_edge.i999: ; preds = %if.then20.i996
134 %arrayidx9.phi.trans.insert.i997 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 undef
135 unreachable
136 }