llvm.org GIT mirror llvm / 66b380b
Relax the constraint more in MemoryDependencyAnalysis.cpp Even loads/stores that have a stronger ordering than monotonic can be safe. The rule is no release-acquire pair on the path from the QueryInst, assuming that the QueryInst is not atomic itself. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216771 91177308-0d34-0410-b5e6-96231b3b80d8 Robin Morisset 6 years ago
3 changed file(s) with 147 addition(s) and 64 deletion(s). Raw diff Collapse all Expand all
369369 int64_t MemLocOffset = 0;
370370 unsigned Limit = BlockScanLimit;
371371 bool isInvariantLoad = false;
372
373 // We must be careful with atomic accesses, as they may allow another thread
374 // to touch this location, cloberring it. We are conservative: if the
375 // QueryInst is not a simple (non-atomic) memory access, we automatically
376 // return getClobber.
377 // If it is simple, we know based on the results of
378 // "Compiler testing via a theory of sound optimisations in the C11/C++11
379 // memory model" in PLDI 2013, that a non-atomic location can only be
380 // clobbered between a pair of a release and an acquire action, with no
381 // access to the location in between.
382 // Here is an example for giving the general intuition behind this rule.
383 // In the following code:
384 // store x 0;
385 // release action; [1]
386 // acquire action; [4]
387 // %val = load x;
388 // It is unsafe to replace %val by 0 because another thread may be running:
389 // acquire action; [2]
390 // store x 42;
391 // release action; [3]
392 // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
393 // being 42. A key property of this program however is that if either
394 // 1 or 4 were missing, there would be a race between the store of 42
395 // either the store of 0 or the load (making the whole progam racy).
396 // The paper mentionned above shows that the same property is respected
397 // by every program that can detect any optimisation of that kind: either
398 // it is racy (undefined) or there is a release followed by an acquire
399 // between the pair of accesses under consideration.
400 bool HasSeenAcquire = false;
401
372402 if (isLoad && QueryInst) {
373403 LoadInst *LI = dyn_cast(QueryInst);
374404 if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
411441 // be accessing the location.
412442 if (LoadInst *LI = dyn_cast(Inst)) {
413443 // Atomic loads have complications involved.
414 // A monotonic load is OK if the query inst is itself not atomic.
444 // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
445 // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
446 // release store will know to return getClobber.
415447 // FIXME: This is overly conservative.
416448 if (!LI->isUnordered()) {
417449 if (!QueryInst)
418 return MemDepResult::getClobber(LI);
419 if (LI->getOrdering() != Monotonic)
420450 return MemDepResult::getClobber(LI);
421451 if (auto *QueryLI = dyn_cast(QueryInst))
422452 if (!QueryLI->isSimple())
424454 if (auto *QuerySI = dyn_cast(QueryInst))
425455 if (!QuerySI->isSimple())
426456 return MemDepResult::getClobber(LI);
457 if (isAtLeastAcquire(LI->getOrdering()))
458 HasSeenAcquire = true;
427459 }
428460
429461 // FIXME: this is overly conservative.
489521
490522 if (StoreInst *SI = dyn_cast(Inst)) {
491523 // Atomic stores have complications involved.
492 // A monotonic store is OK if the query inst is itself not atomic.
524 // A Monotonic store is OK if the query inst is itself not atomic.
525 // A Release (or higher) store further requires that no acquire load
526 // has been seen.
493527 // FIXME: This is overly conservative.
494528 if (!SI->isUnordered()) {
495529 if (!QueryInst)
496 return MemDepResult::getClobber(SI);
497 if (SI->getOrdering() != Monotonic)
498530 return MemDepResult::getClobber(SI);
499531 if (auto *QueryLI = dyn_cast(QueryInst))
500532 if (!QueryLI->isSimple())
502534 if (auto *QuerySI = dyn_cast(QueryInst))
503535 if (!QuerySI->isSimple())
504536 return MemDepResult::getClobber(SI);
537 if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
538 return MemDepResult::getClobber(SI);
505539 }
506540
507541 // FIXME: this is overly conservative.
44
55 ; Sanity tests for atomic stores.
66 ; Note that it turns out essentially every transformation DSE does is legal on
7 ; atomic ops, just some transformations are not allowed across them.
7 ; atomic ops, just some transformations are not allowed across release-acquire pairs.
88
99 @x = common global i32 0, align 4
1010 @y = common global i32 0, align 4
1212 declare void @randomop(i32*)
1313
1414 ; DSE across unordered store (allowed)
15 define void @test1() nounwind uwtable ssp {
16 ; CHECK: test1
15 define void @test1() {
16 ; CHECK-LABEL: test1
1717 ; CHECK-NOT: store i32 0
1818 ; CHECK: store i32 1
19 entry:
2019 store i32 0, i32* @x
2120 store atomic i32 0, i32* @y unordered, align 4
2221 store i32 1, i32* @x
2322 ret void
2423 }
2524
26 ; DSE across seq_cst load (allowed in theory; not implemented ATM)
27 define i32 @test2() nounwind uwtable ssp {
28 ; CHECK: test2
29 ; CHECK: store i32 0
25 ; DSE across seq_cst load (allowed)
26 define i32 @test2() {
27 ; CHECK-LABEL: test2
28 ; CHECK-NOT: store i32 0
3029 ; CHECK: store i32 1
31 entry:
3230 store i32 0, i32* @x
3331 %x = load atomic i32* @y seq_cst, align 4
3432 store i32 1, i32* @x
3533 ret i32 %x
3634 }
3735
38 ; DSE across seq_cst store (store before atomic store must not be removed)
39 define void @test3() nounwind uwtable ssp {
40 ; CHECK: test3
41 ; CHECK: store i32
36 ; DSE across seq_cst store (allowed)
37 define void @test3() {
38 ; CHECK-LABEL: test3
39 ; CHECK-NOT: store i32 0
4240 ; CHECK: store atomic i32 2
43 entry:
4441 store i32 0, i32* @x
4542 store atomic i32 2, i32* @y seq_cst, align 4
4643 store i32 1, i32* @x
4845 }
4946
5047 ; DSE remove unordered store (allowed)
51 define void @test4() nounwind uwtable ssp {
52 ; CHECK: test4
48 define void @test4() {
49 ; CHECK-LABEL: test4
5350 ; CHECK-NOT: store atomic
5451 ; CHECK: store i32 1
55 entry:
5652 store atomic i32 0, i32* @x unordered, align 4
5753 store i32 1, i32* @x
5854 ret void
5955 }
6056
6157 ; DSE unordered store overwriting non-atomic store (allowed)
62 define void @test5() nounwind uwtable ssp {
63 ; CHECK: test5
58 define void @test5() {
59 ; CHECK-LABEL: test5
6460 ; CHECK: store atomic i32 1
65 entry:
6661 store i32 0, i32* @x
6762 store atomic i32 1, i32* @x unordered, align 4
6863 ret void
6964 }
7065
7166 ; DSE no-op unordered atomic store (allowed)
72 define void @test6() nounwind uwtable ssp {
73 ; CHECK: test6
67 define void @test6() {
68 ; CHECK-LABEL: test6
7469 ; CHECK-NOT: store
7570 ; CHECK: ret void
76 entry:
7771 %x = load atomic i32* @x unordered, align 4
7872 store atomic i32 %x, i32* @x unordered, align 4
7973 ret void
8175
8276 ; DSE seq_cst store (be conservative; DSE doesn't have infrastructure
8377 ; to reason about atomic operations).
84 define void @test7() nounwind uwtable ssp {
85 ; CHECK: test7
86 ; CHECK: store atomic
87 entry:
78 define void @test7() {
79 ; CHECK-LABEL: test7
80 ; CHECK: store atomic
8881 %a = alloca i32
8982 store atomic i32 0, i32* %a seq_cst, align 4
9083 ret void
9285
9386 ; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure
9487 ; to reason about atomic operations).
95 define i32 @test8() nounwind uwtable ssp {
96 ; CHECK: test8
88 define i32 @test8() {
89 ; CHECK-LABEL: test8
9790 ; CHECK: store
98 ; CHECK: load atomic
99 entry:
91 ; CHECK: load atomic
10092 %a = alloca i32
10193 call void @randomop(i32* %a)
10294 store i32 0, i32* %a, align 4
10597 }
10698
10799 ; DSE across monotonic load (allowed as long as the eliminated store isUnordered)
108 define i32 @test9() nounwind uwtable ssp {
109 ; CHECK: test9
100 define i32 @test9() {
101 ; CHECK-LABEL: test9
110102 ; CHECK-NOT: store i32 0
111103 ; CHECK: store i32 1
112 entry:
113104 store i32 0, i32* @x
114105 %x = load atomic i32* @y monotonic, align 4
115106 store i32 1, i32* @x
117108 }
118109
119110 ; DSE across monotonic store (allowed as long as the eliminated store isUnordered)
120 define void @test10() nounwind uwtable ssp {
121 ; CHECK: test10
111 define void @test10() {
112 ; CHECK-LABEL: test10
122113 ; CHECK-NOT: store i32 0
123114 ; CHECK: store i32 1
124 entry:
125115 store i32 0, i32* @x
126116 store atomic i32 42, i32* @y monotonic, align 4
127117 store i32 1, i32* @x
129119 }
130120
131121 ; DSE across monotonic load (forbidden since the eliminated store is atomic)
132 define i32 @test11() nounwind uwtable ssp {
133 ; CHECK: test11
122 define i32 @test11() {
123 ; CHECK-LABEL: test11
134124 ; CHECK: store atomic i32 0
135125 ; CHECK: store atomic i32 1
136 entry:
137126 store atomic i32 0, i32* @x monotonic, align 4
138127 %x = load atomic i32* @y monotonic, align 4
139128 store atomic i32 1, i32* @x monotonic, align 4
141130 }
142131
143132 ; DSE across monotonic store (forbidden since the eliminated store is atomic)
144 define void @test12() nounwind uwtable ssp {
145 ; CHECK: test12
133 define void @test12() {
134 ; CHECK-LABEL: test12
146135 ; CHECK: store atomic i32 0
147136 ; CHECK: store atomic i32 1
148 entry:
149137 store atomic i32 0, i32* @x monotonic, align 4
150138 store atomic i32 42, i32* @y monotonic, align 4
151139 store atomic i32 1, i32* @x monotonic, align 4
152140 ret void
153141 }
142
143 ; DSE is allowed across a pair of an atomic read and then write.
144 define i32 @test13() {
145 ; CHECK-LABEL: test13
146 ; CHECK-NOT: store i32 0
147 ; CHECK: store i32 1
148 store i32 0, i32* @x
149 %x = load atomic i32* @y seq_cst, align 4
150 store atomic i32 %x, i32* @y seq_cst, align 4
151 store i32 1, i32* @x
152 ret i32 %x
153 }
154
155 ; Same if it is acquire-release instead of seq_cst/seq_cst
156 define i32 @test14() {
157 ; CHECK-LABEL: test14
158 ; CHECK-NOT: store i32 0
159 ; CHECK: store i32 1
160 store i32 0, i32* @x
161 %x = load atomic i32* @y acquire, align 4
162 store atomic i32 %x, i32* @y release, align 4
163 store i32 1, i32* @x
164 ret i32 %x
165 }
166
167 ; But DSE is not allowed across a release-acquire pair.
168 define i32 @test15() {
169 ; CHECK-LABEL: test15
170 ; CHECK: store i32 0
171 ; CHECK: store i32 1
172 store i32 0, i32* @x
173 store atomic i32 0, i32* @y release, align 4
174 %x = load atomic i32* @y acquire, align 4
175 store i32 1, i32* @x
176 ret i32 %x
177 }
77
88 ; GVN across unordered store (allowed)
99 define i32 @test1() nounwind uwtable ssp {
10 ; CHECK: test1
10 ; CHECK-LABEL: test1
1111 ; CHECK: add i32 %x, %x
1212 entry:
1313 %x = load i32* @y
1717 ret i32 %z
1818 }
1919
20 ; GVN across seq_cst store (allowed in theory; not implemented ATM)
20 ; GVN across seq_cst store (allowed)
2121 define i32 @test2() nounwind uwtable ssp {
22 ; CHECK: test2
23 ; CHECK: add i32 %x, %y
22 ; CHECK-LABEL: test2
23 ; CHECK: add i32 %x, %x
2424 entry:
2525 %x = load i32* @y
2626 store atomic i32 %x, i32* @x seq_cst, align 4
3131
3232 ; GVN across unordered load (allowed)
3333 define i32 @test3() nounwind uwtable ssp {
34 ; CHECK: test3
34 ; CHECK-LABEL: test3
3535 ; CHECK: add i32 %x, %x
3636 entry:
3737 %x = load i32* @y
4242 ret i32 %b
4343 }
4444
45 ; GVN across acquire load (load after atomic load must not be removed)
45 ; GVN across acquire load (allowed as the original load was not atomic)
4646 define i32 @test4() nounwind uwtable ssp {
47 ; CHECK: test4
47 ; CHECK-LABEL: test4
4848 ; CHECK: load atomic i32* @x
49 ; CHECK: load i32* @y
49 ; CHECK-NOT: load i32* @y
5050 entry:
5151 %x = load i32* @y
5252 %y = load atomic i32* @x seq_cst, align 4
5858
5959 ; GVN load to unordered load (allowed)
6060 define i32 @test5() nounwind uwtable ssp {
61 ; CHECK: test5
61 ; CHECK-LABEL: test5
6262 ; CHECK: add i32 %x, %x
6363 entry:
6464 %x = load atomic i32* @x unordered, align 4
6969
7070 ; GVN unordered load to load (unordered load must not be removed)
7171 define i32 @test6() nounwind uwtable ssp {
72 ; CHECK: test6
72 ; CHECK-LABEL: test6
7373 ; CHECK: load atomic i32* @x unordered
7474 entry:
7575 %x = load i32* @x
7878 ret i32 %x3
7979 }
8080
81 ; GVN across release-acquire pair (forbidden)
82 define i32 @test7() nounwind uwtable ssp {
83 ; CHECK-LABEL: test7
84 ; CHECK: add i32 %x, %y
85 entry:
86 %x = load i32* @y
87 store atomic i32 %x, i32* @x release, align 4
88 %w = load atomic i32* @x acquire, align 4
89 %y = load i32* @y
90 %z = add i32 %x, %y
91 ret i32 %z
92 }
93
94 ; GVN across acquire-release pair (allowed)
95 define i32 @test8() nounwind uwtable ssp {
96 ; CHECK-LABEL: test8
97 ; CHECK: add i32 %x, %x
98 entry:
99 %x = load i32* @y
100 %w = load atomic i32* @x acquire, align 4
101 store atomic i32 %x, i32* @x release, align 4
102 %y = load i32* @y
103 %z = add i32 %x, %y
104 ret i32 %z
105 }
106
81107 ; GVN across monotonic store (allowed)
82 define i32 @test7() nounwind uwtable ssp {
83 ; CHECK: test7
108 define i32 @test9() nounwind uwtable ssp {
109 ; CHECK-LABEL: test9
84110 ; CHECK: add i32 %x, %x
85111 entry:
86112 %x = load i32* @y
91117 }
92118
93119 ; GVN of an unordered across monotonic load (not allowed)
94 define i32 @test8() nounwind uwtable ssp {
95 ; CHECK: test8
120 define i32 @test10() nounwind uwtable ssp {
121 ; CHECK-LABEL: test10
96122 ; CHECK: add i32 %x, %y
97123 entry:
98124 %x = load atomic i32* @y unordered, align 4
102128 ret i32 %z
103129 }
104130
105