llvm.org GIT mirror llvm / 56efe24
Atomic load/store handling for the passes using memdep (GVN, DSE, memcpyopt). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137888 91177308-0d34-0410-b5e6-96231b3b80d8 Eli Friedman 8 years ago
6 changed file(s) with 239 addition(s) and 9 deletion(s). Raw diff Collapse all Expand all
212212 /// isRemovable - If the value of this instruction and the memory it writes to
213213 /// is unused, may we delete this instruction?
214214 static bool isRemovable(Instruction *I) {
215 // Don't remove volatile stores.
215 // Don't remove volatile/atomic stores.
216216 if (StoreInst *SI = dyn_cast(I))
217 return !SI->isVolatile();
217 return SI->isUnordered();
218218
219219 IntrinsicInst *II = cast(I);
220220 switch (II->getIntrinsicID()) {
446446 if (StoreInst *SI = dyn_cast(Inst)) {
447447 if (LoadInst *DepLoad = dyn_cast(InstDep.getInst())) {
448448 if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
449 SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
449 SI->getOperand(0) == DepLoad && isRemovable(SI)) {
450450 DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
451451 << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
452452
669669
670670 // If we encounter a use of the pointer, it is no longer considered dead
671671 if (LoadInst *L = dyn_cast(BBI)) {
672 if (!L->isUnordered()) // Be conservative with atomic/volatile load
673 break;
672674 LoadedLoc = AA->getLocation(L);
673675 } else if (VAArgInst *V = dyn_cast(BBI)) {
674676 LoadedLoc = AA->getLocation(V);
981981 unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
982982 unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
983983 if (Offset+LoadSize > SrcValSize) {
984 assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
985 assert(isa(SrcVal->getType())&&"Can't widen non-integer load");
984 assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
985 assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
986986 // If we have a load/load clobber an DepLI can be widened to cover this
987987 // load, then we should widen it to the next power of 2 size big enough!
988988 unsigned NewLoadSize = Offset+LoadSize;
16681668 if (!MD)
16691669 return false;
16701670
1671 if (L->isVolatile())
1671 if (!L->isSimple())
16721672 return false;
16731673
16741674 if (L->use_empty()) {
383383
384384 if (StoreInst *NextStore = dyn_cast(BI)) {
385385 // If this is a store, see if we can merge it in.
386 if (NextStore->isVolatile()) break;
386 if (!NextStore->isSimple()) break;
387387
388388 // Check to see if this stored value is of the same byte-splattable value.
389389 if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
478478
479479
480480 bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
481 if (SI->isVolatile()) return false;
481 if (!SI->isSimple()) return false;
482482
483483 if (TD == 0) return false;
484484
486486 // happen to be using a load-store pair to implement it, rather than
487487 // a memcpy.
488488 if (LoadInst *LI = dyn_cast(SI->getOperand(0))) {
489 if (!LI->isVolatile() && LI->hasOneUse() &&
489 if (LI->isSimple() && LI->hasOneUse() &&
490490 LI->getParent() == SI->getParent()) {
491491 MemDepResult ldep = MD->getDependency(LI);
492492 CallInst *C = 0;
0 ; RUN: opt -basicaa -dse -S < %s | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3 target triple = "x86_64-apple-macosx10.7.0"
4
5 ; Sanity tests for atomic stores.
6 ; Note that it turns out essentially every transformation DSE does is legal on
7 ; atomic ops, just some transformations are not allowed across them.
8
9 @x = common global i32 0, align 4
10 @y = common global i32 0, align 4
11
12 declare void @randomop(i32*)
13
14 ; DSE across unordered store (allowed)
15 define void @test1() nounwind uwtable ssp {
16 ; CHECK: test1
17 ; CHECK-NOT: store i32 0
18 ; CHECK: store i32 1
19 entry:
20 store i32 0, i32* @x
21 store atomic i32 0, i32* @y unordered, align 4
22 store i32 1, i32* @x
23 ret void
24 }
25
26 ; DSE across seq_cst load (allowed in theory; not implemented ATM)
27 define i32 @test2() nounwind uwtable ssp {
28 ; CHECK: test2
29 ; CHECK: store i32 0
30 ; CHECK: store i32 1
31 entry:
32 store i32 0, i32* @x
33 %x = load atomic i32* @y seq_cst, align 4
34 store i32 1, i32* @x
35 ret i32 %x
36 }
37
38 ; DSE across seq_cst store (store before atomic store must not be removed)
39 define void @test3() nounwind uwtable ssp {
40 ; CHECK: test3
41 ; CHECK: store i32
42 ; CHECK: store atomic i32 2
43 entry:
44 store i32 0, i32* @x
45 store atomic i32 2, i32* @y seq_cst, align 4
46 store i32 1, i32* @x
47 ret void
48 }
49
50 ; DSE remove unordered store (allowed)
51 define void @test4() nounwind uwtable ssp {
52 ; CHECK: test4
53 ; CHECK-NOT: store atomic
54 ; CHECK: store i32 1
55 entry:
56 store atomic i32 0, i32* @x unordered, align 4
57 store i32 1, i32* @x
58 ret void
59 }
60
61 ; DSE unordered store overwriting non-atomic store (allowed)
62 define void @test5() nounwind uwtable ssp {
63 ; CHECK: test5
64 ; CHECK: store atomic i32 1
65 entry:
66 store i32 0, i32* @x
67 store atomic i32 1, i32* @x unordered, align 4
68 ret void
69 }
70
71 ; DSE no-op unordered atomic store (allowed)
72 define void @test6() nounwind uwtable ssp {
73 ; CHECK: test6
74 ; CHECK-NOT: store
75 ; CHECK: ret void
76 entry:
77 %x = load atomic i32* @x unordered, align 4
78 store atomic i32 %x, i32* @x unordered, align 4
79 ret void
80 }
81
82 ; DSE seq_cst store (be conservative; DSE doesn't have infrastructure
83 ; to reason about atomic operations).
84 define void @test7() nounwind uwtable ssp {
85 ; CHECK: test7
86 ; CHECK: store atomic
87 entry:
88 %a = alloca i32
89 store atomic i32 0, i32* %a seq_cst, align 4
90 ret void
91 }
92
93 ; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure
94 ; to reason about atomic operations).
95 define i32 @test8() nounwind uwtable ssp {
96 ; CHECK: test8
97 ; CHECK: store
98 ; CHECK: load atomic
99 entry:
100 %a = alloca i32
101 call void @randomop(i32* %a)
102 store i32 0, i32* %a, align 4
103 %x = load atomic i32* @x seq_cst, align 4
104 ret i32 %x
105 }
106
0 ; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3 target triple = "x86_64-apple-macosx10.7.0"
4
5 @x = common global i32 0, align 4
6 @y = common global i32 0, align 4
7
8 ; GVN across unordered store (allowed)
9 define i32 @test1() nounwind uwtable ssp {
10 ; CHECK: test1
11 ; CHECK: add i32 %x, %x
12 entry:
13 %x = load i32* @y
14 store atomic i32 %x, i32* @x unordered, align 4
15 %y = load i32* @y
16 %z = add i32 %x, %y
17 ret i32 %z
18 }
19
20 ; GVN across seq_cst store (allowed in theory; not implemented ATM)
21 define i32 @test2() nounwind uwtable ssp {
22 ; CHECK: test2
23 ; CHECK: add i32 %x, %y
24 entry:
25 %x = load i32* @y
26 store atomic i32 %x, i32* @x seq_cst, align 4
27 %y = load i32* @y
28 %z = add i32 %x, %y
29 ret i32 %z
30 }
31
32 ; GVN across unordered load (allowed)
33 define i32 @test3() nounwind uwtable ssp {
34 ; CHECK: test3
35 ; CHECK: add i32 %x, %x
36 entry:
37 %x = load i32* @y
38 %y = load atomic i32* @x unordered, align 4
39 %z = load i32* @y
40 %a = add i32 %x, %z
41 %b = add i32 %y, %a
42 ret i32 %b
43 }
44
45 ; GVN across acquire load (load after atomic load must not be removed)
46 define i32 @test4() nounwind uwtable ssp {
47 ; CHECK: test4
48 ; CHECK: load atomic i32* @x
49 ; CHECK: load i32* @y
50 entry:
51 %x = load i32* @y
52 %y = load atomic i32* @x seq_cst, align 4
53 %x2 = load i32* @y
54 %x3 = add i32 %x, %x2
55 %y2 = add i32 %y, %x3
56 ret i32 %y2
57 }
58
59 ; GVN load to unordered load (allowed)
60 define i32 @test5() nounwind uwtable ssp {
61 ; CHECK: test5
62 ; CHECK: add i32 %x, %x
63 entry:
64 %x = load atomic i32* @x unordered, align 4
65 %y = load i32* @x
66 %z = add i32 %x, %y
67 ret i32 %z
68 }
69
70 ; GVN unordered load to load (unordered load must not be removed)
71 define i32 @test6() nounwind uwtable ssp {
72 ; CHECK: test6
73 ; CHECK: load atomic i32* @x unordered
74 entry:
75 %x = load i32* @x
76 %x2 = load atomic i32* @x unordered, align 4
77 %x3 = add i32 %x, %x2
78 ret i32 %x3
79 }
0 ; RUN: opt -basicaa -memcpyopt -S < %s | FileCheck %s
1
2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3 target triple = "x86_64-apple-macosx10.7.0"
4
5 @x = global i32 0
6
7 declare void @otherf(i32*)
8
9 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
10
11 ; memcpyopt should not touch atomic ops
12 define void @test1() nounwind uwtable ssp {
13 ; CHECK: test1
14 ; CHECK: store atomic
15 %x = alloca [101 x i32], align 16
16 %bc = bitcast [101 x i32]* %x to i8*
17 call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i32 16, i1 false)
18 %gep1 = getelementptr inbounds [101 x i32]* %x, i32 0, i32 100
19 store atomic i32 0, i32* %gep1 unordered, align 4
20 %gep2 = getelementptr inbounds [101 x i32]* %x, i32 0, i32 0
21 call void @otherf(i32* %gep2)
22 ret void
23 }
24
25 ; memcpyopt across unordered store
26 define void @test2() nounwind uwtable ssp {
27 ; CHECK: test2
28 ; CHECK: call
29 ; CHECK-NEXT: store atomic
30 ; CHECK-NEXT: call
31 %old = alloca i32
32 %new = alloca i32
33 call void @otherf(i32* nocapture %old)
34 store atomic i32 0, i32* @x unordered, align 4
35 %v = load i32* %old
36 store i32 %v, i32* %new
37 call void @otherf(i32* nocapture %new)
38 ret void
39 }
40