llvm.org GIT mirror llvm / 6c92ba3
[DSE] Teach the pass that atomic memory intrinsics are stores. Summary: This change teaches DSE that the atomic memory intrinsics are stores that can be eliminated, and can allow other stores to be eliminated. This change specifically does not teach DSE that these intrinsics can be partially eliminated (i.e. length reduced, and dest/src changed); that will be handled in another change. Reviewers: mkazantsev, skatkov, apilipenko, efriedma, rsmith Reviewed By: efriedma Subscribers: dmgreen, llvm-commits Differential Revision: https://reviews.llvm.org/D45535 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330629 91177308-0d34-0410-b5e6-96231b3b80d8 Daniel Neilson 1 year, 4 months ago
5 changed file(s) with 59 addition(s) and 49 deletion(s). Raw diff Collapse all Expand all
2626 class StoreInst;
2727 class MemTransferInst;
2828 class MemIntrinsic;
29 class AtomicMemTransferInst;
30 class AtomicMemIntrinsic;
31 class AnyMemTransferInst;
32 class AnyMemIntrinsic;
2933 class TargetLibraryInfo;
3034
3135 /// Representation for a specific memory location.
8993
9094 /// Return a location representing the source of a memory transfer.
9195 static MemoryLocation getForSource(const MemTransferInst *MTI);
96 static MemoryLocation getForSource(const AtomicMemTransferInst *MTI);
97 static MemoryLocation getForSource(const AnyMemTransferInst *MTI);
9298
9399 /// Return a location representing the destination of a memory set or
94100 /// transfer.
95101 static MemoryLocation getForDest(const MemIntrinsic *MI);
102 static MemoryLocation getForDest(const AtomicMemIntrinsic *MI);
103 static MemoryLocation getForDest(const AnyMemIntrinsic *MI);
96104
97105 /// Return a location representing a particular argument of a call.
98106 static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx,
6464 }
6565
6666 MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
67 return getForSource(cast(MTI));
68 }
69
70 MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) {
71 return getForSource(cast(MTI));
72 }
73
74 MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
6775 uint64_t Size = UnknownSize;
6876 if (ConstantInt *C = dyn_cast(MTI->getLength()))
6977 Size = C->getValue().getZExtValue();
7684 return MemoryLocation(MTI->getRawSource(), Size, AATags);
7785 }
7886
79 MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) {
87 MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) {
88 return getForDest(cast(MI));
89 }
90
91 MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) {
92 return getForDest(cast(MI));
93 }
94
95 MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
8096 uint64_t Size = UnknownSize;
81 if (ConstantInt *C = dyn_cast(MTI->getLength()))
97 if (ConstantInt *C = dyn_cast(MI->getLength()))
8298 Size = C->getValue().getZExtValue();
8399
84100 // memcpy/memmove can have AA tags. For memcpy, they apply
85101 // to both the source and the destination.
86102 AAMDNodes AATags;
87 MTI->getAAMetadata(AATags);
103 MI->getAAMetadata(AATags);
88104
89 return MemoryLocation(MTI->getRawDest(), Size, AATags);
105 return MemoryLocation(MI->getRawDest(), Size, AATags);
90106 }
91107
92108 MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
159159 case Intrinsic::memset:
160160 case Intrinsic::memmove:
161161 case Intrinsic::memcpy:
162 case Intrinsic::memcpy_element_unordered_atomic:
163 case Intrinsic::memmove_element_unordered_atomic:
164 case Intrinsic::memset_element_unordered_atomic:
162165 case Intrinsic::init_trampoline:
163166 case Intrinsic::lifetime_end:
164167 return true;
188191 if (StoreInst *SI = dyn_cast(Inst))
189192 return MemoryLocation::get(SI);
190193
191 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
194 if (auto *MI = dyn_castMemIntrinsic>(Inst)) {
192195 // memcpy/memmove/memset.
193196 MemoryLocation Loc = MemoryLocation::getForDest(MI);
194197 return Loc;
221224
222225 // The only instructions that both read and write are the mem transfer
223226 // instructions (memcpy/memmove).
224 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
227 if (auto *MTI = dyn_castMemTransferInst>(Inst))
225228 return MemoryLocation::getForSource(MTI);
226229 return MemoryLocation();
227230 }
248251 case Intrinsic::memcpy:
249252 // Don't remove volatile memory intrinsics.
250253 return !cast(II)->isVolatile();
254 case Intrinsic::memcpy_element_unordered_atomic:
255 case Intrinsic::memmove_element_unordered_atomic:
256 case Intrinsic::memset_element_unordered_atomic:
257 return true;
251258 }
252259 }
253260
272279 case Intrinsic::memcpy:
273280 // Do shorten memory intrinsics.
274281 // FIXME: Add memmove if it's also safe to transform.
282 // TODO: Add atomic memcpy/memset
275283 return true;
276284 }
277285 }
286294 static bool isShortenableAtTheBeginning(Instruction *I) {
287295 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
288296 // easily done by offsetting the source address.
297 // TODO: Handle atomic memory intrinsics
289298 IntrinsicInst *II = dyn_cast(I);
290299 return II && II->getIntrinsicID() == Intrinsic::memset;
291300 }
533542 if (AA.isNoAlias(InstReadLoc, InstStoreLoc))
534543 return false;
535544
536 if (isa<MemCpyInst>(Inst)) {
545 if (isa<AnyMemCpyInst>(Inst)) {
537546 // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763)
538547 // but in practice memcpy(A <- B) either means that A and B are disjoint or
539548 // are equal (i.e. there are not partial overlaps). Given that, if we have:
855864 LoadedLoc = MemoryLocation::get(L);
856865 } else if (VAArgInst *V = dyn_cast(BBI)) {
857866 LoadedLoc = MemoryLocation::get(V);
858 } else if (MemTransferInst *MTI = dyn_cast(BBI)) {
859 LoadedLoc = MemoryLocation::getForSource(MTI);
860867 } else if (!BBI->mayReadFromMemory()) {
861868 // Instruction doesn't read memory. Note that stores that weren't removed
862869 // above will hit this case.
5353
5454 define void @test4() {
5555 ; CHECK-LABEL: @test4(
56 ; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2
57 ; CHECK-NEXT: [[B:%.*]] = alloca i16, i16 1024, align 2
58 ; CHECK-NEXT: store atomic i16 0, i16* [[B]] unordered, align 2
59 ; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2)
6056 ; CHECK-NEXT: ret void
6157 ;
6258 %A = alloca i16, i16 1024, align 2
7268
7369 define void @test5() {
7470 ; CHECK-LABEL: @test5(
75 ; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2
76 ; CHECK-NEXT: [[B:%.*]] = alloca i16, i16 1024, align 2
77 ; CHECK-NEXT: store atomic i16 0, i16* [[B]] unordered, align 2
78 ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2)
7971 ; CHECK-NEXT: ret void
8072 ;
8173 %A = alloca i16, i16 1024, align 2
9183
9284 define void @test6() {
9385 ; CHECK-LABEL: @test6(
94 ; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2
95 ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* align 2 [[A]], i8 0, i16 1024, i32 2)
9686 ; CHECK-NEXT: ret void
9787 ;
9888 %A = alloca i16, i16 1024, align 2
9191 ; alias).
9292 define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
9393 ; CHECK-LABEL: @test6_atomic(
94 ; CHECK-NEXT: store atomic i32 10, i32* [[P:%.*]] unordered, align 4
9594 ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
96 ; CHECK-NEXT: store atomic i32 30, i32* [[P]] unordered, align 4
95 ; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4
9796 ; CHECK-NEXT: ret void
9897 ;
9998 store atomic i32 10, i32* %p unordered, align 4 ;; dead.
120119 ; alias).
121120 define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
122121 ; CHECK-LABEL: @test7_atomic(
123 ; CHECK-NEXT: store atomic i32 10, i32* [[P:%.*]] unordered, align 4
124122 ; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
125 ; CHECK-NEXT: store atomic i32 30, i32* [[P]] unordered, align 4
123 ; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4
126124 ; CHECK-NEXT: ret void
127125 ;
128126 store atomic i32 10, i32* %p unordered, align 4 ;; dead.
291289 define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp {
292290 ; CHECK-LABEL: @test15_atomic(
293291 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
294 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
295 ; CHECK-NEXT: ret void
296 ;
297 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
298 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
299 ret void
300 }
301
302 ; It would only be valid to remove the non-atomic memcpy
292 ; CHECK-NEXT: ret void
293 ;
294 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
295 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
296 ret void
297 }
298
299 ;; Fully dead overwrite of memcpy.
303300 define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
304301 ; CHECK-LABEL: @test15_atomic_weaker(
305 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
306 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
302 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
307303 ; CHECK-NEXT: ret void
308304 ;
309305 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false)
311307 ret void
312308 }
313309
314 ; It would only be valid to remove the non-atomic memcpy
310 ;; Fully dead overwrite of memcpy.
315311 define void @test15_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp {
316312 ; CHECK-LABEL: @test15_atomic_weaker_2(
317 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
318 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false)
313 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
319314 ; CHECK-NEXT: ret void
320315 ;
321316 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
337332 ;; Full overwrite of smaller memcpy.
338333 define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp {
339334 ; CHECK-LABEL: @test16_atomic(
340 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1)
341 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
335 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
342336 ; CHECK-NEXT: ret void
343337 ;
344338 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1)
349343 ;; Full overwrite of smaller memory where overwrite has stronger atomicity
350344 define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
351345 ; CHECK-LABEL: @test16_atomic_weaker(
352 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i1 false)
353 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
346 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
354347 ; CHECK-NEXT: ret void
355348 ;
356349 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i1 false)
361354 ;; Full overwrite of smaller memory where overwrite has weaker atomicity.
362355 define void @test16_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp {
363356 ; CHECK-LABEL: @test16_atomic_weaker_2(
364 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1)
365 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false)
357 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
366358 ; CHECK-NEXT: ret void
367359 ;
368360 tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1)
384376 ;; Overwrite of memset by memcpy.
385377 define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp {
386378 ; CHECK-LABEL: @test17_atomic(
387 ; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1)
388 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
379 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
389380 ; CHECK-NEXT: ret void
390381 ;
391382 tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1)
397388 ;; remove the memset.
398389 define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp {
399390 ; CHECK-LABEL: @test17_atomic_weaker(
400 ; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i1 false)
401 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
391 ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
402392 ; CHECK-NEXT: ret void
403393 ;
404394 tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false)
410400 ;; the memset.
411401 define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp {
412402 ; CHECK-LABEL: @test17_atomic_weaker_2(
413 ; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1)
414 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
403 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
415404 ; CHECK-NEXT: ret void
416405 ;
417406 tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1)