llvm.org GIT mirror llvm / 2d32b86
DeadStoreElimination can now trim the size of a store if the end of it is dead. Only currently done if the later store is writing to a power of 2 address or has the same alignment as the earlier store as then its likely to not break up large stores into smaller ones Fixes <rdar://problem/10140300> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143630 91177308-0d34-0410-b5e6-96231b3b80d8 Pete Cooper 7 years ago
2 changed file(s) with 187 addition(s) and 33 deletion(s). Raw diff Collapse all Expand all
238238 }
239239 }
240240
241
242 /// isShortenable - Returns true if this instruction can be safely shortened in
243 /// length.
244 static bool isShortenable(Instruction *I) {
245 // Don't shorten stores for now
246 if (isa(I))
247 return false;
248
249 IntrinsicInst *II = cast(I);
250 switch (II->getIntrinsicID()) {
251 default: return false;
252 case Intrinsic::memset:
253 case Intrinsic::memcpy:
254 // Do shorten memory intrinsics.
255 return true;
256 }
257 }
258
241259 /// getStoredPointerOperand - Return the pointer that is being written to.
242260 static Value *getStoredPointerOperand(Instruction *I) {
243261 if (StoreInst *SI = dyn_cast(I))
292310 return false;
293311 }
294312
295 /// isCompleteOverwrite - Return true if a store to the 'Later' location
313 namespace {
314 enum OverwriteResult
315 {
316 OverwriteComplete,
317 OverwriteEnd,
318 OverwriteUnknown
319 };
320 }
321
322 /// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
296323 /// completely overwrites a store to the 'Earlier' location.
297 static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
298 const AliasAnalysis::Location &Earlier,
299 AliasAnalysis &AA) {
324 /// 'OverwriteEnd' if the end of the 'Earlier' location is completely
325 /// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
326 static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
327 const AliasAnalysis::Location &Earlier,
328 AliasAnalysis &AA,
329 int64_t& EarlierOff,
330 int64_t& LaterOff) {
300331 const Value *P1 = Earlier.Ptr->stripPointerCasts();
301332 const Value *P2 = Later.Ptr->stripPointerCasts();
302333
310341 // If we have no TargetData information around, then the size of the store
311342 // is inferrable from the pointee type. If they are the same type, then
312343 // we know that the store is safe.
313 if (AA.getTargetData() == 0)
314 return Later.Ptr->getType() == Earlier.Ptr->getType();
315 return false;
344 if (AA.getTargetData() == 0 &&
345 Later.Ptr->getType() == Earlier.Ptr->getType())
346 return OverwriteComplete;
347
348 return OverwriteUnknown;
316349 }
317350
318351 // Make sure that the Later size is >= the Earlier size.
319 if (Later.Size < Earlier.Size)
320 return false;
321 return true;
352 if (Later.Size >= Earlier.Size)
353 return OverwriteComplete;
322354 }
323355
324356 // Otherwise, we have to have size information, and the later store has to be
325357 // larger than the earlier one.
326358 if (Later.Size == AliasAnalysis::UnknownSize ||
327359 Earlier.Size == AliasAnalysis::UnknownSize ||
328 Later.Size <= Earlier.Size || AA.getTargetData() == 0)
329 return false;
360 AA.getTargetData() == 0)
361 return OverwriteUnknown;
330362
331363 // Check to see if the later store is to the entire object (either a global,
332364 // an alloca, or a byval argument). If so, then it clearly overwrites any
339371 // If we can't resolve the same pointers to the same object, then we can't
340372 // analyze them at all.
341373 if (UO1 != UO2)
342 return false;
374 return OverwriteUnknown;
343375
344376 // If the "Later" store is to a recognizable object, get its size.
345377 if (isObjectPointerWithTrustworthySize(UO2)) {
346378 uint64_t ObjectSize =
347379 TD.getTypeAllocSize(cast(UO2->getType())->getElementType());
348380 if (ObjectSize == Later.Size)
349 return true;
381 return OverwriteComplete;
350382 }
351383
352384 // Okay, we have stores to two completely different pointers. Try to
353385 // decompose the pointer into a "base + constant_offset" form. If the base
354386 // pointers are equal, then we can reason about the two stores.
355 int64_t EarlierOff = 0, LaterOff = 0;
387 EarlierOff = 0;
388 LaterOff = 0;
356389 const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
357390 const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
358391
359392 // If the base pointers still differ, we have two completely different stores.
360393 if (BP1 != BP2)
361 return false;
394 return OverwriteUnknown;
362395
363396 // The later store completely overlaps the earlier store if:
364397 //
376409 //
377410 // We have to be careful here as *Off is signed while *.Size is unsigned.
378411 if (EarlierOff >= LaterOff &&
412 Later.Size > Earlier.Size &&
379413 uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
380 return true;
414 return OverwriteComplete;
415
416 // The other interesting case is if the later store overwrites the end of
417 // the earlier store
418 //
419 // |--earlier--|
420 // |-- later --|
421 //
422 // In this case we may want to trim the size of earlier to avoid generating
423 // writes to addresses which will definitely be overwritten later
424 if (LaterOff > EarlierOff &&
425 LaterOff + Later.Size >= EarlierOff + Earlier.Size)
426 return OverwriteEnd;
381427
382428 // Otherwise, they don't completely overlap.
383 return false;
429 return OverwriteUnknown;
384430 }
385431
386432 /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
504550 // If we find a write that is a) removable (i.e., non-volatile), b) is
505551 // completely obliterated by the store to 'Loc', and c) which we know that
506552 // 'Inst' doesn't load from, then we can remove it.
507 if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
553 if (isRemovable(DepWrite) &&
508554 !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
509 DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
510 << *DepWrite << "\n KILLER: " << *Inst << '\n');
511
512 // Delete the store and now-dead instructions that feed it.
513 DeleteDeadInstruction(DepWrite, *MD);
514 ++NumFastStores;
515 MadeChange = true;
516
517 // DeleteDeadInstruction can delete the current instruction in loop
518 // cases, reset BBI.
519 BBI = Inst;
520 if (BBI != BB.begin())
521 --BBI;
522 break;
555 int64_t InstWriteOffset, DepWriteOffset;
556 OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
557 DepWriteOffset, InstWriteOffset);
558 if (OR == OverwriteComplete) {
559 DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
560 << *DepWrite << "\n KILLER: " << *Inst << '\n');
561
562 // Delete the store and now-dead instructions that feed it.
563 DeleteDeadInstruction(DepWrite, *MD);
564 ++NumFastStores;
565 MadeChange = true;
566
567 // DeleteDeadInstruction can delete the current instruction in loop
568 // cases, reset BBI.
569 BBI = Inst;
570 if (BBI != BB.begin())
571 --BBI;
572 break;
573 } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
574 // TODO: base this on the target vector size so that if the earlier
575 // store was too small to get vector writes anyway then its likely
576 // a good idea to shorten it
577 // Power of 2 vector writes are probably always a bad idea to optimize
578 // as any store/memset/memcpy is likely using vector instructions so
579 // shortening it to not vector size is likely to be slower
580 MemIntrinsic* DepIntrinsic = cast(DepWrite);
581 unsigned DepWriteAlign = DepIntrinsic->getAlignment();
582 if (llvm::isPowerOf2_64(InstWriteOffset) ||
583 ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
584
585 DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
586 << *DepWrite << "\n KILLER (offset "
587 << InstWriteOffset << ", "
588 << DepLoc.Size << ")"
589 << *Inst << '\n');
590
591 Value* DepWriteLength = DepIntrinsic->getLength();
592 Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
593 InstWriteOffset -
594 DepWriteOffset);
595 DepIntrinsic->setLength(TrimmedLength);
596 MadeChange = true;
597 }
598 }
523599 }
524600
525601 // If this is a may-aliased store that is clobbering the store value, we
0 ; RUN: opt < %s -basicaa -dse -S | FileCheck %s
1 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
2
3 %struct.vec2 = type { <4 x i32>, <4 x i32> }
4 %struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
5
6 @glob1 = global %struct.vec2 zeroinitializer, align 16
7 @glob2 = global %struct.vec2plusi zeroinitializer, align 16
8
9 define void @write24to28(i32* nocapture %p) nounwind uwtable ssp {
10 ; CHECK: @write24to28
11 entry:
12 %arrayidx0 = getelementptr inbounds i32* %p, i64 1
13 %p3 = bitcast i32* %arrayidx0 to i8*
14 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false)
15 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
16 %arrayidx1 = getelementptr inbounds i32* %p, i64 7
17 store i32 1, i32* %arrayidx1, align 4
18 ret void
19 }
20
21 define void @write28to32(i32* nocapture %p) nounwind uwtable ssp {
22 ; CHECK: @write28to32
23 entry:
24 %p3 = bitcast i32* %p to i8*
25 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
26 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
27 %arrayidx1 = getelementptr inbounds i32* %p, i64 7
28 store i32 1, i32* %arrayidx1, align 4
29 ret void
30 }
31
32 define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp {
33 ; CHECK: @dontwrite28to32memset
34 entry:
35 %p3 = bitcast i32* %p to i8*
36 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
37 call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
38 %arrayidx1 = getelementptr inbounds i32* %p, i64 7
39 store i32 1, i32* %arrayidx1, align 4
40 ret void
41 }
42
43 define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
44 ; CHECK: @write32to36
45 entry:
46 %0 = bitcast %struct.vec2plusi* %p to i8*
47 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
48 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
49 %c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2
50 store i32 1, i32* %c, align 4
51 ret void
52 }
53
54 define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp {
55 ; CHECK: @write16to32
56 entry:
57 %0 = bitcast %struct.vec2* %p to i8*
58 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
59 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
60 %c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1
61 store <4 x i32> , <4 x i32>* %c, align 4
62 ret void
63 }
64
65 define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp {
66 ; CHECK: @dontwrite28to32memcpy
67 entry:
68 %0 = bitcast %struct.vec2* %p to i8*
69 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
70 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
71 %arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7
72 store i32 1, i32* %arrayidx1, align 4
73 ret void
74 }
75
76 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
77 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind