llvm.org GIT mirror llvm / 033d04b
[DSE] Bugfix to avoid PartialStoreMerging involving non byte-sized stores Summary: The DeadStoreElimination pass now skips doing PartialStoreMerging when stores overlap according to OW_PartialEarlierWithFullLater and at least one of the stores is having a store size that is different from the size of the type being stored. This solves problems seen in https://bugs.llvm.org/show_bug.cgi?id=41949 for which we in the past could end up with mis-compiles or assertions. The content and location of the padding bits is not formally described (or undefined) in the LangRef at the moment. So the solution is chosen based on that we cannot assume anything about the padding bits when having a store that clobbers more memory than indicated by the type of the value that is stored (such as storing an i6 using an 8-bit store instruction). Fixes: https://bugs.llvm.org/show_bug.cgi?id=41949 Reviewers: spatel, efriedma, fhahn Reviewed By: efriedma Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62250 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361605 91177308-0d34-0410-b5e6-96231b3b80d8 Bjorn Pettersson 3 months ago
3 changed file(s) with 68 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
450450 /// For example, returns 40 for i36 and 80 for x86_fp80.
451451 uint64_t getTypeStoreSizeInBits(Type *Ty) const {
452452 return 8 * getTypeStoreSize(Ty);
453 }
454
455 /// Returns true if no extra padding bits are needed when storing the
456 /// specified type.
457 ///
458 /// For example, returns false for i19 that has a 24-bit store size.
459 bool typeSizeEqualsStoreSize(Type *Ty) const {
460 return getTypeSizeInBits(Ty) == getTypeStoreSizeInBits(Ty);
453461 }
454462
455463 /// Returns the offset in bytes between successive objects of the
12101210 auto *Earlier = dyn_cast(DepWrite);
12111211 auto *Later = dyn_cast(Inst);
12121212 if (Earlier && isa(Earlier->getValueOperand()) &&
1213 DL.typeSizeEqualsStoreSize(
1214 Earlier->getValueOperand()->getType()) &&
12131215 Later && isa(Later->getValueOperand()) &&
1216 DL.typeSizeEqualsStoreSize(
1217 Later->getValueOperand()->getType()) &&
12141218 memoryIsNotModifiedBetween(Earlier, Later, AA)) {
12151219 // If the store we find is:
12161220 // a) partially overwritten by the store to 'Loc'
12171221 // b) the later store is fully contained in the earlier one and
12181222 // c) they both have a constant value
1223 // d) none of the two stores need padding
12191224 // Merge the two stores, replacing the earlier store's value with a
12201225 // merge of both values.
12211226 // TODO: Deal with other constant types (vectors, etc), and probably
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s --data-layout "e" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s
2 ; RUN: opt < %s --data-layout "E" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s
3
4 ; This test used to hit an assertion (see PR41949).
5 ;
6 ; Better safe than sorry, do not assume anything about the padding for the
7 ; i28 store that has 32 bits as store size.
8 define void @test1(i32* %p) {
9 ; CHECK-LABEL: @test1(
10 ; CHECK-NEXT: [[A:%.*]] = alloca i32
11 ; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[A]] to i28*
12 ; CHECK-NEXT: [[C:%.*]] = bitcast i32* [[A]] to { i16, i16 }*
13 ; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* [[C]], i32 0, i32 1
14 ; CHECK-NEXT: store i28 10, i28* [[B]]
15 ; CHECK-NEXT: store i16 20, i16* [[C1]]
16 ; CHECK-NEXT: call void @test1(i32* [[A]])
17 ; CHECK-NEXT: ret void
18 ;
19 %a = alloca i32
20 %b = bitcast i32* %a to i28*
21 %c = bitcast i32* %a to { i16, i16 }*
22 %c1 = getelementptr inbounds { i16, i16 }, { i16, i16 }* %c, i32 0, i32 1
23 store i28 10, i28* %b
24 store i16 20, i16* %c1
25
26 call void @test1(i32* %a)
27 ret void
28 }
29
30
31 ; This test used to mis-compile (see PR41949).
32 ;
33 ; Better safe than sorry, do not assume anything about the padding for the
34 ; i12 store that has 16 bits as store size.
35 define void @test2(i32* %p) {
36 ; CHECK-LABEL: @test2(
37 ; CHECK-NEXT: [[U:%.*]] = alloca i32
38 ; CHECK-NEXT: [[A:%.*]] = bitcast i32* [[U]] to i32*
39 ; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[U]] to i12*
40 ; CHECK-NEXT: store i32 -1, i32* [[A]]
41 ; CHECK-NEXT: store i12 20, i12* [[B]]
42 ; CHECK-NEXT: call void @test2(i32* [[U]])
43 ; CHECK-NEXT: ret void
44 ;
45 %u = alloca i32
46 %a = bitcast i32* %u to i32*
47 %b = bitcast i32* %u to i12*
48 store i32 -1, i32* %a
49 store i12 20, i12* %b
50
51 call void @test2(i32* %u)
52 ret void
53 }
54