llvm.org GIT mirror llvm / 24bde5b
Don't narrow the load and store in a load+twiddle+store sequence unless there are clearly no stores between the load and the store. This fixes this miscompile reported as PR7833. This breaks the test/CodeGen/X86/narrow_op-2.ll optimization, which is safe, but awkward to prove safe. Move it to X86's README.txt. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112861 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 10 years ago
4 changed file(s) with 76 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
57975797 return SDValue();
57985798
57995799 SDValue N0 = Value.getOperand(0);
5800 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
5800 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
5801 Chain == SDValue(N0.getNode(), 1)) {
58015802 LoadSDNode *LD = cast(N0);
58025803 if (LD->getBasePtr() != Ptr)
58035804 return SDValue();
19141914 It should be possible to eliminate the sign extensions.
19151915
19161916 //===---------------------------------------------------------------------===//
1917
1918 LLVM misses a load+store narrowing opportunity in this code:
1919
1920 %struct.bf = type { i64, i16, i16, i32 }
1921
1922 @bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2]
1923
1924 define void @t1() nounwind ssp {
1925 entry:
1926 %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
1927 %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; [#uses=1]
1928 %2 = bitcast i16* %1 to i32* ; [#uses=2]
1929 %3 = load i32* %2, align 1 ; [#uses=1]
1930 %4 = and i32 %3, -65537 ; [#uses=1]
1931 store i32 %4, i32* %2, align 1
1932 %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
1933 %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; [#uses=1]
1934 %7 = bitcast i16* %6 to i32* ; [#uses=2]
1935 %8 = load i32* %7, align 1 ; [#uses=1]
1936 %9 = and i32 %8, -131073 ; [#uses=1]
1937 store i32 %9, i32* %7, align 1
1938 ret void
1939 }
1940
1941 LLVM currently emits this:
1942
1943 movq bfi(%rip), %rax
1944 andl $-65537, 8(%rax)
1945 movq bfi(%rip), %rax
1946 andl $-131073, 8(%rax)
1947 ret
1948
1949 It could narrow the loads and stores to emit this:
1950
1951 movq bfi(%rip), %rax
1952 andb $-2, 10(%rax)
1953 movq bfi(%rip), %rax
1954 andb $-3, 10(%rax)
1955 ret
1956
1957 The trouble is that there is a TokenFactor between the store and the
1958 load, making it non-trivial to determine if there's anything between
1959 the load and the store which would prohibit narrowing.
1960
1961 //===---------------------------------------------------------------------===//
+0
-25
test/CodeGen/X86/narrow_op-2.ll less more
None ; RUN: llc < %s -march=x86-64 | FileCheck %s
1
2 %struct.bf = type { i64, i16, i16, i32 }
3 @bfi = external global %struct.bf*
4
5 define void @t1() nounwind ssp {
6 entry:
7
8 ; CHECK: andb $-2, 10(
9 ; CHECK: andb $-3, 10(
10
11 %0 = load %struct.bf** @bfi, align 8
12 %1 = getelementptr %struct.bf* %0, i64 0, i32 1
13 %2 = bitcast i16* %1 to i32*
14 %3 = load i32* %2, align 1
15 %4 = and i32 %3, -65537
16 store i32 %4, i32* %2, align 1
17 %5 = load %struct.bf** @bfi, align 8
18 %6 = getelementptr %struct.bf* %5, i64 0, i32 1
19 %7 = bitcast i16* %6 to i32*
20 %8 = load i32* %7, align 1
21 %9 = and i32 %8, -131073
22 store i32 %9, i32* %7, align 1
23 ret void
24 }
0 ; rdar://7860110
1 ; RUN: llc < %s | FileCheck %s -check-prefix=X64
2 ; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32
1 ; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64
2 ; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32
33 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
44 target triple = "x86_64-apple-darwin10.2"
55
124124 ; X32: movb %cl, 5(%{{.*}})
125125 }
126126
127 ; PR7833
128
129 @g_16 = internal global i32 -1
130
131 ; X64: test8:
132 ; X64-NEXT: movl _g_16(%rip), %eax
133 ; X64-NEXT: movl $0, _g_16(%rip)
134 ; X64-NEXT: orl $1, %eax
135 ; X64-NEXT: movl %eax, _g_16(%rip)
136 ; X64-NEXT: ret
137 define void @test8() nounwind {
138 %tmp = load i32* @g_16
139 store i32 0, i32* @g_16
140 %or = or i32 %tmp, 1
141 store i32 %or, i32* @g_16
142 ret void
143 }
144
145 ; X64: test9:
146 ; X64-NEXT: orb $1, _g_16(%rip)
147 ; X64-NEXT: ret
148 define void @test9() nounwind {
149 %tmp = load i32* @g_16
150 %or = or i32 %tmp, 1
151 store i32 %or, i32* @g_16
152 ret void
153 }