llvm.org GIT mirror llvm / acc2c1d
Elide stores which are overwritten without being observed. Summary: In SelectionDAG, when a store is immediately chained to another store to the same address, elide the first store as it has no observable effects. This is causes small improvements dealing with intrinsics lowered to stores. Test notes: * Many testcases overwrite store addresses multiple times and needed minor changes, mainly making stores volatile to prevent the optimization from optimizing the test away. * Many X86 test cases optimized out instructions associated with associated with va_start. * Note that test_splat in CodeGen/AArch64/misched-stp.ll no longer has dependencies to check and can probably be removed and potentially replaced with another test. Reviewers: rnk, john.brawn Subscribers: aemerson, rengolin, qcolombet, jyknight, nemanjai, nhaehnle, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D33206 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303198 91177308-0d34-0410-b5e6-96231b3b80d8 Nirav Dave 2 years ago
23 changed file(s) with 135 addition(s) and 150 deletion(s). Raw diff Collapse all Expand all
1308613086 }
1308713087 }
1308813088
13089 // If this is a store followed by a store with the same value to the same
13090 // location, then the store is dead/noop.
1309113089 if (StoreSDNode *ST1 = dyn_cast(Chain)) {
13092 if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
13093 ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
13094 ST1->isUnindexed() && !ST1->isVolatile()) {
13095 // The store is dead, remove it.
13096 return Chain;
13090 if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
13091 !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
13092 ST->getMemoryVT() == ST1->getMemoryVT()) {
13093 // If this is a store followed by a store with the same value to the same
13094 // location, then the store is dead/noop.
13095 if (ST1->getValue() == Value) {
13096 // The store is dead, remove it.
13097 return Chain;
13098 }
13099
13100 // If this is a store who's preceeding store to the same location
13101 // and no one other node is chained to that store we can effectively
13102 // drop the store. Do not remove stores to undef as they may be used as
13103 // data sinks.
13104 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
13105 !ST1->getBasePtr().isUndef()) {
13106 // ST1 is fully overwritten and can be elided. Combine with it's chain
13107 // value.
13108 CombineTo(ST1, ST1->getChain());
13109 return SDValue();
13110 }
1309713111 }
1309813112 }
1309913113
88 ; Original test case which exhibited the bug
99 define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) {
1010 ; CHECK-LABEL: test1:
11 ; CHECK: stp xzr, xzr, [x0, #8]
12 ; CHECK: stp xzr, x2, [x0]
13 ; CHECK: str w1, [x0, #16]
11 ; CHECK-DAG: stp x2, xzr, [x0, #8]
12 ; CHECK-DAG: str w1, [x0, #16]
13 ; CHECK-DAG: str xzr, [x0]
1414 entry:
1515 %0 = bitcast %struct.tree_common* %t to i8*
1616 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false)
2424 ; Store to each struct element instead of using memset
2525 define void @test2(%struct.tree_common* %t, i32 %code, i8* %type) {
2626 ; CHECK-LABEL: test2:
27 ; CHECK: stp xzr, xzr, [x0]
28 ; CHECK: str wzr, [x0, #16]
29 ; CHECK: str w1, [x0, #16]
30 ; CHECK: str x2, [x0, #8]
27 ; CHECK-DAG: str w1, [x0, #16]
28 ; CHECK-DAG: stp xzr, x2, [x0]
3129 entry:
3230 %0 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 0
3331 %1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
4341 ; Vector store instead of memset
4442 define void @test3(%struct.tree_common* %t, i32 %code, i8* %type) {
4543 ; CHECK-LABEL: test3:
46 ; CHECK: stp xzr, xzr, [x0, #8]
47 ; CHECK: stp xzr, x2, [x0]
48 ; CHECK: str w1, [x0, #16]
44 ; CHECK-DAG: stp x2, xzr, [x0, #8]
45 ; CHECK-DAG: str w1, [x0, #16]
46 ; CHECK-DAG: str xzr, [x0]
4947 entry:
5048 %0 = bitcast %struct.tree_common* %t to <3 x i64>*
5149 store <3 x i64> zeroinitializer, <3 x i64>* %0, align 8
5957 ; Vector store, then store to vector elements
6058 define void @test4(<3 x i64>* %p, i64 %x, i64 %y) {
6159 ; CHECK-LABEL: test4:
62 ; CHECK: stp xzr, xzr, [x0, #8]
63 ; CHECK: stp xzr, x2, [x0]
64 ; CHECK: str x1, [x0, #16]
60 ; CHECK-DAG: stp x2, x1, [x0, #8]
61 ; CHECK-DAG: str xzr, [x0]
6562 entry:
6663 store <3 x i64> zeroinitializer, <3 x i64>* %p, align 8
6764 %0 = bitcast <3 x i64>* %p to i64*
0 ; REQUIRES: asserts
1 ; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
1 ; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -o - | FileCheck %s
22
33 ; Tests to check that the scheduler dependencies derived from alias analysis are
44 ; correct when we have loads that have been split up so that they can later be
55 ; merged into STP.
66
7 ; CHECK: ********** MI Scheduling **********
8 ; CHECK: test_splat:BB#0 entry
9 ; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%3+8]
10 ; CHECK: Successors:
11 ; CHECK-NEXT: ord [[SU1:SU\([0-9]+\)]]
12 ; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%3+4]
13 ; CHECK: Successors:
14 ; CHECK-NEXT: ord [[SU2:SU\([0-9]+\)]]
15 ; CHECK: [[SU1]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%2]
16 ; CHECK: [[SU2]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%1]
7 ; Now that overwritten stores are elided in SelectionDAG, dependencies
8 ; are resolved and removed before MISCHED. Check that we have
9 ; equivalent pair of stp calls as a baseline.
10
11 ; CHECK-LABEL: test_splat
12 ; CHECK: ldr [[REG:w[0-9]+]], [x2]
13 ; CHECK-DAG: stp w0, [[REG]], [x2, #12]
14 ; CHECK-DAG: stp [[REG]], w1, [x2, #4]
1715 define void @test_splat(i32 %x, i32 %y, i32* %p) {
1816 entry:
1917 %val = load i32, i32* %p, align 4
3432 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
3533 %struct.tree_common = type { i8*, i8*, i32 }
3634
37 ; CHECK: ********** MI Scheduling **********
38 ; CHECK: test_zero:BB#0 entry
39 ; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 2; mem:ST8[%0+16]
40 ; CHECK: Successors:
41 ; CHECK-NEXT: ord [[SU3:SU\([0-9]+\)]]
42 ; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 1; mem:ST8[%0+8]
43 ; CHECK: Successors:
44 ; CHECK-NEXT: ord [[SU4:SU\([0-9]+\)]]
45 ; CHECK: [[SU3]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 4; mem:ST4[%code1]
46 ; CHECK: [[SU4]]: STRXui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 1; mem:ST8[%type2]
35 ; CHECK-LABEL: test_zero
36 ; CHECK-DAG: stp x2, xzr, [x0, #8]
37 ; CHECK-DAG: str w1, [x0, #16]
38 ; CHECK-DAG: str xzr, [x0]
39
4740 define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) {
4841 entry:
4942 %0 = bitcast %struct.tree_common* %t to i8*
2828 define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
2929 %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index
3030 %val = load float, float addrspace(2)* %ptr
31 store float %val, float addrspace(1)* %out
31 store volatile float %val, float addrspace(1)* %out
3232 %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index
3333 %val2 = load float, float addrspace(2)* %ptr2
34 store float %val2, float addrspace(1)* %out
34 store volatile float %val2, float addrspace(1)* %out
3535 ret void
3636 }
3737
1919
2020 bb8: ; preds = %bb3
2121 %1 = getelementptr inbounds i8, i8* %0, i32 0
22 store i8 0, i8* %1, align 1
22 store volatile i8 0, i8* %1, align 1
2323 %2 = call i32 @ptou() nounwind
2424 ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
2525 ; CHECK-NOT: [[REGISTER]],
3434 %7 = or i8 %6, 48
3535 %8 = add i8 %6, 87
3636 %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8
37 store i8 %iftmp.5.0.1, i8* %p8, align 1
37 store volatile i8 %iftmp.5.0.1, i8* %p8, align 1
3838 ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
3939 ; CHECK-NOT: [[REGISTER]],
4040 ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
4848 %13 = or i8 %12, 48
4949 %14 = add i8 %12, 87
5050 %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14
51 store i8 %iftmp.5.0.2, i8* %p8, align 1
51 store volatile i8 %iftmp.5.0.2, i8* %p8, align 1
5252 ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
5353 ; CHECK-NOT: [[REGISTER]],
5454 ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
6262 %19 = or i8 %18, 48
6363 %20 = add i8 %18, 87
6464 %iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20
65 store i8 %iftmp.5.0.4, i8* null, align 1
65 store volatile i8 %iftmp.5.0.4, i8* null, align 1
6666 ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
6767 ; CHECK-NOT: [[REGISTER]],
6868 ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
7373 %22 = urem i32 %21, 10
7474 %23 = icmp ult i32 %22, 10
7575 %iftmp.5.0.5 = select i1 %23, i8 0, i8 %val8
76 store i8 %iftmp.5.0.5, i8* %p8, align 1
76 store volatile i8 %iftmp.5.0.5, i8* %p8, align 1
7777 ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
7878 ; CHECK-NOT: [[REGISTER]],
7979 ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
8787 %28 = or i8 %27, 48
8888 %29 = add i8 %27, 87
8989 %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29
90 store i8 %iftmp.5.0.6, i8* %p8, align 1
90 store volatile i8 %iftmp.5.0.6, i8* %p8, align 1
9191 ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
9292 ; CHECK-NOT: [[REGISTER]],
9393 ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
101101 %34 = or i8 %33, 48
102102 %35 = add i8 %33, 87
103103 %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35
104 store i8 %iftmp.5.0.7, i8* %p8, align 1
104 store volatile i8 %iftmp.5.0.7, i8* %p8, align 1
105105 ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
106106 ; CHECK-NOT: [[REGISTER]],
107107 ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
115115 %40 = or i8 %39, 48
116116 %41 = add i8 %39, 87
117117 %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41
118 store i8 %iftmp.5.0.8, i8* null, align 1
118 store volatile i8 %iftmp.5.0.8, i8* null, align 1
119119 br label %bb46
120120
121121 bb46: ; preds = %bb3
1212 ; CHECK: sub sp, sp, #12
1313 ; CHECK: sub sp, sp, #4
1414 ; CHECK: add r0, sp, #4
15 ; CHECK: stm sp, {r0, r1, r2, r3}
15 ; CHECK: stmib sp, {r1, r2, r3}
1616 %g = alloca i8*
1717 %g1 = bitcast i8** %g to i8*
1818 call void @llvm.va_start(i8* %g1)
77 ; CHECK-LABEL: {{^}}main
88 ; CHECK: mov [[TMP:r[0-9]+]], #0
99 ; CHECK-NEXT: str [[TMP]], [sp, #4]
10 ; CHECK-NEXT: str [[TMP]], [sp]
10 ; CHECK_O0: str [[TMP]], [sp]
1111 ; CHECK_O0: ldr [[TMP:r[0-9]+]], [sp]
1212 ; CHECK_O0-NEXT: add [[TMP]], [[TMP]], #2
1313 ; CHECK_O1-NOT: ldr [[TMP:r[0-9]+]], [sp]
2424 entry:
2525 ; CHECK-LABEL: va_arg:
2626 %vl.addr = alloca i8*, align 2
27 ; CHECK: mov.w r12, 0(r1)
2827 store i8* %vl, i8** %vl.addr, align 2
2928 ; CHECK: mov.w r12, [[REG:r[0-9]+]]
3029 ; CHECK-NEXT: add.w #2, [[REG]]
88 %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
99 %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
1010 %2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 240)
11 store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
11 store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
1212 %3 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 15)
13 store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
13 store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
1414 %4 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 170)
1515 store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES
1616 ret void
3131 %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
3232 %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
3333 %2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 240)
34 store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
34 store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
3535 %3 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 15)
36 store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
36 store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
3737 %4 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 170)
3838 store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES
3939 ret void
1313 %0 = load ppc_fp128, ppc_fp128* @ld, align 16
1414 %1 = load ppc_fp128, ppc_fp128* @ld2, align 16
1515 %add = fadd ppc_fp128 %0, %1
16 store ppc_fp128 %add, ppc_fp128* %c, align 16
16 store volatile ppc_fp128 %add, ppc_fp128* %c, align 16
1717 %2 = load ppc_fp128, ppc_fp128* @ld, align 16
1818 %3 = load ppc_fp128, ppc_fp128* @ld2, align 16
1919 %sub = fsub ppc_fp128 %2, %3
20 store ppc_fp128 %sub, ppc_fp128* %c, align 16
20 store volatile ppc_fp128 %sub, ppc_fp128* %c, align 16
2121 %4 = load ppc_fp128, ppc_fp128* @ld, align 16
2222 %5 = load ppc_fp128, ppc_fp128* @ld2, align 16
2323 %mul = fmul ppc_fp128 %4, %5
24 store ppc_fp128 %mul, ppc_fp128* %c, align 16
24 store volatile ppc_fp128 %mul, ppc_fp128* %c, align 16
2525 %6 = load ppc_fp128, ppc_fp128* @ld, align 16
2626 %7 = load ppc_fp128, ppc_fp128* @ld2, align 16
2727 %div = fdiv ppc_fp128 %6, %7
28 store ppc_fp128 %div, ppc_fp128* %c, align 16
28 store volatile ppc_fp128 %div, ppc_fp128* %c, align 16
2929 ret void
3030
3131 ; CHECK-LABEL: __gcc_qadd
2424 i32 %a5, ; %i5
2525 i32 signext %a6, ; [%fp+92]
2626 i8* %a7) { ; [%fp+96]
27 store i8 %a0, i8* %a4
28 store i8 %a1, i8* %a4
27 store volatile i8 %a0, i8* %a4
28 store volatile i8 %a1, i8* %a4
2929 %p16 = bitcast i8* %a4 to i16*
30 store i16 %a2, i16* %p16
30 store volatile i16 %a2, i16* %p16
3131 %p32 = bitcast i8* %a4 to i32*
32 store i32 %a3, i32* %p32
32 store volatile i32 %a3, i32* %p32
3333 %pp = bitcast i8* %a4 to i8**
34 store i8* %a4, i8** %pp
35 store i32 %a5, i32* %p32
36 store i32 %a6, i32* %p32
37 store i8* %a7, i8** %pp
34 store volatile i8* %a4, i8** %pp
35 store volatile i32 %a5, i32* %p32
36 store volatile i32 %a6, i32* %p32
37 store volatile i8* %a7, i8** %pp
3838 ret void
3939 }
4040
2323 i32 %a5, ; %i5
2424 i32 signext %a6, ; [%fp+BIAS+176]
2525 i8* %a7) { ; [%fp+BIAS+184]
26 store i8 %a0, i8* %a4
27 store i8 %a1, i8* %a4
26 store volatile i8 %a0, i8* %a4
27 store volatile i8 %a1, i8* %a4
2828 %p16 = bitcast i8* %a4 to i16*
29 store i16 %a2, i16* %p16
29 store volatile i16 %a2, i16* %p16
3030 %p32 = bitcast i8* %a4 to i32*
31 store i32 %a3, i32* %p32
31 store volatile i32 %a3, i32* %p32
3232 %pp = bitcast i8* %a4 to i8**
33 store i8* %a4, i8** %pp
34 store i32 %a5, i32* %p32
35 store i32 %a6, i32* %p32
36 store i8* %a7, i8** %pp
33 store volatile i8* %a4, i8** %pp
34 store volatile i32 %a5, i32* %p32
35 store volatile i32 %a6, i32* %p32
36 store volatile i8* %a7, i8** %pp
3737 ret void
3838 }
3939
315315 %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef,
316316 i64* undef, i64* undef)
317317 %e0 = extractvalue { i64, i64 } %rv, 0
318 store i64 %e0, i64* %i0
318 store volatile i64 %e0, i64* %i0
319319 %e1 = extractvalue { i64, i64 } %rv, 1
320320 store i64 %e1, i64* %i0
321321 ret void
188188 %v6 = extractvalue { i1, i1, i1, i1 } %call, 2
189189 %v7 = extractvalue { i1, i1, i1, i1 } %call, 3
190190 %val = zext i1 %v3 to i32
191 store i32 %val, i32* @var
191 store volatile i32 %val, i32* @var
192192 %val2 = zext i1 %v5 to i32
193 store i32 %val2, i32* @var
193 store volatile i32 %val2, i32* @var
194194 %val3 = zext i1 %v6 to i32
195 store i32 %val3, i32* @var
195 store volatile i32 %val3, i32* @var
196196 %val4 = zext i1 %v7 to i32
197197 store i32 %val4, i32* @var
198198 ret void
66 %z = alloca i8, align 1
77 ; CHECK: add r1, sp, #8
88 ; CHECK: str r1, [r0]
9 store i8* %x, i8** %p, align 4
9 store volatile i8* %x, i8** %p, align 4
1010 ; CHECK: add r1, sp, #4
1111 ; CHECK: str r1, [r0]
12 store i8* %y, i8** %p, align 4
12 store volatile i8* %y, i8** %p, align 4
1313 ; CHECK: mov r1, sp
1414 ; CHECK: str r1, [r0]
15 store i8* %z, i8** %p, align 4
15 store volatile i8* %z, i8** %p, align 4
1616 ret void
1717 }
1818
2323 ; CHECK: add r1, sp, #1020
2424 ; CHECK: adds r1, #4
2525 ; CHECK: str r1, [r0]
26 store [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
26 store volatile [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
2727 ; CHECK: mov r1, sp
2828 ; CHECK: str r1, [r0]
29 store [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
29 store volatile [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
3030 ret void
3131 }
3232
4949 ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
5050 ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
5151 ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
52 store %union.rec* null, %union.rec** @zz_hold, align 4
52 store volatile %union.rec* null, %union.rec** @zz_hold, align 4
5353 store %union.rec* null, %union.rec** @zz_res, align 4
54 store %union.rec* %x, %union.rec** @zz_hold, align 4
54 store volatile %union.rec* %x, %union.rec** @zz_hold, align 4
5555 %0 = call %union.rec* @Manifest(%union.rec* undef, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind ; <%union.rec*> [#uses=0]
5656 unreachable
5757
252252 ; CHECK: calll _addrof_i32
253253 ; CHECK: retl
254254
255
256255 ; Don't elide the copy when the alloca is escaped with a store.
257
258256 define void @escape_with_store(i32 %x) {
259257 %x1 = alloca i32
260258 %x2 = alloca i32*
267265 }
268266
269267 ; CHECK-LABEL: _escape_with_store:
270 ; CHECK-DAG: movl {{.*}}(%esp), %[[reg:[^ ]*]]
271 ; CHECK-DAG: movl $0, [[offs:[0-9]*]](%esp)
272 ; CHECK: movl %[[reg]], [[offs]](%esp)
268 ; CHECK: movl {{.*}}(%esp), %[[reg:[^ ]*]]
269 ; CHECK: movl %[[reg]], [[offs:[0-9]*]](%esp)
273270 ; CHECK: calll _addrof_i32
274271
275272
88 ; X32-SSE: # BB#0:
99 ; X32-SSE-NEXT: pushl %ebp
1010 ; X32-SSE-NEXT: movl %esp, %ebp
11 ; X32-SSE-NEXT: pushl %esi
1211 ; X32-SSE-NEXT: andl $-16, %esp
1312 ; X32-SSE-NEXT: subl $16, %esp
1413 ; X32-SSE-NEXT: movl 72(%ebp), %eax
1514 ; X32-SSE-NEXT: movl 76(%ebp), %ecx
16 ; X32-SSE-NEXT: movl 12(%ebp), %edx
1715 ; X32-SSE-NEXT: movdqa 56(%ebp), %xmm3
1816 ; X32-SSE-NEXT: movdqa 40(%ebp), %xmm4
1917 ; X32-SSE-NEXT: movdqa 24(%ebp), %xmm5
20 ; X32-SSE-NEXT: movl 8(%ebp), %esi
21 ; X32-SSE-NEXT: addps .LCPI0_0, %xmm0
22 ; X32-SSE-NEXT: movntps %xmm0, (%esi)
23 ; X32-SSE-NEXT: paddq .LCPI0_1, %xmm2
24 ; X32-SSE-NEXT: movntdq %xmm2, (%esi)
25 ; X32-SSE-NEXT: addpd .LCPI0_2, %xmm1
26 ; X32-SSE-NEXT: movntpd %xmm1, (%esi)
27 ; X32-SSE-NEXT: paddd .LCPI0_3, %xmm5
28 ; X32-SSE-NEXT: movntdq %xmm5, (%esi)
29 ; X32-SSE-NEXT: paddw .LCPI0_4, %xmm4
30 ; X32-SSE-NEXT: movntdq %xmm4, (%esi)
31 ; X32-SSE-NEXT: paddb .LCPI0_5, %xmm3
32 ; X32-SSE-NEXT: movntdq %xmm3, (%esi)
33 ; X32-SSE-NEXT: movntil %edx, (%esi)
34 ; X32-SSE-NEXT: movntil %ecx, 4(%esi)
35 ; X32-SSE-NEXT: movntil %eax, (%esi)
36 ; X32-SSE-NEXT: leal -4(%ebp), %esp
37 ; X32-SSE-NEXT: popl %esi
18 ; X32-SSE-NEXT: movl 8(%ebp), %edx
19 ; X32-SSE-NEXT: addps {{\.LCPI.*}}, %xmm0
20 ; X32-SSE-NEXT: movntps %xmm0, (%edx)
21 ; X32-SSE-NEXT: paddq {{\.LCPI.*}}, %xmm2
22 ; X32-SSE-NEXT: movntdq %xmm2, (%edx)
23 ; X32-SSE-NEXT: addpd {{\.LCPI.*}}, %xmm1
24 ; X32-SSE-NEXT: movntpd %xmm1, (%edx)
25 ; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm5
26 ; X32-SSE-NEXT: movntdq %xmm5, (%edx)
27 ; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm4
28 ; X32-SSE-NEXT: movntdq %xmm4, (%edx)
29 ; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm3
30 ; X32-SSE-NEXT: movntdq %xmm3, (%edx)
31 ; X32-SSE-NEXT: movntil %ecx, 4(%edx)
32 ; X32-SSE-NEXT: movntil %eax, (%edx)
33 ; X32-SSE-NEXT: movl %ebp, %esp
3834 ; X32-SSE-NEXT: popl %ebp
3935 ; X32-SSE-NEXT: retl
4036 ;
4238 ; X32-AVX: # BB#0:
4339 ; X32-AVX-NEXT: pushl %ebp
4440 ; X32-AVX-NEXT: movl %esp, %ebp
45 ; X32-AVX-NEXT: pushl %esi
4641 ; X32-AVX-NEXT: andl $-16, %esp
4742 ; X32-AVX-NEXT: subl $16, %esp
4843 ; X32-AVX-NEXT: movl 72(%ebp), %eax
4944 ; X32-AVX-NEXT: movl 76(%ebp), %ecx
50 ; X32-AVX-NEXT: movl 12(%ebp), %edx
5145 ; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm3
5246 ; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm4
5347 ; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm5
54 ; X32-AVX-NEXT: movl 8(%ebp), %esi
55 ; X32-AVX-NEXT: vaddps .LCPI0_0, %xmm0, %xmm0
56 ; X32-AVX-NEXT: vmovntps %xmm0, (%esi)
57 ; X32-AVX-NEXT: vpaddq .LCPI0_1, %xmm2, %xmm0
58 ; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
59 ; X32-AVX-NEXT: vaddpd .LCPI0_2, %xmm1, %xmm0
60 ; X32-AVX-NEXT: vmovntpd %xmm0, (%esi)
61 ; X32-AVX-NEXT: vpaddd .LCPI0_3, %xmm5, %xmm0
62 ; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
63 ; X32-AVX-NEXT: vpaddw .LCPI0_4, %xmm4, %xmm0
64 ; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
65 ; X32-AVX-NEXT: vpaddb .LCPI0_5, %xmm3, %xmm0
66 ; X32-AVX-NEXT: vmovntdq %xmm0, (%esi)
67 ; X32-AVX-NEXT: movntil %edx, (%esi)
68 ; X32-AVX-NEXT: movntil %ecx, 4(%esi)
69 ; X32-AVX-NEXT: movntil %eax, (%esi)
70 ; X32-AVX-NEXT: leal -4(%ebp), %esp
71 ; X32-AVX-NEXT: popl %esi
48 ; X32-AVX-NEXT: movl 8(%ebp), %edx
49 ; X32-AVX-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
50 ; X32-AVX-NEXT: vmovntps %xmm0, (%edx)
51 ; X32-AVX-NEXT: vpaddq {{\.LCPI.*}}, %xmm2, %xmm0
52 ; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
53 ; X32-AVX-NEXT: vaddpd {{\.LCPI.*}}, %xmm1, %xmm0
54 ; X32-AVX-NEXT: vmovntpd %xmm0, (%edx)
55 ; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm5, %xmm0
56 ; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
57 ; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm4, %xmm0
58 ; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
59 ; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm3, %xmm0
60 ; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
61 ; X32-AVX-NEXT: movntil %ecx, 4(%edx)
62 ; X32-AVX-NEXT: movntil %eax, (%edx)
63 ; X32-AVX-NEXT: movl %ebp, %esp
7264 ; X32-AVX-NEXT: popl %ebp
7365 ; X32-AVX-NEXT: retl
7466 ;
133133 @g_16 = internal global i32 -1
134134
135135 ; X64-LABEL: test8:
136 ; X64-NEXT: movl _g_16(%rip), %eax
137 ; X64-NEXT: movl $0, _g_16(%rip)
138 ; X64-NEXT: orl $1, %eax
139 ; X64-NEXT: movl %eax, _g_16(%rip)
136 ; X64-NEXT: orb $1, _g_16(%rip)
140137 ; X64-NEXT: ret
141138 define void @test8() nounwind {
142139 %tmp = load i32, i32* @g_16
183183 %v6 = extractvalue { i1, i1, i1, i1 } %call, 2
184184 %v7 = extractvalue { i1, i1, i1, i1 } %call, 3
185185 %val = zext i1 %v3 to i32
186 store i32 %val, i32* @var
186 store volatile i32 %val, i32* @var
187187 %val2 = zext i1 %v5 to i32
188 store i32 %val2, i32* @var
188 store volatile i32 %val2, i32* @var
189189 %val3 = zext i1 %v6 to i32
190 store i32 %val3, i32* @var
190 store volatile i32 %val3, i32* @var
191191 %val4 = zext i1 %v7 to i32
192192 store i32 %val4, i32* @var
193193 ret void
1919 ; Check that proper alignment of spilled vector does not affect vargs
2020
2121 ; CHECK-LABEL: vargs_not_affected
22 ; CHECK: leal 28(%ebp), %eax
22 ; CHECK: movl 28(%ebp), %eax
2323 define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
2424 entry:
2525 %ap = alloca i8*, align 4
1111 ; LINUX: movq $0, -8(%rsp)
1212
1313 %this = alloca %Object addrspace(1)*
14 store %Object addrspace(1)* null, %Object addrspace(1)** %this
15 store %Object addrspace(1)* %param0, %Object addrspace(1)** %this
14 store volatile %Object addrspace(1)* null, %Object addrspace(1)** %this
15 store volatile %Object addrspace(1)* %param0, %Object addrspace(1)** %this
1616 br label %0
1717
1818 ;
9393
9494 ; CHECK-LABEL: arg4:
9595 ; CHECK: pushq
96 ; va_start:
97 ; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
98 ; CHECK: movq [[REG_arg4_1]], (%rsp)
96 ; va_start (optimized away as overwritten by va_arg)
9997 ; va_arg:
10098 ; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
10199 ; CHECK: movq [[REG_arg4_2]], (%rsp)
8989 }
9090
9191 ; CHECK-LABEL: arg4:
92 ; va_start:
93 ; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
94 ; CHECK: movq [[REG_arg4_1]], (%rsp)
92 ; va_start (optimized away as overwritten by va_arg)
9593 ; va_arg:
9694 ; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
9795 ; CHECK: movq [[REG_arg4_2]], (%rsp)