llvm.org GIT mirror llvm / e791f41
[DAG] Improve Aliasing of operations to static alloca Memory accesses offset from frame indices may alias, e.g., we may merge write from function arguments passed on the stack when they are contiguous. As a result, when checking aliasing, we consider the underlying frame index's offset from the stack pointer. Static allocs are realized as stack objects in SelectionDAG, but its offset is not set until post-DAG causing DAGCombiner's alias check to consider access to static allocas to frequently alias. Modify isAlias to consider access between static allocas and access from other frame objects to be considered aliasing. Many test changes are included here. Most are fixes for tests which indirectly relied on our aliasing ability and needed to be modified to preserve their original intent. The remaining tests have minor improvements due to relaxed ordering. The exception is CodeGen/X86/2011-10-19-widen_vselect.ll which has a minor degradation dispite though the pre-legalized DAG is improved. Reviewers: rnk, mkuper, jonpa, hfinkel, uweigand Reviewed By: rnk Subscribers: sdardis, nemanjai, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D33345 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307546 91177308-0d34-0410-b5e6-96231b3b80d8 Nirav Dave 3 years ago
24 changed file(s) with 166 addition(s) and 125 deletion(s). Raw diff Collapse all Expand all
1137211372 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
1137311373 MVT::Other, Chain, ReplLoad.getValue(1));
1137411374
11375 // Make sure the new and old chains are cleaned up.
11376 AddToWorklist(Token.getNode());
11377
11378 // Replace uses with load result and token factor. Don't add users
11379 // to work list.
11380 return CombineTo(N, ReplLoad.getValue(0), Token, false);
11375 // Replace uses with load result and token factor
11376 return CombineTo(N, ReplLoad.getValue(0), Token);
1138111377 }
1138211378 }
1138311379
1670016696 if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
1670116697 return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
1670216698
16699 // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
16700 // able to calculate their relative offset if at least one arises
16701 // from an alloca. However, these allocas cannot overlap and we
16702 // can infer there is no alias.
16703 if (auto *A = dyn_cast(BasePtr0.getBase()))
16704 if (auto *B = dyn_cast(BasePtr1.getBase())) {
16705 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
16706 if (!MFI.isFixedObjectIndex(A->getIndex()) ||
16707 !MFI.isFixedObjectIndex(B->getIndex()))
16708 return false;
16709 }
16710
1670316711 // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
1670416712 // modified to use BaseIndexOffset.
1670516713
1010 ; CHECK: add {{x[0-9]+}}, [[ARGS]], #8
1111 ; First vararg
1212 ; CHECK: ldr {{w[0-9]+}}, [sp, #72]
13 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
1413 ; Second vararg
15 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
14 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
1615 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
1716 ; Third vararg
1817 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
279279 define i32 @caller42() #3 {
280280 entry:
281281 ; CHECK-LABEL: caller42
282 ; CHECK: str {{x[0-9]+}}, [sp, #48]
283 ; CHECK: str {{q[0-9]+}}, [sp, #32]
284 ; CHECK: str {{x[0-9]+}}, [sp, #16]
285 ; CHECK: str {{q[0-9]+}}, [sp]
282 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
283 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
284 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #16]
285 ; CHECK-DAG: str {{q[0-9]+}}, [sp]
286286 ; CHECK: add x1, sp, #32
287287 ; CHECK: mov x2, sp
288288 ; Space for s1 is allocated at sp+32
317317 ; CHECK-LABEL: caller42_stack
318318 ; CHECK: sub sp, sp, #112
319319 ; CHECK: add x29, sp, #96
320 ; CHECK: stur {{x[0-9]+}}, [x29, #-16]
321 ; CHECK: stur {{q[0-9]+}}, [x29, #-32]
322 ; CHECK: str {{x[0-9]+}}, [sp, #48]
323 ; CHECK: str {{q[0-9]+}}, [sp, #32]
320 ; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16]
321 ; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
322 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
323 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
324324 ; Space for s1 is allocated at x29-32 = sp+64
325325 ; Space for s2 is allocated at sp+32
326326 ; CHECK: add x[[B:[0-9]+]], sp, #32
387387 define i32 @caller43() #3 {
388388 entry:
389389 ; CHECK-LABEL: caller43
390 ; CHECK: str {{q[0-9]+}}, [sp, #48]
391 ; CHECK: str {{q[0-9]+}}, [sp, #32]
392 ; CHECK: str {{q[0-9]+}}, [sp, #16]
393 ; CHECK: str {{q[0-9]+}}, [sp]
390 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
391 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
392 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #16]
393 ; CHECK-DAG: str {{q[0-9]+}}, [sp]
394394 ; CHECK: add x1, sp, #32
395395 ; CHECK: mov x2, sp
396396 ; Space for s1 is allocated at sp+32
429429 ; CHECK-LABEL: caller43_stack
430430 ; CHECK: sub sp, sp, #112
431431 ; CHECK: add x29, sp, #96
432 ; CHECK: stur {{q[0-9]+}}, [x29, #-16]
433 ; CHECK: stur {{q[0-9]+}}, [x29, #-32]
434 ; CHECK: str {{q[0-9]+}}, [sp, #48]
435 ; CHECK: str {{q[0-9]+}}, [sp, #32]
432 ; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16]
433 ; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
434 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
435 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
436436 ; Space for s1 is allocated at x29-32 = sp+64
437437 ; Space for s2 is allocated at sp+32
438438 ; CHECK: add x[[B:[0-9]+]], sp, #32
0 ; RUN: llc -mtriple=arm64-eabi -mcpu=cyclone < %s | FileCheck %s
11
22 ; CHECK: foo
3 ; CHECK: str w[[REG0:[0-9]+]], [x19, #264]
4 ; CHECK: mov w[[REG1:[0-9]+]], w[[REG0]]
5 ; CHECK: str w[[REG1]], [x19, #132]
6
3 ; CHECK-DAG: str w[[REG0:[0-9]+]], [x19, #132]
4 ; CHECK-DAG: str w[[REG0]], [x19, #264]
75 define i32 @foo(i32 %a) nounwind {
86 %retval = alloca i32, align 4
97 %a.addr = alloca i32, align 4
115115
116116 define void @test_vext_s32() nounwind ssp {
117117 ; CHECK-LABEL: test_vext_s32:
118 ; CHECK: {{ext.8.*#4}}
118 ; CHECK: {{rev64.2s.*}}
119119 %xS32x2 = alloca <2 x i32>, align 8
120120 %__a = alloca <2 x i32>, align 8
121121 %__b = alloca <2 x i32>, align 8
136136
137137 define void @test_vext_u32() nounwind ssp {
138138 ; CHECK-LABEL: test_vext_u32:
139 ; CHECK: {{ext.8.*#4}}
139 ; CHECK: {{rev64.2s.*}}
140140 %xU32x2 = alloca <2 x i32>, align 8
141141 %__a = alloca <2 x i32>, align 8
142142 %__b = alloca <2 x i32>, align 8
157157
158158 define void @test_vext_f32() nounwind ssp {
159159 ; CHECK-LABEL: test_vext_f32:
160 ; CHECK: {{ext.8.*#4}}
160 ; CHECK: {{rev64.2s.*}}
161161 %xF32x2 = alloca <2 x float>, align 8
162162 %__a = alloca <2 x float>, align 8
163163 %__b = alloca <2 x float>, align 8
178178
179179 define void @test_vext_s64() nounwind ssp {
180180 ; CHECK-LABEL: test_vext_s64:
181 ; CHECK_FIXME: {{ext.8.*#1}}
181 ; CHECK_FIXME: {{rev64.2s.*}}
182182 ; this just turns into a load of the second element
183183 %xS64x1 = alloca <1 x i64>, align 8
184184 %__a = alloca <1 x i64>, align 8
88 %i32T = alloca i32, align 4
99 %i32F = alloca i32, align 4
1010 %i32X = alloca i32, align 4
11 store i32 0, i32* %tmp
11 store i32 %argc, i32* %tmp
1212 store i32 15, i32* %i32T, align 4
1313 store i32 5, i32* %i32F, align 4
1414 %tmp6 = load i32, i32* %tmp, align 4
308308 ; CHECK-APPLE-LABEL: foo_vararg:
309309 ; CHECK-APPLE: orr w0, wzr, #0x10
310310 ; CHECK-APPLE: malloc
311 ; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1
312 ; CHECK-APPLE: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16
313 ; CHECK-APPLE: strb [[ID]], [x0, #8]
311 ; CHECK-APPLE-DAG: orr [[ID:w[0-9]+]], wzr, #0x1
312 ; CHECK-APPLE-DAG: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16
313 ; CHECK-APPLE-DAG: strb [[ID]], [x0, #8]
314314
315315 ; First vararg
316316 ; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8
317317 ; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16]
318 ; CHECK-APPLE: add {{x[0-9]+}}, {{x[0-9]+}}, #8
318 ; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #8
319319 ; Second vararg
320 ; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
321 ; CHECK-APPLE: add {{x[0-9]+}}, {{x[0-9]+}}, #8
320 ; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
321 ; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16
322322 ; Third vararg
323323 ; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
324324
2525 store i32 3855, i32* %xort
2626 store i32 4, i32* %temp
2727 %tmp = load i32, i32* %temp
28 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
2829 ; CHECK: ldrex
2930 ; CHECK: add
3031 ; CHECK: strex
3435 ; CHECK-BAREMETAL-NOT: __sync
3536 %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
3637 store i32 %0, i32* %old
38 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
3739 ; CHECK: ldrex
3840 ; CHECK: sub
3941 ; CHECK: strex
4345 ; CHECK-BAREMETAL-NOT: __sync
4446 %1 = atomicrmw sub i32* %val2, i32 30 monotonic
4547 store i32 %1, i32* %old
48 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
4649 ; CHECK: ldrex
4750 ; CHECK: add
4851 ; CHECK: strex
5255 ; CHECK-BAREMETAL-NOT: __sync
5356 %2 = atomicrmw add i32* %val2, i32 1 monotonic
5457 store i32 %2, i32* %old
58 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
5559 ; CHECK: ldrex
5660 ; CHECK: sub
5761 ; CHECK: strex
6165 ; CHECK-BAREMETAL-NOT: __sync
6266 %3 = atomicrmw sub i32* %val2, i32 1 monotonic
6367 store i32 %3, i32* %old
68 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
6469 ; CHECK: ldrex
6570 ; CHECK: and
6671 ; CHECK: strex
7075 ; CHECK-BAREMETAL-NOT: __sync
7176 %4 = atomicrmw and i32* %andt, i32 4080 monotonic
7277 store i32 %4, i32* %old
78 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
7379 ; CHECK: ldrex
7480 ; CHECK: or
7581 ; CHECK: strex
7985 ; CHECK-BAREMETAL-NOT: __sync
8086 %5 = atomicrmw or i32* %ort, i32 4080 monotonic
8187 store i32 %5, i32* %old
88 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
8289 ; CHECK: ldrex
8390 ; CHECK: eor
8491 ; CHECK: strex
8895 ; CHECK-BAREMETAL-NOT: __sync
8996 %6 = atomicrmw xor i32* %xort, i32 4080 monotonic
9097 store i32 %6, i32* %old
98 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
9199 ; CHECK: ldrex
92100 ; CHECK: cmp
93101 ; CHECK: strex
97105 ; CHECK-BAREMETAL-NOT: __sync
98106 %7 = atomicrmw min i32* %val2, i32 16 monotonic
99107 store i32 %7, i32* %old
108 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
100109 %neg = sub i32 0, 1
101110 ; CHECK: ldrex
102111 ; CHECK: cmp
107116 ; CHECK-BAREMETAL-NOT: __sync
108117 %8 = atomicrmw min i32* %val2, i32 %neg monotonic
109118 store i32 %8, i32* %old
119 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
110120 ; CHECK: ldrex
111121 ; CHECK: cmp
112122 ; CHECK: strex
116126 ; CHECK-BAREMETAL-NOT: __sync
117127 %9 = atomicrmw max i32* %val2, i32 1 monotonic
118128 store i32 %9, i32* %old
129 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
119130 ; CHECK: ldrex
120131 ; CHECK: cmp
121132 ; CHECK: strex
125136 ; CHECK-BAREMETAL-NOT: __sync
126137 %10 = atomicrmw max i32* %val2, i32 0 monotonic
127138 store i32 %10, i32* %old
139 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
128140 ; CHECK: ldrex
129141 ; CHECK: cmp
130142 ; CHECK: strex
134146 ; CHECK-BAREMETAL-NOT: __sync
135147 %11 = atomicrmw umin i32* %val2, i32 16 monotonic
136148 store i32 %11, i32* %old
149 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
137150 %uneg = sub i32 0, 1
138151 ; CHECK: ldrex
139152 ; CHECK: cmp
144157 ; CHECK-BAREMETAL-NOT: __sync
145158 %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
146159 store i32 %12, i32* %old
160 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
147161 ; CHECK: ldrex
148162 ; CHECK: cmp
149163 ; CHECK: strex
153167 ; CHECK-BAREMETAL-NOT: __sync
154168 %13 = atomicrmw umax i32* %val2, i32 1 monotonic
155169 store i32 %13, i32* %old
170 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
156171 ; CHECK: ldrex
157172 ; CHECK: cmp
158173 ; CHECK: strex
3838 ; CHECK-LABEL: va_copy:
3939 %vl.addr = alloca i8*, align 2
4040 %vl2 = alloca i8*, align 2
41 ; CHECK: mov.w r12, 2(r1)
41 ; CHECK-DAG: mov.w r12, 2(r1)
4242 store i8* %vl, i8** %vl.addr, align 2
4343 %0 = bitcast i8** %vl2 to i8*
4444 %1 = bitcast i8** %vl.addr to i8*
45 ; CHECK-NEXT: mov.w r12, 0(r1)
45 ; CHECK-DAG: mov.w r12, 0(r1)
4646 call void @llvm.va_copy(i8* %0, i8* %1)
4747 ret void
4848 }
5858 ; CHECK-LABEL: f123:
5959 ; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 123
6060 ; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 27, 37
61 ; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 5
6261 ; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 4
6362 ; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 28, 6
63 ; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 5
6464 ; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 50, 14
6565 ; MIPS64R2: dsrl $[[R0:[0-9]+]], $[[R1:[0-9]+]], 50
6666 ; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 34, 16
9393 ; MIPS32R2: ori $[[R0:[0-9]+]], $[[R0:[0-9]+]], 8
9494 ; MIPS32R2-NOT: ins {{[[:space:]].*}}
9595 ; MIPS64R2N32: ori $[[R0:[0-9]+]], $[[R0:[0-9]+]], 8
96 ; MIPS64R2N32-NOT: ins {{[[:space:]].*}}
96 ; MIPS64R2N32-NOT: ins {{[[:space:]].*}}
8282 ; X32-NEXT: cmpeqps %xmm2, %xmm1
8383 ; X32-NEXT: movaps %xmm1, %xmm0
8484 ; X32-NEXT: blendvps %xmm0, %xmm2, %xmm4
85 ; X32-NEXT: extractps $1, %xmm4, {{[0-9]+}}(%esp)
8685 ; X32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
87 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
88 ; X32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
86 ; X32-NEXT: movshdup {{.*#+}} xmm0 = xmm4[1,1,3,3]
87 ; X32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
88 ; X32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
89 ; X32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
8990 ; X32-NEXT: addl $60, %esp
9091 ; X32-NEXT: retl
9192 ;
0 ; RUN: llc -o - -mtriple=x86_64-linux-gnu %s | FileCheck %s
1
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 ; We should be able to bypass the load values to their corresponding
6 ; stores here.
7
8 ; CHECK-LABEL: foo
9 ; CHECK-DAG: movl %esi, -8(%rsp)
10 ; CHECK-DAG: movl %ecx, -16(%rsp)
11 ; CHECK-DAG: movl %edi, -4(%rsp)
12 ; CHECK-DAG: movl %edx, -12(%rsp)
13 ; CHECK: leal
14 ; CHECK: addl
15 ; CHECK: addl
16 ; CHECK: retq
17
18 define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) {
19 entry:
20 %a0 = alloca i32
21 %a1 = alloca i32
22 %a2 = alloca i32
23 %a3 = alloca i32
24 store i32 %b, i32* %a1
25 store i32 %d, i32* %a3
26 store i32 %a, i32* %a0
27 store i32 %c, i32* %a2
28 %l0 = load i32, i32* %a0
29 %l1 = load i32, i32* %a1
30 %l2 = load i32, i32* %a2
31 %l3 = load i32, i32* %a3
32 %add0 = add nsw i32 %l0, %l1
33 %add1 = add nsw i32 %add0, %l2
34 %add2 = add nsw i32 %add1, %l3
35 ret i32 %add2
36 }
1414 %tmp = alloca i32, align 4 ; [#uses=3 type=i32*]
1515 %tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*]
1616 %tmp3 = alloca i32 ; [#uses=1 type=i32*]
17 store i32 1, i32* %tmp, align 4
18 store i32 1, i32* %tmp2, align 4
17 store volatile i32 1, i32* %tmp, align 4
18 store volatile i32 1, i32* %tmp2, align 4
1919 br label %bb4
2020
2121 bb4: ; preds = %bb4, %bb
22 %tmp6 = load i32, i32* %tmp2, align 4 ; [#uses=1 type=i32]
22 %tmp6 = load volatile i32, i32* %tmp2, align 4 ; [#uses=1 type=i32]
2323 %tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32]
24 store i32 %tmp7, i32* %tmp2, align 4
24 store volatile i32 %tmp7, i32* %tmp2, align 4
2525 %tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1]
26 %tmp9 = load i32, i32* %tmp ; [#uses=1 type=i32]
26 %tmp9 = load volatile i32, i32* %tmp ; [#uses=1 type=i32]
2727 %tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32]
28 store i32 %tmp10, i32* %tmp3
28 store volatile i32 %tmp10, i32* %tmp3
2929 br i1 %tmp8, label %bb11, label %bb4
3030
3131 bb11: ; preds = %bb4
32 %tmp12 = load i32, i32* %tmp, align 4 ; [#uses=1 type=i32]
32 %tmp12 = load volatile i32, i32* %tmp, align 4 ; [#uses=1 type=i32]
3333 ret i32 %tmp12
3434 }
3535
4747 store i32 %arg0, i32* %arg0_var
4848 store i32 %arg1, i32* %arg1_var
4949 store i32 %arg2, i32* %arg2_var
50
51 ; CHECK: movl 16(%esp), %esi
52 ; CHECK-NEXT: movl 12(%esp), %ebp
53 ; CHECK-NEXT: movl 8(%esp), %eax
54 ; CHECK-NEXT: movl 4(%esp), %edx
50 ; These loads are loading the values from their previous stores and are optimized away.
5551 %0 = load i32, i32* %hp_var
5652 %1 = load i32, i32* %p_var
5753 %2 = load i32, i32* %arg0_var
5656 store i64 %arg2, i64* %arg2_var
5757 store i64 %arg3, i64* %arg3_var
5858
59 ; CHECK: movq 40(%rsp), %r15
60 ; CHECK-NEXT: movq 32(%rsp), %rbp
61 ; CHECK-NEXT: movq 24(%rsp), %rsi
62 ; CHECK-NEXT: movq 16(%rsp), %rdx
63 ; CHECK-NEXT: movq 8(%rsp), %rcx
59 ; Loads are reading values just writen from corresponding register and are therefore noops.
6460 %0 = load i64, i64* %hp_var
6561 %1 = load i64, i64* %p_var
6662 %2 = load i64, i64* %arg0_var
147147 ; CHECK-NEXT: andl $-8, %esp
148148 ; CHECK-NEXT: subl $16, %esp
149149 ; CHECK-NEXT: movl $1, {{[0-9]+}}(%esp)
150 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
151 ; CHECK-NEXT: movl $1, (%esp)
150 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
152151 ; CHECK-NEXT: movl $1, %eax
153152 ; CHECK-NEXT: xorl %ecx, %ecx
154153 ; CHECK-NEXT: shldl $32, %eax, %ecx
174173 ; CHECK-NEXT: retl
175174 %x = alloca i32, align 4
176175 %t = alloca i64, align 8
177 store i32 1, i32* %x, align 4
178 store i64 1, i64* %t, align 8 ;; DEAD
179 %load = load i32, i32* %x, align 4
176 store volatile i32 1, i32* %x, align 4
177 %load = load volatile i32, i32* %x, align 4
180178 %shl = shl i32 %load, 8
181179 %add = add i32 %shl, -224
182180 %sh_prom = zext i32 %add to i64
1616 call void @llvm.dbg.value(metadata i32 10, i64 0, metadata !15, metadata !16), !dbg !17
1717 store i32 4, i32* %5, align 4
1818 store i32 0, i32* @x, align 4, !dbg !24
19 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
1920 ; This is the same sequence of instructions without a debug value. It should be outlined
2021 ; in the same way.
2122 ; CHECK: callq l_OUTLINED_FUNCTION_0
8484 store i32 3, i32* %4, align 4
8585 store i32 4, i32* %5, align 4
8686 store i32 1, i32* @x, align 4
87 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
8788 ; CHECK: callq [[OFUNC2]]
8889 store i32 1, i32* %2, align 4
8990 store i32 2, i32* %3, align 4
9494
9595 right:
9696 ; CHECK-LABEL: %right
97 ; CHECK: movq %rdx, (%rsp)
9798 ; CHECK: movq
98 ; CHECK: movq %rdx, (%rsp)
9999 ; CHECK: callq some_call
100100 %sp2 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)
101101 to label %right.relocs unwind label %exceptional_return.right
1010 define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" {
1111 ; CHECK-LABEL: back_to_back_calls
1212 ; The exact stores don't matter, but there need to be three stack slots created
13 ; CHECK: movq %rdi, 16(%rsp)
14 ; CHECK: movq %rdx, 8(%rsp)
15 ; CHECK: movq %rsi, (%rsp)
13 ; CHECK-DAG: movq %rdi, 16(%rsp)
14 ; CHECK-DAG: movq %rdx, 8(%rsp)
15 ; CHECK-DAG: movq %rsi, (%rsp)
1616 ; There should be no more than three moves
1717 ; CHECK-NOT: movq
1818 %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
3535 define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" {
3636 ; CHECK-LABEL: reserve_first
3737 ; The exact stores don't matter, but there need to be three stack slots created
38 ; CHECK: movq %rdi, 16(%rsp)
39 ; CHECK: movq %rdx, 8(%rsp)
40 ; CHECK: movq %rsi, (%rsp)
38 ; CHECK-DAG: movq %rdi, 16(%rsp)
39 ; CHECK-DAG: movq %rdx, 8(%rsp)
40 ; CHECK-DAG: movq %rsi, (%rsp)
4141 %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
4242 %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12)
4343 %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13)
6060 gc "statepoint-example" {
6161 ; CHECK-LABEL: back_to_back_deopt
6262 ; The exact stores don't matter, but there need to be three stack slots created
63 ; CHECK: movl %ebx, 12(%rsp)
64 ; CHECK: movl %ebp, 8(%rsp)
65 ; CHECK: movl %r14d, 4(%rsp)
63 ; CHECK-DAG: movl %ebx, 12(%rsp)
64 ; CHECK-DAG: movl %ebp, 8(%rsp)
65 ; CHECK-DAG: movl %r14d, 4(%rsp)
6666 ; CHECK: callq
67 ; CHECK: movl %ebx, 12(%rsp)
68 ; CHECK: movl %ebp, 8(%rsp)
69 ; CHECK: movl %r14d, 4(%rsp)
67 ; CHECK-DAG: movl %ebx, 12(%rsp)
68 ; CHECK-DAG: movl %ebp, 8(%rsp)
69 ; CHECK-DAG: movl %r14d, 4(%rsp)
7070 ; CHECK: callq
71 ; CHECK: movl %ebx, 12(%rsp)
72 ; CHECK: movl %ebp, 8(%rsp)
73 ; CHECK: movl %r14d, 4(%rsp)
71 ; CHECK-DAG: movl %ebx, 12(%rsp)
72 ; CHECK-DAG: movl %ebp, 8(%rsp)
73 ; CHECK-DAG: movl %r14d, 4(%rsp)
7474 ; CHECK: callq
75 ; CHECK: movl %ebx, 12(%rsp)
76 ; CHECK: movl %ebp, 8(%rsp)
77 ; CHECK: movl %r14d, 4(%rsp)
75 ; CHECK-DAG: movl %ebx, 12(%rsp)
76 ; CHECK-DAG: movl %ebp, 8(%rsp)
77 ; CHECK-DAG: movl %r14d, 4(%rsp)
7878 ; CHECK: callq
7979 call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c)
8080 call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c)
8888 ; CHECK-LABEL: back_to_back_invokes
8989 entry:
9090 ; The exact stores don't matter, but there need to be three stack slots created
91 ; CHECK: movq %rdi, 16(%rsp)
92 ; CHECK: movq %rdx, 8(%rsp)
93 ; CHECK: movq %rsi, (%rsp)
91 ; CHECK-DAG: movq %rdi, 16(%rsp)
92 ; CHECK-DAG: movq %rdx, 8(%rsp)
93 ; CHECK-DAG: movq %rsi, (%rsp)
9494 ; CHECK: callq
9595 %safepoint_token = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
9696 to label %normal_return unwind label %exceptional_return
4848 ; CHECK: subq $40, %rsp
4949 ; CHECK: testb $1, %dil
5050 ; CHECK: movaps (%rsi), %xmm0
51 ; CHECK: movaps %xmm0, 16(%rsp)
52 ; CHECK: movaps %xmm0, (%rsp)
51 ; CHECK-DAG: movaps %xmm0, (%rsp)
52 ; CHECK-DAG: movaps %xmm0, 16(%rsp)
5353 ; CHECK: callq do_safepoint
5454 ; CHECK: movaps (%rsp), %xmm0
5555 ; CHECK: addq $40, %rsp
1515 ; CHECK-NEXT: .LBB0_2: # %forbody
1616 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
1717 ; CHECK-NEXT: movl (%esp), %eax
18 ; CHECK-NEXT: shll $3, %eax
19 ; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax
20 ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
21 ; CHECK-NEXT: movl (%esp), %eax
22 ; CHECK-NEXT: shll $3, %eax
23 ; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax
24 ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
25 ; CHECK-NEXT: movl (%esp), %ecx
18 ; CHECK-NEXT: leal (,%eax,8), %ecx
2619 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
20 ; CHECK-NEXT: addl %ecx, %edx
21 ; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp)
22 ; CHECK-NEXT: addl {{[0-9]+}}(%esp), %ecx
23 ; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2724 ; CHECK-NEXT: pmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
2825 ; CHECK-NEXT: psubw %xmm0, %xmm3
2926 ; CHECK-NEXT: pand %xmm1, %xmm3
3027 ; CHECK-NEXT: pshufb %xmm2, %xmm3
31 ; CHECK-NEXT: movq %xmm3, (%edx,%ecx,8)
28 ; CHECK-NEXT: movq %xmm3, (%edx,%eax,8)
3229 ; CHECK-NEXT: incl (%esp)
3330 ; CHECK-NEXT: .LBB0_1: # %forcond
3431 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1515 ; NARROW-NEXT: .LBB0_2: # %forbody
1616 ; NARROW-NEXT: # in Loop: Header=BB0_1 Depth=1
1717 ; NARROW-NEXT: movl (%esp), %eax
18 ; NARROW-NEXT: shll $3, %eax
19 ; NARROW-NEXT: addl {{[0-9]+}}(%esp), %eax
20 ; NARROW-NEXT: movl %eax, {{[0-9]+}}(%esp)
21 ; NARROW-NEXT: movl (%esp), %eax
22 ; NARROW-NEXT: shll $3, %eax
23 ; NARROW-NEXT: addl {{[0-9]+}}(%esp), %eax
24 ; NARROW-NEXT: movl %eax, {{[0-9]+}}(%esp)
25 ; NARROW-NEXT: movl (%esp), %ecx
18 ; NARROW-NEXT: leal (,%eax,8), %ecx
2619 ; NARROW-NEXT: movl {{[0-9]+}}(%esp), %edx
20 ; NARROW-NEXT: addl %ecx, %edx
21 ; NARROW-NEXT: movl %edx, {{[0-9]+}}(%esp)
22 ; NARROW-NEXT: addl {{[0-9]+}}(%esp), %ecx
23 ; NARROW-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2724 ; NARROW-NEXT: pmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
2825 ; NARROW-NEXT: psubw %xmm0, %xmm2
2926 ; NARROW-NEXT: psllw $8, %xmm2
3027 ; NARROW-NEXT: psraw $8, %xmm2
3128 ; NARROW-NEXT: psraw $2, %xmm2
3229 ; NARROW-NEXT: pshufb %xmm1, %xmm2
33 ; NARROW-NEXT: movq %xmm2, (%edx,%ecx,8)
30 ; NARROW-NEXT: movq %xmm2, (%edx,%eax,8)
3431 ; NARROW-NEXT: incl (%esp)
3532 ; NARROW-NEXT: .LBB0_1: # %forcond
3633 ; NARROW-NEXT: # =>This Inner Loop Header: Depth=1
5350 ; WIDE-NEXT: .LBB0_2: # %forbody
5451 ; WIDE-NEXT: # in Loop: Header=BB0_1 Depth=1
5552 ; WIDE-NEXT: movl (%esp), %eax
56 ; WIDE-NEXT: shll $3, %eax
57 ; WIDE-NEXT: addl {{[0-9]+}}(%esp), %eax
58 ; WIDE-NEXT: movl %eax, {{[0-9]+}}(%esp)
59 ; WIDE-NEXT: movl (%esp), %eax
60 ; WIDE-NEXT: shll $3, %eax
61 ; WIDE-NEXT: addl {{[0-9]+}}(%esp), %eax
62 ; WIDE-NEXT: movl %eax, {{[0-9]+}}(%esp)
63 ; WIDE-NEXT: movl (%esp), %ecx
53 ; WIDE-NEXT: leal (,%eax,8), %ecx
6454 ; WIDE-NEXT: movl {{[0-9]+}}(%esp), %edx
55 ; WIDE-NEXT: addl %ecx, %edx
56 ; WIDE-NEXT: movl %edx, {{[0-9]+}}(%esp)
57 ; WIDE-NEXT: addl {{[0-9]+}}(%esp), %ecx
58 ; WIDE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
6559 ; WIDE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
66 ; WIDE-NEXT: pinsrd $1, 4(%eax,%ecx,8), %xmm3
60 ; WIDE-NEXT: pinsrd $1, 4(%ecx,%eax,8), %xmm3
6761 ; WIDE-NEXT: psubb %xmm0, %xmm3
6862 ; WIDE-NEXT: psrlw $2, %xmm3
6963 ; WIDE-NEXT: pand %xmm1, %xmm3
7064 ; WIDE-NEXT: pxor %xmm2, %xmm3
7165 ; WIDE-NEXT: psubb %xmm2, %xmm3
72 ; WIDE-NEXT: pextrd $1, %xmm3, 4(%edx,%ecx,8)
73 ; WIDE-NEXT: movd %xmm3, (%edx,%ecx,8)
66 ; WIDE-NEXT: pextrd $1, %xmm3, 4(%edx,%eax,8)
67 ; WIDE-NEXT: movd %xmm3, (%edx,%eax,8)
7468 ; WIDE-NEXT: incl (%esp)
7569 ; WIDE-NEXT: .LBB0_1: # %forcond
7670 ; WIDE-NEXT: # =>This Inner Loop Header: Depth=1
2525 ; CHECK-LABEL: test_vararg
2626 ; CHECK: extsp 6
2727 ; CHECK: stw lr, sp[1]
28 ; CHECK: stw r3, sp[6]
29 ; CHECK: stw r0, sp[3]
30 ; CHECK: stw r1, sp[4]
31 ; CHECK: stw r2, sp[5]
28 ; CHECK-DAG: stw r3, sp[6]
29 ; CHECK-DAG: stw r0, sp[3]
30 ; CHECK-DAG: stw r1, sp[4]
31 ; CHECK-DAG: stw r2, sp[5]
3232 ; CHECK: ldaw r0, sp[3]
3333 ; CHECK: stw r0, sp[2]
3434 %list = alloca i8*, align 4