llvm.org GIT mirror llvm / 5ef7ab8
Re-commit r265039 "[X86] Merge adjacent stack adjustments in eliminateCallFramePseudoInstr (PR27140)" The original commit miscompiled things on 32-bit Windows, e.g. a Clang boostrap. It turns out that mergeSPUpdates() was a bit too generous in what it interpreted as a stack adjustment, causing the following code: addl $12, %esp leal -4(%ebp), %esp To be "optimized" into simply: addl $8, %esp This commit tightens up mergeSPUpdates() and includes a new test (test14 in movtopush.ll) for this situation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265345 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 3 years ago
9 changed file(s) with 104 addition(s) and 32 deletion(s). Raw diff Collapse all Expand all
377377 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
378378 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
379379 PI->getOperand(0).getReg() == StackPtr){
380 assert(PI->getOperand(1).getReg() == StackPtr);
380381 Offset += PI->getOperand(2).getImm();
381382 MBB.erase(PI);
382383 if (!doMergeWithPrevious) MBBI = NI;
383384 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
384 PI->getOperand(0).getReg() == StackPtr) {
385 PI->getOperand(0).getReg() == StackPtr &&
386 PI->getOperand(1).getReg() == StackPtr &&
387 PI->getOperand(2).getImm() == 1 &&
388 PI->getOperand(3).getReg() == X86::NoRegister &&
389 PI->getOperand(5).getReg() == X86::NoRegister) {
385390 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
386391 Offset += PI->getOperand(4).getImm();
387392 MBB.erase(PI);
389394 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
390395 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
391396 PI->getOperand(0).getReg() == StackPtr) {
397 assert(PI->getOperand(1).getReg() == StackPtr);
392398 Offset -= PI->getOperand(2).getImm();
393399 MBB.erase(PI);
394400 if (!doMergeWithPrevious) MBBI = NI;
25322538 BuildCFI(MBB, I, DL,
25332539 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
25342540
2535 if (Amount) {
2536 // Add Amount to SP to destroy a frame, and subtract to setup.
2537 int Offset = isDestroy ? Amount : -Amount;
2538
2539 if (!(Fn->optForMinSize() &&
2540 adjustStackWithPops(MBB, I, DL, Offset)))
2541 BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
2541 // Add Amount to SP to destroy a frame, or subtract to setup.
2542 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
2543
2544 if (StackAdjustment) {
2545 // Merge with any previous or following adjustment instruction.
2546 StackAdjustment += mergeSPUpdates(MBB, I, true);
2547 StackAdjustment += mergeSPUpdates(MBB, I, false);
2548
2549 if (!StackAdjustment) {
2550 // This and the merged instruction canceled out each other.
2551 return I;
2552 }
2553
2554 if (!(Fn->optForMinSize() &&
2555 adjustStackWithPops(MBB, I, DL, StackAdjustment)))
2556 BuildStackAdjustment(MBB, I, DL, StackAdjustment, /*InEpilogue=*/false);
25422557 }
25432558
25442559 if (DwarfCFI && !hasFP(MF)) {
25482563 // CFI only for EH purposes or for debugging. EH only requires the CFA
25492564 // offset to be correct at each call site, while for debugging we want
25502565 // it to be more precise.
2551 int CFAOffset = Amount;
2566
25522567 // TODO: When not using precise CFA, we also need to adjust for the
25532568 // InternalAmt here.
2554
2555 if (CFAOffset) {
2556 CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
2557 BuildCFI(MBB, I, DL,
2558 MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
2569 if (StackAdjustment) {
2570 BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
2571 nullptr, -StackAdjustment));
25592572 }
25602573 }
25612574
0 ; REQUIRES: asserts
11 ; RUN: llc < %s -mtriple=i686-unknown-linux -relocation-model=static -stats 2>&1 | \
2 ; RUN: grep asm-printer | grep 15
2 ; RUN: grep asm-printer | grep 14
33 ;
44 ; It's possible to schedule this in 14 instructions by avoiding
55 ; callee-save registers, but the scheduler isn't currently that
1313 ; SLM: movl (%esp), [[RELOAD:%e..]]
1414 ; SLM-NEXT: pushl [[RELOAD]]
1515 ; CHECK: calll
16 ; CHECK-NEXT: addl $4, %esp
16 ; CHECK-NEXT: addl $8, %esp
1717 %c = add i32 %a, %b
1818 call void @foo(i32 %c)
1919 call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
3131 ; CHECK: movl %{{...}}, %esp
3232 ; CHECK-NOT: {{[^ ,]*}}, %esp
3333 ;
34 ; Next we set up the memset call, and then undo it.
34 ; Next we set up the memset call.
3535 ; CHECK: subl $20, %esp
3636 ; CHECK-NOT: {{[^ ,]*}}, %esp
37 ; CHECK: pushl
38 ; CHECK: pushl
39 ; CHECK: pushl
3740 ; CHECK: calll memset
38 ; CHECK-NEXT: addl $32, %esp
41 ;
42 ; Deallocating 32 bytes of outgoing call frame for memset and
43 ; allocating 28 bytes for calling f yields a 4-byte adjustment:
44 ; CHECK-NEXT: addl $4, %esp
3945 ; CHECK-NOT: {{[^ ,]*}}, %esp
4046 ;
41 ; Next we set up the call to 'f'.
42 ; CHECK: subl $28, %esp
47 ; And move on to call 'f', and then restore the stack.
48 ; CHECK: pushl
4349 ; CHECK-NOT: {{[^ ,]*}}, %esp
4450 ; CHECK: calll f
4551 ; CHECK-NEXT: addl $32, %esp
136136 ; X86: movl $13, (%esp)
137137 ; X86: pushl $0
138138 ; X86: calll _print_framealloc_from_fp
139 ; X86: addl $4, %esp
140 ; X86: addl $8, %esp
139 ; X86: addl $12, %esp
141140 ; X86: retl
44
55 define fastcc void @t1() nounwind {
66 ; CHECK-LABEL: t1:
7 ; CHECK: subl $12, %esp
7 ; CHECK: subl $16, %esp
88 ; CHECK: pushl $188
99 ; CHECK-NEXT: pushl $0
1010 ; CHECK-NEXT: pushl $0
11 ; RUN: llc < %s -mtriple=i686-windows -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
22 ; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
33 ; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
4 ; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX
45
56 %class.Class = type { i32 }
67 %struct.s = type { i64 }
1112 declare void @oneparam(i32 %a)
1213 declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
1314 declare void @struct(%struct.s* byval %a, i32 %b, i32 %c, i32 %d)
15 declare void @inalloca(<{ %struct.s }>* inalloca)
16
17 declare i8* @llvm.stacksave()
18 declare void @llvm.stackrestore(i8*)
1419
1520 ; We should get pushes for x86, even though there is a reserved call frame.
1621 ; Make sure we don't touch x86-64, and that turning it off works.
222227 ; NORMAL-NEXT: pushl $2
223228 ; NORMAL-NEXT: pushl $1
224229 ; NORMAL-NEXT: call
225 ; NORMAL-NEXT: addl $16, %esp
226 ; NORMAL-NEXT: subl $20, %esp
230 ; NORMAL-NEXT: subl $4, %esp
227231 ; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]]
228232 ; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]]
229233 ; NORMAL-NEXT: movl [[E2]], 4(%esp)
260264 ; NORMAL-NEXT: pushl $2
261265 ; NORMAL-NEXT: pushl $1
262266 ; NORMAL-NEXT: calll *16(%esp)
263 ; NORMAL-NEXT: addl $16, %esp
267 ; NORMAL-NEXT: addl $24, %esp
264268 define void @test10() optsize {
265269 %stack_fptr = alloca void (i32, i32, i32, i32)*
266270 store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
313317 ; NORMAL-NEXT: pushl $2
314318 ; NORMAL-NEXT: pushl $1
315319 ; NORMAL-NEXT: calll _good
316 ; NORMAL-NEXT: addl $16, %esp
317 ; NORMAL-NEXT: subl $20, %esp
320 ; NORMAL-NEXT: subl $4, %esp
318321 ; NORMAL: movl $8, 16(%esp)
319322 ; NORMAL-NEXT: movl $7, 12(%esp)
320323 ; NORMAL-NEXT: movl $6, 8(%esp)
357360 call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add)
358361 ret i32* %ptr3
359362 }
363
364 ; Make sure to fold adjacent stack adjustments.
365 ; LINUX-LABEL: pr27140:
366 ; LINUX: subl $12, %esp
367 ; LINUX: .cfi_def_cfa_offset 16
368 ; LINUX-NOT: sub
369 ; LINUX: pushl $4
370 ; LINUX: .cfi_adjust_cfa_offset 4
371 ; LINUX: pushl $3
372 ; LINUX: .cfi_adjust_cfa_offset 4
373 ; LINUX: pushl $2
374 ; LINUX: .cfi_adjust_cfa_offset 4
375 ; LINUX: pushl $1
376 ; LINUX: .cfi_adjust_cfa_offset 4
377 ; LINUX: calll good
378 ; LINUX: addl $28, %esp
379 ; LINUX: .cfi_adjust_cfa_offset -28
380 ; LINUX-NOT: add
381 ; LINUX: retl
382 define void @pr27140() optsize {
383 entry:
384 tail call void @good(i32 1, i32 2, i32 3, i32 4)
385 ret void
386 }
387
388 ; Check that a stack restore (leal -4(%ebp), %esp) doesn't get merged with a
389 ; stack adjustment (addl $12, %esp). Just because it's a lea doesn't mean it's
390 ; simply decreasing the stack pointer.
391 ; NORMAL-LABEL: test14:
392 ; NORMAL: calll _B_func
393 ; NORMAL: leal -4(%ebp), %esp
394 ; NORMAL-NOT: %esp
395 ; NORMAL: retl
396 %struct.A = type { i32, i32 }
397 %struct.B = type { i8 }
398 declare x86_thiscallcc %struct.B* @B_ctor(%struct.B* returned, %struct.A* byval)
399 declare void @B_func(%struct.B* sret, %struct.B*, i32)
400 define void @test14(%struct.A* %a) {
401 entry:
402 %ref.tmp = alloca %struct.B, align 1
403 %agg.tmp = alloca i64, align 4
404 %tmpcast = bitcast i64* %agg.tmp to %struct.A*
405 %tmp = alloca %struct.B, align 1
406 %0 = bitcast %struct.A* %a to i64*
407 %1 = load i64, i64* %0, align 4
408 store i64 %1, i64* %agg.tmp, align 4
409 %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* %ref.tmp, %struct.A* byval %tmpcast)
410 %2 = getelementptr inbounds %struct.B, %struct.B* %tmp, i32 0, i32 0
411 call void @B_func(%struct.B* sret %tmp, %struct.B* %ref.tmp, i32 1)
412 ret void
413 }
2222 ; CHECK: .cfi_adjust_cfa_offset 4
2323 ; CHECK: calll stdfoo
2424 ; CHECK: .cfi_adjust_cfa_offset -8
25 ; CHECK: addl $8, %esp
26 ; CHECK: .cfi_adjust_cfa_offset -8
25 ; CHECK: addl $20, %esp
26 ; CHECK: .cfi_adjust_cfa_offset -20
2727 define void @test1() #0 !dbg !4 {
2828 entry:
2929 tail call void @foo(i32 1, i32 2) #1, !dbg !10
8181 ; LINUX-NEXT: Ltmp{{[0-9]+}}:
8282 ; LINUX-NEXT: .cfi_adjust_cfa_offset 4
8383 ; LINUX-NEXT: call
84 ; LINUX-NEXT: addl $16, %esp
85 ; LINUX: .cfi_adjust_cfa_offset -16
84 ; LINUX-NEXT: addl $28, %esp
85 ; LINUX: .cfi_adjust_cfa_offset -28
8686 ; DARWIN-NOT: .cfi_escape
8787 ; DARWIN-NOT: pushl
8888 define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {