llvm.org GIT mirror llvm / ad2e27a
AMDGPU: Fix ignoring DisableFramePointerElim in leaf functions The attribute can specify elimination for leaf or non-leaf, so it should always be considered. I copied this bug from AArch64, which probably should also be fixed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363949 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 28 days ago
2 changed file(s) with 51 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
706706 .addReg(ScratchExecCopy);
707707 }
708708
709 if (hasFP(MF)) {
710 const MachineFrameInfo &MFI = MF.getFrameInfo();
711 uint32_t NumBytes = MFI.getStackSize();
712 uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
713 NumBytes + MFI.getMaxAlignment() : NumBytes;
714
709 const MachineFrameInfo &MFI = MF.getFrameInfo();
710 uint32_t NumBytes = MFI.getStackSize();
711 uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
712 NumBytes + MFI.getMaxAlignment() : NumBytes;
713
714 if (RoundedSize != 0 && hasFP(MF)) {
715715 const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
716716 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
717717 .addReg(StackPtrReg)
862862 // API SP if there are calls.
863863 if (MF.getInfo()->isEntryFunction())
864864 return true;
865
866 // Retain behavior of always omitting the FP for leaf functions when
867 // possible.
868 if (MF.getTarget().Options.DisableFramePointerElim(MF))
869 return true;
870865 }
871866
872867 return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
873868 MFI.hasStackMap() || MFI.hasPatchPoint() ||
874 MF.getSubtarget().getRegisterInfo()->needsStackRealignment(MF);
875 }
869 MF.getSubtarget().getRegisterInfo()->needsStackRealignment(MF) ||
870 MF.getTarget().Options.DisableFramePointerElim(MF);
871 }
88 ret void
99 }
1010
11 ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim:
11 ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all:
12 ; GCN: ; %bb.0:
13 ; GCN-NEXT: s_waitcnt
14 ; GCN-NEXT: s_mov_b32 s5, s32
15 ; GCN-NEXT: s_setpc_b64
16 define void @callee_no_stack_no_fp_elim_all() #1 {
17 ret void
18 }
19
20 ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_nonleaf:
1221 ; GCN: ; %bb.0:
1322 ; GCN-NEXT: s_waitcnt
1423 ; GCN-NEXT: s_setpc_b64
15 define void @callee_no_stack_no_fp_elim() #1 {
24 define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
1625 ret void
1726 }
18
19 ; Requires frame pointer for access to local regular object.
2027
2128 ; GCN-LABEL: {{^}}callee_with_stack:
2229 ; GCN: ; %bb.0:
2633 ; GCN-NEXT: s_waitcnt
2734 ; GCN-NEXT: s_setpc_b64
2835 define void @callee_with_stack() #0 {
36 %alloca = alloca i32, addrspace(5)
37 store volatile i32 0, i32 addrspace(5)* %alloca
38 ret void
39 }
40
41 ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all:
42 ; GCN: ; %bb.0:
43 ; GCN-NEXT: s_waitcnt
44 ; GCN-NEXT: s_mov_b32 s5, s32
45 ; GCN-NEXT: s_add_u32 s32, s32, 0x200
46 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
47 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
48 ; GCN-NEXT: s_sub_u32 s32, s32, 0x200
49 ; GCN-NEXT: s_waitcnt vmcnt(0)
50 ; GCN-NEXT: s_setpc_b64
51 define void @callee_with_stack_no_fp_elim_all() #1 {
52 %alloca = alloca i32, addrspace(5)
53 store volatile i32 0, i32 addrspace(5)* %alloca
54 ret void
55 }
56
57 ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_non_leaf:
58 ; GCN: ; %bb.0:
59 ; GCN-NEXT: s_waitcnt
60 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
61 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}}
62 ; GCN-NEXT: s_waitcnt
63 ; GCN-NEXT: s_setpc_b64
64 define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
2965 %alloca = alloca i32, addrspace(5)
3066 store volatile i32 0, i32 addrspace(5)* %alloca
3167 ret void
150186 }
151187
152188 attributes #0 = { nounwind }
153 attributes #1 = { nounwind "no-frame-pointer-elim"="true" }
189 attributes #1 = { nounwind "frame-pointer"="all" }
190 attributes #2 = { nounwind "frame-pointer"="non-leaf" }