llvm.org GIT mirror llvm / 04629a4
Merging r360099: ------------------------------------------------------------------------ r360099 | efriedma | 2019-05-06 16:21:59 -0700 (Mon, 06 May 2019) | 26 lines [ARM] Glue register copies to tail calls. This generally follows what other targets do. I don't completely understand why the special case for tail calls existed in the first place; even when the code was committed in r105413, call lowering didn't work in the way described in the comments. Stack protector lowering breaks if the register copies are not glued to a tail call: we have to insert the stack protector check before the tail call, and we choose the location based on the assumption that all physical register dependencies of a tail call are adjacent to the tail call. (See FindSplitPointForStackProtector.) This is sort of fragile, but I don't see any reason to break that assumption. I'm guessing nobody has seen this before just because it's hard to convince the scheduler to actually schedule the code in a way that breaks; even without the glue, the only computation that could actually be scheduled after the register copies is the computation of the call address, and the scheduler usually prefers to schedule that before the copies anyway. Fixes https://bugs.llvm.org/show_bug.cgi?id=41417 Differential Revision: https://reviews.llvm.org/D60427 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@360793 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 3 months ago
2 changed file(s) with 39 addition(s) and 26 deletion(s). Raw diff Collapse all Expand all
19831983 // Build a sequence of copy-to-reg nodes chained together with token chain
19841984 // and flag operands which copy the outgoing args into the appropriate regs.
19851985 SDValue InFlag;
1986 // Tail call byval lowering might overwrite argument registers so in case of
1987 // tail call optimization the copies to registers are lowered later.
1988 if (!isTailCall)
1989 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1990 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1991 RegsToPass[i].second, InFlag);
1992 InFlag = Chain.getValue(1);
1993 }
1994
1995 // For tail calls lower the arguments to the 'real' stack slot.
1996 if (isTailCall) {
1997 // Force all the incoming stack arguments to be loaded from the stack
1998 // before any new outgoing arguments are stored to the stack, because the
1999 // outgoing stack slots may alias the incoming argument stack slots, and
2000 // the alias isn't otherwise explicit. This is slightly more conservative
2001 // than necessary, because it means that each store effectively depends
2002 // on every argument instead of just those arguments it would clobber.
2003
2004 // Do not flag preceding copytoreg stuff together with the following stuff.
2005 InFlag = SDValue();
2006 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2007 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2008 RegsToPass[i].second, InFlag);
2009 InFlag = Chain.getValue(1);
2010 }
2011 InFlag = SDValue();
1986 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1987 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1988 RegsToPass[i].second, InFlag);
1989 InFlag = Chain.getValue(1);
20121990 }
20131991
20141992 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
0 ; RUN: llc < %s | FileCheck %s
1 target triple = "armv6kz-unknown-unknown-gnueabihf"
2
3 ; Make sure this doesn't crash, and we actually emit a tail call.
4 ; Unfortunately, this test is sort of fragile... the original issue only
5 ; shows up if scheduling happens in a very specific order. But including
6 ; it anyway just to demonstrate the issue.
7 ; CHECK: pop {r4, lr}
8
9 @e = external local_unnamed_addr constant [0 x i32 (i32, i32)*], align 4
10
11 ; Function Attrs: nounwind sspstrong
12 define i32 @AVI_ChunkRead_p_chk(i32 %g) nounwind sspstrong "target-cpu"="arm1176jzf-s" {
13 entry:
14 %b = alloca i8, align 1
15 %tobool = icmp eq i32 %g, 0
16 br i1 %tobool, label %if.end, label %if.then
17
18 if.then: ; preds = %entry
19 %add = add nsw i32 %g, 1
20 %arrayidx = getelementptr inbounds [0 x i32 (i32, i32)*], [0 x i32 (i32, i32)*]* @e, i32 0, i32 %add
21 %0 = load i32 (i32, i32)*, i32 (i32, i32)** %arrayidx, align 4
22 %call = tail call i32 %0(i32 0, i32 0) #3
23 br label %return
24
25 if.end: ; preds = %entry
26 call void @c(i8* nonnull %b)
27 br label %return
28
29 return: ; preds = %if.end, %if.then
30 %retval.0 = phi i32 [ %call, %if.then ], [ 0, %if.end ]
31 ret i32 %retval.0
32 }
33
34 declare void @c(i8*)