llvm.org GIT mirror llvm / e65220d
Merge 97980 from mainline. Add documentation on sibling call optimization. Rename tailcall2.ll test to sibcall.ll. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_27@98313 91177308-0d34-0410-b5e6-96231b3b80d8 Tanya Lattner 10 years ago
4 changed file(s) with 268 addition(s) and 220 deletion(s). Raw diff Collapse all Expand all
8585
  • Target-specific Implementation Notes
  • 8686
    8787
  • Tail call optimization
  • 88
  • Sibling call optimization
  • 8889
  • The X86 backend
  • 8990
  • The PowerPC backend
  • 9091
    17301731 currently causes each fastcc call that is not tail call optimized
    17311732 (because one or more of above constraints are not met) to be followed by a
    17321733 readjustment of the stack. So performance might be worse in such cases.

    1734
    1735
    1736
    1737
    1738 Sibling call optimization
    1739
    1740
    1741
    1742
    1743

    Sibling call optimization is a restricted form of tail call optimization.

    1744 Unlike tail call optimization described in the previous section, it can be
    1745 performed automatically on any tail calls when -tailcallopt option
    1746 is not specified.

    1747
    1748

    Sibling call optimization is currently performed on x86/x86-64 when the

    1749 following constraints are met:

    1750
    1751
    1752
  • Caller and callee have the same calling convention. It can be either
  • 1753 c or fastcc.
    1754
    1755
  • The call is a tail call - in tail position (ret immediately follows call
  • 1756 and ret uses value of call or is void).
    1757
    1758
  • Caller and callee have matching return type or the callee result is not
  • 1759 used.
    1760
    1761
  • If any of the callee arguments are being passed in stack, they must be
  • 1762 available in caller's own incoming argument stack and the frame offsets
    1763 must be the same.
    1764
    1765
    1766

    Example:

    1767
    1768
    
                      
                    
    1769 declare i32 @bar(i32, i32)
    1770
    1771 define i32 @foo(i32 %a, i32 %b, i32 %c) {
    1772 entry:
    1773 %0 = tail call i32 @bar(i32 %a, i32 %b)
    1774 ret i32 %0
    1775 }
    1776
    1777
    17331778
    17341779
    17351780
    51685168 a ret instruction. If the "tail" marker is
    51695169 present, the function call is eligible for tail call optimization,
    51705170 but might not in fact be
    5171 optimized into a jump. As of this writing, the extra requirements for
    5172 a call to actually be optimized are:
    5171 optimized into a jump. The code generator may optimize calls marked
    5172 "tail" with either 1) automatic
    5173 sibling call optimization when the caller and callee have
    5174 matching signatures, or 2) forced tail call optimization when the
    5175 following extra requirements are met:
    51735176
    51745177
  • Caller and callee both have the calling
  • 51755178 convention fastcc.
    0 ; RUN: llc < %s -march=x86 -asm-verbose=false | FileCheck %s -check-prefix=32
    1 ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s -check-prefix=64
    2
    3 define void @t1(i32 %x) nounwind ssp {
    4 entry:
    5 ; 32: t1:
    6 ; 32: jmp {{_?}}foo
    7
    8 ; 64: t1:
    9 ; 64: jmp {{_?}}foo
    10 tail call void @foo() nounwind
    11 ret void
    12 }
    13
    14 declare void @foo()
    15
    16 define void @t2() nounwind ssp {
    17 entry:
    18 ; 32: t2:
    19 ; 32: jmp {{_?}}foo2
    20
    21 ; 64: t2:
    22 ; 64: jmp {{_?}}foo2
    23 %0 = tail call i32 @foo2() nounwind
    24 ret void
    25 }
    26
    27 declare i32 @foo2()
    28
    29 define void @t3() nounwind ssp {
    30 entry:
    31 ; 32: t3:
    32 ; 32: jmp {{_?}}foo3
    33
    34 ; 64: t3:
    35 ; 64: jmp {{_?}}foo3
    36 %0 = tail call i32 @foo3() nounwind
    37 ret void
    38 }
    39
    40 declare i32 @foo3()
    41
    42 define void @t4(void (i32)* nocapture %x) nounwind ssp {
    43 entry:
    44 ; 32: t4:
    45 ; 32: call *
    46 ; FIXME: gcc can generate a tailcall for this. But it's tricky.
    47
    48 ; 64: t4:
    49 ; 64-NOT: call
    50 ; 64: jmpq *
    51 tail call void %x(i32 0) nounwind
    52 ret void
    53 }
    54
    55 define void @t5(void ()* nocapture %x) nounwind ssp {
    56 entry:
    57 ; 32: t5:
    58 ; 32-NOT: call
    59 ; 32: jmpl *
    60
    61 ; 64: t5:
    62 ; 64-NOT: call
    63 ; 64: jmpq *
    64 tail call void %x() nounwind
    65 ret void
    66 }
    67
    68 define i32 @t6(i32 %x) nounwind ssp {
    69 entry:
    70 ; 32: t6:
    71 ; 32: call {{_?}}t6
    72 ; 32: jmp {{_?}}bar
    73
    74 ; 64: t6:
    75 ; 64: jmp {{_?}}t6
    76 ; 64: jmp {{_?}}bar
    77 %0 = icmp slt i32 %x, 10
    78 br i1 %0, label %bb, label %bb1
    79
    80 bb:
    81 %1 = add nsw i32 %x, -1
    82 %2 = tail call i32 @t6(i32 %1) nounwind ssp
    83 ret i32 %2
    84
    85 bb1:
    86 %3 = tail call i32 @bar(i32 %x) nounwind
    87 ret i32 %3
    88 }
    89
    90 declare i32 @bar(i32)
    91
    92 define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind ssp {
    93 entry:
    94 ; 32: t7:
    95 ; 32: jmp {{_?}}bar2
    96
    97 ; 64: t7:
    98 ; 64: jmp {{_?}}bar2
    99 %0 = tail call i32 @bar2(i32 %a, i32 %b, i32 %c) nounwind
    100 ret i32 %0
    101 }
    102
    103 declare i32 @bar2(i32, i32, i32)
    104
    105 define signext i16 @t8() nounwind ssp {
    106 entry:
    107 ; 32: t8:
    108 ; 32: call {{_?}}bar3
    109
    110 ; 64: t8:
    111 ; 64: callq {{_?}}bar3
    112 %0 = tail call signext i16 @bar3() nounwind ; [#uses=1]
    113 ret i16 %0
    114 }
    115
    116 declare signext i16 @bar3()
    117
    118 define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
    119 entry:
    120 ; 32: t9:
    121 ; 32: call *
    122
    123 ; 64: t9:
    124 ; 64: callq *
    125 %0 = bitcast i32 (i32)* %x to i16 (i32)*
    126 %1 = tail call signext i16 %0(i32 0) nounwind
    127 ret i16 %1
    128 }
    129
    130 define void @t10() nounwind ssp {
    131 entry:
    132 ; 32: t10:
    133 ; 32: call
    134
    135 ; 64: t10:
    136 ; 64: callq
    137 %0 = tail call i32 @foo4() noreturn nounwind
    138 unreachable
    139 }
    140
    141 declare i32 @foo4()
    142
    143 define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
    144 ; In 32-bit mode, it's emitting a bunch of dead loads that are not being
    145 ; eliminated currently.
    146
    147 ; 32: t11:
    148 ; 32-NOT: subl ${{[0-9]+}}, %esp
    149 ; 32: jne
    150 ; 32-NOT: movl
    151 ; 32-NOT: addl ${{[0-9]+}}, %esp
    152 ; 32: jmp {{_?}}foo5
    153
    154 ; 64: t11:
    155 ; 64-NOT: subq ${{[0-9]+}}, %esp
    156 ; 64-NOT: addq ${{[0-9]+}}, %esp
    157 ; 64: jmp {{_?}}foo5
    158 entry:
    159 %0 = icmp eq i32 %x, 0
    160 br i1 %0, label %bb6, label %bb
    161
    162 bb:
    163 %1 = tail call i32 @foo5(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind
    164 ret i32 %1
    165
    166 bb6:
    167 ret i32 0
    168 }
    169
    170 declare i32 @foo5(i32, i32, i32, i32, i32)
    171
    172 %struct.t = type { i32, i32, i32, i32, i32 }
    173
    174 define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
    175 ; 32: t12:
    176 ; 32-NOT: subl ${{[0-9]+}}, %esp
    177 ; 32-NOT: addl ${{[0-9]+}}, %esp
    178 ; 32: jmp {{_?}}foo6
    179
    180 ; 64: t12:
    181 ; 64-NOT: subq ${{[0-9]+}}, %esp
    182 ; 64-NOT: addq ${{[0-9]+}}, %esp
    183 ; 64: jmp {{_?}}foo6
    184 entry:
    185 %0 = icmp eq i32 %x, 0
    186 br i1 %0, label %bb2, label %bb
    187
    188 bb:
    189 %1 = tail call i32 @foo6(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind
    190 ret i32 %1
    191
    192 bb2:
    193 ret i32 0
    194 }
    195
    196 declare i32 @foo6(i32, i32, %struct.t* byval align 4)
    197
    198 ; rdar://r7717598
    199 %struct.ns = type { i32, i32 }
    200 %struct.cp = type { float, float }
    201
    202 define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
    203 ; 32: t13:
    204 ; 32-NOT: jmp
    205 ; 32: call
    206 ; 32: ret
    207
    208 ; 64: t13:
    209 ; 64-NOT: jmp
    210 ; 64: call
    211 ; 64: ret
    212 entry:
    213 %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
    214 ret %struct.ns* %0
    215 }
    216
    217 declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind ssp
    +0
    -218
    test/CodeGen/X86/tailcall2.ll less more
    None ; RUN: llc < %s -march=x86 -asm-verbose=false | FileCheck %s -check-prefix=32
    1 ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s -check-prefix=64
    2
    3 define void @t1(i32 %x) nounwind ssp {
    4 entry:
    5 ; 32: t1:
    6 ; 32: jmp {{_?}}foo
    7
    8 ; 64: t1:
    9 ; 64: jmp {{_?}}foo
    10 tail call void @foo() nounwind
    11 ret void
    12 }
    13
    14 declare void @foo()
    15
    16 define void @t2() nounwind ssp {
    17 entry:
    18 ; 32: t2:
    19 ; 32: jmp {{_?}}foo2
    20
    21 ; 64: t2:
    22 ; 64: jmp {{_?}}foo2
    23 %0 = tail call i32 @foo2() nounwind
    24 ret void
    25 }
    26
    27 declare i32 @foo2()
    28
    29 define void @t3() nounwind ssp {
    30 entry:
    31 ; 32: t3:
    32 ; 32: jmp {{_?}}foo3
    33
    34 ; 64: t3:
    35 ; 64: jmp {{_?}}foo3
    36 %0 = tail call i32 @foo3() nounwind
    37 ret void
    38 }
    39
    40 declare i32 @foo3()
    41
    42 define void @t4(void (i32)* nocapture %x) nounwind ssp {
    43 entry:
    44 ; 32: t4:
    45 ; 32: call *
    46 ; FIXME: gcc can generate a tailcall for this. But it's tricky.
    47
    48 ; 64: t4:
    49 ; 64-NOT: call
    50 ; 64: jmpq *
    51 tail call void %x(i32 0) nounwind
    52 ret void
    53 }
    54
    55 define void @t5(void ()* nocapture %x) nounwind ssp {
    56 entry:
    57 ; 32: t5:
    58 ; 32-NOT: call
    59 ; 32: jmpl *
    60
    61 ; 64: t5:
    62 ; 64-NOT: call
    63 ; 64: jmpq *
    64 tail call void %x() nounwind
    65 ret void
    66 }
    67
    68 define i32 @t6(i32 %x) nounwind ssp {
    69 entry:
    70 ; 32: t6:
    71 ; 32: call {{_?}}t6
    72 ; 32: jmp {{_?}}bar
    73
    74 ; 64: t6:
    75 ; 64: jmp {{_?}}t6
    76 ; 64: jmp {{_?}}bar
    77 %0 = icmp slt i32 %x, 10
    78 br i1 %0, label %bb, label %bb1
    79
    80 bb:
    81 %1 = add nsw i32 %x, -1
    82 %2 = tail call i32 @t6(i32 %1) nounwind ssp
    83 ret i32 %2
    84
    85 bb1:
    86 %3 = tail call i32 @bar(i32 %x) nounwind
    87 ret i32 %3
    88 }
    89
    90 declare i32 @bar(i32)
    91
    92 define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind ssp {
    93 entry:
    94 ; 32: t7:
    95 ; 32: jmp {{_?}}bar2
    96
    97 ; 64: t7:
    98 ; 64: jmp {{_?}}bar2
    99 %0 = tail call i32 @bar2(i32 %a, i32 %b, i32 %c) nounwind
    100 ret i32 %0
    101 }
    102
    103 declare i32 @bar2(i32, i32, i32)
    104
    105 define signext i16 @t8() nounwind ssp {
    106 entry:
    107 ; 32: t8:
    108 ; 32: call {{_?}}bar3
    109
    110 ; 64: t8:
    111 ; 64: callq {{_?}}bar3
    112 %0 = tail call signext i16 @bar3() nounwind ; [#uses=1]
    113 ret i16 %0
    114 }
    115
    116 declare signext i16 @bar3()
    117
    118 define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
    119 entry:
    120 ; 32: t9:
    121 ; 32: call *
    122
    123 ; 64: t9:
    124 ; 64: callq *
    125 %0 = bitcast i32 (i32)* %x to i16 (i32)*
    126 %1 = tail call signext i16 %0(i32 0) nounwind
    127 ret i16 %1
    128 }
    129
    130 define void @t10() nounwind ssp {
    131 entry:
    132 ; 32: t10:
    133 ; 32: call
    134
    135 ; 64: t10:
    136 ; 64: callq
    137 %0 = tail call i32 @foo4() noreturn nounwind
    138 unreachable
    139 }
    140
    141 declare i32 @foo4()
    142
    143 define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
    144 ; In 32-bit mode, it's emitting a bunch of dead loads that are not being
    145 ; eliminated currently.
    146
    147 ; 32: t11:
    148 ; 32-NOT: subl ${{[0-9]+}}, %esp
    149 ; 32: jne
    150 ; 32-NOT: movl
    151 ; 32-NOT: addl ${{[0-9]+}}, %esp
    152 ; 32: jmp {{_?}}foo5
    153
    154 ; 64: t11:
    155 ; 64-NOT: subq ${{[0-9]+}}, %esp
    156 ; 64-NOT: addq ${{[0-9]+}}, %esp
    157 ; 64: jmp {{_?}}foo5
    158 entry:
    159 %0 = icmp eq i32 %x, 0
    160 br i1 %0, label %bb6, label %bb
    161
    162 bb:
    163 %1 = tail call i32 @foo5(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind
    164 ret i32 %1
    165
    166 bb6:
    167 ret i32 0
    168 }
    169
    170 declare i32 @foo5(i32, i32, i32, i32, i32)
    171
    172 %struct.t = type { i32, i32, i32, i32, i32 }
    173
    174 define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
    175 ; 32: t12:
    176 ; 32-NOT: subl ${{[0-9]+}}, %esp
    177 ; 32-NOT: addl ${{[0-9]+}}, %esp
    178 ; 32: jmp {{_?}}foo6
    179
    180 ; 64: t12:
    181 ; 64-NOT: subq ${{[0-9]+}}, %esp
    182 ; 64-NOT: addq ${{[0-9]+}}, %esp
    183 ; 64: jmp {{_?}}foo6
    184 entry:
    185 %0 = icmp eq i32 %x, 0
    186 br i1 %0, label %bb2, label %bb
    187
    188 bb:
    189 %1 = tail call i32 @foo6(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind
    190 ret i32 %1
    191
    192 bb2:
    193 ret i32 0
    194 }
    195
    196 declare i32 @foo6(i32, i32, %struct.t* byval align 4)
    197
    198 ; rdar://r7717598
    199 %struct.ns = type { i32, i32 }
    200 %struct.cp = type { float, float }
    201
    202 define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
    203 ; 32: t13:
    204 ; 32-NOT: jmp
    205 ; 32: call
    206 ; 32: ret
    207
    208 ; 64: t13:
    209 ; 64-NOT: jmp
    210 ; 64: call
    211 ; 64: ret
    212 entry:
    213 %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
    214 ret %struct.ns* %0
    215 }
    216
    217 declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind ssp