llvm.org GIT mirror llvm / 34b7fde
Make musttail more robust for vector types on x86 Previously I tried to plug musttail into the existing vararg lowering code. That turned out to be a mistake, because non-vararg calls use significantly different register lowering, even on x86. For example, AVX vectors are usually passed in registers to normal functions and memory to vararg functions. Now musttail uses a completely separate lowering. Hopefully this can be used as the basis for non-x86 perfect forwarding. Reviewers: majnemer Differential Revision: http://reviews.llvm.org/D6156 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224745 91177308-0d34-0410-b5e6-96231b3b80d8 Reid Kleckner 5 years ago
6 changed file(s) with 302 addition(s) and 100 deletion(s). Raw diff Collapse all Expand all
155155 bool isUpperBitsInLoc() const {
156156 return HTP == AExtUpper || HTP == SExtUpper || HTP == ZExtUpper;
157157 }
158 };
159
160 /// Describes a register that needs to be forwarded from the prologue to a
161 /// musttail call.
162 struct ForwardedRegister {
163 ForwardedRegister(unsigned VReg, MCPhysReg PReg, MVT VT)
164 : VReg(VReg), PReg(PReg), VT(VT) {}
165 unsigned VReg;
166 MCPhysReg PReg;
167 MVT VT;
158168 };
159169
160170 /// CCAssignFn - This function assigns a location for Val, updating State to
469479 return PendingLocs;
470480 }
471481
482 /// Compute the remaining unused register parameters that would be used for
483 /// the given value type. This is useful when varargs are passed in the
484 /// registers that normal prototyped parameters would be passed in, or for
485 /// implementing perfect forwarding.
486 void getRemainingRegParmsForType(SmallVectorImpl &Regs, MVT VT,
487 CCAssignFn Fn);
488
489 /// Compute the set of registers that need to be preserved and forwarded to
490 /// any musttail calls.
491 void analyzeMustTailForwardedRegisters(
492 SmallVectorImpl &Forwards, ArrayRef RegParmTypes,
493 CCAssignFn Fn);
494
472495 private:
473496 /// MarkAllocated - Mark a register and all of its aliases as allocated.
474497 void MarkAllocated(unsigned Reg);
1313
1414 #include "llvm/CodeGen/CallingConvLower.h"
1515 #include "llvm/CodeGen/MachineFrameInfo.h"
16 #include "llvm/CodeGen/MachineRegisterInfo.h"
1617 #include "llvm/IR/DataLayout.h"
1718 #include "llvm/Support/Debug.h"
1819 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/SaveAndRestore.h"
1921 #include "llvm/Support/raw_ostream.h"
2022 #include "llvm/Target/TargetLowering.h"
2123 #include "llvm/Target/TargetRegisterInfo.h"
177179 llvm_unreachable(nullptr);
178180 }
179181 }
182
183 void CCState::getRemainingRegParmsForType(SmallVectorImpl &Regs,
184 MVT VT, CCAssignFn Fn) {
185 unsigned SavedStackOffset = StackOffset;
186 unsigned NumLocs = Locs.size();
187
188 // Allocate something of this value type repeatedly with just the inreg flag
189 // set until we get assigned a location in memory.
190 ISD::ArgFlagsTy Flags;
191 Flags.setInReg();
192 bool HaveRegParm = true;
193 while (HaveRegParm) {
194 if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) {
195 #ifndef NDEBUG
196 dbgs() << "Call has unhandled type " << EVT(VT).getEVTString()
197 << " while computing remaining regparms\n";
198 #endif
199 llvm_unreachable(nullptr);
200 }
201 HaveRegParm = Locs.back().isRegLoc();
202 }
203
204 // Copy all the registers from the value locations we added.
205 assert(NumLocs < Locs.size() && "CC assignment failed to add location");
206 for (unsigned I = NumLocs, E = Locs.size(); I != E; ++I)
207 if (Locs[I].isRegLoc())
208 Regs.push_back(MCPhysReg(Locs[I].getLocReg()));
209
210 // Clear the assigned values and stack memory. We leave the registers marked
211 // as allocated so that future queries don't return the same registers, i.e.
212 // when i64 and f64 are both passed in GPRs.
213 StackOffset = SavedStackOffset;
214 Locs.resize(NumLocs);
215 }
216
217 void CCState::analyzeMustTailForwardedRegisters(
218 SmallVectorImpl &Forwards, ArrayRef RegParmTypes,
219 CCAssignFn Fn) {
220 // Oftentimes calling conventions will not user register parameters for
221 // variadic functions, so we need to assume we're not variadic so that we get
222 // all the registers that might be used in a non-variadic call.
223 SaveAndRestore SavedVarArg(IsVarArg, false);
224
225 for (MVT RegVT : RegParmTypes) {
226 SmallVector RemainingRegs;
227 getRemainingRegParmsForType(RemainingRegs, RegVT, Fn);
228 const TargetLowering *TL = MF.getSubtarget().getTargetLowering();
229 const TargetRegisterClass *RC = TL->getRegClassFor(RegVT);
230 for (MCPhysReg PReg : RemainingRegs) {
231 unsigned VReg = MF.addLiveIn(PReg, RC);
232 Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT));
233 }
234 }
235 }
25482548 MFI->CreateFixedObject(1, StackSize, true));
25492549 }
25502550
2551 // Figure out if XMM registers are in use.
2552 bool HaveXMMArgs = Is64Bit && !IsWin64;
2553 bool NoImplicitFloatOps = Fn->getAttributes().hasAttribute(
2554 AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
2555 assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
2556 "SSE register cannot be used when SSE is disabled!");
2557 if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
2558 !Subtarget->hasSSE1())
2559 HaveXMMArgs = false;
2560
25512561 // 64-bit calling conventions support varargs and register parameters, so we
2552 // have to do extra work to spill them in the prologue or forward them to
2553 // musttail calls.
2554 if (Is64Bit && isVarArg &&
2555 (MFI->hasVAStart() || MFI->hasMustTailInVarArgFunc())) {
2562 // have to do extra work to spill them in the prologue.
2563 if (Is64Bit && isVarArg && MFI->hasVAStart()) {
25562564 // Find the first unallocated argument registers.
25572565 ArrayRef ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
25582566 ArrayRef ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
25822590 }
25832591 }
25842592
2585 // Store them to the va_list returned by va_start.
2586 if (MFI->hasVAStart()) {
2587 if (IsWin64) {
2588 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
2589 // Get to the caller-allocated home save location. Add 8 to account
2590 // for the return address.
2591 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2592 FuncInfo->setRegSaveFrameIndex(
2593 if (IsWin64) {
2594 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
2595 // Get to the caller-allocated home save location. Add 8 to account
2596 // for the return address.
2597 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2598 FuncInfo->setRegSaveFrameIndex(
25932599 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
2594 // Fixup to set vararg frame on shadow area (4 x i64).
2595 if (NumIntRegs < 4)
2596 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
2597 } else {
2598 // For X86-64, if there are vararg parameters that are passed via
2599 // registers, then we must store them to their spots on the stack so
2600 // they may be loaded by deferencing the result of va_next.
2601 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2602 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2603 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2604 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2605 }
2606
2607 // Store the integer parameter registers.
2608 SmallVector MemOps;
2609 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2610 getPointerTy());
2611 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2612 for (SDValue Val : LiveGPRs) {
2613 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
2614 DAG.getIntPtrConstant(Offset));
2615 SDValue Store =
2616 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2617 MachinePointerInfo::getFixedStack(
2618 FuncInfo->getRegSaveFrameIndex(), Offset),
2619 false, false, 0);
2620 MemOps.push_back(Store);
2621 Offset += 8;
2622 }
2623
2624 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2625 // Now store the XMM (fp + vector) parameter registers.
2626 SmallVector SaveXMMOps;
2627 SaveXMMOps.push_back(Chain);
2628 SaveXMMOps.push_back(ALVal);
2629 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2630 FuncInfo->getRegSaveFrameIndex()));
2631 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2632 FuncInfo->getVarArgsFPOffset()));
2633 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2634 LiveXMMRegs.end());
2635 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2636 MVT::Other, SaveXMMOps));
2637 }
2638
2639 if (!MemOps.empty())
2640 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2600 // Fixup to set vararg frame on shadow area (4 x i64).
2601 if (NumIntRegs < 4)
2602 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
26412603 } else {
2642 // Add all GPRs, al, and XMMs to the list of forwards. We will add then
2643 // to the liveout set on a musttail call.
2644 assert(MFI->hasMustTailInVarArgFunc());
2645 auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
2646 typedef X86MachineFunctionInfo::Forward Forward;
2647
2648 for (unsigned I = 0, E = LiveGPRs.size(); I != E; ++I) {
2649 unsigned VReg =
2650 MF.getRegInfo().createVirtualRegister(&X86::GR64RegClass);
2651 Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveGPRs[I]);
2652 Forwards.push_back(Forward(VReg, ArgGPRs[NumIntRegs + I], MVT::i64));
2653 }
2654
2655 if (!ArgXMMs.empty()) {
2656 unsigned ALVReg =
2657 MF.getRegInfo().createVirtualRegister(&X86::GR8RegClass);
2658 Chain = DAG.getCopyToReg(Chain, dl, ALVReg, ALVal);
2659 Forwards.push_back(Forward(ALVReg, X86::AL, MVT::i8));
2660
2661 for (unsigned I = 0, E = LiveXMMRegs.size(); I != E; ++I) {
2662 unsigned VReg =
2663 MF.getRegInfo().createVirtualRegister(&X86::VR128RegClass);
2664 Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveXMMRegs[I]);
2665 Forwards.push_back(
2666 Forward(VReg, ArgXMMs[NumXMMRegs + I], MVT::v4f32));
2667 }
2668 }
2604 // For X86-64, if there are vararg parameters that are passed via
2605 // registers, then we must store them to their spots on the stack so
2606 // they may be loaded by deferencing the result of va_next.
2607 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2608 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2609 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2610 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2611 }
2612
2613 // Store the integer parameter registers.
2614 SmallVector MemOps;
2615 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2616 getPointerTy());
2617 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2618 for (SDValue Val : LiveGPRs) {
2619 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
2620 DAG.getIntPtrConstant(Offset));
2621 SDValue Store =
2622 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2623 MachinePointerInfo::getFixedStack(
2624 FuncInfo->getRegSaveFrameIndex(), Offset),
2625 false, false, 0);
2626 MemOps.push_back(Store);
2627 Offset += 8;
2628 }
2629
2630 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2631 // Now store the XMM (fp + vector) parameter registers.
2632 SmallVector SaveXMMOps;
2633 SaveXMMOps.push_back(Chain);
2634 SaveXMMOps.push_back(ALVal);
2635 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2636 FuncInfo->getRegSaveFrameIndex()));
2637 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2638 FuncInfo->getVarArgsFPOffset()));
2639 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2640 LiveXMMRegs.end());
2641 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2642 MVT::Other, SaveXMMOps));
2643 }
2644
2645 if (!MemOps.empty())
2646 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2647 }
2648
2649 if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
2650 // Find the largest legal vector type.
2651 MVT VecVT = MVT::Other;
2652 // FIXME: Only some x86_32 calling conventions support AVX512.
2653 if (Subtarget->hasAVX512() &&
2654 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
2655 CallConv == CallingConv::Intel_OCL_BI)))
2656 VecVT = MVT::v16f32;
2657 else if (Subtarget->hasAVX())
2658 VecVT = MVT::v8f32;
2659 else if (Subtarget->hasSSE2())
2660 VecVT = MVT::v4f32;
2661
2662 // We forward some GPRs and some vector types.
2663 SmallVector RegParmTypes;
2664 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
2665 RegParmTypes.push_back(IntVT);
2666 if (VecVT != MVT::Other)
2667 RegParmTypes.push_back(VecVT);
2668
2669 // Compute the set of forwarded registers. The rest are scratch.
2670 SmallVectorImpl &Forwards =
2671 FuncInfo->getForwardedMustTailRegParms();
2672 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
2673
2674 // Conservatively forward AL on x86_64, since it might be used for varargs.
2675 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
2676 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2677 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
2678 }
2679
2680 // Copy all forwards from physical to virtual registers.
2681 for (ForwardedRegister &F : Forwards) {
2682 // FIXME: Can we use a less constrained schedule?
2683 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2684 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
2685 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
26692686 }
26702687 }
26712688
29853002 DAG.getConstant(NumXMMRegs, MVT::i8)));
29863003 }
29873004
2988 if (Is64Bit && isVarArg && IsMustTail) {
3005 if (isVarArg && IsMustTail) {
29893006 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
29903007 for (const auto &F : Forwards) {
29913008 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
1313 #ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
1414 #define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
1515
16 #include "llvm/CodeGen/CallingConvLower.h"
1617 #include "llvm/CodeGen/MachineFunction.h"
1718 #include "llvm/CodeGen/MachineValueType.h"
1819 #include
7677 /// NumLocalDynamics - Number of local-dynamic TLS accesses.
7778 unsigned NumLocalDynamics;
7879
79 public:
80 /// Describes a register that needs to be forwarded from the prologue to a
81 /// musttail call.
82 struct Forward {
83 Forward(unsigned VReg, MCPhysReg PReg, MVT VT)
84 : VReg(VReg), PReg(PReg), VT(VT) {}
85 unsigned VReg;
86 MCPhysReg PReg;
87 MVT VT;
88 };
89
9080 private:
9181 /// ForwardedMustTailRegParms - A list of virtual and physical registers
9282 /// that must be forwarded to every musttail call.
93 std::vector> ForwardedMustTailRegParms;
83 SmallVector> ForwardedMustTailRegParms;
9484
9585 public:
9686 X86MachineFunctionInfo() : ForceFramePointer(false),
167157 unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
168158 void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
169159
170 std::vector> &getForwardedMustTailRegParms() {
160 SmallVectorImpl> &getForwardedMustTailRegParms() {
171161 return ForwardedMustTailRegParms;
172162 }
173163 };
0 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
1 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
2 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
3
4 ; While we don't support varargs with fastcall, we do support forwarding.
5
6 @asdf = internal constant [4 x i8] c"asdf"
7
8 declare void @puts(i8*)
9
10 define i32 @call_fast_thunk() {
11 %r = call x86_fastcallcc i32 (...)* @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
12 ret i32 %r
13 }
14
15 define x86_fastcallcc i32 @fast_thunk(...) {
16 call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
17 %r = musttail call x86_fastcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)
18 ret i32 %r
19 }
20
21 ; Check that we spill and fill around the call to puts.
22
23 ; CHECK-LABEL: @fast_thunk@0:
24 ; CHECK-DAG: movl %ecx, {{.*}}
25 ; CHECK-DAG: movl %edx, {{.*}}
26 ; CHECK: calll _puts
27 ; CHECK-DAG: movl {{.*}}, %ecx
28 ; CHECK-DAG: movl {{.*}}, %edx
29 ; CHECK: jmp @fast_target@12
30
31 define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {
32 %a0 = add i32 %a, %b
33 %a1 = add i32 %a0, %c
34 ret i32 %a1
35 }
36
37 ; Repeat the test for vectorcall, which has XMM registers.
38
39 define i32 @call_vector_thunk() {
40 %r = call x86_vectorcallcc i32 (...)* @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
41 ret i32 %r
42 }
43
44 define x86_vectorcallcc i32 @vector_thunk(...) {
45 call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
46 %r = musttail call x86_vectorcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)
47 ret i32 %r
48 }
49
50 ; Check that we spill and fill SSE registers around the call to puts.
51
52 ; CHECK-LABEL: vector_thunk@@0:
53 ; CHECK-DAG: movl %ecx, {{.*}}
54 ; CHECK-DAG: movl %edx, {{.*}}
55
56 ; SSE2-DAG: movups %xmm0, {{.*}}
57 ; SSE2-DAG: movups %xmm1, {{.*}}
58 ; SSE2-DAG: movups %xmm2, {{.*}}
59 ; SSE2-DAG: movups %xmm3, {{.*}}
60 ; SSE2-DAG: movups %xmm4, {{.*}}
61 ; SSE2-DAG: movups %xmm5, {{.*}}
62
63 ; AVX-DAG: vmovups %ymm0, {{.*}}
64 ; AVX-DAG: vmovups %ymm1, {{.*}}
65 ; AVX-DAG: vmovups %ymm2, {{.*}}
66 ; AVX-DAG: vmovups %ymm3, {{.*}}
67 ; AVX-DAG: vmovups %ymm4, {{.*}}
68 ; AVX-DAG: vmovups %ymm5, {{.*}}
69
70 ; AVX512-DAG: vmovups %zmm0, {{.*}}
71 ; AVX512-DAG: vmovups %zmm1, {{.*}}
72 ; AVX512-DAG: vmovups %zmm2, {{.*}}
73 ; AVX512-DAG: vmovups %zmm3, {{.*}}
74 ; AVX512-DAG: vmovups %zmm4, {{.*}}
75 ; AVX512-DAG: vmovups %zmm5, {{.*}}
76
77 ; CHECK: calll _puts
78
79 ; SSE2-DAG: movups {{.*}}, %xmm0
80 ; SSE2-DAG: movups {{.*}}, %xmm1
81 ; SSE2-DAG: movups {{.*}}, %xmm2
82 ; SSE2-DAG: movups {{.*}}, %xmm3
83 ; SSE2-DAG: movups {{.*}}, %xmm4
84 ; SSE2-DAG: movups {{.*}}, %xmm5
85
86 ; AVX-DAG: vmovups {{.*}}, %ymm0
87 ; AVX-DAG: vmovups {{.*}}, %ymm1
88 ; AVX-DAG: vmovups {{.*}}, %ymm2
89 ; AVX-DAG: vmovups {{.*}}, %ymm3
90 ; AVX-DAG: vmovups {{.*}}, %ymm4
91 ; AVX-DAG: vmovups {{.*}}, %ymm5
92
93 ; AVX512-DAG: vmovups {{.*}}, %zmm0
94 ; AVX512-DAG: vmovups {{.*}}, %zmm1
95 ; AVX512-DAG: vmovups {{.*}}, %zmm2
96 ; AVX512-DAG: vmovups {{.*}}, %zmm3
97 ; AVX512-DAG: vmovups {{.*}}, %zmm4
98 ; AVX512-DAG: vmovups {{.*}}, %zmm5
99
100 ; CHECK-DAG: movl {{.*}}, %ecx
101 ; CHECK-DAG: movl {{.*}}, %edx
102 ; CHECK: jmp vector_target@@12
103
104 define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) {
105 %a0 = add i32 %a, %b
106 %a1 = add i32 %a0, %c
107 ret i32 %a1
108 }
44 ; pack. Doing a normal call will clobber all argument registers, and we will
55 ; spill around it. A simple adjustment should not require any XMM spills.
66
7 declare void @llvm.va_start(i8*) nounwind
8
79 declare void(i8*, ...)* @get_f(i8* %this)
810
911 define void @f_thunk(i8* %this, ...) {
12 ; Use va_start so that we exercise the combination.
13 %ap = alloca [4 x i8*], align 16
14 %ap_i8 = bitcast [4 x i8*]* %ap to i8*
15 call void @llvm.va_start(i8* %ap_i8)
16
1017 %fptr = call void(i8*, ...)*(i8*)* @get_f(i8* %this)
1118 musttail call void (i8*, ...)* %fptr(i8* %this, ...)
1219 ret void