llvm.org GIT mirror llvm / b720a3d
Re-apply r211399, "Generate native unwind info on Win64" with a fix to ignore SEH pseudo ops in X86 JIT emitter. -- This patch enables LLVM to emit Win64-native unwind info rather than DWARF CFI. It handles all corner cases (I hope), including stack realignment. Because the unwind info is not flexible enough to describe stack frames with a gap of unknown size in the middle, such as the one caused by stack realignment, I modified register spilling code to place all spills into the fixed frame slots, so that they can be accessed relative to the frame pointer. Patch by Vadim Chugunov! Reviewed By: rnk Differential Revision: http://reviews.llvm.org/D4081 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211691 91177308-0d34-0410-b5e6-96231b3b80d8 NAKAMURA Takumi 6 years ago
20 changed file(s) with 639 addition(s) and 210 deletion(s). Raw diff Collapse all Expand all
483483 ///
484484 int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable);
485485
486 /// CreateFixedSpillStackObject - Create a spill slot at a fixed location
487 /// on the stack. Returns an index with a negative value.
488 int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset);
486489
487490 /// isFixedObjectIndex - Returns true if the specified index corresponds to a
488491 /// fixed stack object.
480480 bool isExceptionHandlingDwarf() const {
481481 return (ExceptionsType == ExceptionHandling::DwarfCFI ||
482482 ExceptionsType == ExceptionHandling::ARM ||
483 // Win64 handler data still uses DWARF LSDA encoding.
483484 ExceptionsType == ExceptionHandling::Win64);
484485 }
485486 bool doesDwarfUseRelocationsAcrossSections() const {
9292 /// stack pointer.
9393 virtual bool isFPCloseToIncomingSP() const { return true; }
9494
95 /// assignCalleeSavedSpillSlots - Allows target to override spill slot
96 /// assignment logic. If implemented, assignCalleeSavedSpillSlots() should
97 /// assign frame slots to all CSI entries and return true. If this method
98 /// returns false, spill slots will be assigned using generic implementation.
99 /// assignCalleeSavedSpillSlots() may add, delete or rearrange elements of
100 /// CSI.
101 virtual bool
102 assignCalleeSavedSpillSlots(MachineFunction &MF,
103 const TargetRegisterInfo *TRI,
104 std::vector &CSI) const {
105 return false;
106 }
107
95108 /// getCalleeSavedSpillSlots - This method returns a pointer to an array of
96109 /// pairs, that contains an entry for each callee saved register that must be
97110 /// spilled to a particular stack location if it is spilled.
7676 if (!shouldEmitPersonality)
7777 return;
7878
79 MCSymbol *GCCHandlerSym =
80 Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
81 Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
79 const MCSymbol *PersHandlerSym =
80 TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
81 Asm->OutStreamer.EmitWin64EHHandler(PersHandlerSym, true, true);
8282
8383 Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
8484 Asm->getFunctionNumber()));
9797 MMI->TidyLandingPads();
9898
9999 if (shouldEmitPersonality) {
100 const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
101 const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
102 const MCSymbol *Sym =
103 TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
104
105100 Asm->OutStreamer.PushSection();
106101 Asm->OutStreamer.EmitWin64EHHandlerData();
107 Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
108 4);
109102 emitExceptionTable();
110103 Asm->OutStreamer.PopSection();
111104 }
577577 return -++NumFixedObjects;
578578 }
579579
580 /// CreateFixedSpillStackObject - Create a spill slot at a fixed location
581 /// on the stack. Returns an index with a negative value.
582 int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
583 int64_t SPOffset) {
584 unsigned StackAlign = getFrameLowering()->getStackAlignment();
585 unsigned Align = MinAlign(SPOffset, StackAlign);
586 Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
587 !RealignOption,
588 Align, getFrameLowering()->getStackAlignment());
589 Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
590 /*Immutable*/ true,
591 /*isSS*/ true,
592 /*Alloca*/ nullptr));
593 return -++NumFixedObjects;
594 }
580595
581596 BitVector
582597 MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
267267 }
268268 }
269269
270 if (CSI.empty())
271 return; // Early exit if no callee saved registers are modified!
272
273 unsigned NumFixedSpillSlots;
274 const TargetFrameLowering::SpillSlot *FixedSpillSlots =
275 TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
276
277 // Now that we know which registers need to be saved and restored, allocate
278 // stack slots for them.
279 for (std::vector::iterator
280 I = CSI.begin(), E = CSI.end(); I != E; ++I) {
281 unsigned Reg = I->getReg();
282 const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
283
284 int FrameIdx;
285 if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
270 if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
271 // If target doesn't implement this, use generic code.
272
273 if (CSI.empty())
274 return; // Early exit if no callee saved registers are modified!
275
276 unsigned NumFixedSpillSlots;
277 const TargetFrameLowering::SpillSlot *FixedSpillSlots =
278 TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
279
280 // Now that we know which registers need to be saved and restored, allocate
281 // stack slots for them.
282 for (std::vector::iterator I = CSI.begin(), E = CSI.end();
283 I != E; ++I) {
284 unsigned Reg = I->getReg();
285 const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
286
287 int FrameIdx;
288 if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
289 I->setFrameIdx(FrameIdx);
290 continue;
291 }
292
293 // Check to see if this physreg must be spilled to a particular stack slot
294 // on this target.
295 const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
296 while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
297 FixedSlot->Reg != Reg)
298 ++FixedSlot;
299
300 if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
301 // Nope, just spill it anywhere convenient.
302 unsigned Align = RC->getAlignment();
303 unsigned StackAlign = TFI->getStackAlignment();
304
305 // We may not be able to satisfy the desired alignment specification of
306 // the TargetRegisterClass if the stack alignment is smaller. Use the
307 // min.
308 Align = std::min(Align, StackAlign);
309 FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
310 if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
311 if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
312 } else {
313 // Spill it to the stack where we must.
314 FrameIdx =
315 MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
316 }
317
286318 I->setFrameIdx(FrameIdx);
287 continue;
288 }
289
290 // Check to see if this physreg must be spilled to a particular stack slot
291 // on this target.
292 const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
293 while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
294 FixedSlot->Reg != Reg)
295 ++FixedSlot;
296
297 if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
298 // Nope, just spill it anywhere convenient.
299 unsigned Align = RC->getAlignment();
300 unsigned StackAlign = TFI->getStackAlignment();
301
302 // We may not be able to satisfy the desired alignment specification of
303 // the TargetRegisterClass if the stack alignment is smaller. Use the
304 // min.
305 Align = std::min(Align, StackAlign);
306 FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
307 if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
308 if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
309 } else {
310 // Spill it to the stack where we must.
311 FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
312 }
313
314 I->setFrameIdx(FrameIdx);
319 }
315320 }
316321
317322 MFI->setCalleeSavedInfo(CSI);
648648 // though it contains relocatable pointers. In PIC mode, this is probably a
649649 // big runtime hit for C++ apps. Either the contents of the LSDA need to be
650650 // adjusted or this should be a data section.
651 LSDASection =
652 Ctx->getCOFFSection(".gcc_except_table",
653 COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
654 COFF::IMAGE_SCN_MEM_READ,
655 SectionKind::getReadOnly());
651 assert(T.isOSWindows() && "Windows is the only supported COFF target");
652 if (T.getArch() == Triple::x86_64) {
653 // On Windows 64 with SEH, the LSDA is emitted into the .xdata section
654 LSDASection = 0;
655 } else {
656 LSDASection = Ctx->getCOFFSection(".gcc_except_table",
657 COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
658 COFF::IMAGE_SCN_MEM_READ,
659 SectionKind::getReadOnly());
660 }
656661
657662 // Debug info.
658663 COFFDebugSymbolsSection =
503503 report_fatal_error("Frame register and offset already specified!");
504504 if (Offset & 0x0F)
505505 report_fatal_error("Misaligned frame pointer offset!");
506 if (Offset > 240)
507 report_fatal_error("Frame offset must be less than or equal to 240!");
506508 MCSymbol *Label = getContext().CreateTempSymbol();
507509 MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, Label, Register, Offset);
508510 EmitLabel(Label);
142142 void X86MCAsmInfoMicrosoft::anchor() { }
143143
144144 X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
145 if (Triple.getArch() == Triple::x86_64)
145 if (Triple.getArch() == Triple::x86_64) {
146146 PrivateGlobalPrefix = ".L";
147 PointerSize = 8;
148 ExceptionsType = ExceptionHandling::Win64;
149 }
147150
148151 AssemblerDialect = AsmWriterFlavor;
149152
157160 void X86MCAsmInfoGNUCOFF::anchor() { }
158161
159162 X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
163 assert(Triple.isOSWindows() && "Windows is the only supported COFF target");
160164 if (Triple.getArch() == Triple::x86_64) {
161165 PrivateGlobalPrefix = ".L";
162166 PointerSize = 8;
167 ExceptionsType = ExceptionHandling::Win64;
168 } else {
169 ExceptionsType = ExceptionHandling::DwarfCFI;
163170 }
164171
165172 AssemblerDialect = AsmWriterFlavor;
166173
167174 TextAlignFillValue = 0x90;
168175
169 // Exceptions handling
170 ExceptionsType = ExceptionHandling::DwarfCFI;
171
172176 UseIntegratedAssembler = true;
173177 }
11301130 case TargetOpcode::IMPLICIT_DEF:
11311131 case TargetOpcode::KILL:
11321132 break;
1133
1134 case X86::SEH_PushReg:
1135 case X86::SEH_SaveReg:
1136 case X86::SEH_SaveXMM:
1137 case X86::SEH_StackAlloc:
1138 case X86::SEH_SetFrame:
1139 case X86::SEH_PushFrame:
1140 case X86::SEH_EndPrologue:
1141 break;
1142
11331143 case X86::MOVPC32r: {
11341144 // This emits the "call" portion of this pseudo instruction.
11351145 MCE.emitByte(BaseOpcode);
2828 #include "llvm/MC/MCSymbol.h"
2929 #include "llvm/Support/CommandLine.h"
3030 #include "llvm/Target/TargetOptions.h"
31 #include "llvm/Support/Debug.h"
3132
3233 using namespace llvm;
3334
304305 return false;
305306 }
306307
307 void X86FrameLowering::emitCalleeSavedFrameMoves(
308 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL,
309 unsigned FramePtr) const {
308 void
309 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
310 MachineBasicBlock::iterator MBBI,
311 DebugLoc DL) const {
310312 MachineFunction &MF = *MBB.getParent();
311313 MachineFrameInfo *MFI = MF.getFrameInfo();
312314 MachineModuleInfo &MMI = MF.getMMI();
317319 const std::vector &CSI = MFI->getCalleeSavedInfo();
318320 if (CSI.empty()) return;
319321
320 const X86RegisterInfo *RegInfo =
321 static_cast(MF.getTarget().getRegisterInfo());
322 bool HasFP = hasFP(MF);
323
324 // Calculate amount of bytes used for return address storing.
325 int stackGrowth = -RegInfo->getSlotSize();
326
327 // FIXME: This is dirty hack. The code itself is pretty mess right now.
328 // It should be rewritten from scratch and generalized sometimes.
329
330 // Determine maximum offset (minimum due to stack growth).
331 int64_t MaxOffset = 0;
332 for (std::vector::const_iterator
333 I = CSI.begin(), E = CSI.end(); I != E; ++I)
334 MaxOffset = std::min(MaxOffset,
335 MFI->getObjectOffset(I->getFrameIdx()));
336
337322 // Calculate offsets.
338 int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
339323 for (std::vector::const_iterator
340324 I = CSI.begin(), E = CSI.end(); I != E; ++I) {
341325 int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
342326 unsigned Reg = I->getReg();
343 Offset = MaxOffset - Offset + saveAreaOffset;
344
345 // Don't output a new machine move if we're re-saving the frame
346 // pointer. This happens when the PrologEpilogInserter has inserted an extra
347 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
348 // generates one when frame pointers are used. If we generate a "machine
349 // move" for this extra "PUSH", the linker will lose track of the fact that
350 // the frame pointer should have the value of the first "PUSH" when it's
351 // trying to unwind.
352 //
353 // FIXME: This looks inelegant. It's possibly correct, but it's covering up
354 // another bug. I.e., one where we generate a prolog like this:
355 //
356 // pushl %ebp
357 // movl %esp, %ebp
358 // pushl %ebp
359 // pushl %esi
360 // ...
361 //
362 // The immediate re-push of EBP is unnecessary. At the least, it's an
363 // optimization bug. EBP can be used as a scratch register in certain
364 // cases, but probably not when we have a frame pointer.
365 if (HasFP && FramePtr == Reg)
366 continue;
367327
368328 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
369329 unsigned CFIIndex =
395355 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
396356 /// space for local variables. Also emit labels used by the exception handler to
397357 /// generate the exception handling frames.
358
359 /*
360 Here's a gist of what gets emitted:
361
362 ; Establish frame pointer, if needed
363 [if needs FP]
364 push %rbp
365 .cfi_def_cfa_offset 16
366 .cfi_offset %rbp, -16
367 .seh_pushreg %rpb
368 mov %rsp, %rbp
369 .cfi_def_cfa_register %rbp
370
371 ; Spill general-purpose registers
372 [for all callee-saved GPRs]
373 pushq %
374 [if not needs FP]
375 .cfi_def_cfa_offset (offset from RETADDR)
376 .seh_pushreg %
377
378 ; If the required stack alignment > default stack alignment
379 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
380 ; of unknown size in the stack frame.
381 [if stack needs re-alignment]
382 and $MASK, %rsp
383
384 ; Allocate space for locals
385 [if target is Windows and allocated space > 4096 bytes]
386 ; Windows needs special care for allocations larger
387 ; than one page.
388 mov $NNN, %rax
389 call ___chkstk_ms/___chkstk
390 sub %rax, %rsp
391 [else]
392 sub $NNN, %rsp
393
394 [if needs FP]
395 .seh_stackalloc (size of XMM spill slots)
396 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
397 [else]
398 .seh_stackalloc NNN
399
400 ; Spill XMMs
401 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
402 ; they may get spilled on any platform, if the current function
403 ; calls @llvm.eh.unwind.init
404 [if needs FP]
405 [for all callee-saved XMM registers]
406 movaps %, -MMM(%rbp)
407 [for all callee-saved XMM registers]
408 .seh_savexmm %, (-MMM + SEHFrameOffset)
409 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
410 [else]
411 [for all callee-saved XMM registers]
412 movaps %, KKK(%rsp)
413 [for all callee-saved XMM registers]
414 .seh_savexmm %, KKK
415
416 .seh_endprologue
417
418 [if needs base pointer]
419 mov %rsp, %rbx
420
421 ; Emit CFI info
422 [if needs FP]
423 [for all callee-saved registers]
424 .cfi_offset %, (offset from %rbp)
425 [else]
426 .cfi_def_cfa_offset (offset from RETADDR)
427 [for all callee-saved registers]
428 .cfi_offset %, (offset from %rsp)
429
430 Notes:
431 - .seh directives are emitted only for Windows 64 ABI
432 - .cfi directives are emitted for all other ABIs
433 - for 32-bit code, substitute %e?? registers for %r??
434 */
435
398436 void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
399437 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
400438 MachineBasicBlock::iterator MBBI = MBB.begin();
405443 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
406444 MachineModuleInfo &MMI = MF.getMMI();
407445 X86MachineFunctionInfo *X86FI = MF.getInfo();
408 bool needsFrameMoves = MMI.hasDebugInfo() ||
409 Fn->needsUnwindTableEntry();
410446 uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
411447 uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
412448 bool HasFP = hasFP(MF);
414450 bool Is64Bit = STI.is64Bit();
415451 bool IsLP64 = STI.isTarget64BitLP64();
416452 bool IsWin64 = STI.isTargetWin64();
453 bool IsSEH =
454 MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
455 ExceptionHandling::Win64; // Not necessarily synonymous with IsWin64.
456 bool NeedsWin64SEH = IsSEH && Fn->needsUnwindTableEntry();
457 bool NeedsDwarfCFI =
458 !IsSEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
417459 bool UseLEA = STI.useLeaForSP();
418460 unsigned StackAlign = getStackAlignment();
419461 unsigned SlotSize = RegInfo->getSlotSize();
511553 .addReg(FramePtr, RegState::Kill)
512554 .setMIFlag(MachineInstr::FrameSetup);
513555
514 if (needsFrameMoves) {
556 if (NeedsDwarfCFI) {
515557 // Mark the place where EBP/RBP was saved.
516558 // Define the current CFA rule to use the provided offset.
517559 assert(StackSize);
529571 .addCFIIndex(CFIIndex);
530572 }
531573
574 if (NeedsWin64SEH) {
575 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
576 .addImm(FramePtr)
577 .setMIFlag(MachineInstr::FrameSetup);
578 }
579
532580 // Update EBP with the new base value.
533581 BuildMI(MBB, MBBI, DL,
534582 TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
535583 .addReg(StackPtr)
536584 .setMIFlag(MachineInstr::FrameSetup);
537585
538 if (needsFrameMoves) {
586 if (NeedsDwarfCFI) {
539587 // Mark effective beginning of when frame pointer becomes valid.
540588 // Define the current CFA to use the EBP/RBP register.
541589 unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
545593 .addCFIIndex(CFIIndex);
546594 }
547595
548 // Mark the FramePtr as live-in in every block except the entry.
549 for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
550 I != E; ++I)
596 // Mark the FramePtr as live-in in every block.
597 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
551598 I->addLiveIn(FramePtr);
552599 } else {
553600 NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
561608 (MBBI->getOpcode() == X86::PUSH32r ||
562609 MBBI->getOpcode() == X86::PUSH64r)) {
563610 PushedRegs = true;
564 MBBI->setFlag(MachineInstr::FrameSetup);
611 unsigned Reg = MBBI->getOperand(0).getReg();
565612 ++MBBI;
566613
567 if (!HasFP && needsFrameMoves) {
614 if (!HasFP && NeedsDwarfCFI) {
568615 // Mark callee-saved push instruction.
569616 // Define the current CFA rule to use the provided offset.
570617 assert(StackSize);
574621 .addCFIIndex(CFIIndex);
575622 StackOffset += stackGrowth;
576623 }
624
625 if (NeedsWin64SEH) {
626 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
627 MachineInstr::FrameSetup);
628 }
577629 }
578630
579631 // Realign stack after we pushed callee-saved registers (so that we'll be
580632 // able to calculate their offsets from the frame pointer).
581
582 // NOTE: We push the registers before realigning the stack, so
583 // vector callee-saved (xmm) registers may be saved w/o proper
584 // alignment in this way. However, currently these regs are saved in
585 // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
586 // this shouldn't be a problem.
587633 if (RegInfo->needsStackRealignment(MF)) {
588634 assert(HasFP && "There should be a frame pointer if stack is realigned.");
589635 MachineInstr *MI =
682728 MI->setFlag(MachineInstr::FrameSetup);
683729 MBB.insert(MBBI, MI);
684730 }
685 } else if (NumBytes)
731 } else if (NumBytes) {
686732 emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
687733 UseLEA, TII, *RegInfo);
734 }
735
736 int SEHFrameOffset = 0;
737 if (NeedsWin64SEH) {
738 if (HasFP) {
739 // We need to set frame base offset low enough such that all saved
740 // register offsets would be positive relative to it, but we can't
741 // just use NumBytes, because .seh_setframe offset must be <=240.
742 // So we pretend to have only allocated enough space to spill the
743 // non-volatile registers.
744 // We don't care about the rest of stack allocation, because unwinder
745 // will restore SP to (BP - SEHFrameOffset)
746 for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
747 int offset = MFI->getObjectOffset(Info.getFrameIdx());
748 SEHFrameOffset = std::max(SEHFrameOffset, abs(offset));
749 }
750 SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
751
752 // This only needs to account for XMM spill slots, GPR slots
753 // are covered by .seh_pushreg's emitted above.
754 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
755 .addImm(SEHFrameOffset - X86FI->getCalleeSavedFrameSize())
756 .setMIFlag(MachineInstr::FrameSetup);
757
758 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
759 .addImm(FramePtr)
760 .addImm(SEHFrameOffset)
761 .setMIFlag(MachineInstr::FrameSetup);
762 } else {
763 // SP will be the base register for restoring XMMs
764 if (NumBytes) {
765 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
766 .addImm(NumBytes)
767 .setMIFlag(MachineInstr::FrameSetup);
768 }
769 }
770 }
771
772 // Skip the rest of register spilling code
773 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
774 ++MBBI;
775
776 // Emit SEH info for non-GPRs
777 if (NeedsWin64SEH) {
778 for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
779 unsigned Reg = Info.getReg();
780 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
781 continue;
782 assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
783
784 int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
785 Offset += SEHFrameOffset;
786
787 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
788 .addImm(Reg)
789 .addImm(Offset)
790 .setMIFlag(MachineInstr::FrameSetup);
791 }
792
793 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
794 .setMIFlag(MachineInstr::FrameSetup);
795 }
688796
689797 // If we need a base pointer, set it up here. It's whatever the value
690798 // of the stack pointer is at this point. Any variable size objects
691799 // will be allocated after this, so we can still use the base pointer
692800 // to reference locals.
693801 if (RegInfo->hasBasePointer(MF)) {
694 // Update the frame pointer with the current stack pointer.
802 // Update the base pointer with the current stack pointer.
695803 unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
696804 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
697805 .addReg(StackPtr)
698806 .setMIFlag(MachineInstr::FrameSetup);
699807 }
700808
701 if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
809 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
702810 // Mark end of stack pointer adjustment.
703811 if (!HasFP && NumBytes) {
704812 // Define the current CFA rule to use the provided offset.
713821
714822 // Emit DWARF info specifying the offsets of the callee-saved registers.
715823 if (PushedRegs)
716 emitCalleeSavedFrameMoves(MBB, MBBI, DL, HasFP ? FramePtr : StackPtr);
824 emitCalleeSavedFrameMoves(MBB, MBBI, DL);
717825 }
718826 }
719827
9731081 return getFrameIndexOffset(MF, FI);
9741082 }
9751083
976 bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
977 MachineBasicBlock::iterator MI,
978 const std::vector &CSI,
979 const TargetRegisterInfo *TRI) const {
980 if (CSI.empty())
981 return false;
982
1084 bool X86FrameLowering::assignCalleeSavedSpillSlots(
1085 MachineFunction &MF, const TargetRegisterInfo *TRI,
1086 std::vector &CSI) const {
1087 MachineFrameInfo *MFI = MF.getFrameInfo();
1088 const X86RegisterInfo *RegInfo =
1089 static_cast(MF.getTarget().getRegisterInfo());
1090 unsigned SlotSize = RegInfo->getSlotSize();
1091 X86MachineFunctionInfo *X86FI = MF.getInfo();
1092
1093 unsigned CalleeSavedFrameSize = 0;
1094 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
1095
1096 if (hasFP(MF)) {
1097 // emitPrologue always spills frame register the first thing.
1098 SpillSlotOffset -= SlotSize;
1099 MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
1100
1101 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
1102 // the frame register, we can delete it from CSI list and not have to worry
1103 // about avoiding it later.
1104 unsigned FPReg = RegInfo->getFrameRegister(MF);
1105 for (unsigned i = 0; i < CSI.size(); ++i) {
1106 if (CSI[i].getReg() == FPReg) {
1107 CSI.erase(CSI.begin() + i);
1108 break;
1109 }
1110 }
1111 }
1112
1113 // Assign slots for GPRs. It increases frame size.
1114 for (unsigned i = CSI.size(); i != 0; --i) {
1115 unsigned Reg = CSI[i - 1].getReg();
1116
1117 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
1118 continue;
1119
1120 SpillSlotOffset -= SlotSize;
1121 CalleeSavedFrameSize += SlotSize;
1122
1123 int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
1124 CSI[i - 1].setFrameIdx(SlotIndex);
1125 }
1126
1127 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
1128
1129 // Assign slots for XMMs.
1130 for (unsigned i = CSI.size(); i != 0; --i) {
1131 unsigned Reg = CSI[i - 1].getReg();
1132 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
1133 continue;
1134
1135 const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
1136 // ensure alignment
1137 SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment();
1138 // spill into slot
1139 SpillSlotOffset -= RC->getSize();
1140 int SlotIndex =
1141 MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
1142 CSI[i - 1].setFrameIdx(SlotIndex);
1143 MFI->ensureMaxAlignment(RC->getAlignment());
1144 }
1145
1146 return true;
1147 }
1148
1149 bool X86FrameLowering::spillCalleeSavedRegisters(
1150 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1151 const std::vector &CSI,
1152 const TargetRegisterInfo *TRI) const {
9831153 DebugLoc DL = MBB.findDebugLoc(MI);
9841154
9851155 MachineFunction &MF = *MBB.getParent();
986 const X86RegisterInfo *RegInfo =
987 static_cast(MF.getTarget().getRegisterInfo());
988 unsigned SlotSize = RegInfo->getSlotSize();
989 unsigned FPReg = TRI->getFrameRegister(MF);
990 unsigned CalleeFrameSize = 0;
991
9921156 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
993 X86MachineFunctionInfo *X86FI = MF.getInfo();
9941157 const X86Subtarget &STI = MF.getTarget().getSubtarget();
9951158
9961159 // Push GPRs. It increases frame size.
10021165 continue;
10031166 // Add the callee-saved register as live-in. It's killed at the spill.
10041167 MBB.addLiveIn(Reg);
1005 if (Reg == FPReg)
1006 // X86RegisterInfo::emitPrologue will handle spilling of frame register.
1007 continue;
1008 CalleeFrameSize += SlotSize;
1168
10091169 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
10101170 .setMIFlag(MachineInstr::FrameSetup);
10111171 }
10121172
1013 X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
1014
10151173 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
10161174 // It can be done by spilling XMMs to stack frame.
1017 // Note that only Win64 ABI might spill XMMs.
10181175 for (unsigned i = CSI.size(); i != 0; --i) {
10191176 unsigned Reg = CSI[i-1].getReg();
10201177 if (X86::GR64RegClass.contains(Reg) ||
10261183
10271184 TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
10281185 TRI);
1186 --MI;
1187 MI->setFlag(MachineInstr::FrameSetup);
1188 ++MI;
10291189 }
10301190
10311191 return true;
10561216 }
10571217
10581218 // POP GPRs.
1059 unsigned FPReg = TRI->getFrameRegister(MF);
10601219 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
10611220 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
10621221 unsigned Reg = CSI[i].getReg();
10631222 if (!X86::GR64RegClass.contains(Reg) &&
10641223 !X86::GR32RegClass.contains(Reg))
10651224 continue;
1066 if (Reg == FPReg)
1067 // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
1068 continue;
1225
10691226 BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
10701227 }
10711228 return true;
10941251 // [EBP]
10951252 MFI->CreateFixedObject(-TailCallReturnAddrDelta,
10961253 TailCallReturnAddrDelta - SlotSize, true);
1097 }
1098
1099 if (hasFP(MF)) {
1100 assert((TailCallReturnAddrDelta <= 0) &&
1101 "The Delta should always be zero or negative");
1102 const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
1103
1104 // Create a frame entry for the EBP register that must be saved.
1105 int FrameIdx = MFI->CreateFixedObject(SlotSize,
1106 -(int)SlotSize +
1107 TFI.getOffsetOfLocalArea() +
1108 TailCallReturnAddrDelta,
1109 true);
1110 assert(FrameIdx == MFI->getObjectIndexBegin() &&
1111 "Slot for EBP register must be last in order to be found!");
1112 (void)FrameIdx;
11131254 }
11141255
11151256 // Spill the BasePtr if it's used.
2626 : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
2727
2828 void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
29 MachineBasicBlock::iterator MBBI, DebugLoc DL,
30 unsigned FramePtr) const;
29 MachineBasicBlock::iterator MBBI,
30 DebugLoc DL) const;
3131
3232 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
3333 /// the function.
4040
4141 void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
4242 RegScavenger *RS = nullptr) const override;
43
44 bool
45 assignCalleeSavedSpillSlots(MachineFunction &MF,
46 const TargetRegisterInfo *TRI,
47 std::vector &CSI) const override;
4348
4449 bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
4550 MachineBasicBlock::iterator MI,
604604 }
605605
606606 // FIXME - use subtarget debug flags
607 if (!Subtarget->isTargetDarwin() &&
608 !Subtarget->isTargetELF() &&
609 !Subtarget->isTargetCygMing()) {
607 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
608 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
610609 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
611610 }
612611
193193 let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
194194 def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
195195 "#EH_SjLj_Setup\t$dst", []>;
196 }
197
198 //===----------------------------------------------------------------------===//
199 // Pseudo instructions used by unwind info.
200 //
201 let isPseudo = 1 in {
202 def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
203 "#SEH_PushReg $reg", []>;
204 def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
205 "#SEH_SaveReg $reg, $dst", []>;
206 def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
207 "#SEH_SaveXMM $reg, $dst", []>;
208 def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
209 "#SEH_StackAlloc $size", []>;
210 def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
211 "#SEH_SetFrame $reg, $offset", []>;
212 def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
213 "#SEH_PushFrame $mode", []>;
214 def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
215 "#SEH_EndPrologue", []>;
196216 }
197217
198218 //===----------------------------------------------------------------------===//
1212 //===----------------------------------------------------------------------===//
1313
1414 #include "X86AsmPrinter.h"
15 #include "X86RegisterInfo.h"
1516 #include "InstPrinter/X86ATTInstPrinter.h"
1617 #include "MCTargetDesc/X86BaseInfo.h"
1718 #include "llvm/ADT/SmallString.h"
778779
779780 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
780781 X86MCInstLower MCInstLowering(*MF, *this);
782 const X86RegisterInfo *RI =
783 static_cast(TM.getRegisterInfo());
784
781785 switch (MI->getOpcode()) {
782786 case TargetOpcode::DBG_VALUE:
783787 llvm_unreachable("Should be handled target independently");
882886 .addReg(X86::R10)
883887 .addReg(X86::RAX));
884888 return;
889
890 case X86::SEH_PushReg:
891 OutStreamer.EmitWin64EHPushReg(
892 RI->getSEHRegNum(MI->getOperand(0).getImm()));
893 return;
894
895 case X86::SEH_SaveReg:
896 OutStreamer.EmitWin64EHSaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
897 MI->getOperand(1).getImm());
898 return;
899
900 case X86::SEH_SaveXMM:
901 OutStreamer.EmitWin64EHSaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
902 MI->getOperand(1).getImm());
903 return;
904
905 case X86::SEH_StackAlloc:
906 OutStreamer.EmitWin64EHAllocStack(MI->getOperand(0).getImm());
907 return;
908
909 case X86::SEH_SetFrame:
910 OutStreamer.EmitWin64EHSetFrame(
911 RI->getSEHRegNum(MI->getOperand(0).getImm()),
912 MI->getOperand(1).getImm());
913 return;
914
915 case X86::SEH_PushFrame:
916 OutStreamer.EmitWin64EHPushFrame(MI->getOperand(0).getImm());
917 return;
918
919 case X86::SEH_EndPrologue:
920 OutStreamer.EmitWin64EHEndProlog();
921 return;
885922 }
886923
887924 MCInst TmpInst;
0 ; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | FileCheck %s --check-prefix=LIN
1 ; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=LIN
21 ; RUN: llc < %s -mtriple=i386-pc-mingw32 -o - | FileCheck %s --check-prefix=WIN
32 ; RUN: llc < %s -mtriple=i686-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN
3 ; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN64
44
55 ; LIN: .cfi_personality 0, __gnat_eh_personality
66 ; LIN: .cfi_lsda 0, .Lexception0
77 ; WIN: .cfi_personality 0, ___gnat_eh_personality
88 ; WIN: .cfi_lsda 0, Lexception0
9 ; WIN64: .seh_handler __gnat_eh_personality
10 ; WIN64: .seh_handlerdata
911
1012 @error = external global i8
1113
1416 invoke void @raise()
1517 to label %eh_then unwind label %unwind
1618
17 unwind: ; preds = %entry
19 unwind: ; preds = %entry
1820 %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*)
1921 catch i8* @error
2022 %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1
0 ; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
11 ; CHECK: subq $40, %rsp
2 ; CHECK: movaps %xmm8, (%rsp)
3 ; CHECK: movaps %xmm7, 16(%rsp)
2 ; CHECK: movaps %xmm8, 16(%rsp)
3 ; CHECK: movaps %xmm7, (%rsp)
44
55 define i32 @a() nounwind {
66 entry:
66 declare <16 x float> @func_float16(<16 x float>, <16 x float>)
77 declare i32 @func_int(i32, i32)
88
9 ; WIN64: testf16_inp
9 ; WIN64-LABEL: testf16_inp
1010 ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
1111 ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
1212 ; WIN64: leaq {{.*}}(%rsp), %rcx
1313 ; WIN64: call
1414 ; WIN64: ret
1515
16 ; X32: testf16_inp
16 ; X32-LABEL: testf16_inp
1717 ; X32: movl %eax, (%esp)
1818 ; X32: vaddps {{.*}}, {{%ymm[0-1]}}
1919 ; X32: vaddps {{.*}}, {{%ymm[0-1]}}
2020 ; X32: call
2121 ; X32: ret
2222
23 ; X64: testf16_inp
23 ; X64-LABEL: testf16_inp
2424 ; X64: vaddps {{.*}}, {{%ymm[0-1]}}
2525 ; X64: vaddps {{.*}}, {{%ymm[0-1]}}
2626 ; X64: leaq {{.*}}(%rsp), %rdi
4040 ;test calling conventions - preserved registers
4141
4242 ; preserved ymm6-ymm15
43 ; WIN64: testf16_regs
43 ; WIN64-LABEL: testf16_regs
4444 ; WIN64: call
4545 ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
4646 ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
4747 ; WIN64: ret
4848
4949 ; preserved ymm8-ymm15
50 ; X64: testf16_regs
50 ; X64-LABEL: testf16_regs
5151 ; X64: call
5252 ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
5353 ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
6464 }
6565
6666 ; test calling conventions - prolog and epilog
67 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
68 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
69 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
70 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
71 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
72 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
73 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
74 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
75 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
76 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
67 ; WIN64-LABEL: test_prolog_epilog
68 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
69 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
70 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
71 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
72 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
73 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
74 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
75 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
76 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
77 ; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
7778 ; WIN64: call
78 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
79 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
80 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
81 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
82 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
83 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
84 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
85 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
86 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
87 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
79 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
80 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
81 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
82 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
83 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
84 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
85 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
86 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
87 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
88 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
8889
90 ; X64-LABEL: test_prolog_epilog
8991 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
9092 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
9193 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
110112
111113 ; test functions with integer parameters
112114 ; pass parameters on stack for 32-bit platform
115 ; X32-LABEL: test_int
113116 ; X32: movl {{.*}}, 4(%esp)
114117 ; X32: movl {{.*}}, (%esp)
115118 ; X32: call
116119 ; X32: addl {{.*}}, %eax
117120
118121 ; pass parameters in registers for 64-bit platform
122 ; X64-LABEL: test_int
119123 ; X64: leal {{.*}}, %edi
120124 ; X64: movl {{.*}}, %esi
121125 ; X64: call
127131 ret i32 %c
128132 }
129133
130 ; WIN64: test_float4
134 ; WIN64-LABEL: test_float4
131135 ; WIN64-NOT: vzeroupper
132136 ; WIN64: call
133137 ; WIN64-NOT: vzeroupper
134138 ; WIN64: call
135139 ; WIN64: ret
136140
137 ; X64: test_float4
141 ; X64-LABEL: test_float4
138142 ; X64-NOT: vzeroupper
139143 ; X64: call
140144 ; X64-NOT: vzeroupper
141145 ; X64: call
142146 ; X64: ret
143147
144 ; X32: test_float4
148 ; X32-LABEL: test_float4
145149 ; X32: vzeroupper
146150 ; X32: call
147151 ; X32: vzeroupper
1212 ; APPLE: GCC_except_table0:
1313 ; APPLE: Lexception0:
1414
15 ; MINGW64: .cfi_startproc
16 ; MINGW64: .cfi_personality 0, __gxx_personality_v0
17 ; MINGW64: .cfi_lsda 0, .Lexception0
18 ; MINGW64: .cfi_def_cfa_offset 16
15 ; MINGW64: .seh_proc
16 ; MINGW64: .seh_handler __gxx_personality_v0
17 ; MINGW64: .seh_setframe 5, 0
1918 ; MINGW64: callq _Unwind_Resume
20 ; MINGW64: .cfi_endproc
19 ; MINGW64: .seh_handlerdata
2120 ; MINGW64: GCC_except_table0:
2221 ; MINGW64: Lexception0:
22 ; MINGW64: .seh_endproc
2323
2424 ; MINGW32: .cfi_startproc
2525 ; MINGW32: .cfi_personality 0, ___gxx_personality_v0
0 ; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
1 ; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64
2
3 ; Check function without prolog
4 define void @foo0() uwtable {
5 entry:
6 ret void
7 }
8 ; WIN64-LABEL: foo0:
9 ; WIN64: .seh_proc foo0
10 ; WIN64: .seh_endprologue
11 ; WIN64: ret
12 ; WIN64: .seh_endproc
13
14 ; Checks a small stack allocation
15 define void @foo1() uwtable {
16 entry:
17 %baz = alloca [2000 x i16], align 2
18 ret void
19 }
20 ; WIN64-LABEL: foo1:
21 ; WIN64: .seh_proc foo1
22 ; WIN64: subq $4000, %rsp
23 ; WIN64: .seh_stackalloc 4000
24 ; WIN64: .seh_endprologue
25 ; WIN64: addq $4000, %rsp
26 ; WIN64: ret
27 ; WIN64: .seh_endproc
28
29 ; Checks a stack allocation requiring call to __chkstk/___chkstk_ms
30 define void @foo2() uwtable {
31 entry:
32 %baz = alloca [4000 x i16], align 2
33 ret void
34 }
35 ; WIN64-LABEL: foo2:
36 ; WIN64: .seh_proc foo2
37 ; WIN64: movabsq $8000, %rax
38 ; WIN64: callq {{__chkstk|___chkstk_ms}}
39 ; WIN64: subq %rax, %rsp
40 ; WIN64: .seh_stackalloc 8000
41 ; WIN64: .seh_endprologue
42 ; WIN64: addq $8000, %rsp
43 ; WIN64: ret
44 ; WIN64: .seh_endproc
45
46
47 ; Checks stack push
48 define i32 @foo3(i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable {
49 entry:
50 %a = alloca i32
51 %b = alloca i32
52 %c = alloca i32
53 %d = alloca i32
54 %e = alloca i32
55 %f = alloca i32
56 store i32 %a_arg, i32* %a
57 store i32 %b_arg, i32* %b
58 store i32 %c_arg, i32* %c
59 store i32 %d_arg, i32* %d
60 store i32 %e_arg, i32* %e
61 store i32 %f_arg, i32* %f
62 %tmp = load i32* %a
63 %tmp1 = mul i32 %tmp, 2
64 %tmp2 = load i32* %b
65 %tmp3 = mul i32 %tmp2, 3
66 %tmp4 = add i32 %tmp1, %tmp3
67 %tmp5 = load i32* %c
68 %tmp6 = mul i32 %tmp5, 5
69 %tmp7 = add i32 %tmp4, %tmp6
70 %tmp8 = load i32* %d
71 %tmp9 = mul i32 %tmp8, 7
72 %tmp10 = add i32 %tmp7, %tmp9
73 %tmp11 = load i32* %e
74 %tmp12 = mul i32 %tmp11, 11
75 %tmp13 = add i32 %tmp10, %tmp12
76 %tmp14 = load i32* %f
77 %tmp15 = mul i32 %tmp14, 13
78 %tmp16 = add i32 %tmp13, %tmp15
79 ret i32 %tmp16
80 }
81 ; WIN64-LABEL: foo3:
82 ; WIN64: .seh_proc foo3
83 ; WIN64: pushq %rsi
84 ; WIN64: .seh_pushreg 6
85 ; WIN64: subq $24, %rsp
86 ; WIN64: .seh_stackalloc 24
87 ; WIN64: .seh_endprologue
88 ; WIN64: addq $24, %rsp
89 ; WIN64: popq %rsi
90 ; WIN64: ret
91 ; WIN64: .seh_endproc
92
93
94 ; Check emission of eh handler and handler data
95 declare i32 @_d_eh_personality(i32, i32, i64, i8*, i8*)
96 declare void @_d_eh_resume_unwind(i8*)
97
98 declare i32 @bar()
99
100 define i32 @foo4() #0 {
101 entry:
102 %step = alloca i32, align 4
103 store i32 0, i32* %step
104 %tmp = load i32* %step
105
106 %tmp1 = invoke i32 @bar()
107 to label %finally unwind label %landingpad
108
109 finally:
110 store i32 1, i32* %step
111 br label %endtryfinally
112
113 landingpad:
114 %landing_pad = landingpad { i8*, i32 } personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality
115 cleanup
116 %tmp3 = extractvalue { i8*, i32 } %landing_pad, 0
117 store i32 2, i32* %step
118 call void @_d_eh_resume_unwind(i8* %tmp3)
119 unreachable
120
121 endtryfinally:
122 %tmp10 = load i32* %step
123 ret i32 %tmp10
124 }
125 ; WIN64-LABEL: foo4:
126 ; WIN64: .seh_proc foo4
127 ; WIN64: .seh_handler _d_eh_personality, @unwind, @except
128 ; WIN64: subq $56, %rsp
129 ; WIN64: .seh_stackalloc 56
130 ; WIN64: .seh_endprologue
131 ; WIN64: addq $56, %rsp
132 ; WIN64: ret
133 ; WIN64: .seh_handlerdata
134 ; WIN64: .seh_endproc
135
136
137 ; Check stack re-alignment and xmm spilling
138 define void @foo5() uwtable {
139 entry:
140 %s = alloca i32, align 64
141 call void asm sideeffect "", "~{rbx},~{rdi},~{xmm6},~{xmm7}"()
142 ret void
143 }
144 ; WIN64-LABEL: foo5:
145 ; WIN64: .seh_proc foo5
146 ; WIN64: pushq %rbp
147 ; WIN64: .seh_pushreg 5
148 ; WIN64: movq %rsp, %rbp
149 ; WIN64: pushq %rdi
150 ; WIN64: .seh_pushreg 7
151 ; WIN64: pushq %rbx
152 ; WIN64: .seh_pushreg 3
153 ; WIN64: andq $-64, %rsp
154 ; WIN64: subq $128, %rsp
155 ; WIN64: .seh_stackalloc 48
156 ; WIN64: .seh_setframe 5, 64
157 ; WIN64: movaps %xmm7, -32(%rbp) # 16-byte Spill
158 ; WIN64: movaps %xmm6, -48(%rbp) # 16-byte Spill
159 ; WIN64: .seh_savexmm 6, 16
160 ; WIN64: .seh_savexmm 7, 32
161 ; WIN64: .seh_endprologue
162 ; WIN64: movaps -48(%rbp), %xmm6 # 16-byte Reload
163 ; WIN64: movaps -32(%rbp), %xmm7 # 16-byte Reload
164 ; WIN64: leaq -16(%rbp), %rsp
165 ; WIN64: popq %rbx
166 ; WIN64: popq %rdi
167 ; WIN64: popq %rbp
168 ; WIN64: retq
169 ; WIN64: .seh_endproc