LLVM 19.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86.h"
15#include "X86CallingConv.h"
16#include "X86FrameLowering.h"
17#include "X86ISelLowering.h"
18#include "X86InstrBuilder.h"
20#include "X86TargetMachine.h"
21#include "X86TargetObjectFile.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
29
30#define DEBUG_TYPE "x86-isel"
31
32using namespace llvm;
33
34STATISTIC(NumTailCalls, "Number of tail calls");
35
36/// Call this when the user attempts to do something unsupported, like
37/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
38/// report_fatal_error, so calling code should attempt to recover without
39/// crashing.
40static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
41 const char *Msg) {
43 DAG.getContext()->diagnose(
45}
46
47/// Returns true if a CC can dynamically exclude a register from the list of
48/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
49/// the return registers.
51 switch (CC) {
52 default:
53 return false;
57 return true;
58 }
59}
60
61/// Returns true if a CC can dynamically exclude a register from the list of
62/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
63/// the parameters.
66}
67
68static std::pair<MVT, unsigned>
70 const X86Subtarget &Subtarget) {
71 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
72 // convention is one that uses k registers.
73 if (NumElts == 2)
74 return {MVT::v2i64, 1};
75 if (NumElts == 4)
76 return {MVT::v4i32, 1};
77 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
79 return {MVT::v8i16, 1};
80 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
82 return {MVT::v16i8, 1};
83 // v32i1 passes in ymm unless we have BWI and the calling convention is
84 // regcall.
85 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
86 return {MVT::v32i8, 1};
87 // Split v64i1 vectors if we don't have v64i8 available.
88 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
89 if (Subtarget.useAVX512Regs())
90 return {MVT::v64i8, 1};
91 return {MVT::v32i8, 2};
92 }
93
94 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
95 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
96 NumElts > 64)
97 return {MVT::i8, NumElts};
98
100}
101
104 EVT VT) const {
105 if (VT.isVector()) {
106 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
107 unsigned NumElts = VT.getVectorNumElements();
108
109 MVT RegisterVT;
110 unsigned NumRegisters;
111 std::tie(RegisterVT, NumRegisters) =
112 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
113 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
114 return RegisterVT;
115 }
116
117 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
118 return MVT::v8f16;
119 }
120
121 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
122 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
123 !Subtarget.hasX87())
124 return MVT::i32;
125
126 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
128 VT.changeVectorElementType(MVT::f16));
129
130 if (VT == MVT::bf16)
131 return MVT::f16;
132
134}
135
138 EVT VT) const {
139 if (VT.isVector()) {
140 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
141 unsigned NumElts = VT.getVectorNumElements();
142
143 MVT RegisterVT;
144 unsigned NumRegisters;
145 std::tie(RegisterVT, NumRegisters) =
146 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
147 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
148 return NumRegisters;
149 }
150
151 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
152 return 1;
153 }
154
155 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
156 // x87 is disabled.
157 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
158 if (VT == MVT::f64)
159 return 2;
160 if (VT == MVT::f80)
161 return 3;
162 }
163
164 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
166 VT.changeVectorElementType(MVT::f16));
167
169}
170
172 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
173 unsigned &NumIntermediates, MVT &RegisterVT) const {
174 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
175 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
176 Subtarget.hasAVX512() &&
178 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
179 VT.getVectorNumElements() > 64)) {
180 RegisterVT = MVT::i8;
181 IntermediateVT = MVT::i1;
182 NumIntermediates = VT.getVectorNumElements();
183 return NumIntermediates;
184 }
185
186 // Split v64i1 vectors if we don't have v64i8 available.
187 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
189 RegisterVT = MVT::v32i8;
190 IntermediateVT = MVT::v32i1;
191 NumIntermediates = 2;
192 return 2;
193 }
194
195 // Split vNbf16 vectors according to vNf16.
196 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
197 VT = VT.changeVectorElementType(MVT::f16);
198
200 NumIntermediates, RegisterVT);
201}
202
204 LLVMContext& Context,
205 EVT VT) const {
206 if (!VT.isVector())
207 return MVT::i8;
208
209 if (Subtarget.hasAVX512()) {
210 // Figure out what this type will be legalized to.
211 EVT LegalVT = VT;
212 while (getTypeAction(Context, LegalVT) != TypeLegal)
213 LegalVT = getTypeToTransformTo(Context, LegalVT);
214
215 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
216 if (LegalVT.getSimpleVT().is512BitVector())
217 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
218
219 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
220 // If we legalized to less than a 512-bit vector, then we will use a vXi1
221 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
222 // vXi16/vXi8.
223 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
224 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
225 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
226 }
227 }
228
230}
231
232/// Helper for getByValTypeAlignment to determine
233/// the desired ByVal argument alignment.
234static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
235 if (MaxAlign == 16)
236 return;
237 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
238 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
239 MaxAlign = Align(16);
240 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
241 Align EltAlign;
242 getMaxByValAlign(ATy->getElementType(), EltAlign);
243 if (EltAlign > MaxAlign)
244 MaxAlign = EltAlign;
245 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
246 for (auto *EltTy : STy->elements()) {
247 Align EltAlign;
248 getMaxByValAlign(EltTy, EltAlign);
249 if (EltAlign > MaxAlign)
250 MaxAlign = EltAlign;
251 if (MaxAlign == 16)
252 break;
253 }
254 }
255}
256
257/// Return the desired alignment for ByVal aggregate
258/// function arguments in the caller parameter area. For X86, aggregates
259/// that contain SSE vectors are placed at 16-byte boundaries while the rest
260/// are at 4-byte boundaries.
262 const DataLayout &DL) const {
263 if (Subtarget.is64Bit()) {
264 // Max of 8 and alignment of type.
265 Align TyAlign = DL.getABITypeAlign(Ty);
266 if (TyAlign > 8)
267 return TyAlign.value();
268 return 8;
269 }
270
271 Align Alignment(4);
272 if (Subtarget.hasSSE1())
273 getMaxByValAlign(Ty, Alignment);
274 return Alignment.value();
275}
276
277/// It returns EVT::Other if the type should be determined using generic
278/// target-independent logic.
279/// For vector ops we check that the overall size isn't larger than our
280/// preferred vector width.
282 const MemOp &Op, const AttributeList &FuncAttributes) const {
283 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
284 if (Op.size() >= 16 &&
285 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
286 // FIXME: Check if unaligned 64-byte accesses are slow.
287 if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
288 (Subtarget.getPreferVectorWidth() >= 512)) {
289 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
290 }
291 // FIXME: Check if unaligned 32-byte accesses are slow.
292 if (Op.size() >= 32 && Subtarget.hasAVX() &&
293 Subtarget.useLight256BitInstructions()) {
294 // Although this isn't a well-supported type for AVX1, we'll let
295 // legalization and shuffle lowering produce the optimal codegen. If we
296 // choose an optimal type with a vector element larger than a byte,
297 // getMemsetStores() may create an intermediate splat (using an integer
298 // multiply) before we splat as a vector.
299 return MVT::v32i8;
300 }
301 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
302 return MVT::v16i8;
303 // TODO: Can SSE1 handle a byte vector?
304 // If we have SSE1 registers we should be able to use them.
305 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
306 (Subtarget.getPreferVectorWidth() >= 128))
307 return MVT::v4f32;
308 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
309 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
310 // Do not use f64 to lower memcpy if source is string constant. It's
311 // better to use i32 to avoid the loads.
312 // Also, do not use f64 to lower memset unless this is a memset of zeros.
313 // The gymnastics of splatting a byte value into an XMM register and then
314 // only using 8-byte stores (because this is a CPU with slow unaligned
315 // 16-byte accesses) makes that a loser.
316 return MVT::f64;
317 }
318 }
319 // This is a compromise. If we reach here, unaligned accesses may be slow on
320 // this target. However, creating smaller, aligned accesses could be even
321 // slower and would certainly be a lot more code.
322 if (Subtarget.is64Bit() && Op.size() >= 8)
323 return MVT::i64;
324 return MVT::i32;
325}
326
328 if (VT == MVT::f32)
329 return Subtarget.hasSSE1();
330 if (VT == MVT::f64)
331 return Subtarget.hasSSE2();
332 return true;
333}
334
335static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
336 return (8 * Alignment.value()) % SizeInBits == 0;
337}
338
340 if (isBitAligned(Alignment, VT.getSizeInBits()))
341 return true;
342 switch (VT.getSizeInBits()) {
343 default:
344 // 8-byte and under are always assumed to be fast.
345 return true;
346 case 128:
347 return !Subtarget.isUnalignedMem16Slow();
348 case 256:
349 return !Subtarget.isUnalignedMem32Slow();
350 // TODO: What about AVX-512 (512-bit) accesses?
351 }
352}
353
355 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
356 unsigned *Fast) const {
357 if (Fast)
358 *Fast = isMemoryAccessFast(VT, Alignment);
359 // NonTemporal vector memory ops must be aligned.
360 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
361 // NT loads can only be vector aligned, so if its less aligned than the
362 // minimum vector size (which we can split the vector down to), we might as
363 // well use a regular unaligned vector load.
364 // We don't have any NT loads pre-SSE41.
365 if (!!(Flags & MachineMemOperand::MOLoad))
366 return (Alignment < 16 || !Subtarget.hasSSE41());
367 return false;
368 }
369 // Misaligned accesses of any size are always allowed.
370 return true;
371}
372
374 const DataLayout &DL, EVT VT,
375 unsigned AddrSpace, Align Alignment,
377 unsigned *Fast) const {
378 if (Fast)
379 *Fast = isMemoryAccessFast(VT, Alignment);
380 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
381 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
382 /*Fast=*/nullptr))
383 return true;
384 // NonTemporal vector memory ops are special, and must be aligned.
385 if (!isBitAligned(Alignment, VT.getSizeInBits()))
386 return false;
387 switch (VT.getSizeInBits()) {
388 case 128:
389 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
390 return true;
391 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
392 return true;
393 return false;
394 case 256:
395 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
396 return true;
397 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
398 return true;
399 return false;
400 case 512:
401 if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
402 return true;
403 return false;
404 default:
405 return false; // Don't have NonTemporal vector memory ops of this size.
406 }
407 }
408 return true;
409}
410
411/// Return the entry encoding for a jump table in the
412/// current function. The returned value is a member of the
413/// MachineJumpTableInfo::JTEntryKind enum.
415 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
416 // symbol.
417 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
419 if (isPositionIndependent() &&
422
423 // Otherwise, use the normal jump table encoding heuristics.
425}
426
428 return Subtarget.useSoftFloat();
429}
430
432 ArgListTy &Args) const {
433
434 // Only relabel X86-32 for C / Stdcall CCs.
435 if (Subtarget.is64Bit())
436 return;
438 return;
439 unsigned ParamRegs = 0;
440 if (auto *M = MF->getFunction().getParent())
441 ParamRegs = M->getNumberRegisterParameters();
442
443 // Mark the first N int arguments as having reg
444 for (auto &Arg : Args) {
445 Type *T = Arg.Ty;
446 if (T->isIntOrPtrTy())
447 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
448 unsigned numRegs = 1;
449 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
450 numRegs = 2;
451 if (ParamRegs < numRegs)
452 return;
453 ParamRegs -= numRegs;
454 Arg.IsInReg = true;
455 }
456 }
457}
458
459const MCExpr *
461 const MachineBasicBlock *MBB,
462 unsigned uid,MCContext &Ctx) const{
464 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
465 // entries.
468}
469
470/// Returns relocation base for the given PIC jumptable.
472 SelectionDAG &DAG) const {
473 if (!Subtarget.is64Bit())
474 // This doesn't have SDLoc associated with it, but is not really the
475 // same as a Register.
476 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
478 return Table;
479}
480
481/// This returns the relocation base for the given PIC jumptable,
482/// the same as getPICJumpTableRelocBase, but as an MCExpr.
484getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
485 MCContext &Ctx) const {
486 // X86-64 uses RIP relative addressing based on the jump table label.
487 if (Subtarget.isPICStyleRIPRel() ||
488 (Subtarget.is64Bit() &&
491
492 // Otherwise, the reference is relative to the PIC base.
493 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
494}
495
496std::pair<const TargetRegisterClass *, uint8_t>
498 MVT VT) const {
499 const TargetRegisterClass *RRC = nullptr;
500 uint8_t Cost = 1;
501 switch (VT.SimpleTy) {
502 default:
504 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
505 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
506 break;
507 case MVT::x86mmx:
508 RRC = &X86::VR64RegClass;
509 break;
510 case MVT::f32: case MVT::f64:
511 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
512 case MVT::v4f32: case MVT::v2f64:
513 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
514 case MVT::v8f32: case MVT::v4f64:
515 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
516 case MVT::v16f32: case MVT::v8f64:
517 RRC = &X86::VR128XRegClass;
518 break;
519 }
520 return std::make_pair(RRC, Cost);
521}
522
523unsigned X86TargetLowering::getAddressSpace() const {
524 if (Subtarget.is64Bit())
525 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
526 return 256;
527}
528
529static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
530 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
531 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
532}
533
535 int Offset, unsigned AddressSpace) {
537 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
539}
540
542 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
543 // tcbhead_t; use it instead of the usual global variable (see
544 // sysdeps/{i386,x86_64}/nptl/tls.h)
545 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
546 unsigned AddressSpace = getAddressSpace();
547
548 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
549 if (Subtarget.isTargetFuchsia())
550 return SegmentOffset(IRB, 0x10, AddressSpace);
551
552 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
553 // Specially, some users may customize the base reg and offset.
554 int Offset = M->getStackProtectorGuardOffset();
555 // If we don't set -stack-protector-guard-offset value:
556 // %fs:0x28, unless we're using a Kernel code model, in which case
557 // it's %gs:0x28. gs:0x14 on i386.
558 if (Offset == INT_MAX)
559 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
560
561 StringRef GuardReg = M->getStackProtectorGuardReg();
562 if (GuardReg == "fs")
564 else if (GuardReg == "gs")
566
567 // Use symbol guard if user specify.
568 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
569 if (!GuardSymb.empty()) {
570 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
571 if (!GV) {
572 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
573 : Type::getInt32Ty(M->getContext());
574 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
575 nullptr, GuardSymb, nullptr,
577 if (!Subtarget.isTargetDarwin())
578 GV->setDSOLocal(M->getDirectAccessExternalData());
579 }
580 return GV;
581 }
582
583 return SegmentOffset(IRB, Offset, AddressSpace);
584 }
586}
587
589 // MSVC CRT provides functionalities for stack protection.
590 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
592 // MSVC CRT has a global variable holding security cookie.
593 M.getOrInsertGlobal("__security_cookie",
594 PointerType::getUnqual(M.getContext()));
595
596 // MSVC CRT has a function to validate security cookie.
597 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
598 "__security_check_cookie", Type::getVoidTy(M.getContext()),
599 PointerType::getUnqual(M.getContext()));
600 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
601 F->setCallingConv(CallingConv::X86_FastCall);
602 F->addParamAttr(0, Attribute::AttrKind::InReg);
603 }
604 return;
605 }
606
607 StringRef GuardMode = M.getStackProtectorGuard();
608
609 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
610 if ((GuardMode == "tls" || GuardMode.empty()) &&
612 return;
614}
615
617 // MSVC CRT has a global variable holding security cookie.
618 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
620 return M.getGlobalVariable("__security_cookie");
621 }
623}
624
626 // MSVC CRT has a function to validate security cookie.
627 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
629 return M.getFunction("__security_check_cookie");
630 }
632}
633
634Value *
636 // Android provides a fixed TLS slot for the SafeStack pointer. See the
637 // definition of TLS_SLOT_SAFESTACK in
638 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
639 if (Subtarget.isTargetAndroid()) {
640 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
641 // %gs:0x24 on i386
642 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
643 return SegmentOffset(IRB, Offset, getAddressSpace());
644 }
645
646 // Fuchsia is similar.
647 if (Subtarget.isTargetFuchsia()) {
648 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
649 return SegmentOffset(IRB, 0x18, getAddressSpace());
650 }
651
653}
654
655//===----------------------------------------------------------------------===//
656// Return Value Calling Convention Implementation
657//===----------------------------------------------------------------------===//
658
659bool X86TargetLowering::CanLowerReturn(
660 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
661 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
663 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
664 return CCInfo.CheckReturn(Outs, RetCC_X86);
665}
666
667const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
668 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
669 return ScratchRegs;
670}
671
672ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
673 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
674 return RCRegs;
675}
676
677/// Lowers masks values (v*i1) to the local register values
678/// \returns DAG node after lowering to register type
679static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
680 const SDLoc &DL, SelectionDAG &DAG) {
681 EVT ValVT = ValArg.getValueType();
682
683 if (ValVT == MVT::v1i1)
684 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
685 DAG.getIntPtrConstant(0, DL));
686
687 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
688 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
689 // Two stage lowering might be required
690 // bitcast: v8i1 -> i8 / v16i1 -> i16
691 // anyextend: i8 -> i32 / i16 -> i32
692 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
693 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
694 if (ValLoc == MVT::i32)
695 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
696 return ValToCopy;
697 }
698
699 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
700 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
701 // One stage lowering is required
702 // bitcast: v32i1 -> i32 / v64i1 -> i64
703 return DAG.getBitcast(ValLoc, ValArg);
704 }
705
706 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
707}
708
709/// Breaks v64i1 value into two registers and adds the new node to the DAG
711 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
712 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
713 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
714 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
715 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
716 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
717 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
718 "The value should reside in two registers");
719
720 // Before splitting the value we cast it to i64
721 Arg = DAG.getBitcast(MVT::i64, Arg);
722
723 // Splitting the value into two i32 types
724 SDValue Lo, Hi;
725 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
726
727 // Attach the two i32 types into corresponding registers
728 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
729 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
730}
731
733X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
734 bool isVarArg,
736 const SmallVectorImpl<SDValue> &OutVals,
737 const SDLoc &dl, SelectionDAG &DAG) const {
740
741 // In some cases we need to disable registers from the default CSR list.
742 // For example, when they are used as return registers (preserve_* and X86's
743 // regcall) or for argument passing (X86's regcall).
744 bool ShouldDisableCalleeSavedRegister =
745 shouldDisableRetRegFromCSR(CallConv) ||
746 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
747
748 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
749 report_fatal_error("X86 interrupts may not return any value");
750
752 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
753 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
754
756 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
757 ++I, ++OutsIndex) {
758 CCValAssign &VA = RVLocs[I];
759 assert(VA.isRegLoc() && "Can only return in registers!");
760
761 // Add the register to the CalleeSaveDisableRegs list.
762 if (ShouldDisableCalleeSavedRegister)
764
765 SDValue ValToCopy = OutVals[OutsIndex];
766 EVT ValVT = ValToCopy.getValueType();
767
768 // Promote values to the appropriate types.
769 if (VA.getLocInfo() == CCValAssign::SExt)
770 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
771 else if (VA.getLocInfo() == CCValAssign::ZExt)
772 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
773 else if (VA.getLocInfo() == CCValAssign::AExt) {
774 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
775 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
776 else
777 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
778 }
779 else if (VA.getLocInfo() == CCValAssign::BCvt)
780 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
781
783 "Unexpected FP-extend for return value.");
784
785 // Report an error if we have attempted to return a value via an XMM
786 // register and SSE was disabled.
787 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
788 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
789 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
790 } else if (!Subtarget.hasSSE2() &&
791 X86::FR64XRegClass.contains(VA.getLocReg()) &&
792 ValVT == MVT::f64) {
793 // When returning a double via an XMM register, report an error if SSE2 is
794 // not enabled.
795 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
796 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
797 }
798
799 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
800 // the RET instruction and handled by the FP Stackifier.
801 if (VA.getLocReg() == X86::FP0 ||
802 VA.getLocReg() == X86::FP1) {
803 // If this is a copy from an xmm register to ST(0), use an FPExtend to
804 // change the value to the FP stack register class.
806 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
807 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
808 // Don't emit a copytoreg.
809 continue;
810 }
811
812 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
813 // which is returned in RAX / RDX.
814 if (Subtarget.is64Bit()) {
815 if (ValVT == MVT::x86mmx) {
816 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
817 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
818 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
819 ValToCopy);
820 // If we don't have SSE2 available, convert to v4f32 so the generated
821 // register is legal.
822 if (!Subtarget.hasSSE2())
823 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
824 }
825 }
826 }
827
828 if (VA.needsCustom()) {
829 assert(VA.getValVT() == MVT::v64i1 &&
830 "Currently the only custom case is when we split v64i1 to 2 regs");
831
832 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
833 Subtarget);
834
835 // Add the second register to the CalleeSaveDisableRegs list.
836 if (ShouldDisableCalleeSavedRegister)
837 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
838 } else {
839 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
840 }
841 }
842
843 SDValue Glue;
845 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
846 // Operand #1 = Bytes To Pop
847 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
848 MVT::i32));
849
850 // Copy the result values into the output registers.
851 for (auto &RetVal : RetVals) {
852 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
853 RetOps.push_back(RetVal.second);
854 continue; // Don't emit a copytoreg.
855 }
856
857 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
858 Glue = Chain.getValue(1);
859 RetOps.push_back(
860 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
861 }
862
863 // Swift calling convention does not require we copy the sret argument
864 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
865
866 // All x86 ABIs require that for returning structs by value we copy
867 // the sret argument into %rax/%eax (depending on ABI) for the return.
868 // We saved the argument into a virtual register in the entry block,
869 // so now we copy the value out and into %rax/%eax.
870 //
871 // Checking Function.hasStructRetAttr() here is insufficient because the IR
872 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
873 // false, then an sret argument may be implicitly inserted in the SelDAG. In
874 // either case FuncInfo->setSRetReturnReg() will have been called.
875 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
876 // When we have both sret and another return value, we should use the
877 // original Chain stored in RetOps[0], instead of the current Chain updated
878 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
879
880 // For the case of sret and another return value, we have
881 // Chain_0 at the function entry
882 // Chain_1 = getCopyToReg(Chain_0) in the above loop
883 // If we use Chain_1 in getCopyFromReg, we will have
884 // Val = getCopyFromReg(Chain_1)
885 // Chain_2 = getCopyToReg(Chain_1, Val) from below
886
887 // getCopyToReg(Chain_0) will be glued together with
888 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
889 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
890 // Data dependency from Unit B to Unit A due to usage of Val in
891 // getCopyToReg(Chain_1, Val)
892 // Chain dependency from Unit A to Unit B
893
894 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
895 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
897
898 Register RetValReg
899 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
900 X86::RAX : X86::EAX;
901 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
902 Glue = Chain.getValue(1);
903
904 // RAX/EAX now acts like a return value.
905 RetOps.push_back(
906 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
907
908 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
909 // this however for preserve_most/preserve_all to minimize the number of
910 // callee-saved registers for these CCs.
911 if (ShouldDisableCalleeSavedRegister &&
912 CallConv != CallingConv::PreserveAll &&
913 CallConv != CallingConv::PreserveMost)
915 }
916
917 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
918 const MCPhysReg *I =
919 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
920 if (I) {
921 for (; *I; ++I) {
922 if (X86::GR64RegClass.contains(*I))
923 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
924 else
925 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
926 }
927 }
928
929 RetOps[0] = Chain; // Update chain.
930
931 // Add the glue if we have it.
932 if (Glue.getNode())
933 RetOps.push_back(Glue);
934
936 if (CallConv == CallingConv::X86_INTR)
937 opcode = X86ISD::IRET;
938 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
939}
940
941bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
942 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
943 return false;
944
945 SDValue TCChain = Chain;
946 SDNode *Copy = *N->use_begin();
947 if (Copy->getOpcode() == ISD::CopyToReg) {
948 // If the copy has a glue operand, we conservatively assume it isn't safe to
949 // perform a tail call.
950 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
951 return false;
952 TCChain = Copy->getOperand(0);
953 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
954 return false;
955
956 bool HasRet = false;
957 for (const SDNode *U : Copy->uses()) {
958 if (U->getOpcode() != X86ISD::RET_GLUE)
959 return false;
960 // If we are returning more than one value, we can definitely
961 // not make a tail call see PR19530
962 if (U->getNumOperands() > 4)
963 return false;
964 if (U->getNumOperands() == 4 &&
965 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
966 return false;
967 HasRet = true;
968 }
969
970 if (!HasRet)
971 return false;
972
973 Chain = TCChain;
974 return true;
975}
976
977EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
978 ISD::NodeType ExtendKind) const {
979 MVT ReturnMVT = MVT::i32;
980
981 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
982 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
983 // The ABI does not require i1, i8 or i16 to be extended.
984 //
985 // On Darwin, there is code in the wild relying on Clang's old behaviour of
986 // always extending i8/i16 return values, so keep doing that for now.
987 // (PR26665).
988 ReturnMVT = MVT::i8;
989 }
990
991 EVT MinVT = getRegisterType(Context, ReturnMVT);
992 return VT.bitsLT(MinVT) ? MinVT : VT;
993}
994
995/// Reads two 32 bit registers and creates a 64 bit mask value.
996/// \param VA The current 32 bit value that need to be assigned.
997/// \param NextVA The next 32 bit value that need to be assigned.
998/// \param Root The parent DAG node.
999/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1000/// glue purposes. In the case the DAG is already using
1001/// physical register instead of virtual, we should glue
1002/// our new SDValue to InGlue SDvalue.
1003/// \return a new SDvalue of size 64bit.
1005 SDValue &Root, SelectionDAG &DAG,
1006 const SDLoc &DL, const X86Subtarget &Subtarget,
1007 SDValue *InGlue = nullptr) {
1008 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1009 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1010 assert(VA.getValVT() == MVT::v64i1 &&
1011 "Expecting first location of 64 bit width type");
1012 assert(NextVA.getValVT() == VA.getValVT() &&
1013 "The locations should have the same type");
1014 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1015 "The values should reside in two registers");
1016
1017 SDValue Lo, Hi;
1018 SDValue ArgValueLo, ArgValueHi;
1019
1021 const TargetRegisterClass *RC = &X86::GR32RegClass;
1022
1023 // Read a 32 bit value from the registers.
1024 if (nullptr == InGlue) {
1025 // When no physical register is present,
1026 // create an intermediate virtual register.
1027 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1028 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1029 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1030 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1031 } else {
1032 // When a physical register is available read the value from it and glue
1033 // the reads together.
1034 ArgValueLo =
1035 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1036 *InGlue = ArgValueLo.getValue(2);
1037 ArgValueHi =
1038 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1039 *InGlue = ArgValueHi.getValue(2);
1040 }
1041
1042 // Convert the i32 type into v32i1 type.
1043 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1044
1045 // Convert the i32 type into v32i1 type.
1046 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1047
1048 // Concatenate the two values together.
1049 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1050}
1051
1052/// The function will lower a register of various sizes (8/16/32/64)
1053/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1054/// \returns a DAG node contains the operand after lowering to mask type.
1055static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1056 const EVT &ValLoc, const SDLoc &DL,
1057 SelectionDAG &DAG) {
1058 SDValue ValReturned = ValArg;
1059
1060 if (ValVT == MVT::v1i1)
1061 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1062
1063 if (ValVT == MVT::v64i1) {
1064 // In 32 bit machine, this case is handled by getv64i1Argument
1065 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1066 // In 64 bit machine, There is no need to truncate the value only bitcast
1067 } else {
1068 MVT MaskLenVT;
1069 switch (ValVT.getSimpleVT().SimpleTy) {
1070 case MVT::v8i1:
1071 MaskLenVT = MVT::i8;
1072 break;
1073 case MVT::v16i1:
1074 MaskLenVT = MVT::i16;
1075 break;
1076 case MVT::v32i1:
1077 MaskLenVT = MVT::i32;
1078 break;
1079 default:
1080 llvm_unreachable("Expecting a vector of i1 types");
1081 }
1082
1083 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1084 }
1085 return DAG.getBitcast(ValVT, ValReturned);
1086}
1087
1088/// Lower the result values of a call into the
1089/// appropriate copies out of appropriate physical registers.
1090///
1091SDValue X86TargetLowering::LowerCallResult(
1092 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1093 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1095 uint32_t *RegMask) const {
1096
1097 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1098 // Assign locations to each value returned by this call.
1100 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1101 *DAG.getContext());
1102 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1103
1104 // Copy all of the result registers out of their specified physreg.
1105 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1106 ++I, ++InsIndex) {
1107 CCValAssign &VA = RVLocs[I];
1108 EVT CopyVT = VA.getLocVT();
1109
1110 // In some calling conventions we need to remove the used registers
1111 // from the register mask.
1112 if (RegMask) {
1113 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1114 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1115 }
1116
1117 // Report an error if there was an attempt to return FP values via XMM
1118 // registers.
1119 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1120 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1121 if (VA.getLocReg() == X86::XMM1)
1122 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1123 else
1124 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1125 } else if (!Subtarget.hasSSE2() &&
1126 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1127 CopyVT == MVT::f64) {
1128 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1129 if (VA.getLocReg() == X86::XMM1)
1130 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1131 else
1132 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1133 }
1134
1135 // If we prefer to use the value in xmm registers, copy it out as f80 and
1136 // use a truncate to move it from fp stack reg to xmm reg.
1137 bool RoundAfterCopy = false;
1138 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1140 if (!Subtarget.hasX87())
1141 report_fatal_error("X87 register return with X87 disabled");
1142 CopyVT = MVT::f80;
1143 RoundAfterCopy = (CopyVT != VA.getLocVT());
1144 }
1145
1146 SDValue Val;
1147 if (VA.needsCustom()) {
1148 assert(VA.getValVT() == MVT::v64i1 &&
1149 "Currently the only custom case is when we split v64i1 to 2 regs");
1150 Val =
1151 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1152 } else {
1153 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1154 .getValue(1);
1155 Val = Chain.getValue(0);
1156 InGlue = Chain.getValue(2);
1157 }
1158
1159 if (RoundAfterCopy)
1160 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1161 // This truncation won't change the value.
1162 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1163
1164 if (VA.isExtInLoc()) {
1165 if (VA.getValVT().isVector() &&
1166 VA.getValVT().getScalarType() == MVT::i1 &&
1167 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1168 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1169 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1170 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1171 } else
1172 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1173 }
1174
1175 if (VA.getLocInfo() == CCValAssign::BCvt)
1176 Val = DAG.getBitcast(VA.getValVT(), Val);
1177
1178 InVals.push_back(Val);
1179 }
1180
1181 return Chain;
1182}
1183
1184//===----------------------------------------------------------------------===//
1185// C & StdCall & Fast Calling Convention implementation
1186//===----------------------------------------------------------------------===//
1187// StdCall calling convention seems to be standard for many Windows' API
1188// routines and around. It differs from C calling convention just a little:
1189// callee should clean up the stack, not caller. Symbols should be also
1190// decorated in some fancy way :) It doesn't support any vector arguments.
1191// For info on fast calling convention see Fast Calling Convention (tail call)
1192// implementation LowerX86_32FastCCCallTo.
1193
1194/// Determines whether Args, either a set of outgoing arguments to a call, or a
1195/// set of incoming args of a call, contains an sret pointer that the callee
1196/// pops
1197template <typename T>
1198static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1199 const X86Subtarget &Subtarget) {
1200 // Not C++20 (yet), so no concepts available.
1201 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1202 std::is_same_v<T, ISD::InputArg>,
1203 "requires ISD::OutputArg or ISD::InputArg");
1204
1205 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1206 // for most compilations.
1207 if (!Subtarget.is32Bit())
1208 return false;
1209
1210 if (Args.empty())
1211 return false;
1212
1213 // Most calls do not have an sret argument, check the arg next.
1214 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1215 if (!Flags.isSRet() || Flags.isInReg())
1216 return false;
1217
1218 // The MSVCabi does not pop the sret.
1219 if (Subtarget.getTargetTriple().isOSMSVCRT())
1220 return false;
1221
1222 // MCUs don't pop the sret
1223 if (Subtarget.isTargetMCU())
1224 return false;
1225
1226 // Callee pops argument
1227 return true;
1228}
1229
1230/// Make a copy of an aggregate at address specified by "Src" to address
1231/// "Dst" with size and alignment information specified by the specific
1232/// parameter attribute. The copy will be passed as a byval function parameter.
1234 SDValue Chain, ISD::ArgFlagsTy Flags,
1235 SelectionDAG &DAG, const SDLoc &dl) {
1236 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1237
1238 return DAG.getMemcpy(
1239 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1240 /*isVolatile*/ false, /*AlwaysInline=*/true,
1241 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
1242}
1243
1244/// Return true if the calling convention is one that we can guarantee TCO for.
1246 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1249}
1250
1251/// Return true if we might ever do TCO for calls with this calling convention.
1253 switch (CC) {
1254 // C calling conventions:
1255 case CallingConv::C:
1256 case CallingConv::Win64:
1259 // Callee pop conventions:
1264 // Swift:
1265 case CallingConv::Swift:
1266 return true;
1267 default:
1268 return canGuaranteeTCO(CC);
1269 }
1270}
1271
1272/// Return true if the function is being made into a tailcall target by
1273/// changing its ABI.
1274static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1275 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1277}
1278
1279bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1280 if (!CI->isTailCall())
1281 return false;
1282
1283 CallingConv::ID CalleeCC = CI->getCallingConv();
1284 if (!mayTailCallThisCC(CalleeCC))
1285 return false;
1286
1287 return true;
1288}
1289
1290SDValue
1291X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1293 const SDLoc &dl, SelectionDAG &DAG,
1294 const CCValAssign &VA,
1295 MachineFrameInfo &MFI, unsigned i) const {
1296 // Create the nodes corresponding to a load from this parameter slot.
1297 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1298 bool AlwaysUseMutable = shouldGuaranteeTCO(
1299 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1300 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1301 EVT ValVT;
1302 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1303
1304 // If value is passed by pointer we have address passed instead of the value
1305 // itself. No need to extend if the mask value and location share the same
1306 // absolute size.
1307 bool ExtendedInMem =
1308 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1310
1311 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1312 ValVT = VA.getLocVT();
1313 else
1314 ValVT = VA.getValVT();
1315
1316 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1317 // changed with more analysis.
1318 // In case of tail call optimization mark all arguments mutable. Since they
1319 // could be overwritten by lowering of arguments in case of a tail call.
1320 if (Flags.isByVal()) {
1321 unsigned Bytes = Flags.getByValSize();
1322 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1323
1324 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1325 // can be improved with deeper analysis.
1326 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1327 /*isAliased=*/true);
1328 return DAG.getFrameIndex(FI, PtrVT);
1329 }
1330
1331 EVT ArgVT = Ins[i].ArgVT;
1332
1333 // If this is a vector that has been split into multiple parts, don't elide
1334 // the copy. The layout on the stack may not match the packed in-memory
1335 // layout.
1336 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1337
1338 // This is an argument in memory. We might be able to perform copy elision.
1339 // If the argument is passed directly in memory without any extension, then we
1340 // can perform copy elision. Large vector types, for example, may be passed
1341 // indirectly by pointer.
1342 if (Flags.isCopyElisionCandidate() &&
1343 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1344 !ScalarizedVector) {
1345 SDValue PartAddr;
1346 if (Ins[i].PartOffset == 0) {
1347 // If this is a one-part value or the first part of a multi-part value,
1348 // create a stack object for the entire argument value type and return a
1349 // load from our portion of it. This assumes that if the first part of an
1350 // argument is in memory, the rest will also be in memory.
1351 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1352 /*IsImmutable=*/false);
1353 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1354 return DAG.getLoad(
1355 ValVT, dl, Chain, PartAddr,
1357 }
1358
1359 // This is not the first piece of an argument in memory. See if there is
1360 // already a fixed stack object including this offset. If so, assume it
1361 // was created by the PartOffset == 0 branch above and create a load from
1362 // the appropriate offset into it.
1363 int64_t PartBegin = VA.getLocMemOffset();
1364 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1365 int FI = MFI.getObjectIndexBegin();
1366 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1367 int64_t ObjBegin = MFI.getObjectOffset(FI);
1368 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1369 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1370 break;
1371 }
1372 if (MFI.isFixedObjectIndex(FI)) {
1373 SDValue Addr =
1374 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1375 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1376 return DAG.getLoad(ValVT, dl, Chain, Addr,
1378 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1379 }
1380 }
1381
1382 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1383 VA.getLocMemOffset(), isImmutable);
1384
1385 // Set SExt or ZExt flag.
1386 if (VA.getLocInfo() == CCValAssign::ZExt) {
1387 MFI.setObjectZExt(FI, true);
1388 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1389 MFI.setObjectSExt(FI, true);
1390 }
1391
1392 MaybeAlign Alignment;
1393 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1394 ValVT != MVT::f80)
1395 Alignment = MaybeAlign(4);
1396 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1397 SDValue Val = DAG.getLoad(
1398 ValVT, dl, Chain, FIN,
1400 Alignment);
1401 return ExtendedInMem
1402 ? (VA.getValVT().isVector()
1403 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1404 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1405 : Val;
1406}
1407
1408// FIXME: Get this from tablegen.
1410 const X86Subtarget &Subtarget) {
1411 assert(Subtarget.is64Bit());
1412
1413 if (Subtarget.isCallingConvWin64(CallConv)) {
1414 static const MCPhysReg GPR64ArgRegsWin64[] = {
1415 X86::RCX, X86::RDX, X86::R8, X86::R9
1416 };
1417 return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
1418 }
1419
1420 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1421 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1422 };
1423 return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
1424}
1425
1426// FIXME: Get this from tablegen.
1428 CallingConv::ID CallConv,
1429 const X86Subtarget &Subtarget) {
1430 assert(Subtarget.is64Bit());
1431 if (Subtarget.isCallingConvWin64(CallConv)) {
1432 // The XMM registers which might contain var arg parameters are shadowed
1433 // in their paired GPR. So we only need to save the GPR to their home
1434 // slots.
1435 // TODO: __vectorcall will change this.
1436 return std::nullopt;
1437 }
1438
1439 bool isSoftFloat = Subtarget.useSoftFloat();
1440 if (isSoftFloat || !Subtarget.hasSSE1())
1441 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1442 // registers.
1443 return std::nullopt;
1444
1445 static const MCPhysReg XMMArgRegs64Bit[] = {
1446 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1447 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1448 };
1449 return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
1450}
1451
1452#ifndef NDEBUG
1454 return llvm::is_sorted(
1455 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1456 return A.getValNo() < B.getValNo();
1457 });
1458}
1459#endif
1460
1461namespace {
1462/// This is a helper class for lowering variable arguments parameters.
1463class VarArgsLoweringHelper {
1464public:
1465 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1466 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1467 CallingConv::ID CallConv, CCState &CCInfo)
1468 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1469 TheMachineFunction(DAG.getMachineFunction()),
1470 TheFunction(TheMachineFunction.getFunction()),
1471 FrameInfo(TheMachineFunction.getFrameInfo()),
1472 FrameLowering(*Subtarget.getFrameLowering()),
1473 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1474 CCInfo(CCInfo) {}
1475
1476 // Lower variable arguments parameters.
1477 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1478
1479private:
1480 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1481
1482 void forwardMustTailParameters(SDValue &Chain);
1483
1484 bool is64Bit() const { return Subtarget.is64Bit(); }
1485 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1486
1487 X86MachineFunctionInfo *FuncInfo;
1488 const SDLoc &DL;
1489 SelectionDAG &DAG;
1490 const X86Subtarget &Subtarget;
1491 MachineFunction &TheMachineFunction;
1492 const Function &TheFunction;
1493 MachineFrameInfo &FrameInfo;
1494 const TargetFrameLowering &FrameLowering;
1495 const TargetLowering &TargLowering;
1496 CallingConv::ID CallConv;
1497 CCState &CCInfo;
1498};
1499} // namespace
1500
1501void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1502 SDValue &Chain, unsigned StackSize) {
1503 // If the function takes variable number of arguments, make a frame index for
1504 // the start of the first vararg value... for expansion of llvm.va_start. We
1505 // can skip this if there are no va_start calls.
1506 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1507 CallConv != CallingConv::X86_ThisCall)) {
1508 FuncInfo->setVarArgsFrameIndex(
1509 FrameInfo.CreateFixedObject(1, StackSize, true));
1510 }
1511
1512 // 64-bit calling conventions support varargs and register parameters, so we
1513 // have to do extra work to spill them in the prologue.
1514 if (is64Bit()) {
1515 // Find the first unallocated argument registers.
1516 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1517 ArrayRef<MCPhysReg> ArgXMMs =
1518 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1519 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1520 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1521
1522 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1523 "SSE register cannot be used when SSE is disabled!");
1524
1525 if (isWin64()) {
1526 // Get to the caller-allocated home save location. Add 8 to account
1527 // for the return address.
1528 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1529 FuncInfo->setRegSaveFrameIndex(
1530 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1531 // Fixup to set vararg frame on shadow area (4 x i64).
1532 if (NumIntRegs < 4)
1533 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1534 } else {
1535 // For X86-64, if there are vararg parameters that are passed via
1536 // registers, then we must store them to their spots on the stack so
1537 // they may be loaded by dereferencing the result of va_next.
1538 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1539 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1540 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1541 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1542 }
1543
1545 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1546 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1547 // keeping live input value
1548 SDValue ALVal; // if applicable keeps SDValue for %al register
1549
1550 // Gather all the live in physical registers.
1551 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1552 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1553 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1554 }
1555 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1556 if (!AvailableXmms.empty()) {
1557 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1558 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1559 for (MCPhysReg Reg : AvailableXmms) {
1560 // FastRegisterAllocator spills virtual registers at basic
1561 // block boundary. That leads to usages of xmm registers
1562 // outside of check for %al. Pass physical registers to
1563 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1564 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1565 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1566 }
1567 }
1568
1569 // Store the integer parameter registers.
1571 SDValue RSFIN =
1572 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1573 TargLowering.getPointerTy(DAG.getDataLayout()));
1574 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1575 for (SDValue Val : LiveGPRs) {
1576 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1577 TargLowering.getPointerTy(DAG.getDataLayout()),
1578 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1579 SDValue Store =
1580 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1582 DAG.getMachineFunction(),
1583 FuncInfo->getRegSaveFrameIndex(), Offset));
1584 MemOps.push_back(Store);
1585 Offset += 8;
1586 }
1587
1588 // Now store the XMM (fp + vector) parameter registers.
1589 if (!LiveXMMRegs.empty()) {
1590 SmallVector<SDValue, 12> SaveXMMOps;
1591 SaveXMMOps.push_back(Chain);
1592 SaveXMMOps.push_back(ALVal);
1593 SaveXMMOps.push_back(RSFIN);
1594 SaveXMMOps.push_back(
1595 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1596 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1597 MachineMemOperand *StoreMMO =
1600 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1601 Offset),
1604 DL, DAG.getVTList(MVT::Other),
1605 SaveXMMOps, MVT::i8, StoreMMO));
1606 }
1607
1608 if (!MemOps.empty())
1609 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1610 }
1611}
1612
1613void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1614 // Find the largest legal vector type.
1615 MVT VecVT = MVT::Other;
1616 // FIXME: Only some x86_32 calling conventions support AVX512.
1617 if (Subtarget.useAVX512Regs() &&
1618 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1619 CallConv == CallingConv::Intel_OCL_BI)))
1620 VecVT = MVT::v16f32;
1621 else if (Subtarget.hasAVX())
1622 VecVT = MVT::v8f32;
1623 else if (Subtarget.hasSSE2())
1624 VecVT = MVT::v4f32;
1625
1626 // We forward some GPRs and some vector types.
1627 SmallVector<MVT, 2> RegParmTypes;
1628 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1629 RegParmTypes.push_back(IntVT);
1630 if (VecVT != MVT::Other)
1631 RegParmTypes.push_back(VecVT);
1632
1633 // Compute the set of forwarded registers. The rest are scratch.
1635 FuncInfo->getForwardedMustTailRegParms();
1636 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1637
1638 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1639 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1640 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1641 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1642 }
1643
1644 // Copy all forwards from physical to virtual registers.
1645 for (ForwardedRegister &FR : Forwards) {
1646 // FIXME: Can we use a less constrained schedule?
1647 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1648 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1649 TargLowering.getRegClassFor(FR.VT));
1650 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1651 }
1652}
1653
1654void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1655 unsigned StackSize) {
1656 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1657 // If necessary, it would be set into the correct value later.
1658 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1659 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1660
1661 if (FrameInfo.hasVAStart())
1662 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1663
1664 if (FrameInfo.hasMustTailInVarArgFunc())
1665 forwardMustTailParameters(Chain);
1666}
1667
1668SDValue X86TargetLowering::LowerFormalArguments(
1669 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1670 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1671 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1674
1675 const Function &F = MF.getFunction();
1676 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1677 F.getName() == "main")
1678 FuncInfo->setForceFramePointer(true);
1679
1680 MachineFrameInfo &MFI = MF.getFrameInfo();
1681 bool Is64Bit = Subtarget.is64Bit();
1682 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1683
1684 assert(
1685 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1686 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1687
1688 // Assign locations to all of the incoming arguments.
1690 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1691
1692 // Allocate shadow area for Win64.
1693 if (IsWin64)
1694 CCInfo.AllocateStack(32, Align(8));
1695
1696 CCInfo.AnalyzeArguments(Ins, CC_X86);
1697
1698 // In vectorcall calling convention a second pass is required for the HVA
1699 // types.
1700 if (CallingConv::X86_VectorCall == CallConv) {
1701 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1702 }
1703
1704 // The next loop assumes that the locations are in the same order of the
1705 // input arguments.
1706 assert(isSortedByValueNo(ArgLocs) &&
1707 "Argument Location list must be sorted before lowering");
1708
1709 SDValue ArgValue;
1710 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1711 ++I, ++InsIndex) {
1712 assert(InsIndex < Ins.size() && "Invalid Ins index");
1713 CCValAssign &VA = ArgLocs[I];
1714
1715 if (VA.isRegLoc()) {
1716 EVT RegVT = VA.getLocVT();
1717 if (VA.needsCustom()) {
1718 assert(
1719 VA.getValVT() == MVT::v64i1 &&
1720 "Currently the only custom case is when we split v64i1 to 2 regs");
1721
1722 // v64i1 values, in regcall calling convention, that are
1723 // compiled to 32 bit arch, are split up into two registers.
1724 ArgValue =
1725 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1726 } else {
1727 const TargetRegisterClass *RC;
1728 if (RegVT == MVT::i8)
1729 RC = &X86::GR8RegClass;
1730 else if (RegVT == MVT::i16)
1731 RC = &X86::GR16RegClass;
1732 else if (RegVT == MVT::i32)
1733 RC = &X86::GR32RegClass;
1734 else if (Is64Bit && RegVT == MVT::i64)
1735 RC = &X86::GR64RegClass;
1736 else if (RegVT == MVT::f16)
1737 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1738 else if (RegVT == MVT::f32)
1739 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1740 else if (RegVT == MVT::f64)
1741 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1742 else if (RegVT == MVT::f80)
1743 RC = &X86::RFP80RegClass;
1744 else if (RegVT == MVT::f128)
1745 RC = &X86::VR128RegClass;
1746 else if (RegVT.is512BitVector())
1747 RC = &X86::VR512RegClass;
1748 else if (RegVT.is256BitVector())
1749 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1750 else if (RegVT.is128BitVector())
1751 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1752 else if (RegVT == MVT::x86mmx)
1753 RC = &X86::VR64RegClass;
1754 else if (RegVT == MVT::v1i1)
1755 RC = &X86::VK1RegClass;
1756 else if (RegVT == MVT::v8i1)
1757 RC = &X86::VK8RegClass;
1758 else if (RegVT == MVT::v16i1)
1759 RC = &X86::VK16RegClass;
1760 else if (RegVT == MVT::v32i1)
1761 RC = &X86::VK32RegClass;
1762 else if (RegVT == MVT::v64i1)
1763 RC = &X86::VK64RegClass;
1764 else
1765 llvm_unreachable("Unknown argument type!");
1766
1767 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1768 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1769 }
1770
1771 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1772 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1773 // right size.
1774 if (VA.getLocInfo() == CCValAssign::SExt)
1775 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1776 DAG.getValueType(VA.getValVT()));
1777 else if (VA.getLocInfo() == CCValAssign::ZExt)
1778 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1779 DAG.getValueType(VA.getValVT()));
1780 else if (VA.getLocInfo() == CCValAssign::BCvt)
1781 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1782
1783 if (VA.isExtInLoc()) {
1784 // Handle MMX values passed in XMM regs.
1785 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1786 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1787 else if (VA.getValVT().isVector() &&
1788 VA.getValVT().getScalarType() == MVT::i1 &&
1789 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1790 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1791 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1792 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1793 } else
1794 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1795 }
1796 } else {
1797 assert(VA.isMemLoc());
1798 ArgValue =
1799 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1800 }
1801
1802 // If value is passed via pointer - do a load.
1803 if (VA.getLocInfo() == CCValAssign::Indirect &&
1804 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1805 ArgValue =
1806 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1807 }
1808
1809 InVals.push_back(ArgValue);
1810 }
1811
1812 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1813 if (Ins[I].Flags.isSwiftAsync()) {
1814 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1815 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1816 X86FI->setHasSwiftAsyncContext(true);
1817 else {
1818 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1819 int FI =
1820 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1821 X86FI->setSwiftAsyncContextFrameIdx(FI);
1822 SDValue St = DAG.getStore(
1823 DAG.getEntryNode(), dl, InVals[I],
1824 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1826 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1827 }
1828 }
1829
1830 // Swift calling convention does not require we copy the sret argument
1831 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1832 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1833 continue;
1834
1835 // All x86 ABIs require that for returning structs by value we copy the
1836 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1837 // the argument into a virtual register so that we can access it from the
1838 // return points.
1839 if (Ins[I].Flags.isSRet()) {
1840 assert(!FuncInfo->getSRetReturnReg() &&
1841 "SRet return has already been set");
1842 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1843 Register Reg =
1845 FuncInfo->setSRetReturnReg(Reg);
1846 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1847 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1848 break;
1849 }
1850 }
1851
1852 unsigned StackSize = CCInfo.getStackSize();
1853 // Align stack specially for tail calls.
1854 if (shouldGuaranteeTCO(CallConv,
1856 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1857
1858 if (IsVarArg)
1859 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1860 .lowerVarArgsParameters(Chain, StackSize);
1861
1862 // Some CCs need callee pop.
1863 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1865 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1866 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1867 // X86 interrupts must pop the error code (and the alignment padding) if
1868 // present.
1869 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1870 } else {
1871 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1872 // If this is an sret function, the return should pop the hidden pointer.
1873 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1874 FuncInfo->setBytesToPopOnReturn(4);
1875 }
1876
1877 if (!Is64Bit) {
1878 // RegSaveFrameIndex is X86-64 only.
1879 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1880 }
1881
1882 FuncInfo->setArgumentStackSize(StackSize);
1883
1884 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1885 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1886 if (Personality == EHPersonality::CoreCLR) {
1887 assert(Is64Bit);
1888 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1889 // that we'd prefer this slot be allocated towards the bottom of the frame
1890 // (i.e. near the stack pointer after allocating the frame). Every
1891 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1892 // offset from the bottom of this and each funclet's frame must be the
1893 // same, so the size of funclets' (mostly empty) frames is dictated by
1894 // how far this slot is from the bottom (since they allocate just enough
1895 // space to accommodate holding this slot at the correct offset).
1896 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1897 EHInfo->PSPSymFrameIdx = PSPSymFI;
1898 }
1899 }
1900
1901 if (shouldDisableArgRegFromCSR(CallConv) ||
1902 F.hasFnAttribute("no_caller_saved_registers")) {
1904 for (std::pair<Register, Register> Pair : MRI.liveins())
1905 MRI.disableCalleeSavedRegister(Pair.first);
1906 }
1907
1908 if (CallingConv::PreserveNone == CallConv)
1909 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1910 if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1911 Ins[I].Flags.isSwiftError()) {
1912 errorUnsupported(DAG, dl,
1913 "Swift attributes can't be used with preserve_none");
1914 break;
1915 }
1916 }
1917
1918 return Chain;
1919}
1920
1921SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1922 SDValue Arg, const SDLoc &dl,
1923 SelectionDAG &DAG,
1924 const CCValAssign &VA,
1925 ISD::ArgFlagsTy Flags,
1926 bool isByVal) const {
1927 unsigned LocMemOffset = VA.getLocMemOffset();
1928 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1929 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1930 StackPtr, PtrOff);
1931 if (isByVal)
1932 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1933
1934 MaybeAlign Alignment;
1935 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1936 Arg.getSimpleValueType() != MVT::f80)
1937 Alignment = MaybeAlign(4);
1938 return DAG.getStore(
1939 Chain, dl, Arg, PtrOff,
1941 Alignment);
1942}
1943
1944/// Emit a load of return address if tail call
1945/// optimization is performed and it is required.
1946SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1947 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1948 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1949 // Adjust the Return address stack slot.
1950 EVT VT = getPointerTy(DAG.getDataLayout());
1951 OutRetAddr = getReturnAddressFrameIndex(DAG);
1952
1953 // Load the "old" Return address.
1954 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1955 return SDValue(OutRetAddr.getNode(), 1);
1956}
1957
1958/// Emit a store of the return address if tail call
1959/// optimization is performed and it is required (FPDiff!=0).
1961 SDValue Chain, SDValue RetAddrFrIdx,
1962 EVT PtrVT, unsigned SlotSize,
1963 int FPDiff, const SDLoc &dl) {
1964 // Store the return address to the appropriate stack slot.
1965 if (!FPDiff) return Chain;
1966 // Calculate the new stack slot for the return address.
1967 int NewReturnAddrFI =
1968 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1969 false);
1970 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1971 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1973 DAG.getMachineFunction(), NewReturnAddrFI));
1974 return Chain;
1975}
1976
1977/// Returns a vector_shuffle mask for an movs{s|d}, movd
1978/// operation of specified width.
1979SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1980 SDValue V1, SDValue V2) const {
1981 unsigned NumElems = VT.getVectorNumElements();
1983 Mask.push_back(NumElems);
1984 for (unsigned i = 1; i != NumElems; ++i)
1985 Mask.push_back(i);
1986 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
1987}
1988
1989SDValue
1990X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1991 SmallVectorImpl<SDValue> &InVals) const {
1992 SelectionDAG &DAG = CLI.DAG;
1993 SDLoc &dl = CLI.DL;
1995 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1997 SDValue Chain = CLI.Chain;
1998 SDValue Callee = CLI.Callee;
1999 CallingConv::ID CallConv = CLI.CallConv;
2000 bool &isTailCall = CLI.IsTailCall;
2001 bool isVarArg = CLI.IsVarArg;
2002 const auto *CB = CLI.CB;
2003
2005 bool Is64Bit = Subtarget.is64Bit();
2006 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2007 bool IsSibcall = false;
2008 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2009 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2010 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2012 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2013 CB->hasFnAttr("no_caller_saved_registers"));
2014 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2015 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2016 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2017 const Module *M = MF.getMMI().getModule();
2018 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
2019
2021 if (CallConv == CallingConv::X86_INTR)
2022 report_fatal_error("X86 interrupts may not be called directly");
2023
2024 // Analyze operands of the call, assigning locations to each operand.
2026 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2027
2028 // Allocate shadow area for Win64.
2029 if (IsWin64)
2030 CCInfo.AllocateStack(32, Align(8));
2031
2032 CCInfo.AnalyzeArguments(Outs, CC_X86);
2033
2034 // In vectorcall calling convention a second pass is required for the HVA
2035 // types.
2036 if (CallingConv::X86_VectorCall == CallConv) {
2037 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2038 }
2039
2040 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2041 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2042 // If we are using a GOT, disable tail calls to external symbols with
2043 // default visibility. Tail calling such a symbol requires using a GOT
2044 // relocation, which forces early binding of the symbol. This breaks code
2045 // that require lazy function symbol resolution. Using musttail or
2046 // GuaranteedTailCallOpt will override this.
2047 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2048 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2049 G->getGlobal()->hasDefaultVisibility()))
2050 isTailCall = false;
2051 }
2052
2053 if (isTailCall && !IsMustTail) {
2054 // Check if it's really possible to do a tail call.
2055 isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2056 IsCalleePopSRet);
2057
2058 // Sibcalls are automatically detected tailcalls which do not require
2059 // ABI changes.
2060 if (!IsGuaranteeTCO && isTailCall)
2061 IsSibcall = true;
2062
2063 if (isTailCall)
2064 ++NumTailCalls;
2065 }
2066
2067 if (IsMustTail && !isTailCall)
2068 report_fatal_error("failed to perform tail call elimination on a call "
2069 "site marked musttail");
2070
2071 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2072 "Var args not supported with calling convention fastcc, ghc or hipe");
2073
2074 // Get a count of how many bytes are to be pushed on the stack.
2075 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2076 if (IsSibcall)
2077 // This is a sibcall. The memory operands are available in caller's
2078 // own caller's stack.
2079 NumBytes = 0;
2080 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2081 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2082
2083 int FPDiff = 0;
2084 if (isTailCall &&
2085 shouldGuaranteeTCO(CallConv,
2087 // Lower arguments at fp - stackoffset + fpdiff.
2088 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2089
2090 FPDiff = NumBytesCallerPushed - NumBytes;
2091
2092 // Set the delta of movement of the returnaddr stackslot.
2093 // But only set if delta is greater than previous delta.
2094 if (FPDiff < X86Info->getTCReturnAddrDelta())
2095 X86Info->setTCReturnAddrDelta(FPDiff);
2096 }
2097
2098 unsigned NumBytesToPush = NumBytes;
2099 unsigned NumBytesToPop = NumBytes;
2100
2101 // If we have an inalloca argument, all stack space has already been allocated
2102 // for us and be right at the top of the stack. We don't support multiple
2103 // arguments passed in memory when using inalloca.
2104 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2105 NumBytesToPush = 0;
2106 if (!ArgLocs.back().isMemLoc())
2107 report_fatal_error("cannot use inalloca attribute on a register "
2108 "parameter");
2109 if (ArgLocs.back().getLocMemOffset() != 0)
2110 report_fatal_error("any parameter with the inalloca attribute must be "
2111 "the only memory argument");
2112 } else if (CLI.IsPreallocated) {
2113 assert(ArgLocs.back().isMemLoc() &&
2114 "cannot use preallocated attribute on a register "
2115 "parameter");
2116 SmallVector<size_t, 4> PreallocatedOffsets;
2117 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2118 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2119 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2120 }
2121 }
2123 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2124 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2125 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2126 NumBytesToPush = 0;
2127 }
2128
2129 if (!IsSibcall && !IsMustTail)
2130 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2131 NumBytes - NumBytesToPush, dl);
2132
2133 SDValue RetAddrFrIdx;
2134 // Load return address for tail calls.
2135 if (isTailCall && FPDiff)
2136 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2137 Is64Bit, FPDiff, dl);
2138
2140 SmallVector<SDValue, 8> MemOpChains;
2142
2143 // The next loop assumes that the locations are in the same order of the
2144 // input arguments.
2145 assert(isSortedByValueNo(ArgLocs) &&
2146 "Argument Location list must be sorted before lowering");
2147
2148 // Walk the register/memloc assignments, inserting copies/loads. In the case
2149 // of tail call optimization arguments are handle later.
2150 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2151 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2152 ++I, ++OutIndex) {
2153 assert(OutIndex < Outs.size() && "Invalid Out index");
2154 // Skip inalloca/preallocated arguments, they have already been written.
2155 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2156 if (Flags.isInAlloca() || Flags.isPreallocated())
2157 continue;
2158
2159 CCValAssign &VA = ArgLocs[I];
2160 EVT RegVT = VA.getLocVT();
2161 SDValue Arg = OutVals[OutIndex];
2162 bool isByVal = Flags.isByVal();
2163
2164 // Promote the value if needed.
2165 switch (VA.getLocInfo()) {
2166 default: llvm_unreachable("Unknown loc info!");
2167 case CCValAssign::Full: break;
2168 case CCValAssign::SExt:
2169 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2170 break;
2171 case CCValAssign::ZExt:
2172 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2173 break;
2174 case CCValAssign::AExt:
2175 if (Arg.getValueType().isVector() &&
2176 Arg.getValueType().getVectorElementType() == MVT::i1)
2177 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2178 else if (RegVT.is128BitVector()) {
2179 // Special case: passing MMX values in XMM registers.
2180 Arg = DAG.getBitcast(MVT::i64, Arg);
2181 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2182 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2183 } else
2184 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2185 break;
2186 case CCValAssign::BCvt:
2187 Arg = DAG.getBitcast(RegVT, Arg);
2188 break;
2189 case CCValAssign::Indirect: {
2190 if (isByVal) {
2191 // Memcpy the argument to a temporary stack slot to prevent
2192 // the caller from seeing any modifications the callee may make
2193 // as guaranteed by the `byval` attribute.
2194 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2195 Flags.getByValSize(),
2196 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2197 SDValue StackSlot =
2198 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2199 Chain =
2200 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2201 // From now on treat this as a regular pointer
2202 Arg = StackSlot;
2203 isByVal = false;
2204 } else {
2205 // Store the argument.
2206 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2207 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2208 Chain = DAG.getStore(
2209 Chain, dl, Arg, SpillSlot,
2211 Arg = SpillSlot;
2212 }
2213 break;
2214 }
2215 }
2216
2217 if (VA.needsCustom()) {
2218 assert(VA.getValVT() == MVT::v64i1 &&
2219 "Currently the only custom case is when we split v64i1 to 2 regs");
2220 // Split v64i1 value into two registers
2221 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2222 } else if (VA.isRegLoc()) {
2223 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2224 const TargetOptions &Options = DAG.getTarget().Options;
2225 if (Options.EmitCallSiteInfo)
2226 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2227 if (isVarArg && IsWin64) {
2228 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2229 // shadow reg if callee is a varargs function.
2230 Register ShadowReg;
2231 switch (VA.getLocReg()) {
2232 case X86::XMM0: ShadowReg = X86::RCX; break;
2233 case X86::XMM1: ShadowReg = X86::RDX; break;
2234 case X86::XMM2: ShadowReg = X86::R8; break;
2235 case X86::XMM3: ShadowReg = X86::R9; break;
2236 }
2237 if (ShadowReg)
2238 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2239 }
2240 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2241 assert(VA.isMemLoc());
2242 if (!StackPtr.getNode())
2243 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2245 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2246 dl, DAG, VA, Flags, isByVal));
2247 }
2248 }
2249
2250 if (!MemOpChains.empty())
2251 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2252
2253 if (Subtarget.isPICStyleGOT()) {
2254 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2255 // GOT pointer (except regcall).
2256 if (!isTailCall) {
2257 // Indirect call with RegCall calling convertion may use up all the
2258 // general registers, so it is not suitable to bind EBX reister for
2259 // GOT address, just let register allocator handle it.
2260 if (CallConv != CallingConv::X86_RegCall)
2261 RegsToPass.push_back(std::make_pair(
2262 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2263 getPointerTy(DAG.getDataLayout()))));
2264 } else {
2265 // If we are tail calling and generating PIC/GOT style code load the
2266 // address of the callee into ECX. The value in ecx is used as target of
2267 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2268 // for tail calls on PIC/GOT architectures. Normally we would just put the
2269 // address of GOT into ebx and then call target@PLT. But for tail calls
2270 // ebx would be restored (since ebx is callee saved) before jumping to the
2271 // target@PLT.
2272
2273 // Note: The actual moving to ECX is done further down.
2274 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2275 if (G && !G->getGlobal()->hasLocalLinkage() &&
2276 G->getGlobal()->hasDefaultVisibility())
2277 Callee = LowerGlobalAddress(Callee, DAG);
2278 else if (isa<ExternalSymbolSDNode>(Callee))
2279 Callee = LowerExternalSymbol(Callee, DAG);
2280 }
2281 }
2282
2283 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2284 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2285 // From AMD64 ABI document:
2286 // For calls that may call functions that use varargs or stdargs
2287 // (prototype-less calls or calls to functions containing ellipsis (...) in
2288 // the declaration) %al is used as hidden argument to specify the number
2289 // of SSE registers used. The contents of %al do not need to match exactly
2290 // the number of registers, but must be an ubound on the number of SSE
2291 // registers used and is in the range 0 - 8 inclusive.
2292
2293 // Count the number of XMM registers allocated.
2294 static const MCPhysReg XMMArgRegs[] = {
2295 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2296 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2297 };
2298 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2299 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2300 && "SSE registers cannot be used when SSE is disabled");
2301 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2302 DAG.getConstant(NumXMMRegs, dl,
2303 MVT::i8)));
2304 }
2305
2306 if (isVarArg && IsMustTail) {
2307 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2308 for (const auto &F : Forwards) {
2309 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2310 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2311 }
2312 }
2313
2314 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2315 // don't need this because the eligibility check rejects calls that require
2316 // shuffling arguments passed in memory.
2317 if (!IsSibcall && isTailCall) {
2318 // Force all the incoming stack arguments to be loaded from the stack
2319 // before any new outgoing arguments are stored to the stack, because the
2320 // outgoing stack slots may alias the incoming argument stack slots, and
2321 // the alias isn't otherwise explicit. This is slightly more conservative
2322 // than necessary, because it means that each store effectively depends
2323 // on every argument instead of just those arguments it would clobber.
2324 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2325
2326 SmallVector<SDValue, 8> MemOpChains2;
2327 SDValue FIN;
2328 int FI = 0;
2329 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2330 ++I, ++OutsIndex) {
2331 CCValAssign &VA = ArgLocs[I];
2332
2333 if (VA.isRegLoc()) {
2334 if (VA.needsCustom()) {
2335 assert((CallConv == CallingConv::X86_RegCall) &&
2336 "Expecting custom case only in regcall calling convention");
2337 // This means that we are in special case where one argument was
2338 // passed through two register locations - Skip the next location
2339 ++I;
2340 }
2341
2342 continue;
2343 }
2344
2345 assert(VA.isMemLoc());
2346 SDValue Arg = OutVals[OutsIndex];
2347 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2348 // Skip inalloca/preallocated arguments. They don't require any work.
2349 if (Flags.isInAlloca() || Flags.isPreallocated())
2350 continue;
2351 // Create frame index.
2352 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2353 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2354 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2355 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2356
2357 if (Flags.isByVal()) {
2358 // Copy relative to framepointer.
2360 if (!StackPtr.getNode())
2361 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2364 StackPtr, Source);
2365
2366 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
2367 ArgChain,
2368 Flags, DAG, dl));
2369 } else {
2370 // Store relative to framepointer.
2371 MemOpChains2.push_back(DAG.getStore(
2372 ArgChain, dl, Arg, FIN,
2374 }
2375 }
2376
2377 if (!MemOpChains2.empty())
2378 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2379
2380 // Store the return address to the appropriate stack slot.
2381 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2383 RegInfo->getSlotSize(), FPDiff, dl);
2384 }
2385
2386 // Build a sequence of copy-to-reg nodes chained together with token chain
2387 // and glue operands which copy the outgoing args into registers.
2388 SDValue InGlue;
2389 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2390 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2391 RegsToPass[i].second, InGlue);
2392 InGlue = Chain.getValue(1);
2393 }
2394
2395 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2396 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2397 // In the 64-bit large code model, we have to make all calls
2398 // through a register, since the call instruction's 32-bit
2399 // pc-relative offset may not be large enough to hold the whole
2400 // address.
2401 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2402 Callee->getOpcode() == ISD::ExternalSymbol) {
2403 // Lower direct calls to global addresses and external symbols. Setting
2404 // ForCall to true here has the effect of removing WrapperRIP when possible
2405 // to allow direct calls to be selected without first materializing the
2406 // address into a register.
2407 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2408 } else if (Subtarget.isTarget64BitILP32() &&
2409 Callee.getValueType() == MVT::i32) {
2410 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2411 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2412 }
2413
2414 // Returns a chain & a glue for retval copy to use.
2415 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2417
2418 if (!IsSibcall && isTailCall && !IsMustTail) {
2419 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2420 InGlue = Chain.getValue(1);
2421 }
2422
2423 Ops.push_back(Chain);
2424 Ops.push_back(Callee);
2425
2426 if (isTailCall)
2427 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
2428
2429 // Add argument registers to the end of the list so that they are known live
2430 // into the call.
2431 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2432 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2433 RegsToPass[i].second.getValueType()));
2434
2435 // Add a register mask operand representing the call-preserved registers.
2436 const uint32_t *Mask = [&]() {
2437 auto AdaptedCC = CallConv;
2438 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2439 // use X86_INTR calling convention because it has the same CSR mask
2440 // (same preserved registers).
2441 if (HasNCSR)
2443 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2444 // to use the CSR_NoRegs_RegMask.
2445 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2446 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2447 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2448 }();
2449 assert(Mask && "Missing call preserved mask for calling convention");
2450
2451 // If this is an invoke in a 32-bit function using a funclet-based
2452 // personality, assume the function clobbers all registers. If an exception
2453 // is thrown, the runtime will not restore CSRs.
2454 // FIXME: Model this more precisely so that we can register allocate across
2455 // the normal edge and spill and fill across the exceptional edge.
2456 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2457 const Function &CallerFn = MF.getFunction();
2458 EHPersonality Pers =
2459 CallerFn.hasPersonalityFn()
2462 if (isFuncletEHPersonality(Pers))
2463 Mask = RegInfo->getNoPreservedMask();
2464 }
2465
2466 // Define a new register mask from the existing mask.
2467 uint32_t *RegMask = nullptr;
2468
2469 // In some calling conventions we need to remove the used physical registers
2470 // from the reg mask. Create a new RegMask for such calling conventions.
2471 // RegMask for calling conventions that disable only return registers (e.g.
2472 // preserve_most) will be modified later in LowerCallResult.
2473 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2474 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2475 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2476
2477 // Allocate a new Reg Mask and copy Mask.
2478 RegMask = MF.allocateRegMask();
2479 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2480 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2481
2482 // Make sure all sub registers of the argument registers are reset
2483 // in the RegMask.
2484 if (ShouldDisableArgRegs) {
2485 for (auto const &RegPair : RegsToPass)
2486 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2487 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2488 }
2489
2490 // Create the RegMask Operand according to our updated mask.
2491 Ops.push_back(DAG.getRegisterMask(RegMask));
2492 } else {
2493 // Create the RegMask Operand according to the static mask.
2494 Ops.push_back(DAG.getRegisterMask(Mask));
2495 }
2496
2497 if (InGlue.getNode())
2498 Ops.push_back(InGlue);
2499
2500 if (isTailCall) {
2501 // We used to do:
2502 //// If this is the first return lowered for this function, add the regs
2503 //// to the liveout set for the function.
2504 // This isn't right, although it's probably harmless on x86; liveouts
2505 // should be computed from returns not tail calls. Consider a void
2506 // function making a tail call to a function returning int.
2508 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
2509
2510 if (IsCFICall)
2511 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2512
2513 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2514 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2515 return Ret;
2516 }
2517
2518 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2519 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2520 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2521 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2522 // expanded to the call, directly followed by a special marker sequence and
2523 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2524 assert(!isTailCall &&
2525 "tail calls cannot be marked with clang.arc.attachedcall");
2526 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2527
2528 // Add a target global address for the retainRV/claimRV runtime function
2529 // just before the call target.
2531 auto PtrVT = getPointerTy(DAG.getDataLayout());
2532 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2533 Ops.insert(Ops.begin() + 1, GA);
2534 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2535 } else {
2536 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2537 }
2538
2539 if (IsCFICall)
2540 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2541
2542 InGlue = Chain.getValue(1);
2543 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2544 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2545
2546 // Save heapallocsite metadata.
2547 if (CLI.CB)
2548 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2549 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2550
2551 // Create the CALLSEQ_END node.
2552 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2553 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2555 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2556 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2557 // If this call passes a struct-return pointer, the callee
2558 // pops that struct pointer.
2559 NumBytesForCalleeToPop = 4;
2560
2561 // Returns a glue for retval copy to use.
2562 if (!IsSibcall) {
2563 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2564 InGlue, dl);
2565 InGlue = Chain.getValue(1);
2566 }
2567
2568 if (CallingConv::PreserveNone == CallConv)
2569 for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2570 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2571 Outs[I].Flags.isSwiftError()) {
2572 errorUnsupported(DAG, dl,
2573 "Swift attributes can't be used with preserve_none");
2574 break;
2575 }
2576 }
2577
2578 // Handle result values, copying them out of physregs into vregs that we
2579 // return.
2580 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2581 InVals, RegMask);
2582}
2583
2584//===----------------------------------------------------------------------===//
2585// Fast Calling Convention (tail call) implementation
2586//===----------------------------------------------------------------------===//
2587
2588// Like std call, callee cleans arguments, convention except that ECX is
2589// reserved for storing the tail called function address. Only 2 registers are
2590// free for argument passing (inreg). Tail call optimization is performed
2591// provided:
2592// * tailcallopt is enabled
2593// * caller/callee are fastcc
2594// On X86_64 architecture with GOT-style position independent code only local
2595// (within module) calls are supported at the moment.
2596// To keep the stack aligned according to platform abi the function
2597// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2598// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2599// If a tail called function callee has more arguments than the caller the
2600// caller needs to make sure that there is room to move the RETADDR to. This is
2601// achieved by reserving an area the size of the argument delta right after the
2602// original RETADDR, but before the saved framepointer or the spilled registers
2603// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2604// stack layout:
2605// arg1
2606// arg2
2607// RETADDR
2608// [ new RETADDR
2609// move area ]
2610// (possible EBP)
2611// ESI
2612// EDI
2613// local1 ..
2614
2615/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2616/// requirement.
2617unsigned
2618X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2619 SelectionDAG &DAG) const {
2620 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2621 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2622 assert(StackSize % SlotSize == 0 &&
2623 "StackSize must be a multiple of SlotSize");
2624 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2625}
2626
2627/// Return true if the given stack call argument is already available in the
2628/// same position (relatively) of the caller's incoming argument stack.
2629static
2632 const X86InstrInfo *TII, const CCValAssign &VA) {
2633 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2634
2635 for (;;) {
2636 // Look through nodes that don't alter the bits of the incoming value.
2637 unsigned Op = Arg.getOpcode();
2638 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2639 Op == ISD::AssertZext) {
2640 Arg = Arg.getOperand(0);
2641 continue;
2642 }
2643 if (Op == ISD::TRUNCATE) {
2644 const SDValue &TruncInput = Arg.getOperand(0);
2645 if (TruncInput.getOpcode() == ISD::AssertZext &&
2646 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2647 Arg.getValueType()) {
2648 Arg = TruncInput.getOperand(0);
2649 continue;
2650 }
2651 }
2652 break;
2653 }
2654
2655 int FI = INT_MAX;
2656 if (Arg.getOpcode() == ISD::CopyFromReg) {
2657 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2658 if (!VR.isVirtual())
2659 return false;
2660 MachineInstr *Def = MRI->getVRegDef(VR);
2661 if (!Def)
2662 return false;
2663 if (!Flags.isByVal()) {
2664 if (!TII->isLoadFromStackSlot(*Def, FI))
2665 return false;
2666 } else {
2667 unsigned Opcode = Def->getOpcode();
2668 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2669 Opcode == X86::LEA64_32r) &&
2670 Def->getOperand(1).isFI()) {
2671 FI = Def->getOperand(1).getIndex();
2672 Bytes = Flags.getByValSize();
2673 } else
2674 return false;
2675 }
2676 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2677 if (Flags.isByVal())
2678 // ByVal argument is passed in as a pointer but it's now being
2679 // dereferenced. e.g.
2680 // define @foo(%struct.X* %A) {
2681 // tail call @bar(%struct.X* byval %A)
2682 // }
2683 return false;
2684 SDValue Ptr = Ld->getBasePtr();
2685 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2686 if (!FINode)
2687 return false;
2688 FI = FINode->getIndex();
2689 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2690 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
2691 FI = FINode->getIndex();
2692 Bytes = Flags.getByValSize();
2693 } else
2694 return false;
2695
2696 assert(FI != INT_MAX);
2697 if (!MFI.isFixedObjectIndex(FI))
2698 return false;
2699
2700 if (Offset != MFI.getObjectOffset(FI))
2701 return false;
2702
2703 // If this is not byval, check that the argument stack object is immutable.
2704 // inalloca and argument copy elision can create mutable argument stack
2705 // objects. Byval objects can be mutated, but a byval call intends to pass the
2706 // mutated memory.
2707 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2708 return false;
2709
2710 if (VA.getLocVT().getFixedSizeInBits() >
2712 // If the argument location is wider than the argument type, check that any
2713 // extension flags match.
2714 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2715 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2716 return false;
2717 }
2718 }
2719
2720 return Bytes == MFI.getObjectSize(FI);
2721}
2722
2723/// Check whether the call is eligible for tail call optimization. Targets
2724/// that want to do tail call optimization should implement this function.
2725/// Note that the x86 backend does not check musttail calls for eligibility! The
2726/// rest of x86 tail call lowering must be prepared to forward arguments of any
2727/// type.
2728bool X86TargetLowering::IsEligibleForTailCallOptimization(
2730 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2731 SelectionDAG &DAG = CLI.DAG;
2732 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2733 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2735 SDValue Callee = CLI.Callee;
2736 CallingConv::ID CalleeCC = CLI.CallConv;
2737 bool isVarArg = CLI.IsVarArg;
2738
2739 if (!mayTailCallThisCC(CalleeCC))
2740 return false;
2741
2742 // If -tailcallopt is specified, make fastcc functions tail-callable.
2744 const Function &CallerF = MF.getFunction();
2745
2746 // If the function return type is x86_fp80 and the callee return type is not,
2747 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2748 // perform a tailcall optimization here.
2749 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2750 return false;
2751
2752 CallingConv::ID CallerCC = CallerF.getCallingConv();
2753 bool CCMatch = CallerCC == CalleeCC;
2754 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2755 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2756 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2757 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2758
2759 // Win64 functions have extra shadow space for argument homing. Don't do the
2760 // sibcall if the caller and callee have mismatched expectations for this
2761 // space.
2762 if (IsCalleeWin64 != IsCallerWin64)
2763 return false;
2764
2765 if (IsGuaranteeTCO) {
2766 if (canGuaranteeTCO(CalleeCC) && CCMatch)
2767 return true;
2768 return false;
2769 }
2770
2771 // Look for obvious safe cases to perform tail call optimization that do not
2772 // require ABI changes. This is what gcc calls sibcall.
2773
2774 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2775 // emit a special epilogue.
2776 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2777 if (RegInfo->hasStackRealignment(MF))
2778 return false;
2779
2780 // Also avoid sibcall optimization if we're an sret return fn and the callee
2781 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2782 // insufficient.
2784 // For a compatible tail call the callee must return our sret pointer. So it
2785 // needs to be (a) an sret function itself and (b) we pass our sret as its
2786 // sret. Condition #b is harder to determine.
2787 return false;
2788 } else if (IsCalleePopSRet)
2789 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2790 // expect that.
2791 return false;
2792
2793 // Do not sibcall optimize vararg calls unless all arguments are passed via
2794 // registers.
2795 LLVMContext &C = *DAG.getContext();
2796 if (isVarArg && !Outs.empty()) {
2797 // Optimizing for varargs on Win64 is unlikely to be safe without
2798 // additional testing.
2799 if (IsCalleeWin64 || IsCallerWin64)
2800 return false;
2801
2802 for (const auto &VA : ArgLocs)
2803 if (!VA.isRegLoc())
2804 return false;
2805 }
2806
2807 // If the call result is in ST0 / ST1, it needs to be popped off the x87
2808 // stack. Therefore, if it's not used by the call it is not safe to optimize
2809 // this into a sibcall.
2810 bool Unused = false;
2811 for (const auto &In : Ins) {
2812 if (!In.Used) {
2813 Unused = true;
2814 break;
2815 }
2816 }
2817 if (Unused) {
2819 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2820 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2821 for (const auto &VA : RVLocs) {
2822 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2823 return false;
2824 }
2825 }
2826
2827 // Check that the call results are passed in the same way.
2828 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2830 return false;
2831 // The callee has to preserve all registers the caller needs to preserve.
2832 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2833 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2834 if (!CCMatch) {
2835 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2836 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2837 return false;
2838 }
2839
2840 unsigned StackArgsSize = CCInfo.getStackSize();
2841
2842 // If the callee takes no arguments then go on to check the results of the
2843 // call.
2844 if (!Outs.empty()) {
2845 if (StackArgsSize > 0) {
2846 // Check if the arguments are already laid out in the right way as
2847 // the caller's fixed stack objects.
2848 MachineFrameInfo &MFI = MF.getFrameInfo();
2849 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2850 const X86InstrInfo *TII = Subtarget.getInstrInfo();
2851 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2852 const CCValAssign &VA = ArgLocs[I];
2853 SDValue Arg = OutVals[I];
2854 ISD::ArgFlagsTy Flags = Outs[I].Flags;
2856 return false;
2857 if (!VA.isRegLoc()) {
2858 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2859 TII, VA))
2860 return false;
2861 }
2862 }
2863 }
2864
2865 bool PositionIndependent = isPositionIndependent();
2866 // If the tailcall address may be in a register, then make sure it's
2867 // possible to register allocate for it. In 32-bit, the call address can
2868 // only target EAX, EDX, or ECX since the tail call must be scheduled after
2869 // callee-saved registers are restored. These happen to be the same
2870 // registers used to pass 'inreg' arguments so watch out for those.
2871 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2872 !isa<ExternalSymbolSDNode>(Callee)) ||
2873 PositionIndependent)) {
2874 unsigned NumInRegs = 0;
2875 // In PIC we need an extra register to formulate the address computation
2876 // for the callee.
2877 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2878
2879 for (const auto &VA : ArgLocs) {
2880 if (!VA.isRegLoc())
2881 continue;
2882 Register Reg = VA.getLocReg();
2883 switch (Reg) {
2884 default: break;
2885 case X86::EAX: case X86::EDX: case X86::ECX:
2886 if (++NumInRegs == MaxInRegs)
2887 return false;
2888 break;
2889 }
2890 }
2891 }
2892
2893 const MachineRegisterInfo &MRI = MF.getRegInfo();
2894 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2895 return false;
2896 }
2897
2898 bool CalleeWillPop =
2899 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
2901
2902 if (unsigned BytesToPop =
2904 // If we have bytes to pop, the callee must pop them.
2905 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2906 if (!CalleePopMatches)
2907 return false;
2908 } else if (CalleeWillPop && StackArgsSize > 0) {
2909 // If we don't have bytes to pop, make sure the callee doesn't pop any.
2910 return false;
2911 }
2912
2913 return true;
2914}
2915
2916/// Determines whether the callee is required to pop its own arguments.
2917/// Callee pop is necessary to support tail calls.
2919 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2920 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2921 // can guarantee TCO.
2922 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
2923 return true;
2924
2925 switch (CallingConv) {
2926 default:
2927 return false;
2932 return !is64Bit;
2933 }
2934}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint64_t Addr
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
static LVOptions Options
Definition: LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
CCState - This class holds information needed while lowering arguments and return values.
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
void convertToReg(unsigned RegNo)
bool isExtInLoc() const
int64_t getLocMemOffset() const
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1800
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:855
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1921
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void setDSOLocal(bool Local)
Definition: GlobalValue.h:303
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
LLVMContext & getContext() const
Definition: IRBuilder.h:176
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:569
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:359
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
const Module * getModule() const
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
void setCFIType(uint32_t Type)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
SDValue getRegisterMask(const uint32_t *RegMask)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isAndroidVersionLT(unsigned Major) const
Definition: Triple.h:771
bool isAndroid() const
Tests whether the target is Android.
Definition: Triple.h:769
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:667
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:558
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:703
bool isOSFuchsia() const
Definition: Triple.h:588
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:634
bool isWindowsItaniumEnvironment() const
Definition: Triple.h:649
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition: Type.h:160
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
void setArgumentStackSize(unsigned size)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
unsigned getSlotSize() const
bool hasSSE1() const
Definition: X86Subtarget.h:200
bool useLight256BitInstructions() const
Definition: X86Subtarget.h:272
bool isPICStyleGOT() const
Definition: X86Subtarget.h:342
bool isTargetMCU() const
Definition: X86Subtarget.h:311
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:314
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:180
bool isTargetDarwin() const
Definition: X86Subtarget.h:294
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:292
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:129
bool useAVX512Regs() const
Definition: X86Subtarget.h:267
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:351
bool hasAVX512() const
Definition: X86Subtarget.h:208
bool hasSSE41() const
Definition: X86Subtarget.h:204
bool hasSSE2() const
Definition: X86Subtarget.h:201
bool isTargetFuchsia() const
Definition: X86Subtarget.h:312
bool isPICStyleRIPRel() const
Definition: X86Subtarget.h:343
bool isTargetCygMing() const
Definition: X86Subtarget.h:334
const X86RegisterInfo * getRegisterInfo() const override
Definition: X86Subtarget.h:139
bool hasAVX() const
Definition: X86Subtarget.h:206
unsigned getPreferVectorWidth() const
Definition: X86Subtarget.h:239
bool isTargetAndroid() const
Definition: X86Subtarget.h:307
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:131
bool hasAVX2() const
Definition: X86Subtarget.h:207
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
bool useSoftFloat() const override
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:151
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
Definition: CallingConv.h:122
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition: CallingConv.h:99
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
Definition: CallingConv.h:163
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
Definition: CallingConv.h:147
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition: CallingConv.h:90
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:159
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:203
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ExternalSymbol
Definition: ISDOpcodes.h:83
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ FS
Definition: X86.h:206
@ GS
Definition: X86.h:205
Reg
All possible values of the reg field in the ModR/M byte.
@ RET_GLUE
Return with a glue operand.
@ IRET
Return from interrupt. Operand 0 is the number of bytes to pop.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ TC_RETURN
Tail call return.
@ NT_CALL
Same as call except it adds the NoTrack prefix.
@ MOVDQ2Q
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition: ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition: ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
AddressSpace
Definition: NVPTXBaseInfo.h:21
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1902
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition: ValueTypes.h:213
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:208
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
Describes a register that needs to be forwarded from the prologue to a musttail call.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals