llvm.org GIT mirror llvm / 92dbb17
[X86] RegCall - Handling v64i1 in 32/64 bit target Register Calling Convention defines a new behavior for v64i1 types. This type should be saved in GPR. However for 32 bit machine we need to split the value into 2 GPRs (because each is 32 bit). Differential Revision: https://reviews.llvm.org/D26181 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287217 91177308-0d34-0410-b5e6-96231b3b80d8 Oren Ben Simhon 3 years ago
6 changed file(s) with 554 addition(s) and 93 deletion(s). Raw diff Collapse all Expand all
5353 X86VZeroUpper.cpp
5454 X86WinAllocaExpander.cpp
5555 X86WinEHState.cpp
56 X86CallingConv.cpp
5657 ${GLOBAL_ISEL_BUILD_FILES}
5758 )
5859
0 //=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation of custom routines for the X86
10 // Calling Convention that aren't done by tablegen.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "MCTargetDesc/X86MCTargetDesc.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/IR/CallingConv.h"
17
18 namespace llvm {
19
20 bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
21 CCValAssign::LocInfo &LocInfo,
22 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
23 // List of GPR registers that are available to store values in regcall
24 // calling convention.
25 static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI,
26 X86::ESI};
27
28 // The vector will save all the available registers for allocation.
29 SmallVector AvailableRegs;
30
31 // searching for the available registers.
32 for (auto Reg : RegList) {
33 if (!State.isAllocated(Reg))
34 AvailableRegs.push_back(Reg);
35 }
36
37 const size_t RequiredGprsUponSplit = 2;
38 if (AvailableRegs.size() < RequiredGprsUponSplit)
39 return false; // Not enough free registers - continue the search.
40
41 // Allocating the available registers
42 for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {
43
44 // Marking the register as located
45 unsigned Reg = State.AllocateReg(AvailableRegs[I]);
46
47 // Since we previously made sure that 2 registers are available
48 // we expect that a real register number will be returned
49 assert(Reg && "Expecting a register will be available");
50
51 // Assign the value to the allocated register
52 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
53 }
54
55 // Successful in allocating regsiters - stop scanning next rules.
56 return true;
57 }
58
59 } // End llvm namespace
1919 #include "llvm/IR/CallingConv.h"
2020
2121 namespace llvm {
22
23 /// When regcall calling convention compiled to 32 bit arch, special treatment
24 /// is required for 64 bit masks.
25 /// The value should be assigned to two GPRs.
26 /// @return true if registers were allocated and false otherwise
27 bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
28 CCValAssign::LocInfo &LocInfo,
29 ISD::ArgFlagsTy &ArgFlags, CCState &State);
2230
2331 inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
2432 MVT &LocVT,
7676 // bool, char, int, enum, long, pointer --> GPR
7777 CCIfType<[i32], CCAssignToReg>,
7878
79 // TODO: Handle the case of mask types (v*i1)
80 // TODO: Handle the case of 32 bit machine with v64i1 argument
81 // (split to 2 registers)
82 CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
83
8479 // long long, __int64 --> GPR
8580 CCIfType<[i64], CCAssignToReg>,
81
82 // __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
83 CCIfType<[v64i1], CCPromoteToType>,
84 CCIfSubtarget<"is64Bit()", CCIfType<[i64],
85 CCAssignToReg>>,
86 CCIfSubtarget<"is32Bit()", CCIfType<[i64],
87 CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
88
89 // TODO: Handle the case of mask types (v*i1)
90 CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
8691
8792 // TODO: Handle the case of long double (f80)
8893 CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>,
115120
116121 // In 32 bit, assign 64/32 bit values to 8/4 byte stack
117122 CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
118 CCIfType<[f64], CCAssignToStack<8, 4>>,
123 CCIfType<[i64, f64], CCAssignToStack<8, 4>>,
119124
120125 // MMX type gets 8 byte slot in stack , while alignment depends on target
121126 CCIfSubtarget<"is64Bit()", CCIfType<[x86mmx], CCAssignToStack<8, 8>>>,
146151 CCIfType<[i16], CCAssignToReg>,
147152 CCIfType<[i32], CCAssignToReg>,
148153
149 // TODO: Handle the case of mask types (v*i1)
150 // TODO: Handle the case of 32 bit machine with v64i1 argument
151 // (split to 2 registers)
152 CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
153
154154 // long long, __int64 --> GPR
155155 CCIfType<[i64], CCAssignToReg>,
156
157 // __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
158 CCIfType<[v64i1], CCPromoteToType>,
159 CCIfSubtarget<"is64Bit()", CCIfType<[i64],
160 CCAssignToReg>>,
161 CCIfSubtarget<"is32Bit()", CCIfType<[i64],
162 CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
163
164 // TODO: Handle the case of mask types (v*i1)
165 CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
156166
157167 // long double --> FP
158168 CCIfType<[f80], CCAssignToReg<[FP0]>>,
20932093 return ScratchRegs;
20942094 }
20952095
2096 /// Lowers masks values (v*i1) to the local register values
2097 static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2098 const SDLoc &Dl, SelectionDAG &DAG) {
2099 EVT ValVT = ValArg.getValueType();
2100
2101 if (ValVT == MVT::v64i1 && ValLoc == MVT::i64) {
2102 // One stage lowering is required
2103 // bitcast: v64i1 -> i64
2104 return DAG.getBitcast(MVT::i64, ValArg);
2105 } else
2106 return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
2107 }
2108
2109 /// Breaks v64i1 value into two registers and adds the new node to the DAG
2110 static void Passv64i1ArgInRegs(
2111 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2112 SmallVector, 8> &RegsToPass, CCValAssign &VA,
2113 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2114 assert((Subtarget.hasBWI() || Subtarget.hasBMI()) &&
2115 "Expected AVX512BW or AVX512BMI target!");
2116 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
2117 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
2118 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2119 "The value should reside in two registers");
2120
2121 // Before splitting the value we cast it to i64
2122 Arg = DAG.getBitcast(MVT::i64, Arg);
2123
2124 // Splitting the value into two i32 types
2125 SDValue Lo, Hi;
2126 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2127 DAG.getConstant(0, Dl, MVT::i32));
2128 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2129 DAG.getConstant(1, Dl, MVT::i32));
2130
2131 // Attach the two i32 types into corresponding registers
2132 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2133 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2134 }
2135
20962136 SDValue
20972137 X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
20982138 bool isVarArg,
21172157 MVT::i32));
21182158
21192159 // Copy the result values into the output registers.
2120 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2121 CCValAssign &VA = RVLocs[i];
2160 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2161 ++I, ++OutsIndex) {
2162 CCValAssign &VA = RVLocs[I];
21222163 assert(VA.isRegLoc() && "Can only return in registers!");
2123 SDValue ValToCopy = OutVals[i];
2164 SDValue ValToCopy = OutVals[OutsIndex];
21242165 EVT ValVT = ValToCopy.getValueType();
21252166
21262167 // Promote values to the appropriate types.
21302171 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
21312172 else if (VA.getLocInfo() == CCValAssign::AExt) {
21322173 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2133 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2174 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
21342175 else
21352176 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
21362177 }
21832224 }
21842225 }
21852226
2186 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
2187 Flag = Chain.getValue(1);
2188 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2227 SmallVector, 8> RegsToPass;
2228
2229 if (VA.needsCustom()) {
2230 assert(VA.getValVT() == MVT::v64i1 &&
2231 "Currently the only custom case is when we split v64i1 to 2 regs");
2232
2233 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2234 Subtarget);
2235
2236 assert(2 == RegsToPass.size() &&
2237 "Expecting two registers after Pass64BitArgInRegs");
2238 } else {
2239 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2240 }
2241
2242 // Add nodes to the DAG and add the values into the RetOps list
2243 for (auto &Reg : RegsToPass) {
2244 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2245 Flag = Chain.getValue(1);
2246 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2247 }
21892248 }
21902249
21912250 // Swift calling convention does not require we copy the sret argument
23132372 return VT.bitsLT(MinVT) ? MinVT : VT;
23142373 }
23152374
2375 /// Reads two 32 bit registers and creates a 64 bit mask value.
2376 /// @param VA The current 32 bit value that need to be assigned.
2377 /// @param NextVA The next 32 bit value that need to be assigned.
2378 /// @param Root The parent DAG note
2379 /// @param [inout] InFlag Represents SDvalue in the parent DAG node for
2380 /// glue purposes. In the case the DAG is already using
2381 /// physical register instead of virtual, we should glue
2382 /// our new SDValue to InFlag SDvalue.
2383 /// @return a new SDvalue of size 64bit.
2384 static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2385 SDValue &Root, SelectionDAG &DAG,
2386 const SDLoc &Dl, const X86Subtarget &Subtarget,
2387 SDValue *InFlag = nullptr) {
2388 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
2389 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
2390 assert(VA.getValVT() == MVT::v64i1 &&
2391 "Expecting first location of 64 bit width type");
2392 assert(NextVA.getValVT() == VA.getValVT() &&
2393 "The locations should have the same type");
2394 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2395 "The values should reside in two registers");
2396
2397 SDValue Lo, Hi;
2398 unsigned Reg;
2399 SDValue ArgValueLo, ArgValueHi;
2400
2401 MachineFunction &MF = DAG.getMachineFunction();
2402 const TargetRegisterClass *RC = &X86::GR32RegClass;
2403
2404 // Read a 32 bit value from the registers
2405 if (nullptr == InFlag) {
2406 // When no physical register is present,
2407 // create an intermediate virtual register
2408 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2409 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2410 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2411 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2412 } else {
2413 // When a physical register is available read the value from it and glue
2414 // the reads together.
2415 ArgValueLo =
2416 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2417 *InFlag = ArgValueLo.getValue(2);
2418 ArgValueHi =
2419 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2420 *InFlag = ArgValueHi.getValue(2);
2421 }
2422
2423 // Convert the i32 type into v32i1 type
2424 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2425
2426 // Convert the i32 type into v32i1 type
2427 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2428
2429 // Concantenate the two values together
2430 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2431 }
2432
2433 static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2434 const EVT &ValLoc, const SDLoc &Dl,
2435 SelectionDAG &DAG) {
2436 assert((ValLoc == MVT::i64 || ValLoc == MVT::i32) &&
2437 "Expecting register location of size 32/64 bit");
2438
2439 // Currently not referenced - will be used in other mask lowering
2440 (void)Dl;
2441
2442 // In the case of v64i1 no special handling is required due to two reasons:
2443 // In 32 bit machine, this case is handled by getv64i1Argument
2444 // In 64 bit machine, There is no need to truncate the value only bitcast
2445 if (ValVT == MVT::v64i1 && ValLoc == MVT::i32) {
2446 llvm_unreachable("Expecting only i64 locations");
2447 }
2448
2449 return DAG.getBitcast(ValVT, ValArg);
2450 }
2451
23162452 /// Lower the result values of a call into the
23172453 /// appropriate copies out of appropriate physical registers.
23182454 ///
23292465 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
23302466
23312467 // Copy all of the result registers out of their specified physreg.
2332 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2333 CCValAssign &VA = RVLocs[i];
2468 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2469 ++I, ++InsIndex) {
2470 CCValAssign &VA = RVLocs[I];
23342471 EVT CopyVT = VA.getLocVT();
23352472
23362473 // If this is x86-64, and we disabled SSE, we can't return FP values
23372474 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2338 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2475 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
23392476 report_fatal_error("SSE register return with SSE disabled");
23402477 }
23412478
23502487 RoundAfterCopy = (CopyVT != VA.getLocVT());
23512488 }
23522489
2353 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
2354 CopyVT, InFlag).getValue(1);
2355 SDValue Val = Chain.getValue(0);
2490 SDValue Val;
2491 if (VA.needsCustom()) {
2492 assert(VA.getValVT() == MVT::v64i1 &&
2493 "Currently the only custom case is when we split v64i1 to 2 regs");
2494 Val =
2495 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2496 } else {
2497 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2498 .getValue(1);
2499 Val = Chain.getValue(0);
2500 InFlag = Chain.getValue(2);
2501 }
23562502
23572503 if (RoundAfterCopy)
23582504 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
23592505 // This truncation won't change the value.
23602506 DAG.getIntPtrConstant(1, dl));
23612507
2362 if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
2363 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2364
2365 InFlag = Chain.getValue(2);
2508 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2509 if (VA.getValVT().isVector() &&
2510 (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::i64)) {
2511 // promoting a mask type (v*i1) into a register of type i64/i32
2512 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2513 } else
2514 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2515 }
2516
23662517 InVals.push_back(Val);
23672518 }
23682519
24302581 /// Return true if the calling convention is one that we can guarantee TCO for.
24312582 static bool canGuaranteeTCO(CallingConv::ID CC) {
24322583 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2433 CC == CallingConv::HiPE || CC == CallingConv::HHVM);
2584 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2585 CC == CallingConv::HHVM);
24342586 }
24352587
24362588 /// Return true if we might ever do TCO for calls with this calling convention.
24852637 EVT ValVT;
24862638
24872639 // If value is passed by pointer we have address passed instead of the value
2488 // itself.
2489 bool ExtendedInMem = VA.isExtInLoc() &&
2490 VA.getValVT().getScalarType() == MVT::i1;
2640 // itself. No need to extend if the mask value and location share the same
2641 // absolute size.
2642 bool ExtendedInMem =
2643 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2644 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
24912645
24922646 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
24932647 ValVT = VA.getLocVT();
26112765 bool Is64Bit = Subtarget.is64Bit();
26122766 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
26132767
2614 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2615 "Var args not supported with calling convention fastcc, ghc or hipe");
2768 assert(
2769 !(isVarArg && canGuaranteeTCO(CallConv)) &&
2770 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
26162771
26172772 if (CallConv == CallingConv::X86_INTR) {
26182773 bool isLegal = Ins.size() == 1 ||
26322787
26332788 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
26342789
2635 unsigned LastVal = ~0U;
26362790 SDValue ArgValue;
2637 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2638 CCValAssign &VA = ArgLocs[i];
2639 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
2640 // places.
2641 assert(VA.getValNo() != LastVal &&
2642 "Don't support value assigned to multiple locs yet");
2643 (void)LastVal;
2644 LastVal = VA.getValNo();
2791 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
2792 ++I, ++InsIndex) {
2793 assert(InsIndex < Ins.size() && "Invalid Ins index");
2794 CCValAssign &VA = ArgLocs[I];
26452795
26462796 if (VA.isRegLoc()) {
26472797 EVT RegVT = VA.getLocVT();
2648 const TargetRegisterClass *RC;
2649 if (RegVT == MVT::i32)
2650 RC = &X86::GR32RegClass;
2651 else if (Is64Bit && RegVT == MVT::i64)
2652 RC = &X86::GR64RegClass;
2653 else if (RegVT == MVT::f32)
2654 RC = &X86::FR32RegClass;
2655 else if (RegVT == MVT::f64)
2656 RC = &X86::FR64RegClass;
2657 else if (RegVT == MVT::f128)
2658 RC = &X86::FR128RegClass;
2659 else if (RegVT.is512BitVector())
2660 RC = &X86::VR512RegClass;
2661 else if (RegVT.is256BitVector())
2662 RC = &X86::VR256RegClass;
2663 else if (RegVT.is128BitVector())
2664 RC = &X86::VR128RegClass;
2665 else if (RegVT == MVT::x86mmx)
2666 RC = &X86::VR64RegClass;
2667 else if (RegVT == MVT::i1)
2668 RC = &X86::VK1RegClass;
2669 else if (RegVT == MVT::v8i1)
2670 RC = &X86::VK8RegClass;
2671 else if (RegVT == MVT::v16i1)
2672 RC = &X86::VK16RegClass;
2673 else if (RegVT == MVT::v32i1)
2674 RC = &X86::VK32RegClass;
2675 else if (RegVT == MVT::v64i1)
2676 RC = &X86::VK64RegClass;
2677 else
2678 llvm_unreachable("Unknown argument type!");
2679
2680 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2681 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2798 if (VA.needsCustom()) {
2799 assert(
2800 VA.getValVT() == MVT::v64i1 &&
2801 "Currently the only custom case is when we split v64i1 to 2 regs");
2802
2803 // v64i1 values, in regcall calling convention, that are
2804 // compiled to 32 bit arch, are splited up into two registers.
2805 ArgValue =
2806 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
2807 } else {
2808 const TargetRegisterClass *RC;
2809 if (RegVT == MVT::i32)
2810 RC = &X86::GR32RegClass;
2811 else if (Is64Bit && RegVT == MVT::i64)
2812 RC = &X86::GR64RegClass;
2813 else if (RegVT == MVT::f32)
2814 RC = &X86::FR32RegClass;
2815 else if (RegVT == MVT::f64)
2816 RC = &X86::FR64RegClass;
2817 else if (RegVT == MVT::f128)
2818 RC = &X86::FR128RegClass;
2819 else if (RegVT.is512BitVector())
2820 RC = &X86::VR512RegClass;
2821 else if (RegVT.is256BitVector())
2822 RC = &X86::VR256RegClass;
2823 else if (RegVT.is128BitVector())
2824 RC = &X86::VR128RegClass;
2825 else if (RegVT == MVT::x86mmx)
2826 RC = &X86::VR64RegClass;
2827 else if (RegVT == MVT::i1)
2828 RC = &X86::VK1RegClass;
2829 else if (RegVT == MVT::v8i1)
2830 RC = &X86::VK8RegClass;
2831 else if (RegVT == MVT::v16i1)
2832 RC = &X86::VK16RegClass;
2833 else if (RegVT == MVT::v32i1)
2834 RC = &X86::VK32RegClass;
2835 else if (RegVT == MVT::v64i1)
2836 RC = &X86::VK64RegClass;
2837 else
2838 llvm_unreachable("Unknown argument type!");
2839
2840 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2841 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2842 }
26822843
26832844 // If this is an 8 or 16-bit value, it is really passed promoted to 32
26842845 // bits. Insert an assert[sz]ext to capture this, then truncate to the
26962857 // Handle MMX values passed in XMM regs.
26972858 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
26982859 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2699 else
2860 else if (VA.getValVT().isVector() &&
2861 VA.getValVT().getScalarType() == MVT::i1 &&
2862 ((RegVT == MVT::i32) || (RegVT == MVT::i64))) {
2863 // Promoting a mask type (v*i1) into a register of type i64/i32
2864 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
2865 } else
27002866 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
27012867 }
27022868 } else {
27032869 assert(VA.isMemLoc());
2704 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
2870 ArgValue =
2871 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
27052872 }
27062873
27072874 // If value is passed via pointer - do a load.
27122879 InVals.push_back(ArgValue);
27132880 }
27142881
2715 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2882 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
27162883 // Swift calling convention does not require we copy the sret argument
27172884 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
27182885 if (CallConv == CallingConv::Swift)
27222889 // sret argument into %rax/%eax (depending on ABI) for the return. Save
27232890 // the argument into a virtual register so that we can access it from the
27242891 // return points.
2725 if (Ins[i].Flags.isSRet()) {
2892 if (Ins[I].Flags.isSRet()) {
27262893 unsigned Reg = FuncInfo->getSRetReturnReg();
27272894 if (!Reg) {
27282895 MVT PtrTy = getPointerTy(DAG.getDataLayout());
27292896 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
27302897 FuncInfo->setSRetReturnReg(Reg);
27312898 }
2732 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
2899 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
27332900 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
27342901 break;
27352902 }
31213288 // Walk the register/memloc assignments, inserting copies/loads. In the case
31223289 // of tail call optimization arguments are handle later.
31233290 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3124 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3291 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3292 ++I, ++OutIndex) {
3293 assert(OutIndex < Outs.size() && "Invalid Out index");
31253294 // Skip inalloca arguments, they have already been written.
3126 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3295 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
31273296 if (Flags.isInAlloca())
31283297 continue;
31293298
3130 CCValAssign &VA = ArgLocs[i];
3299 CCValAssign &VA = ArgLocs[I];
31313300 EVT RegVT = VA.getLocVT();
3132 SDValue Arg = OutVals[i];
3301 SDValue Arg = OutVals[OutIndex];
31333302 bool isByVal = Flags.isByVal();
31343303
31353304 // Promote the value if needed.
31453314 case CCValAssign::AExt:
31463315 if (Arg.getValueType().isVector() &&
31473316 Arg.getValueType().getVectorElementType() == MVT::i1)
3148 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3317 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
31493318 else if (RegVT.is128BitVector()) {
31503319 // Special case: passing MMX values in XMM registers.
31513320 Arg = DAG.getBitcast(MVT::i64, Arg);
31693338 }
31703339 }
31713340
3172 if (VA.isRegLoc()) {
3341 if (VA.needsCustom()) {
3342 assert(VA.getValVT() == MVT::v64i1 &&
3343 "Currently the only custom case is when we split v64i1 to 2 regs");
3344 // Split v64i1 value into two registers
3345 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3346 Subtarget);
3347 } else if (VA.isRegLoc()) {
31733348 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
31743349 if (isVarArg && IsWin64) {
31753350 // Win64 ABI requires argument XMM reg to be copied to the corresponding
32693444 SmallVector MemOpChains2;
32703445 SDValue FIN;
32713446 int FI = 0;
3272 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3273 CCValAssign &VA = ArgLocs[i];
3274 if (VA.isRegLoc())
3447 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3448 ++I, ++OutsIndex) {
3449 CCValAssign &VA = ArgLocs[I];
3450
3451 if (VA.isRegLoc()) {
3452 if (VA.needsCustom()) {
3453 assert((CallConv == CallingConv::X86_RegCall) &&
3454 "Expecting custome case only in regcall calling convention");
3455 // This means that we are in special case where one argument was
3456 // passed through two register locations - Skip the next location
3457 ++I;
3458 }
3459
32753460 continue;
3461 }
3462
32763463 assert(VA.isMemLoc());
3277 SDValue Arg = OutVals[i];
3278 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3464 SDValue Arg = OutVals[OutsIndex];
3465 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
32793466 // Skip inalloca arguments. They don't require any work.
32803467 if (Flags.isInAlloca())
32813468 continue;
0 ; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck --check-prefix=X32 %s
1 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck --check-prefix=WIN64 %s
2 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck --check-prefix=LINUXOSX64 %s
3
4 ; X32-LABEL: test_argv64i1:
5 ; X32: kmovd %edx, %k0
6 ; X32: kmovd %edi, %k1
7 ; X32: kmovd %eax, %k1
8 ; X32: kmovd %ecx, %k2
9 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
10 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
11 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
12 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
13 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
14 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
15 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
16 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
17 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
18 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
19 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
20 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
21 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
22 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
23 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
24 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
25 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
26 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
27 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
28 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
29 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
30 ; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
31 ; X32: retl
32
33 ; WIN64-LABEL: test_argv64i1:
34 ; WIN64: addq %rcx, %rax
35 ; WIN64: addq %rdx, %rax
36 ; WIN64: addq %rdi, %rax
37 ; WIN64: addq %rsi, %rax
38 ; WIN64: addq %r8, %rax
39 ; WIN64: addq %r9, %rax
40 ; WIN64: addq %r10, %rax
41 ; WIN64: addq %r11, %rax
42 ; WIN64: addq %r12, %rax
43 ; WIN64: addq %r14, %rax
44 ; WIN64: addq %r15, %rax
45 ; WIN64: addq {{([0-9])*}}(%rsp), %rax
46 ; WIN64: retq
47
48 ; LINUXOSX64-LABEL: test_argv64i1:
49 ; LINUXOSX64: addq %rcx, %rax
50 ; LINUXOSX64: addq %rdx, %rax
51 ; LINUXOSX64: addq %rdi, %rax
52 ; LINUXOSX64: addq %rsi, %rax
53 ; LINUXOSX64: addq %r8, %rax
54 ; LINUXOSX64: addq %r9, %rax
55 ; LINUXOSX64: addq %r12, %rax
56 ; LINUXOSX64: addq %r13, %rax
57 ; LINUXOSX64: addq %r14, %rax
58 ; LINUXOSX64: addq %r15, %rax
59 ; LINUXOSX64: addq {{([0-9])*}}(%rsp), %rax
60 ; LINUXOSX64: addq {{([0-9])*}}(%rsp), %rax
61 ; LINUXOSX64: retq
62
63 ; Test regcall when receiving arguments of v64i1 type
64 define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2,
65 <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5,
66 <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8,
67 <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11,
68 <64 x i1> %x12) {
69 %y0 = bitcast <64 x i1> %x0 to i64
70 %y1 = bitcast <64 x i1> %x1 to i64
71 %y2 = bitcast <64 x i1> %x2 to i64
72 %y3 = bitcast <64 x i1> %x3 to i64
73 %y4 = bitcast <64 x i1> %x4 to i64
74 %y5 = bitcast <64 x i1> %x5 to i64
75 %y6 = bitcast <64 x i1> %x6 to i64
76 %y7 = bitcast <64 x i1> %x7 to i64
77 %y8 = bitcast <64 x i1> %x8 to i64
78 %y9 = bitcast <64 x i1> %x9 to i64
79 %y10 = bitcast <64 x i1> %x10 to i64
80 %y11 = bitcast <64 x i1> %x11 to i64
81 %y12 = bitcast <64 x i1> %x12 to i64
82 %add1 = add i64 %y0, %y1
83 %add2 = add i64 %add1, %y2
84 %add3 = add i64 %add2, %y3
85 %add4 = add i64 %add3, %y4
86 %add5 = add i64 %add4, %y5
87 %add6 = add i64 %add5, %y6
88 %add7 = add i64 %add6, %y7
89 %add8 = add i64 %add7, %y8
90 %add9 = add i64 %add8, %y9
91 %add10 = add i64 %add9, %y10
92 %add11 = add i64 %add10, %y11
93 %add12 = add i64 %add11, %y12
94 ret i64 %add12
95 }
96
97 ; X32-LABEL: caller_argv64i1:
98 ; X32: movl $2, %eax
99 ; X32: movl $1, %ecx
100 ; X32: movl $2, %edx
101 ; X32: movl $1, %edi
102 ; X32: pushl ${{1|2}}
103 ; X32: pushl ${{1|2}}
104 ; X32: pushl ${{1|2}}
105 ; X32: pushl ${{1|2}}
106 ; X32: pushl ${{1|2}}
107 ; X32: pushl ${{1|2}}
108 ; X32: pushl ${{1|2}}
109 ; X32: pushl ${{1|2}}
110 ; X32: pushl ${{1|2}}
111 ; X32: pushl ${{1|2}}
112 ; X32: pushl ${{1|2}}
113 ; X32: pushl ${{1|2}}
114 ; X32: pushl ${{1|2}}
115 ; X32: pushl ${{1|2}}
116 ; X32: pushl ${{1|2}}
117 ; X32: pushl ${{1|2}}
118 ; X32: pushl ${{1|2}}
119 ; X32: pushl ${{1|2}}
120 ; X32: pushl ${{1|2}}
121 ; X32: pushl ${{1|2}}
122 ; X32: pushl ${{1|2}}
123 ; X32: pushl ${{1|2}}
124 ; X32: call{{.*}} _test_argv64i1
125
126 ; WIN64-LABEL: caller_argv64i1:
127 ; WIN64: movabsq $4294967298, %rax
128 ; WIN64: movq %rax, (%rsp)
129 ; WIN64: movq %rax, %rcx
130 ; WIN64: movq %rax, %rdx
131 ; WIN64: movq %rax, %rdi
132 ; WIN64: movq %rax, %rsi
133 ; WIN64: movq %rax, %r8
134 ; WIN64: movq %rax, %r9
135 ; WIN64: movq %rax, %r10
136 ; WIN64: movq %rax, %r11
137 ; WIN64: movq %rax, %r12
138 ; WIN64: movq %rax, %r14
139 ; WIN64: movq %rax, %r15
140 ; WIN64: callq test_argv64i1
141
142 ; LINUXOSX64-LABEL: caller_argv64i1:
143 ; LINUXOSX64: movabsq $4294967298, %rax
144 ; LINUXOSX64: movq %rax, %rcx
145 ; LINUXOSX64: movq %rax, %rdx
146 ; LINUXOSX64: movq %rax, %rdi
147 ; LINUXOSX64: movq %rax, %rsi
148 ; LINUXOSX64: movq %rax, %r8
149 ; LINUXOSX64: movq %rax, %r9
150 ; LINUXOSX64: movq %rax, %r12
151 ; LINUXOSX64: movq %rax, %r13
152 ; LINUXOSX64: movq %rax, %r14
153 ; LINUXOSX64: movq %rax, %r15
154 ; LINUXOSX64: call{{.*}} test_argv64i1
155
156 ; Test regcall when passing arguments of v64i1 type
157 define x86_regcallcc i64 @caller_argv64i1() #0 {
158 entry:
159 %v0 = bitcast i64 4294967298 to <64 x i1>
160 %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
161 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
162 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
163 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
164 <64 x i1> %v0)
165 ret i64 %call
166 }
167
168 ; X32-LABEL: test_retv64i1:
169 ; X32: mov{{.*}} $2, %eax
170 ; X32: mov{{.*}} $1, %ecx
171 ; X32: ret{{.*}}
172
173 ; WIN64-LABEL: test_retv64i1:
174 ; WIN64: mov{{.*}} $4294967298, %rax
175 ; WIN64: ret{{.*}}
176
177 ; Test regcall when returning v64i1 type
178 define x86_regcallcc <64 x i1> @test_retv64i1() {
179 %a = bitcast i64 4294967298 to <64 x i1>
180 ret <64 x i1> %a
181 }
182
183 ; X32-LABEL: caller_retv64i1:
184 ; X32: call{{.*}} _test_retv64i1
185 ; X32: kmov{{.*}} %eax, %k0
186 ; X32: kmov{{.*}} %ecx, %k1
187 ; X32: kunpckdq %k0, %k1, %k0
188
189 ; Test regcall when processing result of v64i1 type
190 define x86_regcallcc <64 x i1> @caller_retv64i1() #0 {
191 entry:
192 %call = call x86_regcallcc <64 x i1> @test_retv64i1()
193 ret <64 x i1> %call
194 }