llvm.org GIT mirror llvm / 7327d6f
[X86] Vectorcall Calling Convention - Adding CodeGen Complete Support The vectorcall calling convention specifies that arguments to functions are to be passed in registers, when possible. vectorcall uses more registers for arguments than fastcall or the default x64 calling convention use. The vectorcall calling convention is only supported in native code on x86 and x64 processors that include Streaming SIMD Extensions 2 (SSE2) and above. The current implementation does not handle Homogeneous Vector Aggregates (HVAs) correctly and this review attempts to fix it. This aubmit also includes additional lit tests to cover better HVAs corner cases. Differential Revision: https://reviews.llvm.org/D27392 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290240 91177308-0d34-0410-b5e6-96231b3b80d8 Oren Ben Simhon 3 years ago
9 changed file(s) with 471 addition(s) and 72 deletion(s). Raw diff Collapse all Expand all
295295 void AnalyzeFormalArguments(const SmallVectorImpl &Ins,
296296 CCAssignFn Fn);
297297
298 /// The function will invoke AnalyzeFormalArguments.
299 void AnalyzeArguments(const SmallVectorImpl &Ins,
300 CCAssignFn Fn) {
301 AnalyzeFormalArguments(Ins, Fn);
302 }
303
298304 /// AnalyzeReturn - Analyze the returned values of a return,
299305 /// incorporating info about the result values into this state.
300306 void AnalyzeReturn(const SmallVectorImpl &Outs,
317323 SmallVectorImpl &Flags,
318324 CCAssignFn Fn);
319325
326 /// The function will invoke AnalyzeCallOperands.
327 void AnalyzeArguments(const SmallVectorImpl &Outs,
328 CCAssignFn Fn) {
329 AnalyzeCallOperands(Outs, Fn);
330 }
331
320332 /// AnalyzeCallResult - Analyze the return values of a call,
321333 /// incorporating info about the passed values into this state.
322334 void AnalyzeCallResult(const SmallVectorImpl &Ins,
323335 CCAssignFn Fn);
336
337 /// A shadow allocated register is a register that was allocated
338 /// but wasn't added to the location list (Locs).
339 /// \returns true if the register was allocated as shadow or false otherwise.
340 bool IsShadowAllocatedReg(unsigned Reg) const;
324341
325342 /// AnalyzeCallResult - Same as above except it's specialized for calls which
326343 /// produce a single value.
520537 const SmallVectorImpl &Ins,
521538 CCAssignFn CalleeFn, CCAssignFn CallerFn);
522539
540 /// The function runs an additional analysis pass over function arguments.
541 /// It will mark each argument with the attribute flag SecArgPass.
542 /// After running, it will sort the locs list.
543 template
544 void AnalyzeArgumentsSecondPass(const SmallVectorImpl &Args,
545 CCAssignFn Fn) {
546 unsigned NumFirstPassLocs = Locs.size();
547
548 /// Creates similar argument list to \p Args in which each argument is
549 /// marked using SecArgPass flag.
550 SmallVector SecPassArg;
551 // SmallVector SecPassArg;
552 for (auto Arg : Args) {
553 Arg.Flags.setSecArgPass();
554 SecPassArg.push_back(Arg);
555 }
556
557 // Run the second argument pass
558 AnalyzeArguments(SecPassArg, Fn);
559
560 // Sort the locations of the arguments according to their original position.
561 SmallVector TmpArgLocs;
562 std::swap(TmpArgLocs, Locs);
563 auto B = TmpArgLocs.begin(), E = TmpArgLocs.end();
564 std::merge(B, B + NumFirstPassLocs, B + NumFirstPassLocs, E,
565 std::back_inserter(Locs),
566 [](const CCValAssign &A, const CCValAssign &B) -> bool {
567 return A.getValNo() < B.getValNo();
568 });
569 }
570
523571 private:
524572 /// MarkAllocated - Mark a register and all of its aliases as allocated.
525573 void MarkAllocated(unsigned Reg);
5050 static const uint64_t SwiftSelfOffs = 14;
5151 static const uint64_t SwiftError = 1ULL<<15; ///< Swift error parameter
5252 static const uint64_t SwiftErrorOffs = 15;
53 static const uint64_t Hva = 1ULL << 16; ///< HVA field for
54 ///< vectorcall
55 static const uint64_t HvaOffs = 16;
56 static const uint64_t HvaStart = 1ULL << 17; ///< HVA structure start
57 ///< for vectorcall
58 static const uint64_t HvaStartOffs = 17;
59 static const uint64_t SecArgPass = 1ULL << 18; ///< Second argument
60 ///< pass for vectorcall
61 static const uint64_t SecArgPassOffs = 18;
5362 static const uint64_t OrigAlign = 0x1FULL<<27;
5463 static const uint64_t OrigAlignOffs = 27;
5564 static const uint64_t ByValSize = 0x3fffffffULL<<32; ///< Struct size
8998
9099 bool isSwiftError() const { return Flags & SwiftError; }
91100 void setSwiftError() { Flags |= One << SwiftErrorOffs; }
101
102 bool isHva() const { return Flags & Hva; }
103 void setHva() { Flags |= One << HvaOffs; }
104
105 bool isHvaStart() const { return Flags & HvaStart; }
106 void setHvaStart() { Flags |= One << HvaStartOffs; }
107
108 bool isSecArgPass() const { return Flags & SecArgPass; }
109 void setSecArgPass() { Flags |= One << SecArgPassOffs; }
92110
93111 bool isNest() const { return Flags & Nest; }
94112 void setNest() { Flags |= One << NestOffs; }
2222 #include "llvm/Target/TargetLowering.h"
2323 #include "llvm/Target/TargetRegisterInfo.h"
2424 #include "llvm/Target/TargetSubtargetInfo.h"
25 #include
26
2527 using namespace llvm;
2628
2729 CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
6365 UsedRegs[*AI/32] |= 1 << (*AI&31);
6466 }
6567
68 bool CCState::IsShadowAllocatedReg(unsigned Reg) const {
69 if (!isAllocated(Reg))
70 return false;
71
72 for (auto const &ValAssign : Locs) {
73 if (ValAssign.isRegLoc()) {
74 for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true);
75 AI.isValid(); ++AI) {
76 if (*AI == Reg)
77 return false;
78 }
79 }
80 }
81 return true;
82 }
83
6684 /// Analyze an array of argument values,
6785 /// incorporating info about the formals into this state.
6886 void
77317731 Flags.setZExt();
77327732 if (Args[i].isSExt)
77337733 Flags.setSExt();
7734 if (Args[i].isInReg)
7734 if (Args[i].isInReg) {
7735 // If we are using vectorcall calling convention, a structure that is
7736 // passed InReg - is surely an HVA
7737 if (CLI.CallConv == CallingConv::X86_VectorCall &&
7738 isa(FinalType)) {
7739 // The first value of a structure is marked
7740 if (0 == Value)
7741 Flags.setHvaStart();
7742 Flags.setHva();
7743 }
7744 // Set InReg Flag
77357745 Flags.setInReg();
7746 }
77367747 if (Args[i].isSRet)
77377748 Flags.setSRet();
77387749 if (Args[i].isSwiftSelf)
80188029 Flags.setZExt();
80198030 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
80208031 Flags.setSExt();
8021 if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
8032 if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) {
8033 // If we are using vectorcall calling convention, a structure that is
8034 // passed InReg - is surely an HVA
8035 if (F.getCallingConv() == CallingConv::X86_VectorCall &&
8036 isa(I->getType())) {
8037 // The first value of a structure is marked
8038 if (0 == Value)
8039 Flags.setHvaStart();
8040 Flags.setHva();
8041 }
8042 // Set InReg Flag
80228043 Flags.setInReg();
8044 }
80238045 if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
80248046 Flags.setSRet();
80258047 if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
1212 //===----------------------------------------------------------------------===//
1313
1414 #include "MCTargetDesc/X86MCTargetDesc.h"
15 #include "X86Subtarget.h"
1516 #include "llvm/CodeGen/CallingConvLower.h"
1617 #include "llvm/IR/CallingConv.h"
1718
3839 if (AvailableRegs.size() < RequiredGprsUponSplit)
3940 return false; // Not enough free registers - continue the search.
4041
41 // Allocating the available registers
42 // Allocating the available registers.
4243 for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {
4344
44 // Marking the register as located
45 // Marking the register as located.
4546 unsigned Reg = State.AllocateReg(AvailableRegs[I]);
4647
4748 // Since we previously made sure that 2 registers are available
48 // we expect that a real register number will be returned
49 // we expect that a real register number will be returned.
4950 assert(Reg && "Expecting a register will be available");
5051
5152 // Assign the value to the allocated register
5657 return true;
5758 }
5859
60 static ArrayRef CC_X86_VectorCallGetSSEs(const MVT &ValVT) {
61 if (ValVT.is512BitVector()) {
62 static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2,
63 X86::ZMM3, X86::ZMM4, X86::ZMM5};
64 return RegListZMM;
65 }
66
67 if (ValVT.is256BitVector()) {
68 static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2,
69 X86::YMM3, X86::YMM4, X86::YMM5};
70 return RegListYMM;
71 }
72
73 static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2,
74 X86::XMM3, X86::XMM4, X86::XMM5};
75 return RegListXMM;
76 }
77
78 static ArrayRef CC_X86_64_VectorCallGetGPRs() {
79 static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9};
80 return RegListGPR;
81 }
82
83 static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT,
84 MVT &LocVT,
85 CCValAssign::LocInfo &LocInfo,
86 ISD::ArgFlagsTy &ArgFlags,
87 CCState &State) {
88
89 ArrayRef RegList = CC_X86_VectorCallGetSSEs(ValVT);
90 bool Is64bit = static_cast(
91 State.getMachineFunction().getSubtarget())
92 .is64Bit();
93
94 for (auto Reg : RegList) {
95 // If the register is not marked as allocated - assign to it.
96 if (!State.isAllocated(Reg)) {
97 unsigned AssigedReg = State.AllocateReg(Reg);
98 assert(AssigedReg == Reg && "Expecting a valid register allocation");
99 State.addLoc(
100 CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo));
101 return true;
102 }
103 // If the register is marked as shadow allocated - assign to it.
104 if (Is64bit && State.IsShadowAllocatedReg(Reg)) {
105 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
106 return true;
107 }
108 }
109
110 llvm_unreachable("Clang should ensure that hva marked vectors will have "
111 "an available register.");
112 return false;
113 }
114
115 bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
116 CCValAssign::LocInfo &LocInfo,
117 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
118 // On the second pass, go through the HVAs only.
119 if (ArgFlags.isSecArgPass()) {
120 if (ArgFlags.isHva())
121 return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
122 ArgFlags, State);
123 return true;
124 }
125
126 // Process only vector types as defined by vectorcall spec:
127 // "A vector type is either a floating-point type, for example,
128 // a float or double, or an SIMD vector type, for example, __m128 or __m256".
129 if (!(ValVT.isFloatingPoint() ||
130 (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
131 // If R9 was already assigned it means that we are after the fourth element
132 // and because this is not an HVA / Vector type, we need to allocate
133 // shadow XMM register.
134 if (State.isAllocated(X86::R9)) {
135 // Assign shadow XMM register.
136 (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT));
137 }
138
139 return false;
140 }
141
142 if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) {
143 // Assign shadow GPR register.
144 (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs());
145
146 // Assign XMM register - (shadow for HVA and non-shadow for non HVA).
147 if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
148 // In Vectorcall Calling convention, additional shadow stack can be
149 // created on top of the basic 32 bytes of win64.
150 // It can happen if the fifth or sixth argument is vector type or HVA.
151 // At that case for each argument a shadow stack of 8 bytes is allocated.
152 if (Reg == X86::XMM4 || Reg == X86::XMM5)
153 State.AllocateStack(8, 8);
154
155 if (!ArgFlags.isHva()) {
156 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
157 return true; // Allocated a register - Stop the search.
158 }
159 }
160 }
161
162 // If this is an HVA - Stop the search,
163 // otherwise continue the search.
164 return ArgFlags.isHva();
165 }
166
167 bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
168 CCValAssign::LocInfo &LocInfo,
169 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
170 // On the second pass, go through the HVAs only.
171 if (ArgFlags.isSecArgPass()) {
172 if (ArgFlags.isHva())
173 return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
174 ArgFlags, State);
175 return true;
176 }
177
178 // Process only vector types as defined by vectorcall spec:
179 // "A vector type is either a floating point type, for example,
180 // a float or double, or an SIMD vector type, for example, __m128 or __m256".
181 if (!(ValVT.isFloatingPoint() ||
182 (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
183 return false;
184 }
185
186 if (ArgFlags.isHva())
187 return true; // If this is an HVA - Stop the search.
188
189 // Assign XMM register.
190 if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
191 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
192 return true;
193 }
194
195 // In case we did not find an available XMM register for a vector -
196 // pass it indirectly.
197 // It is similar to CCPassIndirect, with the addition of inreg.
198 if (!ValVT.isFloatingPoint()) {
199 LocVT = MVT::i32;
200 LocInfo = CCValAssign::Indirect;
201 ArgFlags.setInReg();
202 }
203
204 return false; // No register was assigned - Continue the search.
205 }
206
59207 } // End llvm namespace
2323 /// When regcall calling convention compiled to 32 bit arch, special treatment
2424 /// is required for 64 bit masks.
2525 /// The value should be assigned to two GPRs.
26 /// @return true if registers were allocated and false otherwise
26 /// \return true if registers were allocated and false otherwise.
2727 bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2828 CCValAssign::LocInfo &LocInfo,
2929 ISD::ArgFlagsTy &ArgFlags, CCState &State);
3030
31 inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
32 MVT &LocVT,
33 CCValAssign::LocInfo &LocInfo,
34 ISD::ArgFlagsTy &ArgFlags,
35 CCState &State) {
36 // Similar to CCPassIndirect, with the addition of inreg.
37 LocVT = MVT::i32;
38 LocInfo = CCValAssign::Indirect;
39 ArgFlags.setInReg();
40 return false; // Continue the search, but now for i32.
41 }
31 /// Vectorcall calling convention has special handling for vector types or
32 /// HVA for 64 bit arch.
33 /// For HVAs shadow registers might be allocated on the first pass
34 /// and actual XMM registers are allocated on the second pass.
35 /// For vector types, actual XMM registers are allocated on the first pass.
36 /// \return true if registers were allocated and false otherwise.
37 bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
38 CCValAssign::LocInfo &LocInfo,
39 ISD::ArgFlagsTy &ArgFlags, CCState &State);
40
41 /// Vectorcall calling convention has special handling for vector types or
42 /// HVA for 32 bit arch.
43 /// For HVAs actual XMM registers are allocated on the second pass.
44 /// For vector types, actual XMM registers are allocated on the first pass.
45 /// \return true if registers were allocated and false otherwise.
46 bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
47 CCValAssign::LocInfo &LocInfo,
48 ISD::ArgFlagsTy &ArgFlags, CCState &State);
4249
4350 inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
4451 CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
307307 CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
308308 ]>;
309309
310 // X86-32 HiPE return-value convention.
310 // X86-32 Vectorcall return-value convention.
311311 def RetCC_X86_32_VectorCall : CallingConv<[
312 // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
313 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
312 // Floating Point types are returned in XMM0,XMM1,XMMM2 and XMM3.
313 CCIfType<[f32, f64, f128],
314314 CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
315
316 // 256-bit FP vectors
317 CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
318 CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
319
320 // 512-bit FP vectors
321 CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
322 CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
323315
324316 // Return integers in the standard way.
325317 CCDelegateTo
347339
348340 // Otherwise, everything is the same as 'normal' X86-64 C CC.
349341 CCDelegateTo
342 ]>;
343
344 // X86-64 vectorcall return-value convention.
345 def RetCC_X86_64_Vectorcall : CallingConv<[
346 // Vectorcall calling convention always returns FP values in XMMs.
347 CCIfType<[f32, f64, f128],
348 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
349
350 // Otherwise, everything is the same as Windows X86-64 C CC.
351 CCDelegateTo
350352 ]>;
351353
352354 // X86-64 HiPE return-value convention.
446448 CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>,
447449 CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>,
448450
451 // Handle Vectorcall CC
452 CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>,
453
449454 // Handle HHVM calls.
450455 CCIfCC<"CallingConv::HHVM", CCDelegateTo>,
451456
625630 ]>;
626631
627632 def CC_X86_Win64_VectorCall : CallingConv<[
628 // The first 6 floating point and vector types of 128 bits or less use
629 // XMM0-XMM5.
630 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
631 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
632
633 // 256-bit vectors use YMM registers.
634 CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
635 CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
636
637 // 512-bit vectors use ZMM registers.
638 CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
639 CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
633 CCCustom<"CC_X86_64_VectorCall">,
640634
641635 // Delegate to fastcall to handle integer types.
642636 CCDelegateTo
846840 CCDelegateTo
847841 ]>;
848842
849 def CC_X86_32_VectorCall : CallingConv<[
850 // The first 6 floating point and vector types of 128 bits or less use
851 // XMM0-XMM5.
852 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
853 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
854
855 // 256-bit vectors use YMM registers.
856 CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
857 CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
858
859 // 512-bit vectors use ZMM registers.
860 CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
861 CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
862
863 // Otherwise, pass it indirectly.
864 CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64,
865 v32i8, v16i16, v8i32, v4i64, v8f32, v4f64,
866 v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
867 CCCustom<"CC_X86_32_VectorCallIndirect">>,
843 def CC_X86_Win32_VectorCall : CallingConv<[
844 // Pass floating point in XMMs
845 CCCustom<"CC_X86_32_VectorCall">,
868846
869847 // Delegate to fastcall to handle integer types.
870848 CCDelegateTo
998976 CCIfCC<"CallingConv::X86_INTR", CCDelegateTo>,
999977 CCIfSubtarget<"isTargetMCU()", CCDelegateTo>,
1000978 CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo>,
1001 CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo32_VectorCall>>,
979 CCIfCC<"CallingConv::X86_VectorCall", CCDelegateToWin32_VectorCall>>,
1002980 CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo>,
1003981 CCIfCC<"CallingConv::Fast", CCDelegateTo>,
1004982 CCIfCC<"CallingConv::GHC", CCDelegateTo>,
1616 #include "X86CallingConv.h"
1717 #include "X86FrameLowering.h"
1818 #include "X86InstrBuilder.h"
19 #include "X86IntrinsicsInfo.h"
1920 #include "X86MachineFunctionInfo.h"
2021 #include "X86ShuffleDecodeConstantPool.h"
2122 #include "X86TargetMachine.h"
5253 #include "llvm/Support/ErrorHandling.h"
5354 #include "llvm/Support/MathExtras.h"
5455 #include "llvm/Target/TargetOptions.h"
55 #include "X86IntrinsicsInfo.h"
56 #include
5657 #include
58 #include
5759 #include
58 #include
5960 using namespace llvm;
6061
6162 #define DEBUG_TYPE "x86-isel"
27802781 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
27812782 }
27822783
2784 static bool isSortedByValueNo(const SmallVectorImpl &ArgLocs) {
2785 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
2786 [](const CCValAssign &A, const CCValAssign &B) -> bool {
2787 return A.getValNo() < B.getValNo();
2788 });
2789 }
2790
27832791 SDValue X86TargetLowering::LowerFormalArguments(
27842792 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
27852793 const SmallVectorImpl &Ins, const SDLoc &dl,
28142822 SmallVector ArgLocs;
28152823 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
28162824
2817 // Allocate shadow area for Win64
2825 // Allocate shadow area for Win64.
28182826 if (IsWin64)
28192827 CCInfo.AllocateStack(32, 8);
28202828
2821 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
2829 CCInfo.AnalyzeArguments(Ins, CC_X86);
2830
2831 // In vectorcall calling convention a second pass is required for the HVA
2832 // types.
2833 if (CallingConv::X86_VectorCall == CallConv) {
2834 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
2835 }
2836
2837 // The next loop assumes that the locations are in the same order of the
2838 // input arguments.
2839 assert(isSortedByValueNo(ArgLocs) &&
2840 "Argument Location list must be sorted before lowering");
28222841
28232842 SDValue ArgValue;
28242843 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
32623281 SmallVector ArgLocs;
32633282 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
32643283
3265 // Allocate shadow area for Win64
3284 // Allocate shadow area for Win64.
32663285 if (IsWin64)
32673286 CCInfo.AllocateStack(32, 8);
32683287
3269 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
3288 CCInfo.AnalyzeArguments(Outs, CC_X86);
3289
3290 // In vectorcall calling convention a second pass is required for the HVA
3291 // types.
3292 if (CallingConv::X86_VectorCall == CallConv) {
3293 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3294 }
32703295
32713296 // Get a count of how many bytes are to be pushed on the stack.
32723297 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
33203345 SmallVector, 8> RegsToPass;
33213346 SmallVector MemOpChains;
33223347 SDValue StackPtr;
3348
3349 // The next loop assumes that the locations are in the same order of the
3350 // input arguments.
3351 assert(isSortedByValueNo(ArgLocs) &&
3352 "Argument Location list must be sorted before lowering");
33233353
33243354 // Walk the register/memloc assignments, inserting copies/loads. In the case
33253355 // of tail call optimization arguments are handle later.
55 define x86_vectorcallcc i32 @test_int_1() {
66 ret i32 0
77 }
8
98 ; CHECK-LABEL: {{^}}test_int_1@@0:
109 ; CHECK: xorl %eax, %eax
1110
1211 define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
1312 ret i32 %a
1413 }
15
1614 ; X86-LABEL: {{^}}test_int_2@@4:
1715 ; X64-LABEL: {{^}}test_int_2@@8:
1816 ; CHECK: movl %ecx, %eax
2119 %at = trunc i64 %a to i32
2220 ret i32 %at
2321 }
24
2522 ; X86-LABEL: {{^}}test_int_3@@8:
2623 ; X64-LABEL: {{^}}test_int_3@@8:
2724 ; CHECK: movl %ecx, %eax
3027 %s = add i32 %a, %b
3128 ret i32 %s
3229 }
33
3430 ; X86-LABEL: {{^}}test_int_4@@8:
3531 ; X86: leal (%ecx,%edx), %eax
36
3732 ; X64-LABEL: {{^}}test_int_4@@16:
3833 ; X64: leal (%rcx,%rdx), %eax
3934
8984 ret <16 x i8> %r
9085 }
9186 ; CHECK-LABEL: {{^}}test_vec_2@@104:
92 ; CHECK: movaps (%{{[re]}}cx), %xmm0
87 ; x64: movq {{[0-9]*}}(%rsp), %rax
88 ; CHECK: movaps (%{{rax|ecx}}), %xmm0
89
90 %struct.HVA5 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float> }
91 %struct.HVA4 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
92 %struct.HVA3 = type { <4 x float>, <4 x float>, <4 x float> }
93 %struct.HVA2 = type { <4 x float>, <4 x float> }
94
95 define x86_vectorcallcc <4 x float> @test_mixed_1(i32 %a, %struct.HVA4 inreg %bb, i32 %c) {
96 entry:
97 %b = alloca %struct.HVA4, align 16
98 store %struct.HVA4 %bb, %struct.HVA4* %b, align 16
99 %w1 = getelementptr inbounds %struct.HVA4, %struct.HVA4* %b, i32 0, i32 1
100 %0 = load <4 x float>, <4 x float>* %w1, align 16
101 ret <4 x float> %0
102 }
103 ; CHECK-LABEL: test_mixed_1
104 ; CHECK: movaps %xmm1, 16(%{{(e|r)}}sp)
105 ; CHECK: movaps 16(%{{(e|r)}}sp), %xmm0
106 ; CHECK: ret{{q|l}}
107
108 define x86_vectorcallcc <4 x float> @test_mixed_2(%struct.HVA4 inreg %a, %struct.HVA4* %b, <4 x float> %c) {
109 entry:
110 %c.addr = alloca <4 x float>, align 16
111 store <4 x float> %c, <4 x float>* %c.addr, align 16
112 %0 = load <4 x float>, <4 x float>* %c.addr, align 16
113 ret <4 x float> %0
114 }
115 ; CHECK-LABEL: test_mixed_2
116 ; X86: movaps %xmm0, (%esp)
117 ; X64: movaps %xmm2, %xmm0
118 ; CHECK: ret{{[ql]}}
119
120 define x86_vectorcallcc <4 x float> @test_mixed_3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) {
121 entry:
122 %x = getelementptr inbounds %struct.HVA2, %struct.HVA2* %f, i32 0, i32 0
123 %0 = load <4 x float>, <4 x float>* %x, align 16
124 ret <4 x float> %0
125 }
126 ; CHECK-LABEL: test_mixed_3
127 ; CHECK: movaps (%{{[re][ac]}}x), %xmm0
128 ; CHECK: ret{{[ql]}}
129
130 define x86_vectorcallcc <4 x float> @test_mixed_4(%struct.HVA4 inreg %a, %struct.HVA2* %bb, <4 x float> %c) {
131 entry:
132 %y4 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %bb, i32 0, i32 1
133 %0 = load <4 x float>, <4 x float>* %y4, align 16
134 ret <4 x float> %0
135 }
136 ; CHECK-LABEL: test_mixed_4
137 ; X86: movaps 16(%eax), %xmm0
138 ; X64: movaps 16(%rdx), %xmm0
139 ; CHECK: ret{{[ql]}}
140
141 define x86_vectorcallcc <4 x float> @test_mixed_5(%struct.HVA3 inreg %a, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %dd) {
142 entry:
143 %d = alloca %struct.HVA2, align 16
144 store %struct.HVA2 %dd, %struct.HVA2* %d, align 16
145 %y5 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %d, i32 0, i32 1
146 %0 = load <4 x float>, <4 x float>* %y5, align 16
147 ret <4 x float> %0
148 }
149 ; CHECK-LABEL: test_mixed_5
150 ; CHECK: movaps %xmm5, 16(%{{(e|r)}}sp)
151 ; CHECK: movaps 16(%{{(e|r)}}sp), %xmm0
152 ; CHECK: ret{{[ql]}}
153
154 define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) {
155 entry:
156 %retval = alloca %struct.HVA4, align 16
157 %0 = bitcast %struct.HVA4* %retval to i8*
158 %1 = bitcast %struct.HVA4* %b to i8*
159 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 64, i32 16, i1 false)
160 %2 = load %struct.HVA4, %struct.HVA4* %retval, align 16
161 ret %struct.HVA4 %2
162 }
163 ; CHECK-LABEL: test_mixed_6
164 ; CHECK: movaps (%{{[re]}}sp), %xmm0
165 ; CHECK: movaps 16(%{{[re]}}sp), %xmm1
166 ; CHECK: movaps 32(%{{[re]}}sp), %xmm2
167 ; CHECK: movaps 48(%{{[re]}}sp), %xmm3
168 ; CHECK: ret{{[ql]}}
169
170 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
171 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
172 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1)
173
174 define x86_vectorcallcc void @test_mixed_7(%struct.HVA5* noalias sret %agg.result) {
175 entry:
176 %a = alloca %struct.HVA5, align 16
177 %0 = bitcast %struct.HVA5* %a to i8*
178 call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 80, i32 16, i1 false)
179 %1 = bitcast %struct.HVA5* %agg.result to i8*
180 %2 = bitcast %struct.HVA5* %a to i8*
181 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 80, i32 16, i1 false)
182 ret void
183 }
184 ; CHECK-LABEL: test_mixed_7
185 ; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}})
186 ; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}})
187 ; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rcx|eax}})
188 ; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rcx|eax}})
189 ; CHECK: movaps %xmm{{[0-9]}}, (%{{rcx|eax}})
190 ; X64: mov{{[ql]}} %rcx, %rax
191 ; CHECK: ret{{[ql]}}
192
193 define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) {
194 entry:
195 %f.addr = alloca <4 x float>, align 16
196 store <4 x float> %f, <4 x float>* %f.addr, align 16
197 %0 = load <4 x float>, <4 x float>* %f.addr, align 16
198 ret <4 x float> %0
199 }
200 ; CHECK-LABEL: test_mixed_8
201 ; X86: movaps %xmm4, %xmm0
202 ; X64: movaps %xmm5, %xmm0
203 ; CHECK: ret{{[ql]}}
204
205 %struct.HFA4 = type { double, double, double, double }
206 declare x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 %x, double %y)
207
208 define x86_vectorcallcc double @test_mixed_9_caller(%struct.HFA4 inreg %b) {
209 entry:
210 %call = call x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 inreg %b, double 3.000000e+00)
211 %add = fadd double 1.000000e+00, %call
212 ret double %add
213 }
214 ; CHECK-LABEL: test_mixed_9_caller
215 ; CHECK: movaps %xmm3, %xmm4
216 ; CHECK: movaps %xmm2, %xmm3
217 ; CHECK: movaps %xmm1, %xmm2
218 ; X32: movasd %xmm0, %xmm1
219 ; X64: movapd %xmm5, %xmm1
220 ; CHECK: call{{l|q}} test_mixed_9_callee@@40
221 ; CHECK: addsd {{.*}}, %xmm0
222 ; CHECK: ret{{l|q}}