llvm.org GIT mirror llvm / 18fdb39
Implement LowerCall_64 for the SPARC v9 64-bit ABI. There is still no support for byval arguments (which I don't think are needed) and varargs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178993 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 7 years ago
3 changed file(s) with 375 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
593593 SDValue
594594 SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
595595 SmallVectorImpl &InVals) const {
596 if (Subtarget->is64Bit())
597 return LowerCall_64(CLI, InVals);
598 return LowerCall_32(CLI, InVals);
599 }
600
601 // Lower a call for the 32-bit ABI.
602 SDValue
603 SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
604 SmallVectorImpl &InVals) const {
596605 SelectionDAG &DAG = CLI.DAG;
597606 DebugLoc &dl = CLI.DL;
598607 SmallVector &Outs = CLI.Outs;
886895 return getDataLayout()->getTypeAllocSize(ElementTy);
887896 }
888897
898 // Lower a call for the 64-bit ABI.
899 SDValue
900 SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
901 SmallVectorImpl &InVals) const {
902 SelectionDAG &DAG = CLI.DAG;
903 DebugLoc DL = CLI.DL;
904 SDValue Chain = CLI.Chain;
905
906 // Analyze operands of the call, assigning locations to each operand.
907 SmallVector ArgLocs;
908 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
909 DAG.getTarget(), ArgLocs, *DAG.getContext());
910 CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
911
912 // Get the size of the outgoing arguments stack space requirement.
913 // The stack offset computed by CC_Sparc64 includes all arguments.
914 // We always allocate space for 6 arguments in the prolog.
915 unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset()) - 6*8u;
916
917 // Keep stack frames 16-byte aligned.
918 ArgsSize = RoundUpToAlignment(ArgsSize, 16);
919
920 // Adjust the stack pointer to make room for the arguments.
921 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
922 // with more than 6 arguments.
923 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
924
925 // Collect the set of registers to pass to the function and their values.
926 // This will be emitted as a sequence of CopyToReg nodes glued to the call
927 // instruction.
928 SmallVector, 8> RegsToPass;
929
930 // Collect chains from all the memory opeations that copy arguments to the
931 // stack. They must follow the stack pointer adjustment above and precede the
932 // call instruction itself.
933 SmallVector MemOpChains;
934
935 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
936 const CCValAssign &VA = ArgLocs[i];
937 SDValue Arg = CLI.OutVals[i];
938
939 // Promote the value if needed.
940 switch (VA.getLocInfo()) {
941 default:
942 llvm_unreachable("Unknown location info!");
943 case CCValAssign::Full:
944 break;
945 case CCValAssign::SExt:
946 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
947 break;
948 case CCValAssign::ZExt:
949 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
950 break;
951 case CCValAssign::AExt:
952 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
953 break;
954 case CCValAssign::BCvt:
955 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
956 break;
957 }
958
959 if (VA.isRegLoc()) {
960 // The custom bit on an i32 return value indicates that it should be
961 // passed in the high bits of the register.
962 if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
963 Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg,
964 DAG.getConstant(32, MVT::i32));
965
966 // The next value may go in the low bits of the same register.
967 // Handle both at once.
968 if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() &&
969 ArgLocs[i+1].getLocReg() == VA.getLocReg()) {
970 SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
971 CLI.OutVals[i+1]);
972 Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV);
973 // Skip the next value, it's already done.
974 ++i;
975 }
976 }
977
978 // The argument registers are described in term of the callee's register
979 // window, so translate I0-I7 -> O0-O7.
980 unsigned Reg = VA.getLocReg();
981 if (Reg >= SP::I0 && Reg <= SP::I7)
982 Reg = Reg - SP::I0 + SP::O0;
983 RegsToPass.push_back(std::make_pair(Reg, Arg));
984 continue;
985 }
986
987 assert(VA.isMemLoc());
988
989 // Create a store off the stack pointer for this argument.
990 SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
991 // The argument area starts at %fp+BIAS+128 in the callee frame,
992 // %sp+BIAS+128 in ours.
993 SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
994 Subtarget->getStackPointerBias() +
995 128);
996 PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
997 MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff,
998 MachinePointerInfo(),
999 false, false, 0));
1000 }
1001
1002 // Emit all stores, make sure they occur before the call.
1003 if (!MemOpChains.empty())
1004 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1005 &MemOpChains[0], MemOpChains.size());
1006
1007 // Build a sequence of CopyToReg nodes glued together with token chain and
1008 // glue operands which copy the outgoing args into registers. The InGlue is
1009 // necessary since all emitted instructions must be stuck together in order
1010 // to pass the live physical registers.
1011 SDValue InGlue;
1012 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1013 Chain = DAG.getCopyToReg(Chain, DL,
1014 RegsToPass[i].first, RegsToPass[i].second, InGlue);
1015 InGlue = Chain.getValue(1);
1016 }
1017
1018 // If the callee is a GlobalAddress node (quite common, every direct call is)
1019 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1020 // Likewise ExternalSymbol -> TargetExternalSymbol.
1021 SDValue Callee = CLI.Callee;
1022 if (GlobalAddressSDNode *G = dyn_cast(Callee))
1023 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy());
1024 else if (ExternalSymbolSDNode *E = dyn_cast(Callee))
1025 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
1026
1027 // Build the operands for the call instruction itself.
1028 SmallVector Ops;
1029 Ops.push_back(Chain);
1030 Ops.push_back(Callee);
1031 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1032 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1033 RegsToPass[i].second.getValueType()));
1034
1035 // Make sure the CopyToReg nodes are glued to the call instruction which
1036 // consumes the registers.
1037 if (InGlue.getNode())
1038 Ops.push_back(InGlue);
1039
1040 // Now the call itself.
1041 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1042 Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
1043 InGlue = Chain.getValue(1);
1044
1045 // Revert the stack pointer immediately after the call.
1046 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
1047 DAG.getIntPtrConstant(0, true), InGlue);
1048 InGlue = Chain.getValue(1);
1049
1050 // Now extract the return values. This is more or less the same as
1051 // LowerFormalArguments_64.
1052
1053 // Assign locations to each value returned by this call.
1054 SmallVector RVLocs;
1055 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
1056 DAG.getTarget(), RVLocs, *DAG.getContext());
1057 RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64);
1058
1059 // Copy all of the result registers out of their specified physreg.
1060 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1061 CCValAssign &VA = RVLocs[i];
1062 unsigned Reg = VA.getLocReg();
1063
1064 // Remap I0-I7 -> O0-O7.
1065 if (Reg >= SP::I0 && Reg <= SP::I7)
1066 Reg = Reg - SP::I0 + SP::O0;
1067
1068 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
1069 // reside in the same register in the high and low bits. Reuse the
1070 // CopyFromReg previous node to avoid duplicate copies.
1071 SDValue RV;
1072 if (RegisterSDNode *SrcReg = dyn_cast(Chain.getOperand(1)))
1073 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
1074 RV = Chain.getValue(0);
1075
1076 // But usually we'll create a new CopyFromReg for a different register.
1077 if (!RV.getNode()) {
1078 RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
1079 Chain = RV.getValue(1);
1080 InGlue = Chain.getValue(2);
1081 }
1082
1083 // Get the high bits for i32 struct elements.
1084 if (VA.getValVT() == MVT::i32 && VA.needsCustom())
1085 RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
1086 DAG.getConstant(32, MVT::i32));
1087
1088 // The callee promoted the return value, so insert an Assert?ext SDNode so
1089 // we won't promote the value again in this function.
1090 switch (VA.getLocInfo()) {
1091 case CCValAssign::SExt:
1092 RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
1093 DAG.getValueType(VA.getValVT()));
1094 break;
1095 case CCValAssign::ZExt:
1096 RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
1097 DAG.getValueType(VA.getValVT()));
1098 break;
1099 default:
1100 break;
1101 }
1102
1103 // Truncate the register down to the return value type.
1104 if (VA.isExtInLoc())
1105 RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
1106
1107 InVals.push_back(RV);
1108 }
1109
1110 return Chain;
1111 }
1112
8891113 //===----------------------------------------------------------------------===//
8901114 // TargetLowering Implementation
8911115 //===----------------------------------------------------------------------===//
9494 virtual SDValue
9595 LowerCall(TargetLowering::CallLoweringInfo &CLI,
9696 SmallVectorImpl &InVals) const;
97 SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
98 SmallVectorImpl &InVals) const;
99 SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
100 SmallVectorImpl &InVals) const;
97101
98102 virtual SDValue
99103 LowerReturn(SDValue Chain,
None ; RUN: llc < %s -march=sparcv9 | FileCheck %s
0 ; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler | FileCheck %s
11
22 ; CHECK: intarg
33 ; CHECK: stb %i0, [%i4]
1616 i32 %a3, ; %i3
1717 i8* %a4, ; %i4
1818 i32 %a5, ; %i5
19 i32 %a6, ; [%fp+BIAS+176]
19 i32 signext %a6, ; [%fp+BIAS+176]
2020 i8* %a7) { ; [%fp+BIAS+184]
2121 store i8 %a0, i8* %a4
2222 store i8 %a1, i8* %a4
2929 store i32 %a5, i32* %p32
3030 store i32 %a6, i32* %p32
3131 store i8* %a7, i8** %pp
32 ret void
33 }
34
35 ; CHECK: call_intarg
36 ; Sign-extend and store the full 64 bits.
37 ; CHECK: sra %i0, 0, [[R:%[gilo][0-7]]]
38 ; CHECK: stx [[R]], [%sp+2223]
39 ; Use %o0-%o5 for outgoing arguments
40 ; CHECK: or %g0, 5, %o5
41 ; CHECK: call intarg
42 define void @call_intarg(i32 %i0, i8* %i1) {
43 call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 %i0, i8* %i1)
3244 ret void
3345 }
3446
5668 float %a14, ; %f29
5769 float %a15, ; %f31
5870 float %a16, ; [%fp+BIAS+256] (using 8 bytes)
59 float %a17) { ; [%fp+BIAS+264] (using 8 bytes)
71 double %a17) { ; [%fp+BIAS+264] (using 8 bytes)
6072 %d0 = fpext float %a0 to double
6173 %s1 = fadd double %a1, %d0
6274 %s2 = fadd double %a2, %s1
6577 %d16 = fpext float %s16 to double
6678 %s17 = fadd double %d16, %s3
6779 ret double %s17
80 }
81
82 ; CHECK: call_floatarg
83 ; Store 4 bytes, right-aligned in slot.
84 ; CHECK: st %f1, [%sp+2307]
85 ; Store 8 bytes in full slot.
86 ; CHECK: std %f2, [%sp+2311]
87 ; CHECK: fmovd %f2, %f4
88 ; CHECK: call floatarg
89 define void @call_floatarg(float %f1, double %d2, float %f5, double *%p) {
90 %r = call double @floatarg(float %f5, double %d2, double %d2, double %d2,
91 float %f5, float %f5, float %f5, float %f5,
92 float %f5, float %f5, float %f5, float %f5,
93 float %f5, float %f5, float %f5, float %f5,
94 float %f1, double %d2)
95 store double %r, double* %p
96 ret void
6897 }
6998
7099 ; CHECK: mixedarg
91120 ret void
92121 }
93122
123 ; CHECK: call_mixedarg
124 ; CHECK: stx %i2, [%sp+2247]
125 ; CHECK: stx %i0, [%sp+2223]
126 ; CHECK: fmovd %f2, %f6
127 ; CHECK: fmovd %f2, %f16
128 ; CHECK: call mixedarg
129 define void @call_mixedarg(i64 %i0, double %f2, i16* %i2) {
130 call void @mixedarg(i8 undef,
131 float undef,
132 i16 undef,
133 double %f2,
134 i13 undef,
135 float undef,
136 i64 %i0,
137 double* undef,
138 double %f2,
139 i16* %i2)
140 ret void
141 }
142
94143 ; The inreg attribute is used to indicate 32-bit sized struct elements that
95144 ; share an 8-byte slot.
96145 ; CHECK: inreg_fi
104153 ret i32 %rv
105154 }
106155
156 ; CHECK: call_inreg_fi
157 ; CHECK: sllx %i1, 32, %o0
158 ; CHECK: fmovs %f5, %f1
159 ; CHECK: call inreg_fi
160 define void @call_inreg_fi(i32* %p, i32 %i1, float %f5) {
161 %x = call i32 @inreg_fi(i32 %i1, float %f5)
162 ret void
163 }
164
107165 ; CHECK: inreg_ff
108166 ; CHECK: fsubs %f0, %f1, %f1
109167 define float @inreg_ff(float inreg %a0, ; %f0
110168 float inreg %a1) { ; %f1
111169 %rv = fsub float %a0, %a1
112170 ret float %rv
171 }
172
173 ; CHECK: call_inreg_ff
174 ; CHECK: fmovs %f3, %f0
175 ; CHECK: fmovs %f5, %f1
176 ; CHECK: call inreg_ff
177 define void @call_inreg_ff(i32* %p, float %f3, float %f5) {
178 %x = call float @inreg_ff(float %f3, float %f5)
179 ret void
113180 }
114181
115182 ; CHECK: inreg_if
122189 ret i32 %rv
123190 }
124191
192 ; CHECK: call_inreg_if
193 ; CHECK: fmovs %f3, %f0
194 ; CHECK: or %g0, %i2, %o0
195 ; CHECK: call inreg_if
196 define void @call_inreg_if(i32* %p, float %f3, i32 %i2) {
197 %x = call i32 @inreg_if(float %f3, i32 %i2)
198 ret void
199 }
200
125201 ; The frontend shouldn't do this. Just pass i64 instead.
126202 ; CHECK: inreg_ii
127203 ; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]]
130206 i32 inreg %a1) { ; low bits of %i0
131207 %rv = sub i32 %a1, %a0
132208 ret i32 %rv
209 }
210
211 ; CHECK: call_inreg_ii
212 ; CHECK: srl %i2, 0, [[R2:%[gilo][0-7]]]
213 ; CHECK: sllx %i1, 32, [[R1:%[gilo][0-7]]]
214 ; CHECK: or [[R1]], [[R2]], %o0
215 ; CHECK: call inreg_ii
216 define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) {
217 %x = call i32 @inreg_ii(i32 %i1, i32 %i2)
218 ret void
133219 }
134220
135221 ; Structs up to 32 bytes in size can be returned in registers.
143229 %r2 = load i64* %q
144230 %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1
145231 ret { i64, i64 } %rv2
232 }
233
234 ; CHECK: call_ret_i64_pair
235 ; CHECK: call ret_i64_pair
236 ; CHECK: stx %o0, [%i0]
237 ; CHECK: stx %o1, [%i0]
238 define void @call_ret_i64_pair(i64* %i0) {
239 %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef,
240 i64* undef, i64* undef)
241 %e0 = extractvalue { i64, i64 } %rv, 0
242 store i64 %e0, i64* %i0
243 %e1 = extractvalue { i64, i64 } %rv, 1
244 store i64 %e1, i64* %i0
245 ret void
146246 }
147247
148248 ; This is not a C struct, each member uses 8 bytes.
157257 %r2 = load float* %q
158258 %rv2 = insertvalue { i32, float } %rv1, float %r2, 1
159259 ret { i32, float } %rv2
260 }
261
262 ; CHECK: call_ret_i32_float_pair
263 ; CHECK: call ret_i32_float_pair
264 ; CHECK: st %o0, [%i0]
265 ; CHECK: st %f3, [%i1]
266 define void @call_ret_i32_float_pair(i32* %i0, float* %i1) {
267 %rv = call { i32, float } @ret_i32_float_pair(i32 undef, i32 undef,
268 i32* undef, float* undef)
269 %e0 = extractvalue { i32, float } %rv, 0
270 store i32 %e0, i32* %i0
271 %e1 = extractvalue { i32, float } %rv, 1
272 store float %e1, float* %i1
273 ret void
160274 }
161275
162276 ; This is a C struct, each member uses 4 bytes.
172286 %r2 = load float* %q
173287 %rv2 = insertvalue { i32, float } %rv1, float %r2, 1
174288 ret { i32, float } %rv2
289 }
290
291 ; CHECK: call_ret_i32_float_packed
292 ; CHECK: call ret_i32_float_packed
293 ; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
294 ; CHECK: st [[R]], [%i0]
295 ; CHECK: st %f1, [%i1]
296 define void @call_ret_i32_float_packed(i32* %i0, float* %i1) {
297 %rv = call { i32, float } @ret_i32_float_packed(i32 undef, i32 undef,
298 i32* undef, float* undef)
299 %e0 = extractvalue { i32, float } %rv, 0
300 store i32 %e0, i32* %i0
301 %e1 = extractvalue { i32, float } %rv, 1
302 store float %e1, float* %i1
303 ret void
175304 }
176305
177306 ; The C frontend should use i64 to return { i32, i32 } structs, but verify that
191320 ret { i32, i32 } %rv2
192321 }
193322
323 ; CHECK: call_ret_i32_packed
324 ; CHECK: call ret_i32_packed
325 ; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
326 ; CHECK: st [[R]], [%i0]
327 ; CHECK: st %o0, [%i1]
328 define void @call_ret_i32_packed(i32* %i0, i32* %i1) {
329 %rv = call { i32, i32 } @ret_i32_packed(i32 undef, i32 undef,
330 i32* undef, i32* undef)
331 %e0 = extractvalue { i32, i32 } %rv, 0
332 store i32 %e0, i32* %i0
333 %e1 = extractvalue { i32, i32 } %rv, 1
334 store i32 %e1, i32* %i1
335 ret void
336 }
337
194338 ; The return value must be sign-extended to 64 bits.
195339 ; CHECK: ret_sext
196340 ; CHECK: sra %i0, 0, %i0