llvm.org GIT mirror llvm / 3f142c3
Implement -mno-sse: if SSE is disabled on x86-64, don't store XMM on stack for var-args, and don't allow FP return values git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@63495 91177308-0d34-0410-b5e6-96231b3b80d8 Torok Edwin 10 years ago
5 changed file(s) with 70 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
4848 // weirdly; this is really the sse-regparm calling convention) in which
4949 // case they use XMM0, otherwise it is the same as the common X86 calling
5050 // conv.
51 CCIfInReg
51 CCIfInReg
5252 CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
5353 CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
5454 CCDelegateTo
133133
134134 // The first 8 FP/Vector arguments are passed in XMM registers.
135135 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
136 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>,
136 CCIfSubtarget<"hasSSE1()",
137 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
137138
138139 // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
139140 // registers on Darwin.
222223
223224 // The first 8 FP/Vector arguments are passed in XMM registers.
224225 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
225 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>,
226 CCIfSubtarget<"hasSSE1()",
227 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
226228
227229 // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
228230 // registers on Darwin.
10301030 // Assign locations to each value returned by this call.
10311031 SmallVector RVLocs;
10321032 bool isVarArg = TheCall->isVarArg();
1033 bool Is64Bit = Subtarget->is64Bit();
10331034 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
10341035 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
10351036
10381039 // Copy all of the result registers out of their specified physreg.
10391040 for (unsigned i = 0; i != RVLocs.size(); ++i) {
10401041 MVT CopyVT = RVLocs[i].getValVT();
1041
1042
1043 // If this is x86-64, and we disabled SSE, we can't return FP values
1044 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
1045 ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) {
1046 cerr << "SSE register return with SSE disabled\n";
1047 exit(1);
1048 }
1049
10421050 // If this is a call to a function that returns an fp value on the floating
10431051 // point stack, but where we prefer to use the value in xmm registers, copy
10441052 // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
13811389 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
13821390 TotalNumXMMRegs);
13831391
1392 assert((Subtarget->hasSSE1() || !NumXMMRegs) &&
1393 "SSE register cannot be used when SSE is disabled!");
1394 if (!Subtarget->hasSSE1()) {
1395 // Kernel mode asks for SSE to be disabled, so don't push them
1396 // on the stack.
1397 TotalNumXMMRegs = 0;
1398 }
13841399 // For X86-64, if there are vararg parameters that are passed via
13851400 // registers, then we must store them to their spots on the stack so they
13861401 // may be loaded by deferencing the result of va_next.
16741689 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
16751690 };
16761691 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
1692 assert((Subtarget->hasSSE1() || !NumXMMRegs)
1693 && "SSE registers cannot be used when SSE is disabled");
16771694
16781695 Chain = DAG.getCopyToReg(Chain, X86::AL,
16791696 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
330330 // are enabled. These are available on all x86-64 CPUs.
331331 if (Is64Bit) {
332332 HasX86_64 = true;
333 #if 1
333 #if 0
334334 if (X86SSELevel < SSE2)
335335 X86SSELevel = SSE2;
336336 #endif
0 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
11 ; PR3402
2 ; reverted
3 ; XFAIL: *
42 target datalayout =
53 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
64 target triple = "x86_64-unknown-linux-gnu"
0 ; RUN: llvm-as < %s > %t
1 ; RUN: llc -march=x86-64 -mattr=-sse < %t | not grep xmm
2 ; RUN: llc -march=x86-64 < %t | grep xmm
3 ; PR3403
4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
5 target triple = "x86_64-unknown-linux-gnu"
6 %struct.__va_list_tag = type { i32, i32, i8*, i8* }
7
8 define i32 @foo(float %a, i8* nocapture %fmt, ...) nounwind {
9 entry:
10 %ap = alloca [1 x %struct.__va_list_tag], align 8 ; <[1 x %struct.__va_list_tag]*> [#uses=4]
11 %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8* ; [#uses=2]
12 call void @llvm.va_start(i8* %ap12)
13 %0 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0 ; [#uses=2]
14 %1 = load i32* %0, align 8 ; [#uses=3]
15 %2 = icmp ult i32 %1, 48 ; [#uses=1]
16 br i1 %2, label %bb, label %bb3
17
18 bb: ; preds = %entry
19 %3 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 3 ; [#uses=1]
20 %4 = load i8** %3, align 8 ; [#uses=1]
21 %5 = inttoptr i32 %1 to i8* ; [#uses=1]
22 %6 = ptrtoint i8* %5 to i64 ; [#uses=1]
23 %ctg2 = getelementptr i8* %4, i64 %6 ; [#uses=1]
24 %7 = add i32 %1, 8 ; [#uses=1]
25 store i32 %7, i32* %0, align 8
26 br label %bb4
27
28 bb3: ; preds = %entry
29 %8 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2 ; [#uses=2]
30 %9 = load i8** %8, align 8 ; [#uses=2]
31 %10 = getelementptr i8* %9, i64 8 ; [#uses=1]
32 store i8* %10, i8** %8, align 8
33 br label %bb4
34
35 bb4: ; preds = %bb3, %bb
36 %addr.0.0 = phi i8* [ %ctg2, %bb ], [ %9, %bb3 ] ; [#uses=1]
37 %11 = bitcast i8* %addr.0.0 to i32* ; [#uses=1]
38 %12 = load i32* %11, align 4 ; [#uses=1]
39 call void @llvm.va_end(i8* %ap12)
40 ret i32 %12
41 }
42
43 declare void @llvm.va_start(i8*) nounwind
44
45 declare void @llvm.va_end(i8*) nounwind