llvm.org GIT mirror llvm / 3a1999a
[X86] Part 2 to fix x86-64 fp128 calling convention. Part 1 was submitted in http://reviews.llvm.org/D15134. Changes in this part: * X86RegisterInfo.td, X86RecognizableInstr.cpp: Add FR128 register class. * X86CallingConv.td: Pass f128 values in XMM registers or on stack. * X86InstrCompiler.td, X86InstrInfo.td, X86InstrSSE.td: Add instruction selection patterns for f128. * X86ISelLowering.cpp: When target has MMX registers, configure MVT::f128 in FR128RegClass, with TypeSoftenFloat action, and custom actions for some opcodes. Add missed cases of MVT::f128 in places that handle f32, f64, or vector types. Add TODO comment to support f128 type in inline assembly code. * SelectionDAGBuilder.cpp: Fix infinite loop when f128 type can have VT == TLI.getTypeToTransformTo(Ctx, VT). * Add unit tests for x86-64 fp128 type. Differential Revision: http://reviews.llvm.org/D11438 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255558 91177308-0d34-0410-b5e6-96231b3b80d8 Chih-Hung Hsieh 4 years ago
16 changed file(s) with 1052 addition(s) and 25 deletion(s). Raw diff Collapse all Expand all
24502450
24512451 // We care about the legality of the operation after it has been type
24522452 // legalized.
2453 while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
2453 while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2454 VT != TLI.getTypeToTransformTo(Ctx, VT))
24542455 VT = TLI.getTypeToTransformTo(Ctx, VT);
24552456
24562457 // If the vselect is legal, assume we want to leave this as a vector setcc +
157157 // The X86-64 calling convention always returns FP values in XMM0.
158158 CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
159159 CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
160 CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>,
160161
161162 // MMX vector types are always returned in XMM0.
162163 CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
292293 CCIfType<[v64i1], CCPromoteToType>,
293294
294295 // The first 8 FP/Vector arguments are passed in XMM registers.
295 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
296 CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
296297 CCIfSubtarget<"hasSSE1()",
297298 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
298299
317318
318319 // Long doubles get stack slots whose size and alignment depends on the
319320 // subtarget.
320 CCIfType<[f80], CCAssignToStack<0, 0>>,
321 CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
321322
322323 // Vectors get 16-byte stack slots that are 16-byte aligned.
323324 CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
295295 setOperationAction(ISD::BR_CC , MVT::f32, Expand);
296296 setOperationAction(ISD::BR_CC , MVT::f64, Expand);
297297 setOperationAction(ISD::BR_CC , MVT::f80, Expand);
298 setOperationAction(ISD::BR_CC , MVT::f128, Expand);
298299 setOperationAction(ISD::BR_CC , MVT::i8, Expand);
299300 setOperationAction(ISD::BR_CC , MVT::i16, Expand);
300301 setOperationAction(ISD::BR_CC , MVT::i32, Expand);
302303 setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
303304 setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
304305 setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
306 setOperationAction(ISD::SELECT_CC , MVT::f128, Expand);
305307 setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
306308 setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
307309 setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
414416 setOperationAction(ISD::SELECT , MVT::f32 , Custom);
415417 setOperationAction(ISD::SELECT , MVT::f64 , Custom);
416418 setOperationAction(ISD::SELECT , MVT::f80 , Custom);
419 setOperationAction(ISD::SELECT , MVT::f128 , Custom);
417420 setOperationAction(ISD::SETCC , MVT::i8 , Custom);
418421 setOperationAction(ISD::SETCC , MVT::i16 , Custom);
419422 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
420423 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
421424 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
422425 setOperationAction(ISD::SETCC , MVT::f80 , Custom);
426 setOperationAction(ISD::SETCC , MVT::f128 , Custom);
423427 setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
424428 setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
425429 setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
618622 setOperationAction(ISD::FMA, MVT::f64, Expand);
619623 setOperationAction(ISD::FMA, MVT::f32, Expand);
620624
621 // Long double always uses X87.
625 // Long double always uses X87, except f128 in MMX.
622626 if (!Subtarget->useSoftFloat()) {
627 if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
628 addRegisterClass(MVT::f128, &X86::FR128RegClass);
629 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
630 setOperationAction(ISD::FABS , MVT::f128, Custom);
631 setOperationAction(ISD::FNEG , MVT::f128, Custom);
632 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
633 }
634
623635 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
624636 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
625637 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
23622374 EVT CopyVT = VA.getLocVT();
23632375
23642376 // If this is x86-64, and we disabled SSE, we can't return FP values
2365 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
2377 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
23662378 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
23672379 report_fatal_error("SSE register return with SSE disabled");
23682380 }
26462658 RC = &X86::FR32RegClass;
26472659 else if (RegVT == MVT::f64)
26482660 RC = &X86::FR64RegClass;
2661 else if (RegVT == MVT::f128)
2662 RC = &X86::FR128RegClass;
26492663 else if (RegVT.is512BitVector())
26502664 RC = &X86::VR512RegClass;
26512665 else if (RegVT.is256BitVector())
1340913423 SDLoc dl(Op);
1341013424 MVT VT = Op.getSimpleValueType();
1341113425
13426 bool IsF128 = (VT == MVT::f128);
13427
1341213428 // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
1341313429 // decide if we should generate a 16-byte constant mask when we only need 4 or
1341413430 // 8 bytes for the scalar case.
1342113437 LogicVT = VT;
1342213438 EltVT = VT.getVectorElementType();
1342313439 NumElts = VT.getVectorNumElements();
13440 } else if (IsF128) {
13441 // SSE instructions are used for optimized f128 logical operations.
13442 LogicVT = MVT::f128;
13443 EltVT = VT;
13444 NumElts = 1;
1342413445 } else {
1342513446 // There are no scalar bitwise logical SSE/AVX instructions, so we
1342613447 // generate a 16-byte vector constant and logic op even for the scalar case.
1345213473 IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
1345313474 SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
1345413475
13455 if (VT.isVector())
13476 if (VT.isVector() || IsF128)
1345613477 return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
1345713478
1345813479 // For the scalar case extend to a 128-bit vector, perform the logic op,
1347113492 SDLoc dl(Op);
1347213493 MVT VT = Op.getSimpleValueType();
1347313494 MVT SrcVT = Op1.getSimpleValueType();
13495 bool IsF128 = (VT == MVT::f128);
1347413496
1347513497 // If second operand is smaller, extend it first.
1347613498 if (SrcVT.bitsLT(VT)) {
1348513507
1348613508 // At this point the operands and the result should have the same
1348713509 // type, and that won't be f80 since that is not custom lowered.
13510 assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) &&
13511 "Unexpected type in LowerFCOPYSIGN");
1348813512
1348913513 const fltSemantics &Sem =
13490 VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle;
13514 VT == MVT::f64 ? APFloat::IEEEdouble :
13515 (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
1349113516 const unsigned SizeInBits = VT.getSizeInBits();
1349213517
1349313518 SmallVector CV(
13494 VT == MVT::f64 ? 2 : 4,
13519 VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4),
1349513520 ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));
1349613521
1349713522 // First, clear all bits but the sign bit from the second operand (sign).
1350413529 // Perform all logic operations as 16-byte vectors because there are no
1350513530 // scalar FP logic instructions in SSE. This allows load folding of the
1350613531 // constants into the logic instructions.
13507 MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
13532 MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32);
1350813533 SDValue Mask1 =
1350913534 DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
1351013535 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
1351113536 false, false, false, 16);
13512 Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
13537 if (!IsF128)
13538 Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
1351313539 SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1);
1351413540
1351513541 // Next, clear the sign bit from the first operand (magnitude).
1351813544 APFloat APF = Op0CN->getValueAPF();
1351913545 // If the magnitude is a positive zero, the sign bit alone is enough.
1352013546 if (APF.isPosZero())
13521 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
13522 DAG.getIntPtrConstant(0, dl));
13547 return IsF128 ? SignBit :
13548 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
13549 DAG.getIntPtrConstant(0, dl));
1352313550 APF.clearSign();
1352413551 CV[0] = ConstantFP::get(*Context, APF);
1352513552 } else {
1353513562 false, false, false, 16);
1353613563 // If the magnitude operand wasn't a constant, we need to AND out the sign.
1353713564 if (!isa(Op0)) {
13538 Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
13565 if (!IsF128)
13566 Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
1353913567 Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val);
1354013568 }
1354113569 // OR the magnitude value with the sign bit.
1354213570 Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit);
13543 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
13544 DAG.getIntPtrConstant(0, dl));
13571 return IsF128 ? Val :
13572 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
13573 DAG.getIntPtrConstant(0, dl));
1354513574 }
1354613575
1354713576 static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
2215722186 return EmitLoweredTLSCall(MI, BB);
2215822187 case X86::CMOV_FR32:
2215922188 case X86::CMOV_FR64:
22189 case X86::CMOV_FR128:
2216022190 case X86::CMOV_GR8:
2216122191 case X86::CMOV_GR16:
2216222192 case X86::CMOV_GR32:
2382023850 // ignored in unsafe-math mode).
2382123851 // We also try to create v2f32 min/max nodes, which we later widen to v4f32.
2382223852 if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
23823 VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
23853 VT != MVT::f80 && VT != MVT::f128 &&
23854 (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
2382423855 (Subtarget->hasSSE2() ||
2382523856 (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
2382623857 ISD::CondCode CC = cast(Cond.getOperand(2))->get();
2794527976 case MVT::f64:
2794627977 case MVT::i64:
2794727978 return std::make_pair(0U, &X86::FR64RegClass);
27979 // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
2794827980 // Vector types.
2794927981 case MVT::v16i8:
2795027982 case MVT::v8i16:
2805728089 // target independent register mapper will just pick the first match it can
2805828090 // find, ignoring the required type.
2805928091
28092 // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
2806028093 if (VT == MVT::f32 || VT == MVT::i32)
2806128094 Res.second = &X86::FR32RegClass;
2806228095 else if (VT == MVT::f64 || VT == MVT::i64)
511511
512512 defm _FR32 : CMOVrr_PSEUDO;
513513 defm _FR64 : CMOVrr_PSEUDO;
514 defm _FR128 : CMOVrr_PSEUDO;
514515 defm _V4F32 : CMOVrr_PSEUDO;
515516 defm _V2F64 : CMOVrr_PSEUDO;
516517 defm _V2I64 : CMOVrr_PSEUDO;
954954 return false;
955955 }]>;
956956
957 def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
958 def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
959 def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
960 def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
961 def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
957 def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
958 def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
959 def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
960 def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
961 def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
962 def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;
962963
963964 def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
964965 def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
412412 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
413413 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
414414 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
415 def : Pat<(f128 (bitconvert (i128 FR128:$src))), (f128 FR128:$src)>;
416 def : Pat<(i128 (bitconvert (f128 FR128:$src))), (i128 FR128:$src)>;
415417 }
416418
417419 // Bitcasts between 256-bit vector types. Return the original type since
88508852 defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>;
88518853 }
88528854 }
8855
8856 //===----------------------------------------------------------------------===//
8857 // Extra selection patterns for FR128, f128, f128mem
8858
8859 // movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
8860 def : Pat<(store (f128 FR128:$src), addr:$dst),
8861 (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;
8862
8863 def : Pat<(loadf128 addr:$src),
8864 (COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>;
8865
8866 // andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
8867 def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)),
8868 (COPY_TO_REGCLASS
8869 (ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
8870 FR128)>;
8871
8872 def : Pat<(X86fand FR128:$src1, FR128:$src2),
8873 (COPY_TO_REGCLASS
8874 (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
8875 (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
8876
8877 def : Pat<(and FR128:$src1, FR128:$src2),
8878 (COPY_TO_REGCLASS
8879 (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
8880 (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
8881
8882 def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)),
8883 (COPY_TO_REGCLASS
8884 (ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
8885 FR128)>;
8886
8887 def : Pat<(X86for FR128:$src1, FR128:$src2),
8888 (COPY_TO_REGCLASS
8889 (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
8890 (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
8891
8892 def : Pat<(or FR128:$src1, FR128:$src2),
8893 (COPY_TO_REGCLASS
8894 (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
8895 (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
8896
8897 def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)),
8898 (COPY_TO_REGCLASS
8899 (XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
8900 FR128)>;
8901
8902 def : Pat<(X86fxor FR128:$src1, FR128:$src2),
8903 (COPY_TO_REGCLASS
8904 (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
8905 (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
8906
8907 def : Pat<(xor FR128:$src1, FR128:$src2),
8908 (COPY_TO_REGCLASS
8909 (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
8910 (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
422422
423423 def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
424424
425 def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>;
426
425427
426428 // FIXME: This sets up the floating point register files as though they are f64
427429 // values, though they really are f80 values. This will cause us to spill
0 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
1 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
2
3 ; __float128 myFP128 = 1.0L; // x86_64-linux-android
4 @myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16
5
6 ; The first few parameters are passed in registers and the other are on stack.
7
8 define fp128 @TestParam_FP128_0(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
9 entry:
10 ret fp128 %d0
11 ; CHECK-LABEL: TestParam_FP128_0:
12 ; CHECK-NOT: mov
13 ; CHECK: retq
14 }
15
16 define fp128 @TestParam_FP128_1(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
17 entry:
18 ret fp128 %d1
19 ; CHECK-LABEL: TestParam_FP128_1:
20 ; CHECK: movaps %xmm1, %xmm0
21 ; CHECK-NEXT: retq
22 }
23
24 define fp128 @TestParam_FP128_7(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
25 entry:
26 ret fp128 %d7
27 ; CHECK-LABEL: TestParam_FP128_7:
28 ; CHECK: movaps %xmm7, %xmm0
29 ; CHECK-NEXT: retq
30 }
31
32 define fp128 @TestParam_FP128_8(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
33 entry:
34 ret fp128 %d8
35 ; CHECK-LABEL: TestParam_FP128_8:
36 ; CHECK: movaps 8(%rsp), %xmm0
37 ; CHECK-NEXT: retq
38 }
39
40 define fp128 @TestParam_FP128_9(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
41 entry:
42 ret fp128 %d9
43 ; CHECK-LABEL: TestParam_FP128_9:
44 ; CHECK: movaps 24(%rsp), %xmm0
45 ; CHECK-NEXT: retq
46 }
0 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
1 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
2
3 ; Check soft floating point conversion function calls.
4
5 @vi32 = common global i32 0, align 4
6 @vi64 = common global i64 0, align 8
7 @vu32 = common global i32 0, align 4
8 @vu64 = common global i64 0, align 8
9 @vf32 = common global float 0.000000e+00, align 4
10 @vf64 = common global double 0.000000e+00, align 8
11 @vf128 = common global fp128 0xL00000000000000000000000000000000, align 16
12
13 define void @TestFPExtF32_F128() {
14 entry:
15 %0 = load float, float* @vf32, align 4
16 %conv = fpext float %0 to fp128
17 store fp128 %conv, fp128* @vf128, align 16
18 ret void
19 ; CHECK-LABEL: TestFPExtF32_F128:
20 ; CHECK: movss vf32(%rip), %xmm0
21 ; CHECK-NEXT: callq __extendsftf2
22 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
23 ; CHECK: retq
24 }
25
26 define void @TestFPExtF64_F128() {
27 entry:
28 %0 = load double, double* @vf64, align 8
29 %conv = fpext double %0 to fp128
30 store fp128 %conv, fp128* @vf128, align 16
31 ret void
32 ; CHECK-LABEL: TestFPExtF64_F128:
33 ; CHECK: movsd vf64(%rip), %xmm0
34 ; CHECK-NEXT: callq __extenddftf2
35 ; CHECK-NEXT: movapd %xmm0, vf128(%rip)
36 ; CHECK: ret
37 }
38
39 define void @TestFPToSIF128_I32() {
40 entry:
41 %0 = load fp128, fp128* @vf128, align 16
42 %conv = fptosi fp128 %0 to i32
43 store i32 %conv, i32* @vi32, align 4
44 ret void
45 ; CHECK-LABEL: TestFPToSIF128_I32:
46 ; CHECK: movaps vf128(%rip), %xmm0
47 ; CHECK-NEXT: callq __fixtfsi
48 ; CHECK-NEXT: movl %eax, vi32(%rip)
49 ; CHECK: retq
50 }
51
52 define void @TestFPToUIF128_U32() {
53 entry:
54 %0 = load fp128, fp128* @vf128, align 16
55 %conv = fptoui fp128 %0 to i32
56 store i32 %conv, i32* @vu32, align 4
57 ret void
58 ; CHECK-LABEL: TestFPToUIF128_U32:
59 ; CHECK: movaps vf128(%rip), %xmm0
60 ; CHECK-NEXT: callq __fixunstfsi
61 ; CHECK-NEXT: movl %eax, vu32(%rip)
62 ; CHECK: retq
63 }
64
65 define void @TestFPToSIF128_I64() {
66 entry:
67 %0 = load fp128, fp128* @vf128, align 16
68 %conv = fptosi fp128 %0 to i32
69 %conv1 = sext i32 %conv to i64
70 store i64 %conv1, i64* @vi64, align 8
71 ret void
72 ; CHECK-LABEL: TestFPToSIF128_I64:
73 ; CHECK: movaps vf128(%rip), %xmm0
74 ; CHECK-NEXT: callq __fixtfsi
75 ; CHECK-NEXT: cltq
76 ; CHECK-NEXT: movq %rax, vi64(%rip)
77 ; CHECK: retq
78 }
79
80 define void @TestFPToUIF128_U64() {
81 entry:
82 %0 = load fp128, fp128* @vf128, align 16
83 %conv = fptoui fp128 %0 to i32
84 %conv1 = zext i32 %conv to i64
85 store i64 %conv1, i64* @vu64, align 8
86 ret void
87 ; CHECK-LABEL: TestFPToUIF128_U64:
88 ; CHECK: movaps vf128(%rip), %xmm0
89 ; CHECK-NEXT: callq __fixunstfsi
90 ; CHECK-NEXT: movl %eax, %eax
91 ; CHECK-NEXT: movq %rax, vu64(%rip)
92 ; CHECK: retq
93 }
94
95 define void @TestFPTruncF128_F32() {
96 entry:
97 %0 = load fp128, fp128* @vf128, align 16
98 %conv = fptrunc fp128 %0 to float
99 store float %conv, float* @vf32, align 4
100 ret void
101 ; CHECK-LABEL: TestFPTruncF128_F32:
102 ; CHECK: movaps vf128(%rip), %xmm0
103 ; CHECK-NEXT: callq __trunctfsf2
104 ; CHECK-NEXT: movss %xmm0, vf32(%rip)
105 ; CHECK: retq
106 }
107
108 define void @TestFPTruncF128_F64() {
109 entry:
110 %0 = load fp128, fp128* @vf128, align 16
111 %conv = fptrunc fp128 %0 to double
112 store double %conv, double* @vf64, align 8
113 ret void
114 ; CHECK-LABEL: TestFPTruncF128_F64:
115 ; CHECK: movapd vf128(%rip), %xmm0
116 ; CHECK-NEXT: callq __trunctfdf2
117 ; CHECK-NEXT: movsd %xmm0, vf64(%rip)
118 ; CHECK: retq
119 }
120
121 define void @TestSIToFPI32_F128() {
122 entry:
123 %0 = load i32, i32* @vi32, align 4
124 %conv = sitofp i32 %0 to fp128
125 store fp128 %conv, fp128* @vf128, align 16
126 ret void
127 ; CHECK-LABEL: TestSIToFPI32_F128:
128 ; CHECK: movl vi32(%rip), %edi
129 ; CHECK-NEXT: callq __floatsitf
130 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
131 ; CHECK: retq
132 }
133
134 define void @TestUIToFPU32_F128() #2 {
135 entry:
136 %0 = load i32, i32* @vu32, align 4
137 %conv = uitofp i32 %0 to fp128
138 store fp128 %conv, fp128* @vf128, align 16
139 ret void
140 ; CHECK-LABEL: TestUIToFPU32_F128:
141 ; CHECK: movl vu32(%rip), %edi
142 ; CHECK-NEXT: callq __floatunsitf
143 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
144 ; CHECK: retq
145 }
146
147 define void @TestSIToFPI64_F128(){
148 entry:
149 %0 = load i64, i64* @vi64, align 8
150 %conv = sitofp i64 %0 to fp128
151 store fp128 %conv, fp128* @vf128, align 16
152 ret void
153 ; CHECK-LABEL: TestSIToFPI64_F128:
154 ; CHECK: movq vi64(%rip), %rdi
155 ; CHECK-NEXT: callq __floatditf
156 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
157 ; CHECK: retq
158 }
159
160 define void @TestUIToFPU64_F128() #2 {
161 entry:
162 %0 = load i64, i64* @vu64, align 8
163 %conv = uitofp i64 %0 to fp128
164 store fp128 %conv, fp128* @vf128, align 16
165 ret void
166 ; CHECK-LABEL: TestUIToFPU64_F128:
167 ; CHECK: movq vu64(%rip), %rdi
168 ; CHECK-NEXT: callq __floatunditf
169 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
170 ; CHECK: retq
171 }
172
173 define i32 @TestConst128(fp128 %v) {
174 entry:
175 %cmp = fcmp ogt fp128 %v, 0xL00000000000000003FFF000000000000
176 %conv = zext i1 %cmp to i32
177 ret i32 %conv
178 ; CHECK-LABEL: TestConst128:
179 ; CHECK: movaps {{.*}}, %xmm1
180 ; CHECK-NEXT: callq __gttf2
181 ; CHECK-NEXT: test
182 ; CHECK: retq
183 }
184
185 ; C code:
186 ; struct TestBits_ieee_ext {
187 ; unsigned v1;
188 ; unsigned v2;
189 ; };
190 ; union TestBits_LDU {
191 ; FP128 ld;
192 ; struct TestBits_ieee_ext bits;
193 ; };
194 ; int TestBits128(FP128 ld) {
195 ; union TestBits_LDU u;
196 ; u.ld = ld * ld;
197 ; return ((u.bits.v1 | u.bits.v2) == 0);
198 ; }
199 define i32 @TestBits128(fp128 %ld) {
200 entry:
201 %mul = fmul fp128 %ld, %ld
202 %0 = bitcast fp128 %mul to i128
203 %shift = lshr i128 %0, 32
204 %or5 = or i128 %shift, %0
205 %or = trunc i128 %or5 to i32
206 %cmp = icmp eq i32 %or, 0
207 %conv = zext i1 %cmp to i32
208 ret i32 %conv
209 ; CHECK-LABEL: TestBits128:
210 ; CHECK: movaps %xmm0, %xmm1
211 ; CHECK-NEXT: callq __multf3
212 ; CHECK-NEXT: movaps %xmm0, (%rsp)
213 ; CHECK-NEXT: movq (%rsp),
214 ; CHECK-NEXT: movq %
215 ; CHECK-NEXT: shrq $32,
216 ; CHECK: orl
217 ; CHECK-NEXT: sete %al
218 ; CHECK-NEXT: movzbl %al, %eax
219 ; CHECK: retq
220 ;
221 ; If TestBits128 fails due to any llvm or clang change,
222 ; please make sure the original simplified C code will
223 ; be compiled into correct IL and assembly code, not
224 ; just this TestBits128 test case. Better yet, try to
225 ; test the whole libm and its test cases.
226 }
227
228 ; C code: (compiled with -target x86_64-linux-android)
229 ; typedef long double __float128;
230 ; __float128 TestPair128(unsigned long a, unsigned long b) {
231 ; unsigned __int128 n;
232 ; unsigned __int128 v1 = ((unsigned __int128)a << 64);
233 ; unsigned __int128 v2 = (unsigned __int128)b;
234 ; n = (v1 | v2) + 3;
235 ; return *(__float128*)&n;
236 ; }
237 define fp128 @TestPair128(i64 %a, i64 %b) {
238 entry:
239 %conv = zext i64 %a to i128
240 %shl = shl nuw i128 %conv, 64
241 %conv1 = zext i64 %b to i128
242 %or = or i128 %shl, %conv1
243 %add = add i128 %or, 3
244 %0 = bitcast i128 %add to fp128
245 ret fp128 %0
246 ; CHECK-LABEL: TestPair128:
247 ; CHECK: addq $3, %rsi
248 ; CHECK: movq %rsi, -24(%rsp)
249 ; CHECK: movq %rdi, -16(%rsp)
250 ; CHECK: movaps -24(%rsp), %xmm0
251 ; CHECK-NEXT: retq
252 }
253
254 define fp128 @TestTruncCopysign(fp128 %x, i32 %n) {
255 entry:
256 %cmp = icmp sgt i32 %n, 50000
257 br i1 %cmp, label %if.then, label %cleanup
258
259 if.then: ; preds = %entry
260 %conv = fptrunc fp128 %x to double
261 %call = tail call double @copysign(double 0x7FF0000000000000, double %conv) #2
262 %conv1 = fpext double %call to fp128
263 br label %cleanup
264
265 cleanup: ; preds = %entry, %if.then
266 %retval.0 = phi fp128 [ %conv1, %if.then ], [ %x, %entry ]
267 ret fp128 %retval.0
268 ; CHECK-LABEL: TestTruncCopysign:
269 ; CHECK: callq __trunctfdf2
270 ; CHECK-NEXT: andpd {{.*}}, %xmm0
271 ; CHECK-NEXT: orpd {{.*}}, %xmm0
272 ; CHECK-NEXT: callq __extenddftf2
273 ; CHECK: retq
274 }
275
276 declare double @copysign(double, double) #1
277
278 attributes #2 = { nounwind readnone }
0 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
1 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
2
3 define i32 @TestComp128GT(fp128 %d1, fp128 %d2) {
4 entry:
5 %cmp = fcmp ogt fp128 %d1, %d2
6 %conv = zext i1 %cmp to i32
7 ret i32 %conv
8 ; CHECK-LABEL: TestComp128GT:
9 ; CHECK: callq __gttf2
10 ; CHECK: setg %al
11 ; CHECK: movzbl %al, %eax
12 ; CHECK: retq
13 }
14
15 define i32 @TestComp128GE(fp128 %d1, fp128 %d2) {
16 entry:
17 %cmp = fcmp oge fp128 %d1, %d2
18 %conv = zext i1 %cmp to i32
19 ret i32 %conv
20 ; CHECK-LABEL: TestComp128GE:
21 ; CHECK: callq __getf2
22 ; CHECK: testl %eax, %eax
23 ; CHECK: setns %al
24 ; CHECK: movzbl %al, %eax
25 ; CHECK: retq
26 }
27
28 define i32 @TestComp128LT(fp128 %d1, fp128 %d2) {
29 entry:
30 %cmp = fcmp olt fp128 %d1, %d2
31 %conv = zext i1 %cmp to i32
32 ret i32 %conv
33 ; CHECK-LABEL: TestComp128LT:
34 ; CHECK: callq __lttf2
35 ; CHECK-NEXT: shrl $31, %eax
36 ; CHECK: retq
37 ;
38 ; The 'shrl' is a special optimization in llvm to combine
39 ; the effect of 'fcmp olt' and 'zext'. The main purpose is
40 ; to test soften call to __lttf2.
41 }
42
43 define i32 @TestComp128LE(fp128 %d1, fp128 %d2) {
44 entry:
45 %cmp = fcmp ole fp128 %d1, %d2
46 %conv = zext i1 %cmp to i32
47 ret i32 %conv
48 ; CHECK-LABEL: TestComp128LE:
49 ; CHECK: callq __letf2
50 ; CHECK-NEXT: testl %eax, %eax
51 ; CHECK: setle %al
52 ; CHECK: movzbl %al, %eax
53 ; CHECK: retq
54 }
55
56 define i32 @TestComp128EQ(fp128 %d1, fp128 %d2) {
57 entry:
58 %cmp = fcmp oeq fp128 %d1, %d2
59 %conv = zext i1 %cmp to i32
60 ret i32 %conv
61 ; CHECK-LABEL: TestComp128EQ:
62 ; CHECK: callq __eqtf2
63 ; CHECK-NEXT: testl %eax, %eax
64 ; CHECK: sete %al
65 ; CHECK: movzbl %al, %eax
66 ; CHECK: retq
67 }
68
69 define i32 @TestComp128NE(fp128 %d1, fp128 %d2) {
70 entry:
71 %cmp = fcmp une fp128 %d1, %d2
72 %conv = zext i1 %cmp to i32
73 ret i32 %conv
74 ; CHECK-LABEL: TestComp128NE:
75 ; CHECK: callq __netf2
76 ; CHECK-NEXT: testl %eax, %eax
77 ; CHECK: setne %al
78 ; CHECK: movzbl %al, %eax
79 ; CHECK: retq
80 }
81
82 define fp128 @TestMax(fp128 %x, fp128 %y) {
83 entry:
84 %cmp = fcmp ogt fp128 %x, %y
85 %cond = select i1 %cmp, fp128 %x, fp128 %y
86 ret fp128 %cond
87 ; CHECK-LABEL: TestMax:
88 ; CHECK: movaps %xmm1
89 ; CHECK: movaps %xmm0
90 ; CHECK: callq __gttf2
91 ; CHECK: movaps {{.*}}, %xmm0
92 ; CHECK: testl %eax, %eax
93 ; CHECK: movaps {{.*}}, %xmm0
94 ; CHECK: retq
95 }
0 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
1 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
2
3 ; These tests were generated from simplified libm C code.
4 ; When compiled for the x86_64-linux-android target,
5 ; long double is mapped to f128 type that should be passed
6 ; in SSE registers. When the f128 type calling convention
7 ; problem was fixed, old llvm code failed to handle f128 values
8 ; in several f128/i128 type operations. These unit tests hopefully
9 ; will catch regression in any future change in this area.
10 ; To modified or enhance these test cases, please consult libm
11 ; code pattern and compile with -target x86_64-linux-android
12 ; to generate IL. The __float128 keyword if not accepted by
13 ; clang, just define it to "long double".
14 ;
15
16 ; typedef long double __float128;
17 ; union IEEEl2bits {
18 ; __float128 e;
19 ; struct {
20 ; unsigned long manl :64;
21 ; unsigned long manh :48;
22 ; unsigned int exp :15;
23 ; unsigned int sign :1;
24 ; } bits;
25 ; struct {
26 ; unsigned long manl :64;
27 ; unsigned long manh :48;
28 ; unsigned int expsign :16;
29 ; } xbits;
30 ; };
31
32 ; C code:
33 ; void foo(__float128 x);
34 ; void TestUnionLD1(__float128 s, unsigned long n) {
35 ; union IEEEl2bits u;
36 ; __float128 w;
37 ; u.e = s;
38 ; u.bits.manh = n;
39 ; w = u.e;
40 ; foo(w);
41 ; }
42 define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
43 entry:
44 %0 = bitcast fp128 %s to i128
45 %1 = zext i64 %n to i128
46 %bf.value = shl nuw i128 %1, 64
47 %bf.shl = and i128 %bf.value, 5192296858534809181786422619668480
48 %bf.clear = and i128 %0, -5192296858534809181786422619668481
49 %bf.set = or i128 %bf.shl, %bf.clear
50 %2 = bitcast i128 %bf.set to fp128
51 tail call void @foo(fp128 %2) #2
52 ret void
53 ; CHECK-LABEL: TestUnionLD1:
54 ; CHECK: movaps %xmm0, -24(%rsp)
55 ; CHECK-NEXT: movq -24(%rsp), %rax
56 ; CHECK-NEXT: movabsq $281474976710655, %rcx
57 ; CHECK-NEXT: andq %rdi, %rcx
58 ; CHECK-NEXT: movabsq $-281474976710656, %rdx
59 ; CHECK-NEXT: andq -16(%rsp), %rdx
60 ; CHECK-NEXT: movq %rax, -40(%rsp)
61 ; CHECK-NEXT: orq %rcx, %rdx
62 ; CHECK-NEXT: movq %rdx, -32(%rsp)
63 ; CHECK-NEXT: movaps -40(%rsp), %xmm0
64 ; CHECK-NEXT: jmp foo
65 }
66
67 ; C code:
68 ; __float128 TestUnionLD2(__float128 s) {
69 ; union IEEEl2bits u;
70 ; __float128 w;
71 ; u.e = s;
72 ; u.bits.manl = 0;
73 ; w = u.e;
74 ; return w;
75 ; }
76 define fp128 @TestUnionLD2(fp128 %s) #0 {
77 entry:
78 %0 = bitcast fp128 %s to i128
79 %bf.clear = and i128 %0, -18446744073709551616
80 %1 = bitcast i128 %bf.clear to fp128
81 ret fp128 %1
82 ; CHECK-LABEL: TestUnionLD2:
83 ; CHECK: movaps %xmm0, -24(%rsp)
84 ; CHECK-NEXT: movq -16(%rsp), %rax
85 ; CHECK-NEXT: movq %rax, -32(%rsp)
86 ; CHECK-NEXT: movq $0, -40(%rsp)
87 ; CHECK-NEXT: movaps -40(%rsp), %xmm0
88 ; CHECK-NEXT: retq
89 }
90
91 ; C code:
92 ; __float128 TestI128_1(__float128 x)
93 ; {
94 ; union IEEEl2bits z;
95 ; z.e = x;
96 ; z.bits.sign = 0;
97 ; return (z.e < 0.1L) ? 1.0L : 2.0L;
98 ; }
99 define fp128 @TestI128_1(fp128 %x) #0 {
100 entry:
101 %0 = bitcast fp128 %x to i128
102 %bf.clear = and i128 %0, 170141183460469231731687303715884105727
103 %1 = bitcast i128 %bf.clear to fp128
104 %cmp = fcmp olt fp128 %1, 0xL999999999999999A3FFB999999999999
105 %cond = select i1 %cmp, fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000004000000000000000
106 ret fp128 %cond
107 ; CHECK-LABEL: TestI128_1:
108 ; CHECK: movaps %xmm0,
109 ; CHECK: movabsq $9223372036854775807,
110 ; CHECK: callq __lttf2
111 ; CHECK: testl %eax, %eax
112 ; CHECK: movaps {{.*}}, %xmm0
113 ; CHECK: retq
114 }
115
116 ; C code:
117 ; __float128 TestI128_2(__float128 x, __float128 y)
118 ; {
119 ; unsigned short hx;
120 ; union IEEEl2bits ge_u;
121 ; ge_u.e = x;
122 ; hx = ge_u.xbits.expsign;
123 ; return (hx & 0x8000) == 0 ? x : y;
124 ; }
125 define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 {
126 entry:
127 %0 = bitcast fp128 %x to i128
128 %cmp = icmp sgt i128 %0, -1
129 %cond = select i1 %cmp, fp128 %x, fp128 %y
130 ret fp128 %cond
131 ; CHECK-LABEL: TestI128_2:
132 ; CHECK: movaps %xmm0, -24(%rsp)
133 ; CHECK-NEXT: cmpq $0, -16(%rsp)
134 ; CHECK-NEXT: jns
135 ; CHECK: movaps %xmm1, %xmm0
136 ; CHECK: retq
137 }
138
139 ; C code:
140 ; __float128 TestI128_3(__float128 x, int *ex)
141 ; {
142 ; union IEEEl2bits u;
143 ; u.e = x;
144 ; if (u.bits.exp == 0) {
145 ; u.e *= 0x1.0p514;
146 ; u.bits.exp = 0x3ffe;
147 ; }
148 ; return (u.e);
149 ; }
150 define fp128 @TestI128_3(fp128 %x, i32* nocapture readnone %ex) #0 {
151 entry:
152 %0 = bitcast fp128 %x to i128
153 %bf.cast = and i128 %0, 170135991163610696904058773219554885632
154 %cmp = icmp eq i128 %bf.cast, 0
155 br i1 %cmp, label %if.then, label %if.end
156
157 if.then: ; preds = %entry
158 %mul = fmul fp128 %x, 0xL00000000000000004201000000000000
159 %1 = bitcast fp128 %mul to i128
160 %bf.clear4 = and i128 %1, -170135991163610696904058773219554885633
161 %bf.set = or i128 %bf.clear4, 85060207136517546210586590865283612672
162 br label %if.end
163
164 if.end: ; preds = %if.then, %entry
165 %u.sroa.0.0 = phi i128 [ %bf.set, %if.then ], [ %0, %entry ]
166 %2 = bitcast i128 %u.sroa.0.0 to fp128
167 ret fp128 %2
168 ; CHECK-LABEL: TestI128_3:
169 ; CHECK: movaps %xmm0,
170 ; CHECK: movabsq $9223090561878065152,
171 ; CHECK: testq
172 ; CHECK: callq __multf3
173 ; CHECK-NEXT: movaps %xmm0
174 ; CHECK: movabsq $-9223090561878065153,
175 ; CHECK: movabsq $4611123068473966592,
176 ; CHECK: retq
177 }
178
179 ; C code:
180 ; __float128 TestI128_4(__float128 x)
181 ; {
182 ; union IEEEl2bits u;
183 ; __float128 df;
184 ; u.e = x;
185 ; u.xbits.manl = 0;
186 ; df = u.e;
187 ; return x + df;
188 ; }
189 define fp128 @TestI128_4(fp128 %x) #0 {
190 entry:
191 %0 = bitcast fp128 %x to i128
192 %bf.clear = and i128 %0, -18446744073709551616
193 %1 = bitcast i128 %bf.clear to fp128
194 %add = fadd fp128 %1, %x
195 ret fp128 %add
196 ; CHECK-LABEL: TestI128_4:
197 ; CHECK: movaps %xmm0, %xmm1
198 ; CHECK-NEXT: movaps %xmm1, 16(%rsp)
199 ; CHECK-NEXT: movq 24(%rsp), %rax
200 ; CHECK-NEXT: movq %rax, 8(%rsp)
201 ; CHECK-NEXT: movq $0, (%rsp)
202 ; CHECK-NEXT: movaps (%rsp), %xmm0
203 ; CHECK-NEXT: callq __addtf3
204 ; CHECK: retq
205 }
206
207 @v128 = common global i128 0, align 16
208 @v128_2 = common global i128 0, align 16
209
210 ; C code:
211 ; unsigned __int128 v128, v128_2;
212 ; void TestShift128_2() {
213 ; v128 = ((v128 << 96) | v128_2);
214 ; }
215 define void @TestShift128_2() #2 {
216 entry:
217 %0 = load i128, i128* @v128, align 16
218 %shl = shl i128 %0, 96
219 %1 = load i128, i128* @v128_2, align 16
220 %or = or i128 %shl, %1
221 store i128 %or, i128* @v128, align 16
222 ret void
223 ; CHECK-LABEL: TestShift128_2:
224 ; CHECK: movq v128(%rip), %rax
225 ; CHECK-NEXT: shlq $32, %rax
226 ; CHECK-NEXT: movq v128_2(%rip), %rcx
227 ; CHECK-NEXT: orq v128_2+8(%rip), %rax
228 ; CHECK-NEXT: movq %rcx, v128(%rip)
229 ; CHECK-NEXT: movq %rax, v128+8(%rip)
230 ; CHECK-NEXT: retq
231 }
232
233 define fp128 @acosl(fp128 %x) #0 {
234 entry:
235 %0 = bitcast fp128 %x to i128
236 %bf.clear = and i128 %0, -18446744073709551616
237 %1 = bitcast i128 %bf.clear to fp128
238 %add = fadd fp128 %1, %x
239 ret fp128 %add
240 ; CHECK-LABEL: acosl:
241 ; CHECK: movaps %xmm0, %xmm1
242 ; CHECK-NEXT: movaps %xmm1, 16(%rsp)
243 ; CHECK-NEXT: movq 24(%rsp), %rax
244 ; CHECK-NEXT: movq %rax, 8(%rsp)
245 ; CHECK-NEXT: movq $0, (%rsp)
246 ; CHECK-NEXT: movaps (%rsp), %xmm0
247 ; CHECK-NEXT: callq __addtf3
248 ; CHECK: retq
249 }
250
251 ; Compare i128 values and check i128 constants.
252 define fp128 @TestComp(fp128 %x, fp128 %y) #0 {
253 entry:
254 %0 = bitcast fp128 %x to i128
255 %cmp = icmp sgt i128 %0, -1
256 %cond = select i1 %cmp, fp128 %x, fp128 %y
257 ret fp128 %cond
258 ; CHECK-LABEL: TestComp:
259 ; CHECK: movaps %xmm0, -24(%rsp)
260 ; CHECK-NEXT: cmpq $0, -16(%rsp)
261 ; CHECK-NEXT: jns
262 ; CHECK: movaps %xmm1, %xmm0
263 ; CHECK: retq
264 }
265
266 declare void @foo(fp128) #1
267
268 ; Test logical operations on fp128 values.
269 define fp128 @TestFABS_LD(fp128 %x) #0 {
270 entry:
271 %call = tail call fp128 @fabsl(fp128 %x) #2
272 ret fp128 %call
273 ; CHECK-LABEL: TestFABS_LD
274 ; CHECK: andps {{.*}}, %xmm0
275 ; CHECK-NEXT: retq
276 }
277
278 declare fp128 @fabsl(fp128) #1
279
280 declare fp128 @copysignl(fp128, fp128) #1
281
282 ; Test more complicated logical operations generated from copysignl.
283 define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, { fp128, fp128 }* byval nocapture readonly align 16 %z) #0 {
284 entry:
285 %z.realp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 0
286 %z.real = load fp128, fp128* %z.realp, align 16
287 %z.imagp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 1
288 %z.imag4 = load fp128, fp128* %z.imagp, align 16
289 %cmp = fcmp ogt fp128 %z.real, %z.imag4
290 %sub = fsub fp128 %z.imag4, %z.imag4
291 br i1 %cmp, label %if.then, label %cleanup
292
293 if.then: ; preds = %entry
294 %call = tail call fp128 @fabsl(fp128 %sub) #2
295 br label %cleanup
296
297 cleanup: ; preds = %entry, %if.then
298 %z.real.sink = phi fp128 [ %z.real, %if.then ], [ %sub, %entry ]
299 %call.sink = phi fp128 [ %call, %if.then ], [ %z.real, %entry ]
300 %call5 = tail call fp128 @copysignl(fp128 %z.real.sink, fp128 %z.imag4) #2
301 %0 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 0
302 %1 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 1
303 store fp128 %call.sink, fp128* %0, align 16
304 store fp128 %call5, fp128* %1, align 16
305 ret void
306 ; CHECK-LABEL: TestCopySign
307 ; CHECK-NOT: call
308 ; CHECK: callq __subtf3
309 ; CHECK-NOT: call
310 ; CHECK: callq __gttf2
311 ; CHECK-NOT: call
312 ; CHECK: andps {{.*}}, %xmm0
313 ; CHECK: retq
314 }
315
316
317 attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
318 attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
319 attributes #2 = { nounwind readnone }
0 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
1 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
2
3 ; Check all soft floating point library function calls.
4
5 @vf64 = common global double 0.000000e+00, align 8
6 @vf128 = common global fp128 0xL00000000000000000000000000000000, align 16
7
8 define void @Test128Add(fp128 %d1, fp128 %d2) {
9 entry:
10 %add = fadd fp128 %d1, %d2
11 store fp128 %add, fp128* @vf128, align 16
12 ret void
13 ; CHECK-LABEL: Test128Add:
14 ; CHECK: callq __addtf3
15 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
16 ; CHECK: retq
17 }
18
19 define void @Test128_1Add(fp128 %d1){
20 entry:
21 %0 = load fp128, fp128* @vf128, align 16
22 %add = fadd fp128 %0, %d1
23 store fp128 %add, fp128* @vf128, align 16
24 ret void
25 ; CHECK-LABEL: Test128_1Add:
26 ; CHECK: movaps %xmm0, %xmm1
27 ; CHECK-NEXT: movaps vf128(%rip), %xmm0
28 ; CHECK-NEXT: callq __addtf3
29 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
30 ; CHECK: retq
31 }
32
33 define void @Test128Sub(fp128 %d1, fp128 %d2){
34 entry:
35 %sub = fsub fp128 %d1, %d2
36 store fp128 %sub, fp128* @vf128, align 16
37 ret void
38 ; CHECK-LABEL: Test128Sub:
39 ; CHECK: callq __subtf3
40 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
41 ; CHECK: retq
42 }
43
44 define void @Test128_1Sub(fp128 %d1){
45 entry:
46 %0 = load fp128, fp128* @vf128, align 16
47 %sub = fsub fp128 %0, %d1
48 store fp128 %sub, fp128* @vf128, align 16
49 ret void
50 ; CHECK-LABEL: Test128_1Sub:
51 ; CHECK: movaps %xmm0, %xmm1
52 ; CHECK-NEXT: movaps vf128(%rip), %xmm0
53 ; CHECK-NEXT: callq __subtf3
54 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
55 ; CHECK: retq
56 }
57
58 define void @Test128Mul(fp128 %d1, fp128 %d2){
59 entry:
60 %mul = fmul fp128 %d1, %d2
61 store fp128 %mul, fp128* @vf128, align 16
62 ret void
63 ; CHECK-LABEL: Test128Mul:
64 ; CHECK: callq __multf3
65 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
66 ; CHECK: retq
67 }
68
69 define void @Test128_1Mul(fp128 %d1){
70 entry:
71 %0 = load fp128, fp128* @vf128, align 16
72 %mul = fmul fp128 %0, %d1
73 store fp128 %mul, fp128* @vf128, align 16
74 ret void
75 ; CHECK-LABEL: Test128_1Mul:
76 ; CHECK: movaps %xmm0, %xmm1
77 ; CHECK-NEXT: movaps vf128(%rip), %xmm0
78 ; CHECK-NEXT: callq __multf3
79 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
80 ; CHECK: retq
81 }
82
83 define void @Test128Div(fp128 %d1, fp128 %d2){
84 entry:
85 %div = fdiv fp128 %d1, %d2
86 store fp128 %div, fp128* @vf128, align 16
87 ret void
88 ; CHECK-LABEL: Test128Div:
89 ; CHECK: callq __divtf3
90 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
91 ; CHECK: retq
92 }
93
94 define void @Test128_1Div(fp128 %d1){
95 entry:
96 %0 = load fp128, fp128* @vf128, align 16
97 %div = fdiv fp128 %0, %d1
98 store fp128 %div, fp128* @vf128, align 16
99 ret void
100 ; CHECK-LABEL: Test128_1Div:
101 ; CHECK: movaps %xmm0, %xmm1
102 ; CHECK-NEXT: movaps vf128(%rip), %xmm0
103 ; CHECK-NEXT: callq __divtf3
104 ; CHECK-NEXT: movaps %xmm0, vf128(%rip)
105 ; CHECK: retq
106 }
0 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
1 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
2
3 ; __float128 myFP128 = 1.0L; // x86_64-linux-android
4 @my_fp128 = global fp128 0xL00000000000000003FFF000000000000, align 16
5
6 define fp128 @get_fp128() {
7 entry:
8 %0 = load fp128, fp128* @my_fp128, align 16
9 ret fp128 %0
10 ; CHECK-LABEL: get_fp128:
11 ; CHECK: movaps my_fp128(%rip), %xmm0
12 ; CHECK-NEXT: retq
13 }
14
15 @TestLoadExtend.data = internal unnamed_addr constant [2 x float] [float 0x3FB99999A0000000, float 0x3FC99999A0000000], align 4
16
17 define fp128 @TestLoadExtend(fp128 %x, i32 %n) {
18 entry:
19 %idxprom = sext i32 %n to i64
20 %arrayidx = getelementptr inbounds [2 x float], [2 x float]* @TestLoadExtend.data, i64 0, i64 %idxprom
21 %0 = load float, float* %arrayidx, align 4
22 %conv = fpext float %0 to fp128
23 ret fp128 %conv
24 ; CHECK-LABEL: TestLoadExtend:
25 ; CHECK: movslq %edi, %rax
26 ; CHECK-NEXT: movss TestLoadExtend.data(,%rax,4), %xmm0
27 ; CHECK-NEXT: callq __extendsftf2
28 ; CHECK: retq
29 }
30
31 ; CHECK-LABEL: my_fp128:
32 ; CHECK-NEXT: .quad 0
33 ; CHECK-NEXT: .quad 4611404543450677248
34 ; CHECK-NEXT: .size my_fp128, 16
0 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
1 ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
2
3 ; __float128 myFP128 = 1.0L; // x86_64-linux-android
4 @myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16
5
6 define void @set_FP128(fp128 %x) {
7 entry:
8 store fp128 %x, fp128* @myFP128, align 16
9 ret void
10 ; CHECK-LABEL: set_FP128:
11 ; CHECK: movaps %xmm0, myFP128(%rip)
12 ; CHECK-NEXT: retq
13 }
None ; RUN: llc < %s -march=x86 -mattr=+sse2,+soft-float | FileCheck %s
1 ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+soft-float | FileCheck %s
2 ; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+sse2,+soft-float | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mattr=+mmx,+sse,+soft-float \
1 ; RUN: | FileCheck %s --check-prefix=SOFT1 --check-prefix=CHECK
2 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2,+soft-float \
3 ; RUN: | FileCheck %s --check-prefix=SOFT2 --check-prefix=CHECK
4 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse \
5 ; RUN: | FileCheck %s --check-prefix=SSE1 --check-prefix=CHECK
6 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 \
7 ; RUN: | FileCheck %s --check-prefix=SSE2 --check-prefix=CHECK
8 ; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+mmx,+sse2,+soft-float | FileCheck %s
39
4 ; CHECK-NOT: xmm{[0-9]+}
10 ; CHECK-NOT: xmm{{[0-9]+}}
511
612 %struct.__va_list_tag = type { i32, i32, i8*, i8* }
713
1420 call void @bar(%struct.__va_list_tag* %va3) nounwind
1521 call void @llvm.va_end(i8* %va12)
1622 ret i32 undef
23 ; CHECK-LABEL: t1:
24 ; CHECK: ret{{[lq]}}
1725 }
1826
1927 declare void @llvm.va_start(i8*) nounwind
2634 entry:
2735 %0 = fadd float %a, %b ; [#uses=1]
2836 ret float %0
37 ; CHECK-LABEL: t2:
38 ; SOFT1-NOT: xmm{{[0-9]+}}
39 ; SOFT2-NOT: xmm{{[0-9]+}}
40 ; SSE1: xmm{{[0-9]+}}
41 ; SSE2: xmm{{[0-9]+}}
42 ; CHECK: ret{{[lq]}}
2943 }
44
45 ; soft-float means no SSE instruction and passing fp128 as pair of i64.
46 define fp128 @t3(fp128 %a, fp128 %b) nounwind readnone {
47 entry:
48 %0 = fadd fp128 %b, %a
49 ret fp128 %0
50 ; CHECK-LABEL: t3:
51 ; SOFT1-NOT: xmm{{[0-9]+}}
52 ; SOFT2-NOT: xmm{{[0-9]+}}
53 ; SSE1: xmm{{[0-9]+}}
54 ; SSE2: xmm{{[0-9]+}}
55 ; CHECK: ret{{[lq]}}
56 }
950950 TYPE("f128mem", TYPE_M128)
951951 TYPE("f256mem", TYPE_M256)
952952 TYPE("f512mem", TYPE_M512)
953 TYPE("FR128", TYPE_XMM128)
953954 TYPE("FR64", TYPE_XMM64)
954955 TYPE("FR64X", TYPE_XMM64)
955956 TYPE("f64mem", TYPE_M64FP)
10681069 // register IDs in 8-bit immediates nowadays.
10691070 ENCODING("FR32", ENCODING_IB)
10701071 ENCODING("FR64", ENCODING_IB)
1072 ENCODING("FR128", ENCODING_IB)
10711073 ENCODING("VR128", ENCODING_IB)
10721074 ENCODING("VR256", ENCODING_IB)
10731075 ENCODING("FR32X", ENCODING_IB)
10901092 ENCODING("GR8", ENCODING_RM)
10911093 ENCODING("VR128", ENCODING_RM)
10921094 ENCODING("VR128X", ENCODING_RM)
1095 ENCODING("FR128", ENCODING_RM)
10931096 ENCODING("FR64", ENCODING_RM)
10941097 ENCODING("FR32", ENCODING_RM)
10951098 ENCODING("FR64X", ENCODING_RM)
11191122 ENCODING("GR64", ENCODING_REG)
11201123 ENCODING("GR8", ENCODING_REG)
11211124 ENCODING("VR128", ENCODING_REG)
1125 ENCODING("FR128", ENCODING_REG)
11221126 ENCODING("FR64", ENCODING_REG)
11231127 ENCODING("FR32", ENCODING_REG)
11241128 ENCODING("VR64", ENCODING_REG)
11561160 ENCODING("GR32", ENCODING_VVVV)
11571161 ENCODING("GR64", ENCODING_VVVV)
11581162 ENCODING("FR32", ENCODING_VVVV)
1163 ENCODING("FR128", ENCODING_VVVV)
11591164 ENCODING("FR64", ENCODING_VVVV)
11601165 ENCODING("VR128", ENCODING_VVVV)
11611166 ENCODING("VR256", ENCODING_VVVV)