llvm.org GIT mirror llvm / 707e018
Drop ISD::MEMSET, ISD::MEMMOVE, and ISD::MEMCPY, which are not Legal on any current target and aren't optimized in DAGCombiner. Instead of using intermediate nodes, expand the operations, choosing between simple loads/stores, target-specific code, and library calls, immediately. Previously, the code to emit optimized code for these operations was only used at initial SelectionDAG construction time; now it is used at all times. This fixes some cases where rep;movs was being used for small copies where simple loads/stores would be better. This also cleans up code that checks for alignments less than 4; let the targets make that decision instead of doing it in target-independent code. This allows x86 to use rep;movs in low-alignment cases. Also, this fixes a bug that resulted in the use of rep;stos for memsets of 0 with non-constant memory size when the alignment was at least 4. It's better to use the library in this case, which can be significantly faster when the size is large. This also preserves more SourceValue information when memory intrinsics are lowered into simple loads/stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49572 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 11 years ago
31 changed file(s) with 700 addition(s) and 763 deletion(s). Raw diff Collapse all Expand all
322322 SDOperand getNode(unsigned Opcode, SDVTList VTs,
323323 const SDOperand *Ops, unsigned NumOps);
324324
325 SDOperand getMemcpy(SDOperand Chain, SDOperand Dest, SDOperand Src,
326 SDOperand Size, SDOperand Align,
327 SDOperand AlwaysInline);
328
329 SDOperand getMemmove(SDOperand Chain, SDOperand Dest, SDOperand Src,
330 SDOperand Size, SDOperand Align,
331 SDOperand AlwaysInline);
332
333 SDOperand getMemset(SDOperand Chain, SDOperand Dest, SDOperand Src,
334 SDOperand Size, SDOperand Align,
335 SDOperand AlwaysInline);
325 SDOperand getMemcpy(SDOperand Chain, SDOperand Dst, SDOperand Src,
326 SDOperand Size, unsigned Align,
327 bool AlwaysInline,
328 Value *DstSV, uint64_t DstOff,
329 Value *SrcSV, uint64_t SrcOff);
330
331 SDOperand getMemmove(SDOperand Chain, SDOperand Dst, SDOperand Src,
332 SDOperand Size, unsigned Align,
333 Value *DstSV, uint64_t DstOff,
334 Value *SrcSV, uint64_t SrcOff);
335
336 SDOperand getMemset(SDOperand Chain, SDOperand Dst, SDOperand Src,
337 SDOperand Size, unsigned Align,
338 Value *DstSV, uint64_t DstOff);
336339
337340 /// getSetCC - Helper function to make it easier to build SetCC's if you just
338341 /// have an ISD::CondCode instead of an SDOperand.
496496 // it returns an output chain.
497497 STACKRESTORE,
498498
499 // MEMSET/MEMCPY/MEMMOVE - The first operand is the chain. The following
500 // correspond to the operands of the LLVM intrinsic functions and the last
501 // one is AlwaysInline. The only result is a token chain. The alignment
502 // argument is guaranteed to be a Constant node.
503 MEMSET,
504 MEMMOVE,
505 MEMCPY,
506
507499 // CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of
508500 // a call sequence, and carry arbitrary information that target might want
509501 // to know. The first operand is a chain, the rest are specified by the
947947 SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG);
948948
949949
950 virtual SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
951 virtual SDOperand LowerMEMCPYCall(SDOperand Chain, SDOperand Dest,
952 SDOperand Source, SDOperand Count,
953 SelectionDAG &DAG);
954 virtual SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest,
955 SDOperand Source, unsigned Size,
956 unsigned Align, SelectionDAG &DAG) {
957 assert(0 && "Not Implemented");
958 return SDOperand(); // this is here to silence compiler errors
959 }
960
950 /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a
951 /// memcpy. This can be used by targets to provide code sequences for cases
952 /// that don't fit the target's parameters for simple loads/stores and can be
953 /// more efficient than using a library call. This function can return a null
954 /// SDOperand if the target declines to use inline code and a different
955 /// lowering strategy should be used.
956 ///
957 /// If AlwaysInline is true, the size is constant and the target should not
958 /// emit any calls and is strongly encouraged to attempt to emit inline code
959 /// even if it is beyond the usual threshold because this intrinsic is being
960 /// expanded in a place where calls are not feasible (e.g. within the prologue
961 /// for another call). If the target chooses to decline an AlwaysInline
962 /// request here, legalize will resort to using simple loads and stores.
963 virtual SDOperand
964 EmitTargetCodeForMemcpy(SelectionDAG &DAG,
965 SDOperand Chain,
966 SDOperand Op1, SDOperand Op2,
967 SDOperand Op3, unsigned Align,
968 bool AlwaysInline,
969 Value *DstSV, uint64_t DstOff,
970 Value *SrcSV, uint64_t SrcOff) {
971 return SDOperand();
972 }
973
974 /// EmitTargetCodeForMemmove - Emit target-specific code that performs a
975 /// memmove. This can be used by targets to provide code sequences for cases
976 /// that don't fit the target's parameters for simple loads/stores and can be
977 /// more efficient than using a library call. This function can return a null
978 /// SDOperand if the target declines to use code and a different lowering
979 /// strategy should be used.
980 virtual SDOperand
981 EmitTargetCodeForMemmove(SelectionDAG &DAG,
982 SDOperand Chain,
983 SDOperand Op1, SDOperand Op2,
984 SDOperand Op3, unsigned Align,
985 Value *DstSV, uint64_t DstOff,
986 Value *SrcSV, uint64_t SrcOff) {
987 return SDOperand();
988 }
989
990 /// EmitTargetCodeForMemset - Emit target-specific code that performs a
991 /// memset. This can be used by targets to provide code sequences for cases
992 /// that don't fit the target's parameters for simple stores and can be more
993 /// efficient than using a library call. This function can return a null
994 /// SDOperand if the target declines to use code and a different lowering
995 /// strategy should be used.
996 virtual SDOperand
997 EmitTargetCodeForMemset(SelectionDAG &DAG,
998 SDOperand Chain,
999 SDOperand Op1, SDOperand Op2,
1000 SDOperand Op3, unsigned Align,
1001 Value *DstSV, uint64_t DstOff) {
1002 return SDOperand();
1003 }
9611004
9621005 /// LowerOperation - This callback is invoked for operations that are
9631006 /// unsupported by the target, which are registered to use 'custom' lowering,
2727 protected: // Can only create subclasses...
2828 TargetSubtarget();
2929 public:
30 /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
31 /// that still makes it profitable to inline the call.
32 virtual unsigned getMaxInlineSizeThreshold() const {return 0; }
3330 virtual ~TargetSubtarget();
3431 };
3532
2121 #include "llvm/Target/TargetData.h"
2222 #include "llvm/Target/TargetMachine.h"
2323 #include "llvm/Target/TargetOptions.h"
24 #include "llvm/Target/TargetSubtarget.h"
2425 #include "llvm/CallingConv.h"
2526 #include "llvm/Constants.h"
2627 #include "llvm/DerivedTypes.h"
28412842 break;
28422843 }
28432844 break;
2844 case ISD::MEMSET:
2845 case ISD::MEMCPY:
2846 case ISD::MEMMOVE: {
2847 Tmp1 = LegalizeOp(Node->getOperand(0)); // Chain
2848 Tmp2 = LegalizeOp(Node->getOperand(1)); // Pointer
2849
2850 if (Node->getOpcode() == ISD::MEMSET) { // memset = ubyte
2851 switch (getTypeAction(Node->getOperand(2).getValueType())) {
2852 case Expand: assert(0 && "Cannot expand a byte!");
2853 case Legal:
2854 Tmp3 = LegalizeOp(Node->getOperand(2));
2855 break;
2856 case Promote:
2857 Tmp3 = PromoteOp(Node->getOperand(2));
2858 break;
2859 }
2860 } else {
2861 Tmp3 = LegalizeOp(Node->getOperand(2)); // memcpy/move = pointer,
2862 }
2863
2864 SDOperand Tmp4;
2865 switch (getTypeAction(Node->getOperand(3).getValueType())) {
2866 case Expand: {
2867 // Length is too big, just take the lo-part of the length.
2868 SDOperand HiPart;
2869 ExpandOp(Node->getOperand(3), Tmp4, HiPart);
2870 break;
2871 }
2872 case Legal:
2873 Tmp4 = LegalizeOp(Node->getOperand(3));
2874 break;
2875 case Promote:
2876 Tmp4 = PromoteOp(Node->getOperand(3));
2877 break;
2878 }
2879
2880 SDOperand Tmp5;
2881 switch (getTypeAction(Node->getOperand(4).getValueType())) { // uint
2882 case Expand: assert(0 && "Cannot expand this yet!");
2883 case Legal:
2884 Tmp5 = LegalizeOp(Node->getOperand(4));
2885 break;
2886 case Promote:
2887 Tmp5 = PromoteOp(Node->getOperand(4));
2888 break;
2889 }
2890
2891 SDOperand Tmp6;
2892 switch (getTypeAction(Node->getOperand(5).getValueType())) { // bool
2893 case Expand: assert(0 && "Cannot expand this yet!");
2894 case Legal:
2895 Tmp6 = LegalizeOp(Node->getOperand(5));
2896 break;
2897 case Promote:
2898 Tmp6 = PromoteOp(Node->getOperand(5));
2899 break;
2900 }
2901
2902 switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
2903 default: assert(0 && "This action not implemented for this operation!");
2904 case TargetLowering::Custom:
2905 isCustom = true;
2906 // FALLTHROUGH
2907 case TargetLowering::Legal: {
2908 SDOperand Ops[] = { Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6 };
2909 Result = DAG.UpdateNodeOperands(Result, Ops, 6);
2910 if (isCustom) {
2911 Tmp1 = TLI.LowerOperation(Result, DAG);
2912 if (Tmp1.Val) Result = Tmp1;
2913 }
2914 break;
2915 }
2916 case TargetLowering::Expand: {
2917 // Otherwise, the target does not support this operation. Lower the
2918 // operation to an explicit libcall as appropriate.
2919 MVT::ValueType IntPtr = TLI.getPointerTy();
2920 const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
2921 TargetLowering::ArgListTy Args;
2922 TargetLowering::ArgListEntry Entry;
2923
2924 const char *FnName = 0;
2925 if (Node->getOpcode() == ISD::MEMSET) {
2926 Entry.Node = Tmp2; Entry.Ty = IntPtrTy;
2927 Args.push_back(Entry);
2928 // Extend the (previously legalized) ubyte argument to be an int value
2929 // for the call.
2930 if (Tmp3.getValueType() > MVT::i32)
2931 Tmp3 = DAG.getNode(ISD::TRUNCATE, MVT::i32, Tmp3);
2932 else
2933 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Tmp3);
2934 Entry.Node = Tmp3; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
2935 Args.push_back(Entry);
2936 Entry.Node = Tmp4; Entry.Ty = IntPtrTy; Entry.isSExt = false;
2937 Args.push_back(Entry);
2938
2939 FnName = "memset";
2940 } else if (Node->getOpcode() == ISD::MEMCPY ||
2941 Node->getOpcode() == ISD::MEMMOVE) {
2942 Entry.Ty = IntPtrTy;
2943 Entry.Node = Tmp2; Args.push_back(Entry);
2944 Entry.Node = Tmp3; Args.push_back(Entry);
2945 Entry.Node = Tmp4; Args.push_back(Entry);
2946 FnName = Node->getOpcode() == ISD::MEMMOVE ? "memmove" : "memcpy";
2947 } else {
2948 assert(0 && "Unknown op!");
2949 }
2950
2951 std::pair CallResult =
2952 TLI.LowerCallTo(Tmp1, Type::VoidTy,
2953 false, false, false, CallingConv::C, false,
2954 DAG.getExternalSymbol(FnName, IntPtr), Args, DAG);
2955 Result = CallResult.second;
2956 break;
2957 }
2958 }
2959 break;
2960 }
29612845
29622846 case ISD::SHL_PARTS:
29632847 case ISD::SRA_PARTS:
438438 return DAG.getLoad(DestVT, Store, FIPtr, NULL, 0);
439439 }
440440
441 /// HandleMemIntrinsic - This handles memcpy/memset/memmove with invalid
442 /// operands. This promotes or expands the operands as required.
443 SDOperand DAGTypeLegalizer::HandleMemIntrinsic(SDNode *N) {
444 // The chain and pointer [operands #0 and #1] are always valid types.
445 SDOperand Chain = N->getOperand(0);
446 SDOperand Ptr = N->getOperand(1);
447 SDOperand Op2 = N->getOperand(2);
448
449 // Op #2 is either a value (memset) or a pointer. Promote it if required.
450 switch (getTypeAction(Op2.getValueType())) {
451 default: assert(0 && "Unknown action for pointer/value operand");
452 case Legal: break;
453 case Promote: Op2 = GetPromotedOp(Op2); break;
454 }
455
456 // The length could have any action required.
457 SDOperand Length = N->getOperand(3);
458 switch (getTypeAction(Length.getValueType())) {
459 default: assert(0 && "Unknown action for memop operand");
460 case Legal: break;
461 case Promote: Length = GetPromotedZExtOp(Length); break;
462 case Expand:
463 SDOperand Dummy; // discard the high part.
464 GetExpandedOp(Length, Length, Dummy);
465 break;
466 }
467
468 SDOperand Align = N->getOperand(4);
469 switch (getTypeAction(Align.getValueType())) {
470 default: assert(0 && "Unknown action for memop operand");
471 case Legal: break;
472 case Promote: Align = GetPromotedZExtOp(Align); break;
473 }
474
475 SDOperand AlwaysInline = N->getOperand(5);
476 switch (getTypeAction(AlwaysInline.getValueType())) {
477 default: assert(0 && "Unknown action for memop operand");
478 case Legal: break;
479 case Promote: AlwaysInline = GetPromotedZExtOp(AlwaysInline); break;
480 }
481
482 SDOperand Ops[] = { Chain, Ptr, Op2, Length, Align, AlwaysInline };
483 return DAG.UpdateNodeOperands(SDOperand(N, 0), Ops, 6);
484 }
485
486441 /// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
487442 SDOperand DAGTypeLegalizer::JoinIntegers(SDOperand Lo, SDOperand Hi) {
488443 MVT::ValueType LVT = Lo.getValueType();
164164 // Common routines.
165165 SDOperand BitConvertToInteger(SDOperand Op);
166166 SDOperand CreateStackStoreLoad(SDOperand Op, MVT::ValueType DestVT);
167 SDOperand HandleMemIntrinsic(SDNode *N);
168167 SDOperand JoinIntegers(SDOperand Lo, SDOperand Hi);
169168 void SplitInteger(SDOperand Op, SDOperand &Lo, SDOperand &Hi);
170169 void SplitInteger(SDOperand Op, MVT::ValueType LoVT, MVT::ValueType HiVT,
945945 case ISD::STORE:
946946 Res = ExpandOperand_STORE(cast(N), OpNo);
947947 break;
948 case ISD::MEMSET:
949 case ISD::MEMCPY:
950 case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break;
951948
952949 case ISD::BUILD_VECTOR: Res = ExpandOperand_BUILD_VECTOR(N); break;
953950 }
446446
447447 case ISD::STORE: Res = PromoteOperand_STORE(cast(N),
448448 OpNo); break;
449 case ISD::MEMSET:
450 case ISD::MEMCPY:
451 case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break;
452449
453450 case ISD::BUILD_VECTOR: Res = PromoteOperand_BUILD_VECTOR(N); break;
454451 case ISD::INSERT_VECTOR_ELT:
1616 #include "llvm/Intrinsics.h"
1717 #include "llvm/DerivedTypes.h"
1818 #include "llvm/Assembly/Writer.h"
19 #include "llvm/CallingConv.h"
1920 #include "llvm/CodeGen/MachineBasicBlock.h"
2021 #include "llvm/CodeGen/MachineConstantPool.h"
2122 #include "llvm/CodeGen/MachineFrameInfo.h"
23842385 return getNode(Opcode, VT, Ops, 5);
23852386 }
23862387
2387 SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dest,
2388 /// getMemsetValue - Vectorized representation of the memset value
2389 /// operand.
2390 static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
2391 SelectionDAG &DAG) {
2392 MVT::ValueType CurVT = VT;
2393 if (ConstantSDNode *C = dyn_cast(Value)) {
2394 uint64_t Val = C->getValue() & 255;
2395 unsigned Shift = 8;
2396 while (CurVT != MVT::i8) {
2397 Val = (Val << Shift) | Val;
2398 Shift <<= 1;
2399 CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
2400 }
2401 return DAG.getConstant(Val, VT);
2402 } else {
2403 Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
2404 unsigned Shift = 8;
2405 while (CurVT != MVT::i8) {
2406 Value =
2407 DAG.getNode(ISD::OR, VT,
2408 DAG.getNode(ISD::SHL, VT, Value,
2409 DAG.getConstant(Shift, MVT::i8)), Value);
2410 Shift <<= 1;
2411 CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
2412 }
2413
2414 return Value;
2415 }
2416 }
2417
2418 /// getMemsetStringVal - Similar to getMemsetValue. Except this is only
2419 /// used when a memcpy is turned into a memset when the source is a constant
2420 /// string ptr.
2421 static SDOperand getMemsetStringVal(MVT::ValueType VT,
2422 SelectionDAG &DAG,
2423 const TargetLowering &TLI,
2424 std::string &Str, unsigned Offset) {
2425 uint64_t Val = 0;
2426 unsigned MSB = MVT::getSizeInBits(VT) / 8;
2427 if (TLI.isLittleEndian())
2428 Offset = Offset + MSB - 1;
2429 for (unsigned i = 0; i != MSB; ++i) {
2430 Val = (Val << 8) | (unsigned char)Str[Offset];
2431 Offset += TLI.isLittleEndian() ? -1 : 1;
2432 }
2433 return DAG.getConstant(Val, VT);
2434 }
2435
2436 /// getMemBasePlusOffset - Returns base and offset node for the
2437 static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
2438 SelectionDAG &DAG) {
2439 MVT::ValueType VT = Base.getValueType();
2440 return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
2441 }
2442
2443 /// MeetsMaxMemopRequirement - Determines if the number of memory ops required
2444 /// to replace the memset / memcpy is below the threshold. It also returns the
2445 /// types of the sequence of memory ops to perform memset / memcpy.
2446 static bool MeetsMaxMemopRequirement(std::vector &MemOps,
2447 unsigned Limit, uint64_t Size,
2448 unsigned Align,
2449 const TargetLowering &TLI) {
2450 MVT::ValueType VT;
2451
2452 if (TLI.allowsUnalignedMemoryAccesses()) {
2453 VT = MVT::i64;
2454 } else {
2455 switch (Align & 7) {
2456 case 0:
2457 VT = MVT::i64;
2458 break;
2459 case 4:
2460 VT = MVT::i32;
2461 break;
2462 case 2:
2463 VT = MVT::i16;
2464 break;
2465 default:
2466 VT = MVT::i8;
2467 break;
2468 }
2469 }
2470
2471 MVT::ValueType LVT = MVT::i64;
2472 while (!TLI.isTypeLegal(LVT))
2473 LVT = (MVT::ValueType)((unsigned)LVT - 1);
2474 assert(MVT::isInteger(LVT));
2475
2476 if (VT > LVT)
2477 VT = LVT;
2478
2479 unsigned NumMemOps = 0;
2480 while (Size != 0) {
2481 unsigned VTSize = MVT::getSizeInBits(VT) / 8;
2482 while (VTSize > Size) {
2483 VT = (MVT::ValueType)((unsigned)VT - 1);
2484 VTSize >>= 1;
2485 }
2486 assert(MVT::isInteger(VT));
2487
2488 if (++NumMemOps > Limit)
2489 return false;
2490 MemOps.push_back(VT);
2491 Size -= VTSize;
2492 }
2493
2494 return true;
2495 }
2496
2497 static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
2498 SDOperand Chain, SDOperand Dst,
2499 SDOperand Src, uint64_t Size,
2500 unsigned Align,
2501 bool AlwaysInline,
2502 Value *DstSV, uint64_t DstOff,
2503 Value *SrcSV, uint64_t SrcOff) {
2504 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2505
2506 // Expand memcpy to a series of store ops if the size operand falls below
2507 // a certain threshold.
2508 std::vector MemOps;
2509 uint64_t Limit = -1;
2510 if (!AlwaysInline)
2511 Limit = TLI.getMaxStoresPerMemcpy();
2512 if (!MeetsMaxMemopRequirement(MemOps, Limit, Size, Align, TLI))
2513 return SDOperand();
2514
2515 SmallVector OutChains;
2516
2517 unsigned NumMemOps = MemOps.size();
2518 unsigned SrcDelta = 0;
2519 GlobalAddressSDNode *G = NULL;
2520 std::string Str;
2521 bool CopyFromStr = false;
2522
2523 if (Src.getOpcode() == ISD::GlobalAddress)
2524 G = cast(Src);
2525 else if (Src.getOpcode() == ISD::ADD &&
2526 Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
2527 Src.getOperand(1).getOpcode() == ISD::Constant) {
2528 G = cast(Src.getOperand(0));
2529 SrcDelta = cast(Src.getOperand(1))->getValue();
2530 }
2531 if (G) {
2532 GlobalVariable *GV = dyn_cast(G->getGlobal());
2533 if (GV && GV->isConstant()) {
2534 Str = GV->getStringValue(false);
2535 if (!Str.empty()) {
2536 CopyFromStr = true;
2537 SrcOff += SrcDelta;
2538 }
2539 }
2540 }
2541
2542 for (unsigned i = 0; i < NumMemOps; i++) {
2543 MVT::ValueType VT = MemOps[i];
2544 unsigned VTSize = MVT::getSizeInBits(VT) / 8;
2545 SDOperand Value, Store;
2546
2547 if (CopyFromStr) {
2548 Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
2549 Store =
2550 DAG.getStore(Chain, Value,
2551 getMemBasePlusOffset(Dst, DstOff, DAG),
2552 DstSV, DstOff);
2553 } else {
2554 Value = DAG.getLoad(VT, Chain,
2555 getMemBasePlusOffset(Src, SrcOff, DAG),
2556 SrcSV, SrcOff, false, Align);
2557 Store =
2558 DAG.getStore(Chain, Value,
2559 getMemBasePlusOffset(Dst, DstOff, DAG),
2560 DstSV, DstOff, false, Align);
2561 }
2562 OutChains.push_back(Store);
2563 SrcOff += VTSize;
2564 DstOff += VTSize;
2565 }
2566
2567 return DAG.getNode(ISD::TokenFactor, MVT::Other,
2568 &OutChains[0], OutChains.size());
2569 }
2570
2571 static SDOperand getMemsetStores(SelectionDAG &DAG,
2572 SDOperand Chain, SDOperand Dst,
2573 SDOperand Src, uint64_t Size,
2574 unsigned Align,
2575 Value *DstSV, uint64_t DstOff) {
2576 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2577
2578 // Expand memset to a series of load/store ops if the size operand
2579 // falls below a certain threshold.
2580 std::vector MemOps;
2581 if (!MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
2582 Size, Align, TLI))
2583 return SDOperand();
2584
2585 SmallVector OutChains;
2586
2587 unsigned NumMemOps = MemOps.size();
2588 for (unsigned i = 0; i < NumMemOps; i++) {
2589 MVT::ValueType VT = MemOps[i];
2590 unsigned VTSize = MVT::getSizeInBits(VT) / 8;
2591 SDOperand Value = getMemsetValue(Src, VT, DAG);
2592 SDOperand Store = DAG.getStore(Chain, Value,
2593 getMemBasePlusOffset(Dst, DstOff, DAG),
2594 DstSV, DstOff);
2595 OutChains.push_back(Store);
2596 DstOff += VTSize;
2597 }
2598
2599 return DAG.getNode(ISD::TokenFactor, MVT::Other,
2600 &OutChains[0], OutChains.size());
2601 }
2602
2603 SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dst,
23882604 SDOperand Src, SDOperand Size,
2389 SDOperand Align,
2390 SDOperand AlwaysInline) {
2391 SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
2392 return getNode(ISD::MEMCPY, MVT::Other, Ops, 6);
2393 }
2394
2395 SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dest,
2605 unsigned Align, bool AlwaysInline,
2606 Value *DstSV, uint64_t DstOff,
2607 Value *SrcSV, uint64_t SrcOff) {
2608
2609 // Check to see if we should lower the memcpy to loads and stores first.
2610 // For cases within the target-specified limits, this is the best choice.
2611 ConstantSDNode *ConstantSize = dyn_cast(Size);
2612 if (ConstantSize) {
2613 // Memcpy with size zero? Just return the original chain.
2614 if (ConstantSize->isNullValue())
2615 return Chain;
2616
2617 SDOperand Result =
2618 getMemcpyLoadsAndStores(*this, Chain, Dst, Src, ConstantSize->getValue(),
2619 Align, false, DstSV, DstOff, SrcSV, SrcOff);
2620 if (Result.Val)
2621 return Result;
2622 }
2623
2624 // Then check to see if we should lower the memcpy with target-specific
2625 // code. If the target chooses to do this, this is the next best.
2626 SDOperand Result =
2627 TLI.EmitTargetCodeForMemcpy(*this, Chain, Dst, Src, Size, Align,
2628 AlwaysInline,
2629 DstSV, DstOff, SrcSV, SrcOff);
2630 if (Result.Val)
2631 return Result;
2632
2633 // If we really need inline code and the target declined to provide it,
2634 // use a (potentially long) sequence of loads and stores.
2635 if (AlwaysInline) {
2636 assert(ConstantSize && "AlwaysInline requires a constant size!");
2637 return getMemcpyLoadsAndStores(*this, Chain, Dst, Src,
2638 ConstantSize->getValue(), Align, true,
2639 DstSV, DstOff, SrcSV, SrcOff);
2640 }
2641
2642 // Emit a library call.
2643 TargetLowering::ArgListTy Args;
2644 TargetLowering::ArgListEntry Entry;
2645 Entry.Ty = TLI.getTargetData()->getIntPtrType();
2646 Entry.Node = Dst; Args.push_back(Entry);
2647 Entry.Node = Src; Args.push_back(Entry);
2648 Entry.Node = Size; Args.push_back(Entry);
2649 std::pair CallResult =
2650 TLI.LowerCallTo(Chain, Type::VoidTy,
2651 false, false, false, CallingConv::C, false,
2652 getExternalSymbol("memcpy", TLI.getPointerTy()),
2653 Args, *this);
2654 return CallResult.second;
2655 }
2656
2657 SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dst,
2658 SDOperand Src, SDOperand Size,
2659 unsigned Align,
2660 Value *DstSV, uint64_t DstOff,
2661 Value *SrcSV, uint64_t SrcOff) {
2662
2663 // TODO: Optimize small memmove cases with simple loads and stores,
2664 // ensuring that all loads precede all stores. This can cause severe
2665 // register pressure, so targets should be careful with the size limit.
2666
2667 // Then check to see if we should lower the memmove with target-specific
2668 // code. If the target chooses to do this, this is the next best.
2669 SDOperand Result =
2670 TLI.EmitTargetCodeForMemmove(*this, Chain, Dst, Src, Size, Align,
2671 DstSV, DstOff, SrcSV, SrcOff);
2672 if (Result.Val)
2673 return Result;
2674
2675 // Emit a library call.
2676 TargetLowering::ArgListTy Args;
2677 TargetLowering::ArgListEntry Entry;
2678 Entry.Ty = TLI.getTargetData()->getIntPtrType();
2679 Entry.Node = Dst; Args.push_back(Entry);
2680 Entry.Node = Src; Args.push_back(Entry);
2681 Entry.Node = Size; Args.push_back(Entry);
2682 std::pair CallResult =
2683 TLI.LowerCallTo(Chain, Type::VoidTy,
2684 false, false, false, CallingConv::C, false,
2685 getExternalSymbol("memmove", TLI.getPointerTy()),
2686 Args, *this);
2687 return CallResult.second;
2688 }
2689
2690 SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dst,
23962691 SDOperand Src, SDOperand Size,
2397 SDOperand Align,
2398 SDOperand AlwaysInline) {
2399 SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
2400 return getNode(ISD::MEMMOVE, MVT::Other, Ops, 6);
2401 }
2402
2403 SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dest,
2404 SDOperand Src, SDOperand Size,
2405 SDOperand Align,
2406 SDOperand AlwaysInline) {
2407 SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
2408 return getNode(ISD::MEMSET, MVT::Other, Ops, 6);
2692 unsigned Align,
2693 Value *DstSV, uint64_t DstOff) {
2694
2695 // Check to see if we should lower the memset to stores first.
2696 // For cases within the target-specified limits, this is the best choice.
2697 ConstantSDNode *ConstantSize = dyn_cast(Size);
2698 if (ConstantSize) {
2699 // Memset with size zero? Just return the original chain.
2700 if (ConstantSize->isNullValue())
2701 return Chain;
2702
2703 SDOperand Result =
2704 getMemsetStores(*this, Chain, Dst, Src, ConstantSize->getValue(), Align,
2705 DstSV, DstOff);
2706 if (Result.Val)
2707 return Result;
2708 }
2709
2710 // Then check to see if we should lower the memset with target-specific
2711 // code. If the target chooses to do this, this is the next best.
2712 SDOperand Result =
2713 TLI.EmitTargetCodeForMemset(*this, Chain, Dst, Src, Size, Align,
2714 DstSV, DstOff);
2715 if (Result.Val)
2716 return Result;
2717
2718 // Emit a library call.
2719 const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
2720 TargetLowering::ArgListTy Args;
2721 TargetLowering::ArgListEntry Entry;
2722 Entry.Node = Dst; Entry.Ty = IntPtrTy;
2723 Args.push_back(Entry);
2724 // Extend or truncate the argument to be an i32 value for the call.
2725 if (Src.getValueType() > MVT::i32)
2726 Src = getNode(ISD::TRUNCATE, MVT::i32, Src);
2727 else
2728 Src = getNode(ISD::ZERO_EXTEND, MVT::i32, Src);
2729 Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
2730 Args.push_back(Entry);
2731 Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false;
2732 Args.push_back(Entry);
2733 std::pair CallResult =
2734 TLI.LowerCallTo(Chain, Type::VoidTy,
2735 false, false, false, CallingConv::C, false,
2736 getExternalSymbol("memset", TLI.getPointerTy()),
2737 Args, *this);
2738 return CallResult.second;
24092739 }
24102740
24112741 SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain,
40074337 case ISD::STACKSAVE: return "stacksave";
40084338 case ISD::STACKRESTORE: return "stackrestore";
40094339 case ISD::TRAP: return "trap";
4010
4011 // Block memory operations.
4012 case ISD::MEMSET: return "memset";
4013 case ISD::MEMCPY: return "memcpy";
4014 case ISD::MEMMOVE: return "memmove";
40154340
40164341 // Bit manipulation
40174342 case ISD::BSWAP: return "bswap";
645645 void visitVAArg(VAArgInst &I);
646646 void visitVAEnd(CallInst &I);
647647 void visitVACopy(CallInst &I);
648
649 void visitMemIntrinsic(CallInst &I, unsigned Op);
650648
651649 void visitGetResult(GetResultInst &I);
652650
27362734 return "_longjmp"+!TLI.usesUnderscoreLongJmp();
27372735 break;
27382736 case Intrinsic::memcpy_i32:
2739 case Intrinsic::memcpy_i64:
2740 visitMemIntrinsic(I, ISD::MEMCPY);
2737 case Intrinsic::memcpy_i64: {
2738 SDOperand Op1 = getValue(I.getOperand(1));
2739 SDOperand Op2 = getValue(I.getOperand(2));
2740 SDOperand Op3 = getValue(I.getOperand(3));
2741 unsigned Align = cast(I.getOperand(4))->getZExtValue();
2742 DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align, false,
2743 I.getOperand(1), 0, I.getOperand(2), 0));
27412744 return 0;
2745 }
27422746 case Intrinsic::memset_i32:
2743 case Intrinsic::memset_i64:
2744 visitMemIntrinsic(I, ISD::MEMSET);
2747 case Intrinsic::memset_i64: {
2748 SDOperand Op1 = getValue(I.getOperand(1));
2749 SDOperand Op2 = getValue(I.getOperand(2));
2750 SDOperand Op3 = getValue(I.getOperand(3));
2751 unsigned Align = cast(I.getOperand(4))->getZExtValue();
2752 DAG.setRoot(DAG.getMemset(getRoot(), Op1, Op2, Op3, Align,
2753 I.getOperand(1), 0));
27452754 return 0;
2755 }
27462756 case Intrinsic::memmove_i32:
2747 case Intrinsic::memmove_i64:
2748 visitMemIntrinsic(I, ISD::MEMMOVE);
2757 case Intrinsic::memmove_i64: {
2758 SDOperand Op1 = getValue(I.getOperand(1));
2759 SDOperand Op2 = getValue(I.getOperand(2));
2760 SDOperand Op3 = getValue(I.getOperand(3));
2761 unsigned Align = cast(I.getOperand(4))->getZExtValue();
2762
2763 // If the source and destination are known to not be aliases, we can
2764 // lower memmove as memcpy.
2765 uint64_t Size = -1ULL;
2766 if (ConstantSDNode *C = dyn_cast(Op3))
2767 Size = C->getValue();
2768 if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
2769 AliasAnalysis::NoAlias) {
2770 DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align, false,
2771 I.getOperand(1), 0, I.getOperand(2), 0));
2772 return 0;
2773 }
2774
2775 DAG.setRoot(DAG.getMemmove(getRoot(), Op1, Op2, Op3, Align,
2776 I.getOperand(1), 0, I.getOperand(2), 0));
27492777 return 0;
2750
2778 }
27512779 case Intrinsic::dbg_stoppoint: {
27522780 MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
27532781 DbgStopPointInst &SPI = cast(I);
43414369 return SDOperand();
43424370 }
43434371
4344 /// getMemsetValue - Vectorized representation of the memset value
4345 /// operand.
4346 static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
4347 SelectionDAG &DAG) {
4348 MVT::ValueType CurVT = VT;
4349 if (ConstantSDNode *C = dyn_cast(Value)) {
4350 uint64_t Val = C->getValue() & 255;
4351 unsigned Shift = 8;
4352 while (CurVT != MVT::i8) {
4353 Val = (Val << Shift) | Val;
4354 Shift <<= 1;
4355 CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
4356 }
4357 return DAG.getConstant(Val, VT);
4358 } else {
4359 Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
4360 unsigned Shift = 8;
4361 while (CurVT != MVT::i8) {
4362 Value =
4363 DAG.getNode(ISD::OR, VT,
4364 DAG.getNode(ISD::SHL, VT, Value,
4365 DAG.getConstant(Shift, MVT::i8)), Value);
4366 Shift <<= 1;
4367 CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
4368 }
4369
4370 return Value;
4371 }
4372 }
4373
4374 /// getMemsetStringVal - Similar to getMemsetValue. Except this is only
4375 /// used when a memcpy is turned into a memset when the source is a constant
4376 /// string ptr.
4377 static SDOperand getMemsetStringVal(MVT::ValueType VT,
4378 SelectionDAG &DAG, TargetLowering &TLI,
4379 std::string &Str, unsigned Offset) {
4380 uint64_t Val = 0;
4381 unsigned MSB = MVT::getSizeInBits(VT) / 8;
4382 if (TLI.isLittleEndian())
4383 Offset = Offset + MSB - 1;
4384 for (unsigned i = 0; i != MSB; ++i) {
4385 Val = (Val << 8) | (unsigned char)Str[Offset];
4386 Offset += TLI.isLittleEndian() ? -1 : 1;
4387 }
4388 return DAG.getConstant(Val, VT);
4389 }
4390
4391 /// getMemBasePlusOffset - Returns base and offset node for the
4392 static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
4393 SelectionDAG &DAG, TargetLowering &TLI) {
4394 MVT::ValueType VT = Base.getValueType();
4395 return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
4396 }
4397
4398 /// MeetsMaxMemopRequirement - Determines if the number of memory ops required
4399 /// to replace the memset / memcpy is below the threshold. It also returns the
4400 /// types of the sequence of memory ops to perform memset / memcpy.
4401 static bool MeetsMaxMemopRequirement(std::vector &MemOps,
4402 unsigned Limit, uint64_t Size,
4403 unsigned Align, TargetLowering &TLI) {
4404 MVT::ValueType VT;
4405
4406 if (TLI.allowsUnalignedMemoryAccesses()) {
4407 VT = MVT::i64;
4408 } else {
4409 switch (Align & 7) {
4410 case 0:
4411 VT = MVT::i64;
4412 break;
4413 case 4:
4414 VT = MVT::i32;
4415 break;
4416 case 2:
4417 VT = MVT::i16;
4418 break;
4419 default:
4420 VT = MVT::i8;
4421 break;
4422 }
4423 }
4424
4425 MVT::ValueType LVT = MVT::i64;
4426 while (!TLI.isTypeLegal(LVT))
4427 LVT = (MVT::ValueType)((unsigned)LVT - 1);
4428 assert(MVT::isInteger(LVT));
4429
4430 if (VT > LVT)
4431 VT = LVT;
4432
4433 unsigned NumMemOps = 0;
4434 while (Size != 0) {
4435 unsigned VTSize = MVT::getSizeInBits(VT) / 8;
4436 while (VTSize > Size) {
4437 VT = (MVT::ValueType)((unsigned)VT - 1);
4438 VTSize >>= 1;
4439 }
4440 assert(MVT::isInteger(VT));
4441
4442 if (++NumMemOps > Limit)
4443 return false;
4444 MemOps.push_back(VT);
4445 Size -= VTSize;
4446 }
4447
4448 return true;
4449 }
4450
4451 void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) {
4452 SDOperand Op1 = getValue(I.getOperand(1));
4453 SDOperand Op2 = getValue(I.getOperand(2));
4454 SDOperand Op3 = getValue(I.getOperand(3));
4455 SDOperand Op4 = getValue(I.getOperand(4));
4456 unsigned Align = (unsigned)cast(Op4)->getValue();
4457 if (Align == 0) Align = 1;
4458
4459 // If the source and destination are known to not be aliases, we can
4460 // lower memmove as memcpy.
4461 if (Op == ISD::MEMMOVE) {
4462 uint64_t Size = -1ULL;
4463 if (ConstantSDNode *C = dyn_cast(Op3))
4464 Size = C->getValue();
4465 if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
4466 AliasAnalysis::NoAlias)
4467 Op = ISD::MEMCPY;
4468 }
4469
4470 if (ConstantSDNode *Size = dyn_cast(Op3)) {
4471 std::vector MemOps;
4472
4473 // Expand memset / memcpy to a series of load / store ops
4474 // if the size operand falls below a certain threshold.
4475 SmallVector OutChains;
4476 switch (Op) {
4477 default: break; // Do nothing for now.
4478 case ISD::MEMSET: {
4479 if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
4480 Size->getValue(), Align, TLI)) {
4481 unsigned NumMemOps = MemOps.size();
4482 unsigned Offset = 0;
4483 for (unsigned i = 0; i < NumMemOps; i++) {
4484 MVT::ValueType VT = MemOps[i];
4485 unsigned VTSize = MVT::getSizeInBits(VT) / 8;
4486 SDOperand Value = getMemsetValue(Op2, VT, DAG);
4487 SDOperand Store = DAG.getStore(getRoot(), Value,
4488 getMemBasePlusOffset(Op1, Offset, DAG, TLI),
4489 I.getOperand(1), Offset);
4490 OutChains.push_back(Store);
4491 Offset += VTSize;
4492 }
4493 }
4494 break;
4495 }
4496 case ISD::MEMCPY: {
4497 if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(),
4498 Size->getValue(), Align, TLI)) {
4499 unsigned NumMemOps = MemOps.size();
4500 unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0;
4501 GlobalAddressSDNode *G = NULL;
4502 std::string Str;
4503 bool CopyFromStr = false;
4504
4505 if (Op2.getOpcode() == ISD::GlobalAddress)
4506 G = cast(Op2);
4507 else if (Op2.getOpcode() == ISD::ADD &&
4508 Op2.getOperand(0).getOpcode() == ISD::GlobalAddress &&
4509 Op2.getOperand(1).getOpcode() == ISD::Constant) {
4510 G = cast(Op2.getOperand(0));
4511 SrcDelta = cast(Op2.getOperand(1))->getValue();
4512 }
4513 if (G) {
4514 GlobalVariable *GV = dyn_cast(G->getGlobal());
4515 if (GV && GV->isConstant()) {
4516 Str = GV->getStringValue(false);
4517 if (!Str.empty()) {
4518 CopyFromStr = true;
4519 SrcOff += SrcDelta;
4520 }
4521 }
4522 }
4523
4524 for (unsigned i = 0; i < NumMemOps; i++) {
4525 MVT::ValueType VT = MemOps[i];
4526 unsigned VTSize = MVT::getSizeInBits(VT) / 8;
4527 SDOperand Value, Chain, Store;
4528
4529 if (CopyFromStr) {
4530 Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
4531 Chain = getRoot();
4532 Store =
4533 DAG.getStore(Chain, Value,
4534 getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
4535 I.getOperand(1), DstOff);
4536 } else {
4537 Value = DAG.getLoad(VT, getRoot(),
4538 getMemBasePlusOffset(Op2, SrcOff, DAG, TLI),
4539 I.getOperand(2), SrcOff, false, Align);
4540 Chain = Value.getValue(1);
4541 Store =
4542 DAG.getStore(Chain, Value,
4543 getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
4544 I.getOperand(1), DstOff, false, Align);
4545 }
4546 OutChains.push_back(Store);
4547 SrcOff += VTSize;
4548 DstOff += VTSize;
4549 }
4550 }
4551 break;
4552 }
4553 }
4554
4555 if (!OutChains.empty()) {
4556 DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
4557 &OutChains[0], OutChains.size()));
4558 return;
4559 }
4560 }
4561
4562 SDOperand AlwaysInline = DAG.getConstant(0, MVT::i1);
4563 SDOperand Node;
4564 switch(Op) {
4565 default:
4566 assert(0 && "Unknown Op");
4567 case ISD::MEMCPY:
4568 Node = DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline);
4569 break;
4570 case ISD::MEMMOVE:
4571 Node = DAG.getMemmove(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline);
4572 break;
4573 case ISD::MEMSET:
4574 Node = DAG.getMemset(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline);
4575 break;
4576 }
4577 DAG.setRoot(Node);
4578 }
4579
45804372 //===----------------------------------------------------------------------===//
45814373 // SelectionDAGISel code
45824374 //===----------------------------------------------------------------------===//
1616 #include "llvm/Target/TargetData.h"
1717 #include "llvm/Target/TargetMachine.h"
1818 #include "llvm/Target/TargetRegisterInfo.h"
19 #include "llvm/CallingConv.h"
19 #include "llvm/GlobalVariable.h"
2020 #include "llvm/DerivedTypes.h"
2121 #include "llvm/CodeGen/SelectionDAG.h"
2222 #include "llvm/ADT/StringExtras.h"
233233
234234 TargetLowering::~TargetLowering() {}
235235
236
237 SDOperand TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
238 assert(getSubtarget() && "Subtarget not defined");
239 SDOperand ChainOp = Op.getOperand(0);
240 SDOperand DestOp = Op.getOperand(1);
241 SDOperand SourceOp = Op.getOperand(2);
242 SDOperand CountOp = Op.getOperand(3);
243 SDOperand AlignOp = Op.getOperand(4);
244 SDOperand AlwaysInlineOp = Op.getOperand(5);
245
246 bool AlwaysInline = (bool)cast(AlwaysInlineOp)->getValue();
247 unsigned Align = (unsigned)cast(AlignOp)->getValue();
248 if (Align == 0) Align = 1;
249
250 // If size is unknown, call memcpy.
251 ConstantSDNode *I = dyn_cast(CountOp);
252 if (!I) {
253 assert(!AlwaysInline && "Cannot inline copy of unknown size");
254 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
255 }
256
257 // If not DWORD aligned or if size is more than threshold, then call memcpy.
258 // The libc version is likely to be faster for the following cases. It can
259 // use the address value and run time information about the CPU.
260 // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
261 unsigned Size = I->getValue();
262 if (AlwaysInline ||
263 (Size <= getSubtarget()->getMaxInlineSizeThreshold() &&
264 (Align & 3) == 0))
265 return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
266 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
267 }
268
269
270 SDOperand TargetLowering::LowerMEMCPYCall(SDOperand Chain,
271 SDOperand Dest,
272 SDOperand Source,
273 SDOperand Count,
274 SelectionDAG &DAG) {
275 MVT::ValueType IntPtr = getPointerTy();
276 TargetLowering::ArgListTy Args;
277 TargetLowering::ArgListEntry Entry;
278 Entry.Ty = getTargetData()->getIntPtrType();
279 Entry.Node = Dest; Args.push_back(Entry);
280 Entry.Node = Source; Args.push_back(Entry);
281 Entry.Node = Count; Args.push_back(Entry);
282 std::pair CallResult =
283 LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
284 false, DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
285 return CallResult.second;
286 }
287
288
289236 /// computeRegisterProperties - Once all of the register classes are added,
290237 /// this allows us to compute derived properties we expose.
291238 void TargetLowering::computeRegisterProperties() {
195195 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
196196 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
197197 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
198
199 // Expand mem operations genericly.
200 setOperationAction(ISD::MEMSET , MVT::Other, Expand);
201 setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
202 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
203198
204199 // Use the default implementation.
205200 setOperationAction(ISD::VASTART , MVT::Other, Custom);
12451240 return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
12461241 }
12471242
1248 SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
1249 SDOperand Dest,
1250 SDOperand Source,
1251 unsigned Size,
1252 unsigned Align,
1253 SelectionDAG &DAG) {
1243 SDOperand
1244 ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
1245 SDOperand Chain,
1246 SDOperand Dst, SDOperand Src,
1247 SDOperand Size, unsigned Align,
1248 bool AlwaysInline,
1249 Value *DstSV, uint64_t DstOff,
1250 Value *SrcSV, uint64_t SrcOff){
12541251 // Do repeated 4-byte loads and stores. To be improved.
1255 assert((Align & 3) == 0 && "Expected 4-byte aligned addresses!");
1256 unsigned BytesLeft = Size & 3;
1257 unsigned NumMemOps = Size >> 2;
1252 // This requires 4-byte alignment.
1253 if ((Align & 3) != 0)
1254 return SDOperand();
1255 // This requires the copy size to be a constant, preferrably
1256 // within a subtarget-specific limit.
1257 ConstantSDNode *ConstantSize = dyn_cast(Size);
1258 if (!ConstantSize)
1259 return SDOperand();
1260 uint64_t SizeVal = ConstantSize->getValue();
1261 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
1262 return SDOperand();
1263
1264 unsigned BytesLeft = SizeVal & 3;
1265 unsigned NumMemOps = SizeVal >> 2;
12581266 unsigned EmittedNumMemOps = 0;
1259 unsigned SrcOff = 0, DstOff = 0;
12601267 MVT::ValueType VT = MVT::i32;
12611268 unsigned VTSize = 4;
12621269 unsigned i = 0;
12711278 for (i = 0;
12721279 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
12731280 Loads[i] = DAG.getLoad(VT, Chain,
1274 DAG.getNode(ISD::ADD, MVT::i32, Source,
1281 DAG.getNode(ISD::ADD, MVT::i32, Src,
12751282 DAG.getConstant(SrcOff, MVT::i32)),
1276 NULL, 0);
1283 SrcSV, SrcOff);
12771284 TFOps[i] = Loads[i].getValue(1);
12781285 SrcOff += VTSize;
12791286 }
12821289 for (i = 0;
12831290 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
12841291 TFOps[i] = DAG.getStore(Chain, Loads[i],
1285 DAG.getNode(ISD::ADD, MVT::i32, Dest,
1292 DAG.getNode(ISD::ADD, MVT::i32, Dst,
12861293 DAG.getConstant(DstOff, MVT::i32)),
1287 NULL, 0);
1294 DstSV, DstOff);
12881295 DstOff += VTSize;
12891296 }
12901297 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i);
13081315 }
13091316
13101317 Loads[i] = DAG.getLoad(VT, Chain,
1311 DAG.getNode(ISD::ADD, MVT::i32, Source,
1318 DAG.getNode(ISD::ADD, MVT::i32, Src,
13121319 DAG.getConstant(SrcOff, MVT::i32)),
1313 NULL, 0);
1320 SrcSV, SrcOff);
13141321 TFOps[i] = Loads[i].getValue(1);
13151322 ++i;
13161323 SrcOff += VTSize;
13301337 }
13311338
13321339 TFOps[i] = DAG.getStore(Chain, Loads[i],
1333 DAG.getNode(ISD::ADD, MVT::i32, Dest,
1340 DAG.getNode(ISD::ADD, MVT::i32, Dst,
13341341 DAG.getConstant(DstOff, MVT::i32)),
1335 NULL, 0);
1342 DstSV, DstOff);
13361343 ++i;
13371344 DstOff += VTSize;
13381345 BytesLeft -= VTSize;
14081415 case ISD::RETURNADDR: break;
14091416 case ISD::FRAMEADDR: break;
14101417 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
1411 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
14121418 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
14131419
14141420
118118 getRegClassForInlineAsmConstraint(const std::string &Constraint,
119119 MVT::ValueType VT) const;
120120
121 virtual const TargetSubtarget* getSubtarget() {
122 return static_cast(Subtarget);
121 virtual const ARMSubtarget* getSubtarget() {
122 return Subtarget;
123123 }
124124
125125 private:
142142 SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG);
143143 SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
144144 SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
145 SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest,
146 SDOperand Source, unsigned Size,
147 unsigned Align, SelectionDAG &DAG);
148145
149
146 SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG,
147 SDOperand Chain,
148 SDOperand Dst, SDOperand Src,
149 SDOperand Size, unsigned Align,
150 bool AlwaysInline,
151 Value *DstSV, uint64_t DstOff,
152 Value *SrcSV, uint64_t SrcOff);
150153 };
151154 }
152155
6161 ///
6262 ARMSubtarget(const Module &M, const std::string &FS, bool thumb);
6363
64 /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
65 /// that still makes it profitable to inline the call.
6466 unsigned getMaxInlineSizeThreshold() const {
6567 // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb.
6668 // Change this once Thumb ldmia / stmia support is added.
8686 setOperationAction(ISD::SDIV , MVT::i64, Custom);
8787 setOperationAction(ISD::UDIV , MVT::i64, Custom);
8888
89 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
90 setOperationAction(ISD::MEMSET , MVT::Other, Expand);
91 setOperationAction(ISD::MEMCPY , MVT::Other, Expand);
92
9389 // We don't support sin/cos/sqrt/pow
9490 setOperationAction(ISD::FSIN , MVT::f64, Expand);
9591 setOperationAction(ISD::FCOS , MVT::f64, Expand);
174174 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
175175
176176 // SPU has no intrinsics for these particular operations:
177 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
178 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
179 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
180177 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
181178
182179 // PowerPC has no SREM/UREM instructions
6464 setOperationAction(ISD::UREM , MVT::f32 , Expand);
6565 setOperationAction(ISD::UREM , MVT::f64 , Expand);
6666
67 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
68 setOperationAction(ISD::MEMSET , MVT::Other, Expand);
69 setOperationAction(ISD::MEMCPY , MVT::Other, Expand);
7067 setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
7168
7269 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
7979 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
8080
8181 // Mips not supported intrinsics.
82 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
83 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
84 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
8582 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
8683
8784 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
7777 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
7878
7979 // PowerPC has no intrinsics for these particular operations
80 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
81 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
82 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
8380 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
8481
8582 // PowerPC has no SREM/UREM instructions
17341731 CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
17351732 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
17361733 unsigned Size) {
1737 SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
1738 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
1739 SDOperand AlwaysInline = DAG.getConstant(0, MVT::i32);
1740 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
1734 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
1735 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false,
1736 NULL, 0, NULL, 0);
17411737 }
17421738
17431739 SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
569569 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
570570
571571 // SPARC has no intrinsics for these particular operations.
572 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
573 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
574 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
575572 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
576573
577574 setOperationAction(ISD::FSIN , MVT::f64, Expand);
205205 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
206206 setOperationAction(ISD::BR_CC , MVT::Other, Expand);
207207 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
208 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
209208 if (Subtarget->is64Bit())
210209 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
211210 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
280279 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
281280 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
282281 }
283 // X86 wants to expand memset / memcpy itself.
284 setOperationAction(ISD::MEMSET , MVT::Other, Custom);
285 setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
286282
287283 if (Subtarget->hasSSE1())
288284 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
11121108 static SDOperand
11131109 CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
11141110 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1115 SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
11161111 SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1117 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
1118 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
1112 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(),
1113 /*AlwaysInline=*/true,
1114 NULL, 0, NULL, 0);
11191115 }
11201116
11211117 SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
45564552 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
45574553 }
45584554
4559 SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
4560 SDOperand InFlag(0, 0);
4561 SDOperand Chain = Op.getOperand(0);
4562 unsigned Align =
4563 (unsigned)cast(Op.getOperand(4))->getValue();
4564 if (Align == 0) Align = 1;
4565
4566 ConstantSDNode *I = dyn_cast(Op.getOperand(3));
4567 // If not DWORD aligned or size is more than the threshold, call memset.
4568 // The libc version is likely to be faster for these cases. It can use the
4569 // address value and run time information about the CPU.
4570 if ((Align & 3) != 0 ||
4571 (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
4555 SDOperand
4556 X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
4557 SDOperand Chain,
4558 SDOperand Dst, SDOperand Src,
4559 SDOperand Size, unsigned Align,
4560 Value *DstSV, uint64_t DstOff) {
4561 ConstantSDNode *ConstantSize = dyn_cast(Size);
4562
4563 /// If not DWORD aligned or size is more than the threshold, call the library.
4564 /// The libc version is likely to be faster for these cases. It can use the
4565 /// address value and run time information about the CPU.
4566 if ((Align & 3) == 0 ||
4567 !ConstantSize ||
4568 ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) {
4569 SDOperand InFlag(0, 0);
45724570
45734571 // Check to see if there is a specialized entry-point for memory zeroing.
4574 ConstantSDNode *V = dyn_cast(Op.getOperand(2));
4575 const char *bzeroEntry =
4576 V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0;
4577
4578 MVT::ValueType IntPtr = getPointerTy();
4579 const Type *IntPtrTy = getTargetData()->getIntPtrType();
4580 TargetLowering::ArgListTy Args;
4581 TargetLowering::ArgListEntry Entry;
4582 Entry.Node = Op.getOperand(1);
4583 Entry.Ty = IntPtrTy;
4584 Args.push_back(Entry);
4585
4586 if (!bzeroEntry) {
4587 // Extend the unsigned i8 argument to be an int value for the call.
4588 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
4572 ConstantSDNode *V = dyn_cast(Src);
4573 if (const char *bzeroEntry =
4574 V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
4575 MVT::ValueType IntPtr = getPointerTy();
4576 const Type *IntPtrTy = getTargetData()->getIntPtrType();
4577 TargetLowering::ArgListTy Args;
4578 TargetLowering::ArgListEntry Entry;
4579 Entry.Node = Dst;
45894580 Entry.Ty = IntPtrTy;
45904581 Args.push_back(Entry);
4591 }
4592
4593 Entry.Node = Op.getOperand(3);
4594 Args.push_back(Entry);
4595 const char *Name = bzeroEntry ? bzeroEntry : "memset";
4596 std::pair CallResult =
4597 LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
4598 false, DAG.getExternalSymbol(Name, IntPtr), Args, DAG);
4599 return CallResult.second;
4600 }
4601
4582 Entry.Node = Size;
4583 Args.push_back(Entry);
4584 std::pair CallResult =
4585 LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
4586 false, DAG.getExternalSymbol(bzeroEntry, IntPtr),
4587 Args, DAG);
4588 return CallResult.second;
4589 }
4590
4591 // Otherwise have the target-independent code call memset.
4592 return SDOperand();
4593 }
4594
4595 uint64_t SizeVal = ConstantSize->getValue();
4596 SDOperand InFlag(0, 0);
46024597 MVT::ValueType AVT;
46034598 SDOperand Count;
4604 ConstantSDNode *ValC = dyn_cast(Op.getOperand(2));
4599 ConstantSDNode *ValC = dyn_cast(Src);
46054600 unsigned BytesLeft = 0;
46064601 bool TwoRepStos = false;
46074602 if (ValC) {
46294624 default: // Byte aligned
46304625 AVT = MVT::i8;
46314626 ValReg = X86::AL;
4632 Count = Op.getOperand(3);
4627 Count = Size;
46334628 break;
46344629 }
46354630
46364631 if (AVT > MVT::i8) {
4637 if (I) {
4638 unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
4639 Count = DAG.getIntPtrConstant(I->getValue() / UBytes);
4640 BytesLeft = I->getValue() % UBytes;
4641 } else {
4642 assert(AVT >= MVT::i32 &&
4643 "Do not use rep;stos if not at least DWORD aligned");
4644 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
4645 Op.getOperand(3), DAG.getConstant(2, MVT::i8));
4646 TwoRepStos = true;
4647 }
4632 unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
4633 Count = DAG.getIntPtrConstant(SizeVal / UBytes);
4634 BytesLeft = SizeVal % UBytes;
46484635 }
46494636
46504637 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
46524639 InFlag = Chain.getValue(1);
46534640 } else {
46544641 AVT = MVT::i8;
4655 Count = Op.getOperand(3);
4656 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
4642 Count = Size;
4643 Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag);
46574644 InFlag = Chain.getValue(1);
46584645 }
46594646
46614648 Count, InFlag);
46624649 InFlag = Chain.getValue(1);
46634650 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4664 Op.getOperand(1), InFlag);
4651 Dst, InFlag);
46654652 InFlag = Chain.getValue(1);
46664653
46674654 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
46734660
46744661 if (TwoRepStos) {
46754662 InFlag = Chain.getValue(1);
4676 Count = Op.getOperand(3);
4663 Count = Size;
46774664 MVT::ValueType CVT = Count.getValueType();
46784665 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
46794666 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
46874674 Ops.push_back(InFlag);
46884675 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
46894676 } else if (BytesLeft) {
4690 // Issue stores for the last 1 - 7 bytes.
4691 SDOperand Value;
4692 unsigned Val = ValC->getValue() & 255;
4693 unsigned Offset = I->getValue() - BytesLeft;
4694 SDOperand DstAddr = Op.getOperand(1);
4695 MVT::ValueType AddrVT = DstAddr.getValueType();
4696 if (BytesLeft >= 4) {
4697 Val = (Val << 8) | Val;
4698 Val = (Val << 16) | Val;
4699 Value = DAG.getConstant(Val, MVT::i32);
4700 Chain = DAG.getStore(Chain, Value,
4701 DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4702 DAG.getConstant(Offset, AddrVT)),
4703 NULL, 0);
4704 BytesLeft -= 4;
4705 Offset += 4;
4706 }
4707 if (BytesLeft >= 2) {
4708 Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
4709 Chain = DAG.getStore(Chain, Value,
4710 DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4711 DAG.getConstant(Offset, AddrVT)),
4712 NULL, 0);
4713 BytesLeft -= 2;
4714 Offset += 2;
4715 }
4716 if (BytesLeft == 1) {
4717 Value = DAG.getConstant(Val, MVT::i8);
4718 Chain = DAG.getStore(Chain, Value,
4719 DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4720 DAG.getConstant(Offset, AddrVT)),
4721 NULL, 0);
4722 }
4723 }
4724
4677 // Handle the last 1 - 7 bytes.
4678 unsigned Offset = SizeVal - BytesLeft;
4679 MVT::ValueType AddrVT = Dst.getValueType();
4680 MVT::ValueType SizeVT = Size.getValueType();
4681
4682 Chain = DAG.getMemset(Chain,
4683 DAG.getNode(ISD::ADD, AddrVT, Dst,
4684 DAG.getConstant(Offset, AddrVT)),
4685 Src,
4686 DAG.getConstant(BytesLeft, SizeVT),
4687 Align, DstSV, Offset);
4688 }
4689
4690 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
47254691 return Chain;
47264692 }
47274693
4728 SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
4729 SDOperand Dest,
4730 SDOperand Source,
4731 unsigned Size,
4732 unsigned Align,
4733 SelectionDAG &DAG) {
4694 SDOperand
4695 X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
4696 SDOperand Chain,
4697 SDOperand Dst, SDOperand Src,
4698 SDOperand Size, unsigned Align,
4699 bool AlwaysInline,
4700 Value *DstSV, uint64_t DstOff,
4701 Value *SrcSV, uint64_t SrcOff){
4702
4703 // This requires the copy size to be a constant, preferrably
4704 // within a subtarget-specific limit.
4705 ConstantSDNode *ConstantSize = dyn_cast(Size);
4706 if (!ConstantSize)
4707 return SDOperand();
4708 uint64_t SizeVal = ConstantSize->getValue();
4709 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
4710 return SDOperand();
4711
4712 SmallVector Results;
4713
47344714 MVT::ValueType AVT;
47354715 unsigned BytesLeft = 0;
4736 switch (Align & 3) {
4737 case 2: // WORD aligned
4738 AVT = MVT::i16;
4739 break;
4740 case 0: // DWORD aligned
4741 AVT = MVT::i32;
4742 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
4743 AVT = MVT::i64;
4744 break;
4745 default: // Byte aligned
4746 AVT = MVT::i8;
4747 break;
4748 }
4716 if (Align >= 8 && Subtarget->is64Bit())
4717 AVT = MVT::i64;
4718 else if (Align >= 4)
4719 AVT = MVT::i32;
4720 else if (Align >= 2)
4721 AVT = MVT::i16;
4722 else
4723 AVT = MVT::i8;
47494724
47504725 unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
4751 SDOperand Count = DAG.getIntPtrConstant(Size / UBytes);
4752 BytesLeft = Size % UBytes;
4726 unsigned CountVal = SizeVal / UBytes;
4727 SDOperand Count = DAG.getIntPtrConstant(CountVal);
4728 BytesLeft = SizeVal % UBytes;
47534729
47544730 SDOperand InFlag(0, 0);
47554731 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
47564732 Count, InFlag);
47574733 InFlag = Chain.getValue(1);
47584734 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4759 Dest, InFlag);
4735 Dst, InFlag);
47604736 InFlag = Chain.getValue(1);
47614737 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
4762 Source, InFlag);
4738 Src, InFlag);
47634739 InFlag = Chain.getValue(1);
47644740
47654741 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
47674743 Ops.push_back(Chain);
47684744 Ops.push_back(DAG.getValueType(AVT));
47694745 Ops.push_back(InFlag);
4770 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
4746 Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()));
47714747
47724748 if (BytesLeft) {
4773 // Issue loads and stores for the last 1 - 7 bytes.
4774 unsigned Offset = Size - BytesLeft;
4775 SDOperand DstAddr = Dest;
4776 MVT::ValueType DstVT = DstAddr.getValueType();
4777 SDOperand SrcAddr = Source;
4778 MVT::ValueType SrcVT = SrcAddr.getValueType();
4779 SDOperand Value;
4780 if (BytesLeft >= 4) {
4781 Value = DAG.getLoad(MVT::i32, Chain,
4782 DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4783 DAG.getConstant(Offset, SrcVT)),
4784 NULL, 0);
4785 Chain = Value.getValue(1);
4786 Chain = DAG.getStore(Chain, Value,
4787 DAG.getNode(ISD::ADD, DstVT, DstAddr,
4788 DAG.getConstant(Offset, DstVT)),
4789 NULL, 0);
4790 BytesLeft -= 4;
4791 Offset += 4;
4792 }
4793 if (BytesLeft >= 2) {
4794 Value = DAG.getLoad(MVT::i16, Chain,
4795 DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4796 DAG.getConstant(Offset, SrcVT)),
4797 NULL, 0);
4798 Chain = Value.getValue(1);
4799 Chain = DAG.getStore(Chain, Value,
4800 DAG.getNode(ISD::ADD, DstVT, DstAddr,
4801 DAG.getConstant(Offset, DstVT)),
4802 NULL, 0);
4803 BytesLeft -= 2;
4804 Offset += 2;
4805 }
4806
4807 if (BytesLeft == 1) {
4808 Value = DAG.getLoad(MVT::i8, Chain,
4809 DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4810 DAG.getConstant(Offset, SrcVT)),
4811 NULL, 0);
4812 Chain = Value.getValue(1);
4813 Chain = DAG.getStore(Chain, Value,
4814 DAG.getNode(ISD::ADD, DstVT, DstAddr,
4815 DAG.getConstant(Offset, DstVT)),
4816 NULL, 0);
4817 }
4818 }
4819
4820 return Chain;
4749 // Handle the last 1 - 7 bytes.
4750 unsigned Offset = SizeVal - BytesLeft;
4751 MVT::ValueType DstVT = Dst.getValueType();
4752 MVT::ValueType SrcVT = Src.getValueType();
4753 MVT::ValueType SizeVT = Size.getValueType();
4754
4755 Results.push_back(DAG.getMemcpy(Chain,
4756 DAG.getNode(ISD::ADD, DstVT, Dst,
4757 DAG.getConstant(Offset,
4758 DstVT)),
4759 DAG.getNode(ISD::ADD, SrcVT, Src,
4760 DAG.getConstant(Offset,
4761 SrcVT)),
4762 DAG.getConstant(BytesLeft, SizeVT),
4763 Align, AlwaysInline,
4764 DstSV, Offset, SrcSV, Offset));
4765 }
4766
4767 return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size());
48214768 }
48224769
48234770 /// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
54295376 case ISD::CALL: return LowerCALL(Op, DAG);
54305377 case ISD::RET: return LowerRET(Op, DAG);
54315378 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
5432 case ISD::MEMSET: return LowerMEMSET(Op, DAG);
5433 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
54345379 case ISD::VASTART: return LowerVASTART(Op, DAG);
54355380 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
54365381 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
440440 SDOperand Ret,
441441 SelectionDAG &DAG) const;
442442
443 virtual const TargetSubtarget* getSubtarget() {
444 return static_cast(Subtarget);
443 virtual const X86Subtarget* getSubtarget() {
444 return Subtarget;
445445 }
446446
447447 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
511511 SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG);
512512 SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
513513 SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);
514 SDOperand LowerMEMCPYInline(SDOperand Dest, SDOperand Source,
515 SDOperand Chain, unsigned Size, unsigned Align,
516 SelectionDAG &DAG);
517514 SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG);
518515 SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
519516 SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG);
534531 SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG);
535532 SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG);
536533 SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG);
534
535 SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG,
536 SDOperand Chain,
537 SDOperand Dst, SDOperand Src,
538 SDOperand Size, unsigned Align,
539 Value *DstSV, uint64_t DstOff);
540 SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG,
541 SDOperand Chain,
542 SDOperand Dst, SDOperand Src,
543 SDOperand Size, unsigned Align,
544 bool AlwaysInline,
545 Value *DstSV, uint64_t DstOff,
546 Value *SrcSV, uint64_t SrcOff);
537547 };
538548 }
539549
None ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
1 ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep memcpy | count 2
0 ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 3
21
32 @A = global [32 x i32] zeroinitializer
43 @B = global [32 x i32] zeroinitializer
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
11 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
22
3 %struct.s = type { i64, i64, i64 }
3 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
4 i64, i64, i64, i64, i64, i64, i64, i64,
5 i64 }
46
57 define void @g(i64 %a, i64 %b, i64 %c) {
68 entry:
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
11 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
22
3 %struct.s = type { i32, i32, i32, i32, i32, i32 }
3 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
4 i32, i32, i32, i32, i32, i32, i32, i32,
5 i32, i32, i32, i32, i32, i32, i32, i32,
6 i32, i32, i32, i32, i32, i32, i32, i32,
7 i32 }
48
59 define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) {
610 entry:
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2
11 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
22
3 %struct.s = type { i16, i16, i16, i16, i16, i16 }
3 %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
4 i16, i16, i16, i16, i16, i16, i16, i16,
5 i16, i16, i16, i16, i16, i16, i16, i16,
6 i16, i16, i16, i16, i16, i16, i16, i16,
7 i16, i16, i16, i16, i16, i16, i16, i16,
8 i16, i16, i16, i16, i16, i16, i16, i16,
9 i16, i16, i16, i16, i16, i16, i16, i16,
10 i16, i16, i16, i16, i16, i16, i16, i16,
11 i16 }
412
513
614 define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3,
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2
11 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
22
3 %struct.s = type { i8, i8, i8, i8, i8, i8 }
3 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
4 i8, i8, i8, i8, i8, i8, i8, i8,
5 i8, i8, i8, i8, i8, i8, i8, i8,
6 i8, i8, i8, i8, i8, i8, i8, i8,
7 i8, i8, i8, i8, i8, i8, i8, i8,
8 i8, i8, i8, i8, i8, i8, i8, i8,
9 i8, i8, i8, i8, i8, i8, i8, i8,
10 i8, i8, i8, i8, i8, i8, i8, i8,
11 i8, i8, i8, i8, i8, i8, i8, i8,
12 i8, i8, i8, i8, i8, i8, i8, i8,
13 i8, i8, i8, i8, i8, i8, i8, i8,
14 i8, i8, i8, i8, i8, i8, i8, i8,
15 i8, i8, i8, i8, i8, i8, i8, i8,
16 i8, i8, i8, i8, i8, i8, i8, i8,
17 i8, i8, i8, i8, i8, i8, i8, i8,
18 i8, i8, i8, i8, i8, i8, i8, i8,
19 i8 }
420
521
622 define void @g(i8 signext %a1, i8 signext %a2, i8 signext %a3,
0 ; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep add | grep 16
11
2 %struct.S = type { <2 x i64> }
2 %struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
3 <2 x i64> }
34
45 define i32 @main() nounwind {
56 entry:
0 ; RUN: llvm-as < %s | llc | not grep movs
1
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
3 target triple = "i386-apple-darwin8"
4
5 define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %z) nounwind {
6 entry:
7 %iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=3]
8 %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1 ; [#uses=1]
9 %tmp2 = load x86_fp80* %tmp1, align 16 ; [#uses=1]
10 %tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2 ; [#uses=1]
11 %tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1 ; [#uses=1]
12 %real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0 ; [#uses=1]
13 %tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0 ; [#uses=1]
14 %tmp7 = load x86_fp80* %tmp6, align 16 ; [#uses=1]
15 store x86_fp80 %tmp3, x86_fp80* %real, align 16
16 store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16
17 call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %iz ) nounwind
18 ret void
19 }
20
21 declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval align 4 ) nounwind
0 ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
1
2 declare void @llvm.memset.i64(i8*, i8, i64, i32)
3
4 define void @foo(i8* %p, i64 %n) {
5 call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4)
6 ret void
7 }