llvm.org GIT mirror llvm / 255f20f
Fix sdisel memcpy, memset, memmove lowering: 1. Makes it possible to lower with floating point loads and stores. 2. Avoid unaligned loads / stores unless it's fast. 3. Fix some memcpy lowering logic bug related to when to optimize a load from constant string into a constant. 4. Adjust x86 memcpy lowering threshold to make it more sane. 5. Fix x86 target hook so it uses vector and floating point memory ops more effectively. rdar://7774704 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100090 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 9 years ago
14 changed file(s) with 314 addition(s) and 184 deletion(s). Raw diff Collapse all Expand all
521521 /// counterpart (e.g. structs), otherwise it will assert.
522522 EVT getValueType(const Type *Ty, bool AllowUnknown = false) const {
523523 EVT VT = EVT::getEVT(Ty, AllowUnknown);
524 return VT == MVT:: iPTR ? PointerTy : VT;
524 return VT == MVT::iPTR ? PointerTy : VT;
525525 }
526526
527527 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
635635 /// and store operations as a result of memset, memcpy, and memmove lowering.
636636 /// It returns EVT::Other if SelectionDAG should be responsible for
637637 /// determining it.
638 virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
639 bool isSrcConst, bool isSrcStr,
638 virtual EVT getOptimalMemOpType(uint64_t Size,
639 unsigned DstAlign, unsigned SrcAlign,
640640 SelectionDAG &DAG) const {
641641 return MVT::Other;
642642 }
50215021 SDValue Chain = LD->getChain();
50225022 SDValue Ptr = LD->getBasePtr();
50235023
5024 // Try to infer better alignment information than the load already has.
5025 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
5026 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
5027 if (Align > LD->getAlignment())
5028 return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
5029 LD->getValueType(0),
5030 Chain, Ptr, LD->getSrcValue(),
5031 LD->getSrcValueOffset(), LD->getMemoryVT(),
5032 LD->isVolatile(), LD->isNonTemporal(), Align);
5033 }
5034 }
5035
50365024 // If load is not volatile and there are no uses of the loaded value (and
50375025 // the updated indexed value in case of indexed loads), change uses of the
50385026 // chain value into uses of the chain input (i.e. delete the dead load).
50985086 }
50995087 }
51005088
5089 // Try to infer better alignment information than the load already has.
5090 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
5091 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
5092 if (Align > LD->getAlignment())
5093 return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
5094 LD->getValueType(0),
5095 Chain, Ptr, LD->getSrcValue(),
5096 LD->getSrcValueOffset(), LD->getMemoryVT(),
5097 LD->isVolatile(), LD->isNonTemporal(), Align);
5098 }
5099 }
5100
51015101 if (CombinerAA) {
51025102 // Walk up chain skipping non-aliasing memory nodes.
51035103 SDValue BetterChain = FindBetterChain(N, Chain);
52485248 SDValue Chain = ST->getChain();
52495249 SDValue Value = ST->getValue();
52505250 SDValue Ptr = ST->getBasePtr();
5251
5252 // Try to infer better alignment information than the store already has.
5253 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
5254 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
5255 if (Align > ST->getAlignment())
5256 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
5257 Ptr, ST->getSrcValue(),
5258 ST->getSrcValueOffset(), ST->getMemoryVT(),
5259 ST->isVolatile(), ST->isNonTemporal(), Align);
5260 }
5261 }
52625251
52635252 // If this is a store of a bit convert, store the input value if the
52645253 // resultant store does not need a higher alignment than the original.
53475336
53485337 break;
53495338 }
5339 }
5340 }
5341
5342 // Try to infer better alignment information than the store already has.
5343 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
5344 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
5345 if (Align > ST->getAlignment())
5346 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
5347 Ptr, ST->getSrcValue(),
5348 ST->getSrcValueOffset(), ST->getMemoryVT(),
5349 ST->isVolatile(), ST->isNonTemporal(), Align);
53505350 }
53515351 }
53525352
31313131 if (Str.empty()) {
31323132 if (VT.isInteger())
31333133 return DAG.getConstant(0, VT);
3134 unsigned NumElts = VT.getVectorNumElements();
3135 MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
3136 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
3137 DAG.getConstant(0,
3138 EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
3134 else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
3135 VT.getSimpleVT().SimpleTy == MVT::f64)
3136 return DAG.getConstantFP(0.0, VT);
3137 else if (VT.isVector()) {
3138 unsigned NumElts = VT.getVectorNumElements();
3139 MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
3140 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
3141 DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
3142 EltVT, NumElts)));
3143 } else
3144 llvm_unreachable("Expected type!");
31393145 }
31403146
31413147 assert(!VT.isVector() && "Can't handle vector type here!");
31833189 return false;
31843190 }
31853191
3186 /// MeetsMaxMemopRequirement - Determines if the number of memory ops required
3187 /// to replace the memset / memcpy is below the threshold. It also returns the
3188 /// types of the sequence of memory ops to perform memset / memcpy.
3189 static
3190 bool MeetsMaxMemopRequirement(std::vector &MemOps,
3191 SDValue Dst, SDValue Src,
3192 unsigned Limit, uint64_t Size, unsigned &Align,
3193 std::string &Str, bool &isSrcStr,
3194 SelectionDAG &DAG,
3195 const TargetLowering &TLI) {
3196 isSrcStr = isMemSrcFromString(Src, Str);
3197 bool isSrcConst = isa(Src);
3198 EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
3199 bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
3200 if (VT != MVT::Other) {
3192 /// FindOptimalMemOpLowering - Determines the optimial series memory ops
3193 /// to replace the memset / memcpy. Return true if the number of memory ops
3194 /// is below the threshold. It returns the types of the sequence of
3195 /// memory ops to perform memset / memcpy by reference.
3196 static bool FindOptimalMemOpLowering(std::vector &MemOps,
3197 SDValue Dst, SDValue Src,
3198 unsigned Limit, uint64_t Size,
3199 unsigned DstAlign, unsigned SrcAlign,
3200 SelectionDAG &DAG,
3201 const TargetLowering &TLI) {
3202 assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
3203 "Expecting memcpy / memset source to meet alignment requirement!");
3204 // If 'SrcAlign' is zero, that means the memory operation does not need load
3205 // the value, i.e. memset or memcpy from constant string. Otherwise, it's
3206 // the inferred alignment of the source. 'DstAlign', on the other hand, is the
3207 // specified alignment of the memory operation. If it is zero, that means
3208 // it's possible to change the alignment of the destination.
3209 EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, DAG);
3210
3211 if (VT == MVT::Other) {
3212 VT = TLI.getPointerTy();
32013213 const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
3202 unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
3203 // If source is a string constant, this will require an unaligned load.
3204 if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
3205 if (Dst.getOpcode() != ISD::FrameIndex) {
3206 // Can't change destination alignment. It requires a unaligned store.
3207 if (AllowUnalign)
3208 VT = MVT::Other;
3209 } else {
3210 int FI = cast(Dst)->getIndex();
3211 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
3212 if (MFI->isFixedObjectIndex(FI)) {
3213 // Can't change destination alignment. It requires a unaligned store.
3214 if (AllowUnalign)
3215 VT = MVT::Other;
3216 } else {
3217 // Give the stack frame object a larger alignment if needed.
3218 if (MFI->getObjectAlignment(FI) < NewAlign)
3219 MFI->setObjectAlignment(FI, NewAlign);
3220 Align = NewAlign;
3221 }
3222 }
3223 }
3224 }
3225
3226 if (VT == MVT::Other) {
3227 if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
3214 if (DstAlign >= TLI.getTargetData()->getABITypeAlignment(Ty) ||
3215 TLI.allowsUnalignedMemoryAccesses(VT)) {
32283216 VT = MVT::i64;
32293217 } else {
3230 switch (Align & 7) {
3218 switch (DstAlign & 7) {
32313219 case 0: VT = MVT::i64; break;
32323220 case 4: VT = MVT::i32; break;
32333221 case 2: VT = MVT::i16; break;
32493237 unsigned VTSize = VT.getSizeInBits() / 8;
32503238 while (VTSize > Size) {
32513239 // For now, only use non-vector load / store's for the left-over pieces.
3252 if (VT.isVector()) {
3240 if (VT.isVector() || VT.isFloatingPoint()) {
32533241 VT = MVT::i64;
32543242 while (!TLI.isTypeLegal(VT))
32553243 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
32853273 uint64_t Limit = -1ULL;
32863274 if (!AlwaysInline)
32873275 Limit = TLI.getMaxStoresPerMemcpy();
3288 unsigned DstAlign = Align; // Destination alignment can change.
3276 bool DstAlignCanChange = false;
3277 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
3278 FrameIndexSDNode *FI = dyn_cast(Dst);
3279 if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
3280 DstAlignCanChange = true;
3281 unsigned SrcAlign = DAG.InferPtrAlignment(Src);
3282 if (Align > SrcAlign)
3283 SrcAlign = Align;
32893284 std::string Str;
3290 bool CopyFromStr;
3291 if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
3292 Str, CopyFromStr, DAG, TLI))
3285 bool CopyFromStr = isMemSrcFromString(Src, Str);
3286 bool isZeroStr = CopyFromStr && Str.empty();
3287 if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
3288 (DstAlignCanChange ? 0 : Align),
3289 (isZeroStr ? 0 : SrcAlign), DAG, TLI))
32933290 return SDValue();
32943291
3295
3296 bool isZeroStr = CopyFromStr && Str.empty();
3292 if (DstAlignCanChange) {
3293 const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
3294 unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
3295 if (NewAlign > Align) {
3296 // Give the stack frame object a larger alignment if needed.
3297 if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
3298 MFI->setObjectAlignment(FI->getIndex(), NewAlign);
3299 Align = NewAlign;
3300 }
3301 }
3302
32973303 SmallVector OutChains;
32983304 unsigned NumMemOps = MemOps.size();
32993305 uint64_t SrcOff = 0, DstOff = 0;
33023308 unsigned VTSize = VT.getSizeInBits() / 8;
33033309 SDValue Value, Store;
33043310
3305 if (CopyFromStr && (isZeroStr || !VT.isVector())) {
3311 if (CopyFromStr &&
3312 (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
33063313 // It's unlikely a store of a vector immediate can be done in a single
33073314 // instruction. It would require a load from a constantpool first.
3308 // We also handle store a vector with all zero's.
3315 // We only handle zero vectors here.
33093316 // FIXME: Handle other cases where store of vector immediate is done in
33103317 // a single instruction.
33113318 Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
33123319 Store = DAG.getStore(Chain, dl, Value,
33133320 getMemBasePlusOffset(Dst, DstOff, DAG),
3314 DstSV, DstSVOff + DstOff, false, false, DstAlign);
3321 DstSV, DstSVOff + DstOff, false, false, Align);
33153322 } else {
33163323 // The type might not be legal for the target. This should only happen
33173324 // if the type is smaller than a legal type, as on PPC, so the right
33223329 assert(NVT.bitsGE(VT));
33233330 Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
33243331 getMemBasePlusOffset(Src, SrcOff, DAG),
3325 SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
3332 SrcSV, SrcSVOff + SrcOff, VT, false, false,
3333 MinAlign(SrcAlign, SrcOff));
33263334 Store = DAG.getTruncStore(Chain, dl, Value,
33273335 getMemBasePlusOffset(Dst, DstOff, DAG),
33283336 DstSV, DstSVOff + DstOff, VT, false, false,
3329 DstAlign);
3337 Align);
33303338 }
33313339 OutChains.push_back(Store);
33323340 SrcOff += VTSize;
33383346 }
33393347
33403348 static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
3341 SDValue Chain, SDValue Dst,
3342 SDValue Src, uint64_t Size,
3343 unsigned Align, bool AlwaysInline,
3344 const Value *DstSV, uint64_t DstSVOff,
3345 const Value *SrcSV, uint64_t SrcSVOff){
3349 SDValue Chain, SDValue Dst,
3350 SDValue Src, uint64_t Size,
3351 unsigned Align,bool AlwaysInline,
3352 const Value *DstSV, uint64_t DstSVOff,
3353 const Value *SrcSV, uint64_t SrcSVOff) {
33463354 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
33473355
33483356 // Expand memmove to a series of load and store ops if the size operand falls
33513359 uint64_t Limit = -1ULL;
33523360 if (!AlwaysInline)
33533361 Limit = TLI.getMaxStoresPerMemmove();
3354 unsigned DstAlign = Align; // Destination alignment can change.
3355 std::string Str;
3356 bool CopyFromStr;
3357 if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
3358 Str, CopyFromStr, DAG, TLI))
3362 bool DstAlignCanChange = false;
3363 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
3364 FrameIndexSDNode *FI = dyn_cast(Dst);
3365 if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
3366 DstAlignCanChange = true;
3367 unsigned SrcAlign = DAG.InferPtrAlignment(Src);
3368 if (Align > SrcAlign)
3369 SrcAlign = Align;
3370
3371 if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
3372 (DstAlignCanChange ? 0 : Align),
3373 SrcAlign, DAG, TLI))
33593374 return SDValue();
33603375
3376 if (DstAlignCanChange) {
3377 const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
3378 unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
3379 if (NewAlign > Align) {
3380 // Give the stack frame object a larger alignment if needed.
3381 if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
3382 MFI->setObjectAlignment(FI->getIndex(), NewAlign);
3383 Align = NewAlign;
3384 }
3385 }
3386
33613387 uint64_t SrcOff = 0, DstOff = 0;
3362
33633388 SmallVector LoadValues;
33643389 SmallVector LoadChains;
33653390 SmallVector OutChains;
33713396
33723397 Value = DAG.getLoad(VT, dl, Chain,
33733398 getMemBasePlusOffset(Src, SrcOff, DAG),
3374 SrcSV, SrcSVOff + SrcOff, false, false, Align);
3399 SrcSV, SrcSVOff + SrcOff, false, false, SrcAlign);
33753400 LoadValues.push_back(Value);
33763401 LoadChains.push_back(Value.getValue(1));
33773402 SrcOff += VTSize;
33863411
33873412 Store = DAG.getStore(Chain, dl, LoadValues[i],
33883413 getMemBasePlusOffset(Dst, DstOff, DAG),
3389 DstSV, DstSVOff + DstOff, false, false, DstAlign);
3414 DstSV, DstSVOff + DstOff, false, false, Align);
33903415 OutChains.push_back(Store);
33913416 DstOff += VTSize;
33923417 }
33963421 }
33973422
33983423 static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
3399 SDValue Chain, SDValue Dst,
3400 SDValue Src, uint64_t Size,
3401 unsigned Align,
3402 const Value *DstSV, uint64_t DstSVOff) {
3424 SDValue Chain, SDValue Dst,
3425 SDValue Src, uint64_t Size,
3426 unsigned Align,
3427 const Value *DstSV, uint64_t DstSVOff) {
34033428 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
34043429
34053430 // Expand memset to a series of load/store ops if the size operand
34063431 // falls below a certain threshold.
34073432 std::vector MemOps;
3408 std::string Str;
3409 bool CopyFromStr;
3410 if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
3411 Size, Align, Str, CopyFromStr, DAG, TLI))
3433 bool DstAlignCanChange = false;
3434 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
3435 FrameIndexSDNode *FI = dyn_cast(Dst);
3436 if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
3437 DstAlignCanChange = true;
3438 if (!FindOptimalMemOpLowering(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
3439 Size, (DstAlignCanChange ? 0 : Align), 0,
3440 DAG, TLI))
34123441 return SDValue();
3442
3443 if (DstAlignCanChange) {
3444 const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
3445 unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
3446 if (NewAlign > Align) {
3447 // Give the stack frame object a larger alignment if needed.
3448 if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
3449 MFI->setObjectAlignment(FI->getIndex(), NewAlign);
3450 Align = NewAlign;
3451 }
3452 }
34133453
34143454 SmallVector OutChains;
34153455 uint64_t DstOff = 0;
3416
34173456 unsigned NumMemOps = MemOps.size();
34183457 for (unsigned i = 0; i < NumMemOps; i++) {
34193458 EVT VT = MemOps[i];
34443483 if (ConstantSize->isNullValue())
34453484 return Chain;
34463485
3447 SDValue Result =
3448 getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
3449 ConstantSize->getZExtValue(),
3450 Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
3486 SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
3487 ConstantSize->getZExtValue(),Align,
3488 false, DstSV, DstSVOff, SrcSV, SrcSVOff);
34513489 if (Result.getNode())
34523490 return Result;
34533491 }
61056143 // If this is a GlobalAddress + cst, return the alignment.
61066144 GlobalValue *GV;
61076145 int64_t GVOffset = 0;
6108 if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
6109 return MinAlign(GV->getAlignment(), GVOffset);
6146 if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
6147 // If GV has specified alignment, then use it. Otherwise, use the preferred
6148 // alignment.
6149 unsigned Align = GV->getAlignment();
6150 if (!Align) {
6151 if (GlobalVariable *GVar = dyn_cast(GV)) {
6152 const TargetData *TD = TLI.getTargetData();
6153 Align = TD->getPreferredAlignment(GVar);
6154 }
6155 }
6156 return MinAlign(Align, GVOffset);
6157 }
61106158
61116159 // If this is a direct reference to a stack slot, use information about the
61126160 // stack slot's alignment.
55385538 return false;
55395539 }
55405540
5541 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
5542 bool isSrcConst, bool isSrcStr,
5541 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
5542 unsigned DstAlign, unsigned SrcAlign,
55435543 SelectionDAG &DAG) const {
55445544 if (this->PPCSubTarget.isPPC64()) {
55455545 return MVT::i64;
346346
347347 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
348348
349 virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
350 bool isSrcConst, bool isSrcStr,
349 virtual EVT getOptimalMemOpType(uint64_t Size,
350 unsigned DstAlign, unsigned SrcAlign,
351351 SelectionDAG &DAG) const;
352352
353353 /// getFunctionAlignment - Return the Log2 alignment of this function.
10111011 // FIXME: These should be based on subtarget info. Plus, the values should
10121012 // be smaller when we are in optimizing for size mode.
10131013 maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1014 maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
1014 maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
10151015 maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
10161016 setPrefLoopAlignment(16);
10171017 benefitFromCodePlacementOpt = true;
10731073 /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
10741074 /// determining it.
10751075 EVT
1076 X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
1077 bool isSrcConst, bool isSrcStr,
1076 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1077 unsigned DstAlign, unsigned SrcAlign,
10781078 SelectionDAG &DAG) const {
10791079 // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
10801080 // linux. This is because the stack realignment code can't handle certain
10811081 // cases like PR2962. This should be removed when PR2962 is fixed.
10821082 const Function *F = DAG.getMachineFunction().getFunction();
1083 bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
1084 if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
1085 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
1086 return MVT::v4i32;
1087 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
1088 return MVT::v4f32;
1083 if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
1084 if (Size >= 16 &&
1085 (Subtarget->isUnalignedMemAccessFast() ||
1086 (DstAlign == 0 || DstAlign >= 16) &&
1087 (SrcAlign == 0 || SrcAlign >= 16)) &&
1088 Subtarget->getStackAlignment() >= 16) {
1089 if (Subtarget->hasSSE2())
1090 return MVT::v4i32;
1091 if (Subtarget->hasSSE1())
1092 return MVT::v4f32;
1093 } else if (Size >= 8 &&
1094 Subtarget->getStackAlignment() >= 8 &&
1095 Subtarget->hasSSE2())
1096 return MVT::f64;
10891097 }
10901098 if (Subtarget->is64Bit() && Size >= 8)
10911099 return MVT::i64;
422422 /// and store operations as a result of memset, memcpy, and memmove
423423 /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
424424 /// determining it.
425 virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
426 bool isSrcConst, bool isSrcStr,
425 virtual EVT getOptimalMemOpType(uint64_t Size,
426 unsigned DstAlign, unsigned SrcAlign,
427427 SelectionDAG &DAG) const;
428428
429429 /// allowsUnalignedMemoryAccesses - Returns true if the target allows
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
11 ; rdar://7396984
22
33 @str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
None ; RUN: llc < %s -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
0 ; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
11
22 %struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
3 <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
34 <2 x i64> }
45
56 define i32 @main() nounwind {
67 entry:
8 ; CHECK: main:
9 ; CHECK: movl $1, (%esp)
10 ; CHECK: leal 16(%esp), %edi
11 ; CHECK: movl $36, %ecx
12 ; CHECK: leal 160(%esp), %esi
13 ; CHECK: rep;movsl
714 %s = alloca %struct.S ; <%struct.S*> [#uses=2]
815 %tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1]
916 store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
None ; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
1 ; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
0 ; RUN: llc < %s -mattr=+sse2 -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=SSE2
1 ; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=SSE1
2 ; RUN: llc < %s -mattr=-sse -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=NOSSE
23
34 %struct.ParmT = type { [25 x i8], i8, i8* }
45 @.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00" ; <[25 x i8]*> [#uses=1]
56
6 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
7 define void @t1(i32 %argc, i8** %argv) nounwind {
8 entry:
9 ; SSE2: t1:
10 ; SSE2: movaps _.str12, %xmm0
11 ; SSE2: movaps %xmm0
12 ; SSE2: movb $0
13 ; SSE2: movl $0
14 ; SSE2: movl $0
715
8 define void @t(i32 %argc, i8** %argv) nounwind {
9 entry:
16 ; SSE1: t1:
17 ; SSE1: movaps _.str12, %xmm0
18 ; SSE1: movaps %xmm0
19 ; SSE1: movb $0
20 ; SSE1: movl $0
21 ; SSE1: movl $0
22
23 ; NOSSE: t1:
24 ; NOSSE: movb $0
25 ; NOSSE: movl $0
26 ; NOSSE: movl $0
27 ; NOSSE: movl $0
28 ; NOSSE: movl $0
29 ; NOSSE: movl $101
30 ; NOSSE: movl $1734438249
1031 %parms.i = alloca [13 x %struct.ParmT] ; <[13 x %struct.ParmT]*> [#uses=1]
1132 %parms1.i = getelementptr [13 x %struct.ParmT]* %parms.i, i32 0, i32 0, i32 0, i32 0 ; [#uses=1]
1233 call void @llvm.memcpy.i32( i8* %parms1.i, i8* getelementptr ([25 x i8]* @.str12, i32 0, i32 0), i32 25, i32 1 ) nounwind
1334 unreachable
1435 }
36
37 ;rdar://7774704
38 %struct.s0 = type { [2 x double] }
39
40 define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
41 entry:
42 ; SSE2: t2:
43 ; SSE2: movaps (%eax), %xmm0
44 ; SSE2: movaps %xmm0, (%eax)
45
46 ; SSE1: t2:
47 ; SSE1: movaps (%eax), %xmm0
48 ; SSE1: movaps %xmm0, (%eax)
49
50 ; NOSSE: t2:
51 ; NOSSE: movl
52 ; NOSSE: movl
53 ; NOSSE: movl
54 ; NOSSE: movl
55 ; NOSSE: movl
56 ; NOSSE: movl
57 ; NOSSE: movl
58 ; NOSSE: movl
59 ; NOSSE: movl
60 ; NOSSE: movl
61 %tmp2 = bitcast %struct.s0* %a to i8* ; [#uses=1]
62 %tmp3 = bitcast %struct.s0* %b to i8* ; [#uses=1]
63 tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16)
64 ret void
65 }
66
67 define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
68 entry:
69 ; SSE2: t3:
70 ; SSE2: movsd (%eax), %xmm0
71 ; SSE2: movsd 8(%eax), %xmm1
72 ; SSE2: movsd %xmm1, 8(%eax)
73 ; SSE2: movsd %xmm0, (%eax)
74
75 ; SSE1: t3:
76 ; SSE1: movl
77 ; SSE1: movl
78 ; SSE1: movl
79 ; SSE1: movl
80 ; SSE1: movl
81 ; SSE1: movl
82 ; SSE1: movl
83 ; SSE1: movl
84 ; SSE1: movl
85 ; SSE1: movl
86
87 ; NOSSE: t3:
88 ; NOSSE: movl
89 ; NOSSE: movl
90 ; NOSSE: movl
91 ; NOSSE: movl
92 ; NOSSE: movl
93 ; NOSSE: movl
94 ; NOSSE: movl
95 ; NOSSE: movl
96 ; NOSSE: movl
97 ; NOSSE: movl
98 %tmp2 = bitcast %struct.s0* %a to i8* ; [#uses=1]
99 %tmp3 = bitcast %struct.s0* %b to i8* ; [#uses=1]
100 tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8)
101 ret void
102 }
103
104 declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
None ; RUN: llc < %s | not grep rep
1 ; RUN: llc < %s | grep memset
0 ; RUN: llc < %s | FileCheck %s
21
32 target triple = "i386"
43
54 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
65
7 define fastcc i32 @cli_scanzip(i32 %desc) nounwind {
6 define fastcc void @t() nounwind {
87 entry:
9 br label %bb8.i.i.i.i
10
11 bb8.i.i.i.i: ; preds = %bb8.i.i.i.i, %entry
12 icmp eq i32 0, 0 ; :0 [#uses=1]
13 br i1 %0, label %bb61.i.i.i, label %bb8.i.i.i.i
14
15 bb32.i.i.i: ; preds = %bb61.i.i.i
16 ptrtoint i8* %tail.0.i.i.i to i32 ; :1 [#uses=1]
17 sub i32 0, %1 ; :2 [#uses=1]
18 icmp sgt i32 %2, 19 ; :3 [#uses=1]
19 br i1 %3, label %bb34.i.i.i, label %bb61.i.i.i
20
21 bb34.i.i.i: ; preds = %bb32.i.i.i
22 load i32* null, align 4 ; :4 [#uses=1]
23 icmp eq i32 %4, 101010256 ; :5 [#uses=1]
24 br i1 %5, label %bb8.i11.i.i.i, label %bb61.i.i.i
25
26 bb8.i11.i.i.i: ; preds = %bb8.i11.i.i.i, %bb34.i.i.i
27 icmp eq i32 0, 0 ; :6 [#uses=1]
28 br i1 %6, label %cli_dbgmsg.exit49.i, label %bb8.i11.i.i.i
29
30 cli_dbgmsg.exit49.i: ; preds = %bb8.i11.i.i.i
31 icmp eq [32768 x i8]* null, null ; :7 [#uses=1]
32 br i1 %7, label %bb1.i28.i, label %bb8.i.i
33
34 bb61.i.i.i: ; preds = %bb61.i.i.i, %bb34.i.i.i, %bb32.i.i.i, %bb8.i.i.i.i
35 %tail.0.i.i.i = getelementptr [1024 x i8]* null, i32 0, i32 0 ; [#uses=2]
36 load i8* %tail.0.i.i.i, align 1 ; :8 [#uses=1]
37 icmp eq i8 %8, 80 ; :9 [#uses=1]
38 br i1 %9, label %bb32.i.i.i, label %bb61.i.i.i
39
40 bb1.i28.i: ; preds = %cli_dbgmsg.exit49.i
41 call void @llvm.memset.i32( i8* null, i8 0, i32 88, i32 1 ) nounwind
42 unreachable
43
44 bb8.i.i: ; preds = %bb8.i.i, %cli_dbgmsg.exit49.i
45 br label %bb8.i.i
8 ; CHECK: t:
9 ; CHECK: call memset
10 call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
11 unreachable
4612 }
None ; RUN: llc < %s -mtriple=i386-apple-darwin | grep stosl
0 ; RUN: llc < %s -mtriple=i386-apple-darwin | grep movl | count 20
11 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq | count 10
22
33 define void @bork() nounwind {
None ; RUN: llc < %s | not grep movs
0 ; RUN: llc < %s | grep movsd | count 8
11
22 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
33 target triple = "i386-apple-darwin8"
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=CORE2 %s
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=COREI7 %s
12
23 @.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
34 @.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8
1011 bb:
1112 %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
1213 call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
13 ; CHECK: movups _.str3
14 ; CORE2: movsd _.str3+16
15 ; CORE2: movsd _.str3+8
16 ; CORE2: movsd _.str3
17
18 ; COREI7: movups _.str3
1419 br label %bb
1520
1621 return:
1924
2025 declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
2126
22 ; CHECK: .align 3
23 ; CHECK-NEXT: _.str1:
24 ; CHECK-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
25 ; CHECK: .align 3
26 ; CHECK-NEXT: _.str3:
27 ; CORE2: .align 3
28 ; CORE2-NEXT: _.str1:
29 ; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
30 ; CORE2: .align 3
31 ; CORE2-NEXT: _.str3:
32
33 ; COREI7: .align 3
34 ; COREI7-NEXT: _.str1:
35 ; COREI7-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
36 ; COREI7: .align 3
37 ; COREI7-NEXT: _.str3: