llvm.org GIT mirror llvm / 61db1f5
start using irbuilder to make mem intrinsics in a few passes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122572 91177308-0d34-0410-b5e6-96231b3b80d8 Chris Lattner 8 years ago
3 changed file(s) with 36 addition(s) and 108 deletion(s). Raw diff Collapse all Expand all
2424 #include "llvm/Analysis/ValueTracking.h"
2525 #include "llvm/Support/Debug.h"
2626 #include "llvm/Support/GetElementPtrTypeIterator.h"
27 #include "llvm/Support/IRBuilder.h"
2728 #include "llvm/Support/raw_ostream.h"
2829 #include "llvm/Target/TargetData.h"
2930 #include
331332 }
332333 }
333334
334 LLVMContext &Context = SI->getContext();
335
336335 // There are two cases that are interesting for this code to handle: memcpy
337336 // and memset. Right now we only handle memset.
338337
344343 return false;
345344
346345 AliasAnalysis &AA = getAnalysis();
347 Module *M = SI->getParent()->getParent()->getParent();
348346
349347 // Okay, so we now have a single store that can be splatable. Scan to find
350348 // all subsequent stores of the same value to offset from the same pointer.
430428 Alignment = TD->getABITypeAlignment(EltType);
431429 }
432430
433 // Cast the start ptr to be i8* as memset requires.
434 const PointerType* StartPTy = cast(StartPtr->getType());
435 const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
436 StartPTy->getAddressSpace());
437 if (StartPTy!= i8Ptr)
438 StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
439 InsertPt);
440
441 Value *Ops[] = {
442 StartPtr, ByteVal, // Start, value
443 // size
444 ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
445 // align
446 ConstantInt::get(Type::getInt32Ty(Context), Alignment),
447 // volatile
448 ConstantInt::getFalse(Context),
449 };
450 const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
451
452 Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
453
454 Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
431 IRBuilder<> Builder(InsertPt);
432 Value *C =
433 Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
434
455435 DEBUG(dbgs() << "Replace stores:\n";
456436 for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
457437 dbgs() << *Range.TheStores[i] << '\n';
662642 // If the dest of the second might alias the source of the first, then the
663643 // source and dest might overlap. We still want to eliminate the intermediate
664644 // value, but we have to generate a memmove instead of memcpy.
665 Intrinsic::ID ResultFn = Intrinsic::memcpy;
666 if (AA.alias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)) !=
667 AliasAnalysis::NoAlias)
668 ResultFn = Intrinsic::memmove;
645 bool UseMemMove = false;
646 if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
647 UseMemMove = true;
669648
670649 // If all checks passed, then we can transform M.
671 const Type *ArgTys[3] = {
672 M->getRawDest()->getType(),
673 MDep->getRawSource()->getType(),
674 M->getLength()->getType()
675 };
676 Function *MemCpyFun =
677 Intrinsic::getDeclaration(MDep->getParent()->getParent()->getParent(),
678 ResultFn, ArgTys, 3);
679650
680651 // Make sure to use the lesser of the alignment of the source and the dest
681652 // since we're changing where we're reading from, but don't want to increase
683654 // TODO: Is this worth it if we're creating a less aligned memcpy? For
684655 // example we could be moving from movaps -> movq on x86.
685656 unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
686 Value *Args[5] = {
687 M->getRawDest(),
688 MDep->getRawSource(),
689 M->getLength(),
690 ConstantInt::get(Type::getInt32Ty(MemCpyFun->getContext()), Align),
691 M->getVolatileCst()
692 };
693 CallInst::Create(MemCpyFun, Args, Args+5, "", M);
657
658 IRBuilder<> Builder(M);
659 if (UseMemMove)
660 Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
661 Align, M->isVolatile());
662 else
663 Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
664 Align, M->isVolatile());
694665
695666 // Remove the instruction we're replacing.
696667 MD->removeInstruction(M);
721692 if (GlobalVariable *GV = dyn_cast(M->getSource()))
722693 if (GV->isConstant() && GV->hasDefinitiveInitializer())
723694 if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
724 Value *Ops[] = {
725 M->getRawDest(), ByteVal, // Start, value
726 CopySize, // Size
727 M->getAlignmentCst(), // Alignment
728 ConstantInt::getFalse(M->getContext()), // volatile
729 };
730 const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
731 Module *Mod = M->getParent()->getParent()->getParent();
732 Function *MemSetF = Intrinsic::getDeclaration(Mod, Intrinsic::memset,
733 Tys, 2);
734 CallInst::Create(MemSetF, Ops, Ops+5, "", M);
695 IRBuilder<> Builder(M);
696 Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
697 M->getAlignment(), false);
735698 MD->removeInstruction(M);
736699 M->eraseFromParent();
737700 ++NumCpyToSet;
764727 AliasAnalysis &AA = getAnalysis();
765728
766729 // See if the pointers alias.
767 if (AA.alias(AA.getLocationForDest(M),
768 AA.getLocationForSource(M)) !=
769 AliasAnalysis::NoAlias)
730 if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
770731 return false;
771732
772733 DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
424424 continue;
425425 }
426426
427 IRBuilder<> Builder(User->getParent(), User);
427 IRBuilder<> Builder(User);
428428
429429 if (LoadInst *LI = dyn_cast(User)) {
430430 // The load is a bit extract from NewAI shifted right by Offset bits.
13521352 }
13531353
13541354 // Process each element of the aggregate.
1355 Value *TheFn = MI->getCalledValue();
1356 const Type *BytePtrTy = MI->getRawDest()->getType();
13571355 bool SROADest = MI->getRawDest() == Inst;
13581356
13591357 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
14471445 // Otherwise, if we're storing a byte variable, use a memset call for
14481446 // this element.
14491447 }
1450
1451 // Cast the element pointer to BytePtrTy.
1452 if (EltPtr->getType() != BytePtrTy)
1453 EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
1454
1455 // Cast the other pointer (if we have one) to BytePtrTy.
1456 if (OtherElt && OtherElt->getType() != BytePtrTy) {
1457 // Preserve address space of OtherElt
1458 const PointerType* OtherPTy = cast(OtherElt->getType());
1459 const PointerType* PTy = cast(BytePtrTy);
1460 if (OtherPTy->getElementType() != PTy->getElementType()) {
1461 Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
1462 OtherPTy->getAddressSpace());
1463 OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
1464 OtherElt->getName(), MI);
1465 }
1466 }
1467
1448
14681449 unsigned EltSize = TD->getTypeAllocSize(EltTy);
14691450
1451 IRBuilder<> Builder(MI);
1452
14701453 // Finally, insert the meminst for this element.
1471 if (isa(MI)) {
1472 Value *Ops[] = {
1473 SROADest ? EltPtr : OtherElt, // Dest ptr
1474 SROADest ? OtherElt : EltPtr, // Src ptr
1475 ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
1476 // Align
1477 ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
1478 MI->getVolatileCst()
1479 };
1480 // In case we fold the address space overloaded memcpy of A to B
1481 // with memcpy of B to C, change the function to be a memcpy of A to C.
1482 const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
1483 Ops[2]->getType() };
1484 Module *M = MI->getParent()->getParent()->getParent();
1485 TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
1486 CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
1454 if (isa(MI)) {
1455 Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
1456 MI->isVolatile());
14871457 } else {
1488 assert(isa(MI));
1489 Value *Ops[] = {
1490 EltPtr, MI->getArgOperand(1), // Dest, Value,
1491 ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
1492 Zero, // Align
1493 ConstantInt::getFalse(MI->getContext()) // isVolatile
1494 };
1495 const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
1496 Module *M = MI->getParent()->getParent()->getParent();
1497 TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
1498 CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
1458 assert(isa(MI));
1459 Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
1460 Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
1461
1462 if (isa(MI))
1463 Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
1464 else
1465 Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
14991466 }
15001467 }
15011468 DeadInsts.push_back(MI);
33 ; The resulting memset is only 4-byte aligned, despite containing
44 ; a 16-byte alignmed store in the middle.
55
6 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %a01, i8 0, i64 16, i32 4, i1 false)
6 ; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
77
88 define void @foo(i32* %p) {
99 %a0 = getelementptr i32* %p, i64 0