20#define DEBUG_TYPE "lower-mem-intrinsics"
28 std::optional<uint32_t> AtomicElementSize) {
43 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
44 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
48 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.
value(), DstAlign.
value(),
51 "Atomic memcpy lowering is not supported for vector operand type");
53 unsigned LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
54 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
55 "Atomic memcpy lowering is not supported for selected operand size");
59 if (LoopEndCount != 0) {
73 LoopIndex->
addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB);
78 PartSrcAlign, SrcIsVolatile);
81 Load->setMetadata(LLVMContext::MD_alias_scope,
87 Load, DstGEP, PartDstAlign, DstIsVolatile);
90 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
92 if (AtomicElementSize) {
93 Load->setAtomic(AtomicOrdering::Unordered);
94 Store->setAtomic(AtomicOrdering::Unordered);
97 LoopBuilder.
CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
101 Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount);
106 uint64_t BytesCopied = LoopEndCount * LoopOpSize;
108 if (RemainingBytes) {
114 SrcAS, DstAS, SrcAlign.
value(),
115 DstAlign.
value(), AtomicElementSize);
117 for (
auto *OpTy : RemainingOps) {
122 unsigned OperandSize =
DL.getTypeStoreSize(OpTy);
124 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
125 "Atomic memcpy lowering is not supported for selected operand size");
127 uint64_t GepIndex = BytesCopied / OperandSize;
128 assert(GepIndex * OperandSize == BytesCopied &&
129 "Division should have no Remainder!");
132 OpTy, SrcAddr, ConstantInt::get(TypeOfCopyLen, GepIndex));
137 Load->setMetadata(LLVMContext::MD_alias_scope,
141 OpTy, DstAddr, ConstantInt::get(TypeOfCopyLen, GepIndex));
146 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
148 if (AtomicElementSize) {
149 Load->setAtomic(AtomicOrdering::Unordered);
150 Store->setAtomic(AtomicOrdering::Unordered);
152 BytesCopied += OperandSize;
156 "Bytes copied should match size in the call!");
162 unsigned OpSizeVal) {
165 return B.CreateLShr(Len,
Log2_32(OpSizeVal));
166 return B.CreateUDiv(Len, OpSize);
172 unsigned OpSizeVal) {
175 return B.CreateAnd(Len, OpSizeVal - 1);
176 return B.CreateURem(Len, OpSize);
181 Align SrcAlign,
Align DstAlign,
bool SrcIsVolatile,
bool DstIsVolatile,
183 std::optional<uint32_t> AtomicElementSize) {
186 PreLoopBB->
splitBasicBlock(InsertBefore,
"post-loop-memcpy-expansion");
192 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(
"MemCopyDomain");
194 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain,
Name);
196 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
197 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
200 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.
value(), DstAlign.
value(),
203 "Atomic memcpy lowering is not supported for vector operand type");
204 unsigned LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
205 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
206 "Atomic memcpy lowering is not supported for selected operand size");
212 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
214 "expected size argument to memcpy to be an integer type!");
216 bool LoopOpIsInt8 = LoopOpType == Int8Type;
217 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
218 Value *RuntimeLoopCount = LoopOpIsInt8
221 CILoopOpSize, LoopOpSize);
230 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2,
"loop-index");
231 LoopIndex->
addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
233 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
234 LoadInst *
Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
235 PartSrcAlign, SrcIsVolatile);
238 Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
240 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
242 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
247 if (AtomicElementSize) {
248 Load->setAtomic(AtomicOrdering::Unordered);
249 Store->setAtomic(AtomicOrdering::Unordered);
252 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
255 bool requiresResidual =
256 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize);
257 if (requiresResidual) {
258 Type *ResLoopOpType = AtomicElementSize
261 unsigned ResLoopOpSize =
DL.getTypeStoreSize(ResLoopOpType);
262 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
263 "Store size is expected to match type size");
266 CILoopOpSize, LoopOpSize);
267 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
275 Ctx,
"loop-memcpy-residual-header", PreLoopBB->
getParent(),
nullptr);
283 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
284 LoopBB, ResHeaderBB);
287 LoopBuilder.CreateCondBr(
288 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
293 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
294 ResLoopBB, PostLoopBB);
299 ResBuilder.CreatePHI(CopyLenType, 2,
"residual-loop-index");
302 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
304 ResBuilder.CreateInBoundsGEP(ResLoopOpType, SrcAddr, FullOffset);
305 LoadInst *
Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
306 PartSrcAlign, SrcIsVolatile);
309 Load->setMetadata(LLVMContext::MD_alias_scope,
313 ResBuilder.CreateInBoundsGEP(ResLoopOpType, DstAddr, FullOffset);
314 StoreInst *
Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
320 if (AtomicElementSize) {
321 Load->setAtomic(AtomicOrdering::Unordered);
322 Store->setAtomic(AtomicOrdering::Unordered);
324 Value *ResNewIndex = ResBuilder.CreateAdd(
325 ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize));
326 ResidualIndex->
addIncoming(ResNewIndex, ResLoopBB);
329 ResBuilder.CreateCondBr(
330 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
338 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
341 LoopBuilder.CreateCondBr(
342 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
371 Align DstAlign,
bool SrcIsVolatile,
388 SrcAddr, DstAddr,
"compare_src_dst");
399 CopyBackwardsBB->
setName(
"copy_backwards");
401 CopyForwardBB->
setName(
"copy_forward");
403 ExitBB->
setName(
"memmove_done");
405 unsigned PartSize =
DL.getTypeStoreSize(EltTy);
413 ConstantInt::get(TypeOfCopyLen, 0),
"compare_n_to_0");
422 LoopPhi, ConstantInt::get(TypeOfCopyLen, 1),
"index_ptr");
425 PartSrcAlign,
"element");
430 LoopBuilder.
CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
441 PHINode *FwdCopyPhi = FwdLoopBuilder.
CreatePHI(TypeOfCopyLen, 0,
"index_ptr");
448 FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1),
"index_increment");
452 FwdCopyPhi->
addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
473 Builder.
CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
477 unsigned PartSize =
DL.getTypeStoreSize(
SetValue->getType());
482 LoopIndex->
addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
487 PartAlign, IsVolatile);
490 LoopBuilder.
CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
501 auto *DestSCEV = SE->
getSCEV(Memcpy->getRawDest());
547 bool DstIsVolatile = SrcIsVolatile;
552 if (SrcAS != DstAS) {
556 if (
ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
558 CI, SrcAlign, DstAlign, SrcIsVolatile,
563 CopyLen, SrcAlign, DstAlign, SrcIsVolatile,
580 dbgs() <<
"Do not know how to expand memmove between different "
587 Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
588 SrcIsVolatile, DstIsVolatile,
TTI);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
static bool canOverlap(MemTransferBase< T > *Memcpy, ScalarEvolution *SE)
static Value * getRuntimeLoopRemainder(const DataLayout &DL, IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal)
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, Align DstAlign, bool IsVolatile)
static Value * getRuntimeLoopCount(const DataLayout &DL, IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal)
static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents the atomic memcpy intrinsic i.e.
uint32_t getElementSizeInBytes() const
LLVM Basic Block Representation.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Common base class shared among various IRBuilders.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Common base class for all memory transfer intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
self_iterator getIterator()
This is an optimization pass for GlobalISel generic memory operations.
void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< unsigned > AtomicSize=std::nullopt)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
void expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE)
Expand AtomicMemCpy as a loop. AtomicMemCpy is not deleted.
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.