llvm.org GIT mirror llvm / afa2e7e
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1) Summary: This is a resurrection of work first proposed and discussed in Aug 2015: http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html and initially landed (but then backed out) in Nov 2015: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument which is required to be a constant integer. It represents the alignment of the dest (and source), and so must be the minimum of the actual alignment of the two. This change is the first in a series that allows source and dest to each have their own alignments by using the alignment attribute on their arguments. In this change we: 1) Remove the alignment argument. 2) Add alignment attributes to the source & dest arguments. We, temporarily, require that the alignments for source & dest be equal. For example, code which used to read: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false) will now read call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false) Downstream users may have to update their lit tests that check for @llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script may help with updating the majority of your tests, but it does not catch all possible patterns so some manual checking and updating will be required. s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g The remaining changes in the series will: Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing source and dest alignments. Step 3) Update Clang to use the new IRBuilder API. Step 4) Update Polly to use the new IRBuilder API. Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API, and those that use use MemIntrinsicInst::[get|set]Alignment() to use getDestAlignment() and getSourceAlignment() instead. Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the MemIntrinsicInst::[get|set]Alignment() methods. Reviewers: pete, hfinkel, lhames, reames, bollu Reviewed By: reames Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits Differential Revision: https://reviews.llvm.org/D41675 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@322965 91177308-0d34-0410-b5e6-96231b3b80d8 Daniel Neilson 1 year, 7 months ago
395 changed file(s) with 2503 addition(s) and 2374 deletion(s). Raw diff Collapse all Expand all
10631063 to trap and to be properly aligned. This is not a valid attribute
10641064 for return values.
10651065
1066 .. _attr_align:
1067
10661068 ``align ``
10671069 This indicates that the pointer value may be assumed by the optimizer to
10681070 have the specified alignment.
1034010342 ::
1034110343
1034210344 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* , i8* ,
10343 i32 , i32 , i1 )
10345 i32 , i1 )
1034410346 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* , i8* ,
10345 i64 , i32 , i1 )
10347 i64 , i1 )
1034610348
1034710349 Overview:
1034810350 """""""""
1035110353 source location to the destination location.
1035210354
1035310355 Note that, unlike the standard libc function, the ``llvm.memcpy.*``
10354 intrinsics do not return a value, takes extra alignment/isvolatile
10356 intrinsics do not return a value, takes extra isvolatile
1035510357 arguments and the pointers can be in specified address spaces.
1035610358
1035710359 Arguments:
1035910361
1036010362 The first argument is a pointer to the destination, the second is a
1036110363 pointer to the source. The third argument is an integer argument
10362 specifying the number of bytes to copy, the fourth argument is the
10363 alignment of the source and destination locations, and the fifth is a
10364 specifying the number of bytes to copy, and the fourth is a
1036410365 boolean indicating a volatile access.
1036510366
10366 If the call to this intrinsic has an alignment value that is not 0 or 1,
10367 then the caller guarantees that both the source and destination pointers
10368 are aligned to that boundary.
10367 The :ref:`align <_attr_align>` parameter attribute can be provided
10368 for the first and second arguments.
1036910369
1037010370 If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy`` call is
1037110371 a :ref:`volatile operation `. The detailed access behavior is not
1039510395 ::
1039610396
1039710397 declare void @llvm.memmove.p0i8.p0i8.i32(i8* , i8* ,
10398 i32 , i32 , i1 )
10398 i32 , i1 )
1039910399 declare void @llvm.memmove.p0i8.p0i8.i64(i8* , i8* ,
10400 i64 , i32 , i1 )
10400 i64 , i1 )
1040110401
1040210402 Overview:
1040310403 """""""""
1040810408 overlap.
1040910409
1041010410 Note that, unlike the standard libc function, the ``llvm.memmove.*``
10411 intrinsics do not return a value, takes extra alignment/isvolatile
10412 arguments and the pointers can be in specified address spaces.
10411 intrinsics do not return a value, takes an extra isvolatile
10412 argument and the pointers can be in specified address spaces.
1041310413
1041410414 Arguments:
1041510415 """"""""""
1041610416
1041710417 The first argument is a pointer to the destination, the second is a
1041810418 pointer to the source. The third argument is an integer argument
10419 specifying the number of bytes to copy, the fourth argument is the
10420 alignment of the source and destination locations, and the fifth is a
10419 specifying the number of bytes to copy, and the fourth is a
1042110420 boolean indicating a volatile access.
1042210421
10423 If the call to this intrinsic has an alignment value that is not 0 or 1,
10424 then the caller guarantees that the source and destination pointers are
10425 aligned to that boundary.
10422 The :ref:`align <_attr_align>` parameter attribute can be provided
10423 for the first and second arguments.
1042610424
1042710425 If the ``isvolatile`` parameter is ``true``, the ``llvm.memmove`` call
1042810426 is a :ref:`volatile operation `. The detailed access behavior is
1045210450 ::
1045310451
1045410452 declare void @llvm.memset.p0i8.i32(i8* , i8 ,
10455 i32 , i32 , i1 )
10453 i32 , i1 )
1045610454 declare void @llvm.memset.p0i8.i64(i8* , i8 ,
10457 i64 , i32 , i1 )
10455 i64 , i1 )
1045810456
1045910457 Overview:
1046010458 """""""""
1046310461 particular byte value.
1046410462
1046510463 Note that, unlike the standard libc function, the ``llvm.memset``
10466 intrinsic does not return a value and takes extra alignment/volatile
10467 arguments. Also, the destination can be in an arbitrary address space.
10464 intrinsic does not return a value and takes an extra volatile
10465 argument. Also, the destination can be in an arbitrary address space.
1046810466
1046910467 Arguments:
1047010468 """"""""""
1047210470 The first argument is a pointer to the destination to fill, the second
1047310471 is the byte value with which to fill it, the third argument is an
1047410472 integer argument specifying the number of bytes to fill, and the fourth
10475 argument is the known alignment of the destination location.
10476
10477 If the call to this intrinsic has an alignment value that is not 0 or 1,
10478 then the caller guarantees that the destination pointer is aligned to
10479 that boundary.
10473 is a boolean indicating a volatile access.
10474
10475 The :ref:`align <_attr_align>` parameter attribute can be provided
10476 for the first arguments.
1048010477
1048110478 If the ``isvolatile`` parameter is ``true``, the ``llvm.memset`` call is
1048210479 a :ref:`volatile operation `. The detailed access behavior is not
1048610483 """"""""""
1048710484
1048810485 The '``llvm.memset.*``' intrinsics fill "len" bytes of memory starting
10489 at the destination location. If the argument is known to be aligned to
10490 some boundary, this can be specified as the fourth argument, otherwise
10491 it should be set to 0 or 1 (both meaning no alignment).
10486 at the destination location.
1049210487
1049310488 '``llvm.sqrt.*``' Intrinsic
1049410489 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
242242 return cast(getRawDest()->getType())->getAddressSpace();
243243 }
244244
245 unsigned getDestAlignment() const { return getParamAlignment(ARG_DEST); }
246
245247 /// Set the specified arguments of the instruction.
246248 void setDest(Value *Ptr) {
247249 assert(getRawDest()->getType() == Ptr->getType() &&
249251 setArgOperand(ARG_DEST, Ptr);
250252 }
251253
254 void setDestAlignment(unsigned Align) {
255 removeParamAttr(ARG_DEST, Attribute::Alignment);
256 if (Align > 0)
257 addParamAttr(ARG_DEST,
258 Attribute::getWithAlignment(getContext(), Align));
259 }
260
252261 void setLength(Value *L) {
253262 assert(getLength()->getType() == L->getType() &&
254263 "setLength called with value of wrong type!");
346355 return cast(getRawSource()->getType())->getAddressSpace();
347356 }
348357
358 unsigned getSourceAlignment() const {
359 return getParamAlignment(ARG_SOURCE);
360 }
361
349362 void setSource(Value *Ptr) {
350363 assert(getRawSource()->getType() == Ptr->getType() &&
351364 "setSource called with pointer of wrong type!");
352365 setArgOperand(ARG_SOURCE, Ptr);
353366 }
354367
368 void setSourceAlignment(unsigned Align) {
369 removeParamAttr(ARG_SOURCE, Attribute::Alignment);
370 if (Align > 0)
371 addParamAttr(ARG_SOURCE,
372 Attribute::getWithAlignment(getContext(), Align));
373 }
374
355375 static bool classof(const IntrinsicInst *I) {
356376 switch (I->getIntrinsicID()) {
357377 case Intrinsic::memcpy_element_unordered_atomic:
393413 /// This is the common base class for memset/memcpy/memmove.
394414 class MemIntrinsic : public MemIntrinsicBase {
395415 private:
396 enum { ARG_ALIGN = 3, ARG_VOLATILE = 4 };
397
398 public:
399 ConstantInt *getAlignmentCst() const {
400 return cast(const_cast(getArgOperand(ARG_ALIGN)));
401 }
402
403 unsigned getAlignment() const {
404 return getAlignmentCst()->getZExtValue();
405 }
416 enum { ARG_VOLATILE = 3 };
417
418 public:
419 // TODO: Remove this method entirely.
420 // Interim, for now, during transition from having an alignment
421 // arg to using alignment attributes.
422 unsigned getAlignment() const;
406423
407424 ConstantInt *getVolatileCst() const {
408425 return cast(
413430 return !getVolatileCst()->isZero();
414431 }
415432
416 void setAlignment(unsigned Align) {
417 setArgOperand(ARG_ALIGN, ConstantInt::get(getAlignmentType(), Align));
418 }
433 // TODO: Remove this method entirely. It is here only during transition
434 // from having an explicit alignment arg to using alignment attributes.
435 // For now we always set dest & source alignment attributes to match
436 void setAlignment(unsigned Align);
419437
420438 void setVolatile(Constant *V) { setArgOperand(ARG_VOLATILE, V); }
421
422 Type *getAlignmentType() const {
423 return getArgOperand(ARG_ALIGN)->getType();
424 }
425439
426440 // Methods for support type inquiry through isa, cast, and dyn_cast:
427441 static bool classof(const IntrinsicInst *I) {
481495 return cast(getRawSource()->getType())->getAddressSpace();
482496 }
483497
498 unsigned getSourceAlignment() const {
499 return getParamAlignment(ARG_SOURCE);
500 }
501
484502 void setSource(Value *Ptr) {
485503 assert(getRawSource()->getType() == Ptr->getType() &&
486504 "setSource called with pointer of wrong type!");
487505 setArgOperand(ARG_SOURCE, Ptr);
488506 }
489507
508 void setSourceAlignment(unsigned Align) {
509 removeParamAttr(ARG_SOURCE, Attribute::Alignment);
510 if (Align > 0)
511 addParamAttr(ARG_SOURCE,
512 Attribute::getWithAlignment(getContext(), Align));
513 }
514
490515 // Methods for support type inquiry through isa, cast, and dyn_cast:
491516 static bool classof(const IntrinsicInst *I) {
492517 return I->getIntrinsicID() == Intrinsic::memcpy ||
496521 return isa(V) && classof(cast(V));
497522 }
498523 };
524
525 inline unsigned MemIntrinsic::getAlignment() const {
526 if (const auto *MTI = dyn_cast(this))
527 return std::min(MTI->getDestAlignment(), MTI->getSourceAlignment());
528 else
529 return getDestAlignment();
530 }
531
532 inline void MemIntrinsic::setAlignment(unsigned Align) {
533 setDestAlignment(Align);
534 if (auto *MTI = dyn_cast(this))
535 MTI->setSourceAlignment(Align);
536 }
499537
500538 /// This class wraps the llvm.memcpy intrinsic.
501539 class MemCpyInst : public MemTransferInst {
610648 return cast(getRawSource()->getType())->getAddressSpace();
611649 }
612650
651 unsigned getSourceAlignment() const {
652 return getParamAlignment(ARG_SOURCE);
653 }
654
613655 void setSource(Value *Ptr) {
614656 assert(getRawSource()->getType() == Ptr->getType() &&
615657 "setSource called with pointer of wrong type!");
616658 setArgOperand(ARG_SOURCE, Ptr);
617659 }
618660
661 void setSourceAlignment(unsigned Align) {
662 removeParamAttr(ARG_SOURCE, Attribute::Alignment);
663 if (Align > 0)
664 addParamAttr(ARG_SOURCE,
665 Attribute::getWithAlignment(getContext(), Align));
666 }
667
619668 static bool classof(const IntrinsicInst *I) {
620669 switch (I->getIntrinsicID()) {
621670 case Intrinsic::memcpy:
389389
390390 def int_memcpy : Intrinsic<[],
391391 [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
392 llvm_i32_ty, llvm_i1_ty],
392 llvm_i1_ty],
393393 [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
394394 WriteOnly<0>, ReadOnly<1>]>;
395395 def int_memmove : Intrinsic<[],
396396 [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
397 llvm_i32_ty, llvm_i1_ty],
397 llvm_i1_ty],
398398 [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
399399 ReadOnly<1>]>;
400400 def int_memset : Intrinsic<[],
401401 [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
402 llvm_i32_ty, llvm_i1_ty],
402 llvm_i1_ty],
403403 [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
404404
405405 // FIXME: Add version of these floating point intrinsics which allow non-default
50165016 Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
50175017 bool isVol = MCI.isVolatile();
50185018 bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5019 // FIXME: Support passing different dest/src alignments to the memcpy DAG
5020 // node.
50195021 SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
50205022 false, isTC,
50215023 MachinePointerInfo(I.getArgOperand(0)),
50485050 Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
50495051 bool isVol = MMI.isVolatile();
50505052 bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5053 // FIXME: Support passing different dest/src alignments to the memmove DAG
5054 // node.
50515055 SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
50525056 isTC, MachinePointerInfo(I.getArgOperand(0)),
50535057 MachinePointerInfo(I.getArgOperand(1)));
2121 #include "llvm/IR/Function.h"
2222 #include "llvm/IR/IRBuilder.h"
2323 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
2425 #include "llvm/IR/LLVMContext.h"
2526 #include "llvm/IR/Module.h"
2627 #include "llvm/IR/Verifier.h"
521522 return true;
522523 }
523524 }
525 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
526 // alignment parameter to embedding the alignment as an attribute of
527 // the pointer args.
528 if (Name.startswith("memcpy.") && F->arg_size() == 5) {
529 rename(F);
530 // Get the types of dest, src, and len
531 ArrayRef ParamTypes = F->getFunctionType()->params().slice(0, 3);
532 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
533 ParamTypes);
534 return true;
535 }
536 if (Name.startswith("memmove.") && F->arg_size() == 5) {
537 rename(F);
538 // Get the types of dest, src, and len
539 ArrayRef ParamTypes = F->getFunctionType()->params().slice(0, 3);
540 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
541 ParamTypes);
542 return true;
543 }
544 if (Name.startswith("memset.") && F->arg_size() == 5) {
545 rename(F);
546 // Get the types of dest, and len
547 const auto *FT = F->getFunctionType();
548 Type *ParamTypes[2] = {
549 FT->getParamType(0), // Dest
550 FT->getParamType(2) // len
551 };
552 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
553 ParamTypes);
554 return true;
555 }
524556 break;
525557 }
526558 case 'n': {
21972229 return;
21982230 }
21992231
2200 CallInst *NewCall = nullptr;
2201 switch (NewFn->getIntrinsicID()) {
2202 default: {
2232 const auto &DefaultCase = [&NewFn, &CI]() -> void {
22032233 // Handle generic mangling change, but nothing else
22042234 assert(
22052235 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
22062236 "Unknown function for CallInst upgrade and isn't just a name change");
22072237 CI->setCalledFunction(NewFn);
2238 };
2239 CallInst *NewCall = nullptr;
2240 switch (NewFn->getIntrinsicID()) {
2241 default: {
2242 DefaultCase();
22082243 return;
22092244 }
22102245
23432378 SmallVector Args(CI->arg_operands().begin(),
23442379 CI->arg_operands().end());
23452380 NewCall = Builder.CreateCall(NewFn, Args);
2381 break;
2382 }
2383
2384 case Intrinsic::memcpy:
2385 case Intrinsic::memmove:
2386 case Intrinsic::memset: {
2387 // We have to make sure that the call signature is what we're expecting.
2388 // We only want to change the old signatures by removing the alignment arg:
2389 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
2390 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
2391 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
2392 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
2393 // Note: i8*'s in the above can be any pointer type
2394 if (CI->getNumArgOperands() != 5) {
2395 DefaultCase();
2396 return;
2397 }
2398 // Remove alignment argument (3), and add alignment attributes to the
2399 // dest/src pointers.
2400 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
2401 CI->getArgOperand(2), CI->getArgOperand(4)};
2402 NewCall = Builder.CreateCall(NewFn, Args);
2403 auto *MemCI = cast(NewCall);
2404 // All mem intrinsics support dest alignment.
2405 const ConstantInt *Align = cast(CI->getArgOperand(3));
2406 MemCI->setDestAlignment(Align->getZExtValue());
2407 // Memcpy/Memmove also support source alignment.
2408 if (auto *MTI = dyn_cast(MemCI))
2409 MTI->setSourceAlignment(Align->getZExtValue());
23462410 break;
23472411 }
23482412 }
1414 #include "llvm/IR/IRBuilder.h"
1515 #include "llvm/IR/Function.h"
1616 #include "llvm/IR/GlobalVariable.h"
17 #include "llvm/IR/IntrinsicInst.h"
1718 #include "llvm/IR/Intrinsics.h"
1819 #include "llvm/IR/LLVMContext.h"
1920 #include "llvm/IR/Statepoint.h"
8283 bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
8384 MDNode *NoAliasTag) {
8485 Ptr = getCastedInt8PtrValue(Ptr);
85 Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
86 Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
8687 Type *Tys[] = { Ptr->getType(), Size->getType() };
8788 Module *M = BB->getParent()->getParent();
8889 Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
8990
9091 CallInst *CI = createCallHelper(TheFn, Ops, this);
91
92
93 if (Align > 0)
94 cast(CI)->setDestAlignment(Align);
95
9296 // Set the TBAA info if present.
9397 if (TBAATag)
9498 CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
98102
99103 if (NoAliasTag)
100104 CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
101
105
102106 return CI;
103107 }
104108
106110 CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
107111 bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag,
108112 MDNode *ScopeTag, MDNode *NoAliasTag) {
113 assert((Align == 0 || isPowerOf2_32(Align)) && "Must be 0 or a power of 2");
109114 Dst = getCastedInt8PtrValue(Dst);
110115 Src = getCastedInt8PtrValue(Src);
111116
112 Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
117 Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
113118 Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
114119 Module *M = BB->getParent()->getParent();
115120 Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
116121
117122 CallInst *CI = createCallHelper(TheFn, Ops, this);
118
123
124 if (Align > 0)
125 cast(CI)->setAlignment(Align);
126
119127 // Set the TBAA info if present.
120128 if (TBAATag)
121129 CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
129137
130138 if (NoAliasTag)
131139 CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
132
140
133141 return CI;
134142 }
135143
153161 CallInst *CI = createCallHelper(TheFn, Ops, this);
154162
155163 // Set the alignment of the pointer args.
156 CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign));
157 CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), SrcAlign));
164 auto *AMCI = cast(CI);
165 AMCI->setDestAlignment(DstAlign);
166 AMCI->setSourceAlignment(SrcAlign);
158167
159168 // Set the TBAA info if present.
160169 if (TBAATag)
177186 CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
178187 bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
179188 MDNode *NoAliasTag) {
189 assert((Align == 0 || isPowerOf2_32(Align)) && "Must be 0 or a power of 2");
180190 Dst = getCastedInt8PtrValue(Dst);
181191 Src = getCastedInt8PtrValue(Src);
182
183 Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
192
193 Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
184194 Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
185195 Module *M = BB->getParent()->getParent();
186196 Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
187197
188198 CallInst *CI = createCallHelper(TheFn, Ops, this);
189
199
200 auto *MMI = cast(CI);
201 if (Align > 0)
202 MMI->setAlignment(Align);
203
190204 // Set the TBAA info if present.
191205 if (TBAATag)
192206 CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
40474047 case Intrinsic::memcpy:
40484048 case Intrinsic::memmove:
40494049 case Intrinsic::memset: {
4050 ConstantInt *AlignCI = dyn_cast(CS.getArgOperand(3));
4051 Assert(AlignCI,
4052 "alignment argument of memory intrinsics must be a constant int",
4053 CS);
40544050 const auto *MI = cast(CS.getInstruction());
40554051 auto IsValidAlignment = [&](unsigned Alignment) -> bool {
40564052 return Alignment == 0 || isPowerOf2_32(Alignment);
40574053 };
4058 Assert(IsValidAlignment(MI->getAlignment()),
4059 "alignment argument of memory intrinsics must be 0 or a power of 2",
4054 Assert(IsValidAlignment(MI->getDestAlignment()),
4055 "alignment of arg 0 of memory intrinsic must be 0 or a power of 2",
40604056 CS);
4061 Assert(isa(CS.getArgOperand(4)),
4057 if (const auto *MTI = dyn_cast(MI)) {
4058 Assert(IsValidAlignment(MTI->getSourceAlignment()),
4059 "alignment of arg 1 of memory intrinsic must be 0 or a power of 2",
4060 CS);
4061 // TODO: Remove this assert when we enhance IRBuilder API to create
4062 // memcpy/memmove with separate source & dest alignments.
4063 Assert(MTI->getSourceAlignment() == MTI->getDestAlignment(),
4064 "TEMPORARY: source and dest alignments must be the same");
4065 }
4066 Assert(isa(CS.getArgOperand(3)),
40624067 "isvolatile argument of memory intrinsics must be a constant int",
40634068 CS);
40644069 break;
34753475 return false;
34763476
34773477 const char *IntrMemName = isa(II) ? "memcpy" : "memmove";
3478 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3478 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
34793479 }
34803480 case Intrinsic::memset: {
34813481 const MemSetInst *MSI = cast(II);
34913491 // address spaces.
34923492 return false;
34933493
3494 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3494 return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
34953495 }
34963496 case Intrinsic::sin:
34973497 case Intrinsic::cos:
23512351 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
23522352 i != e; ++i) {
23532353 // If we're lowering a memory intrinsic instead of a regular call, skip the
2354 // last two arguments, which shouldn't be passed to the underlying function.
2355 if (IntrMemName && e-i <= 2)
2354 // last argument, which shouldn't be passed to the underlying function.
2355 if (IntrMemName && e - i <= 1)
23562356 break;
23572357
23582358 ISD::ArgFlagsTy Flags;
16271627 if (!MTI->getLength()->getType()->isIntegerTy(32))
16281628 return false;
16291629 const char *IntrMemName = isa(II) ? "memcpy" : "memmove";
1630 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
1630 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
16311631 }
16321632 case Intrinsic::memset: {
16331633 const MemSetInst *MSI = cast(II);
16361636 return false;
16371637 if (!MSI->getLength()->getType()->isIntegerTy(32))
16381638 return false;
1639 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
1639 return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
16401640 }
16411641 }
16421642 return false;
27252725 if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
27262726 return false;
27272727
2728 return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
2728 return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
27292729 }
27302730 case Intrinsic::memset: {
27312731 const MemSetInst *MSI = cast(II);
27402740 if (MSI->getDestAddressSpace() > 255)
27412741 return false;
27422742
2743 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2743 return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
27442744 }
27452745 case Intrinsic::stackprotector: {
27462746 // Emit code to store the stack guard onto the stack.
188188 unsigned MinAlign = std::min(DstAlign, SrcAlign);
189189 unsigned CopyAlign = MI->getAlignment();
190190
191 // FIXME: Check & simplify source & dest alignments separately
191192 if (CopyAlign < MinAlign) {
192193 MI->setAlignment(MinAlign);
193194 return MI;
13811381 Value *LenShadow = IRB.CreateMul(
13821382 I.getLength(),
13831383 ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8));
1384 Value *AlignShadow;
1385 if (ClPreserveAlignment) {
1386 AlignShadow = IRB.CreateMul(I.getAlignmentCst(),
1387 ConstantInt::get(I.getAlignmentType(),
1388 DFSF.DFS.ShadowWidth / 8));
1389 } else {
1390 AlignShadow = ConstantInt::get(I.getAlignmentType(),
1391 DFSF.DFS.ShadowWidth / 8);
1392 }
13931384 Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
13941385 DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
13951386 SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
1396 IRB.CreateCall(I.getCalledValue(), {DestShadow, SrcShadow, LenShadow,
1397 AlignShadow, I.getVolatileCst()});
1387 auto *MTI = cast(
1388 IRB.CreateCall(I.getCalledValue(),
1389 {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
1390 // FIXME: Set the source & dest alignments of MTI based on the separate
1391 // source & dest alignments of I
1392 if (ClPreserveAlignment) {
1393 MTI->setAlignment(I.getAlignment() * (DFSF.DFS.ShadowWidth / 8));
1394 } else {
1395 MTI->setAlignment(DFSF.DFS.ShadowWidth / 8);
1396 }
13981397 }
13991398
14001399 void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
1313 %a = alloca i8, align 1
1414 %b = alloca i8, align 1
1515 store i8 1, i8* %a, align 1
16 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 false)
16 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 false)
1717 store i8 1, i8* %b, align 1
1818 ret void
1919 }
2929 %a = alloca i8, align 1
3030 %b = alloca i8, align 1
3131 store i8 1, i8* %a, align 1
32 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 true)
32 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 true)
3333 store i8 1, i8* %b, align 1
3434 ret void
3535 }
4545 %a = alloca i8, align 1
4646 %b = alloca i8, align 1
4747 store i8 1, i8* %a, align 1
48 call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 false)
48 call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 false)
4949 store i8 1, i8* %b, align 1
5050 ret void
5151 }
6161 %a = alloca i8, align 1
6262 %b = alloca i8, align 1
6363 store i8 1, i8* %a, align 1
64 call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 true)
64 call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 true)
6565 store i8 1, i8* %b, align 1
6666 ret void
6767 }
7575 %a = alloca i8, align 1
7676 %b = alloca i8, align 1
7777 store i8 1, i8* %a, align 1
78 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false)
78 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i1 false)
7979 store i8 1, i8* %b, align 1
8080 ret void
8181 }
8989 %a = alloca i8, align 1
9090 %b = alloca i8, align 1
9191 store i8 1, i8* %a, align 1
92 call void @llvm.memmove.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false)
92 call void @llvm.memmove.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i1 false)
9393 store i8 1, i8* %b, align 1
9494 ret void
9595 }
103103 %a = alloca i8, align 1
104104 %b = alloca i8, align 1
105105 store i8 1, i8* %a, align 1
106 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false)
107 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 1, i32 1, i1 false)
106 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i1 false)
107 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 1, i1 false)
108108 store i8 1, i8* %b, align 1
109109 ret void
110110 }
111111
112 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
113 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
112 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
113 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
0 ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
11 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
22
3 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0
3 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0
44 declare void @llvm.assume(i1) #0
55
66 define void @test1(i8* %P, i8* %Q) nounwind ssp {
77 tail call void @llvm.assume(i1 true)
8 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
8 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
99 ret void
1010
1111 ; CHECK-LABEL: Function: test1:
1313 ; CHECK: MayAlias: i8* %P, i8* %Q
1414 ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.assume(i1 true)
1515 ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.assume(i1 true)
16 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
17 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
18 ; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
19 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.assume(i1 true)
16 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
17 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
18 ; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
19 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.assume(i1 true)
2020 }
2121
2222 attributes #0 = { nounwind }
11 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
22 target triple = "arm-apple-ios"
33
4 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #0
5 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0
4 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #0
5 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #0
66
77 declare void @a_readonly_func(i8*) #1
88 declare void @a_writeonly_func(i8*) #2
99
1010 define void @test2(i8* %P, i8* %Q) #3 {
11 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
12 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
11 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
12 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
1313 ret void
1414
1515 ; CHECK-LABEL: Function: test2:
1616
1717 ; CHECK: MayAlias: i8* %P, i8* %Q
18 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
19 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
20 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
21 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
22 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
23 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
18 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
19 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
20 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
21 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
22 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
23 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
2424 }
2525
2626 define void @test2a(i8* noalias %P, i8* noalias %Q) #3 {
27 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
28 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
27 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
28 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
2929 ret void
3030
3131 ; CHECK-LABEL: Function: test2a:
3232
3333 ; CHECK: NoAlias: i8* %P, i8* %Q
34 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
35 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
36 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
37 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
38 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
39 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
34 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
35 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
36 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
37 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
38 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
39 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
4040 }
4141
4242 define void @test2b(i8* noalias %P, i8* noalias %Q) #3 {
43 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
43 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
4444 %R = getelementptr i8, i8* %P, i64 12
45 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
45 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
4646 ret void
4747
4848 ; CHECK-LABEL: Function: test2b:
5050 ; CHECK: NoAlias: i8* %P, i8* %Q
5151 ; CHECK: NoAlias: i8* %P, i8* %R
5252 ; CHECK: NoAlias: i8* %Q, i8* %R
53 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
54 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
55 ; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
56 ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
57 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
58 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
59 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
60 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
53 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
54 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
55 ; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
56 ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
57 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
58 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
59 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
60 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
6161 }
6262
6363 define void @test2c(i8* noalias %P, i8* noalias %Q) #3 {
64 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
64 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
6565 %R = getelementptr i8, i8* %P, i64 11
66 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
66 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
6767 ret void
6868
6969 ; CHECK-LABEL: Function: test2c:
7171 ; CHECK: NoAlias: i8* %P, i8* %Q
7272 ; CHECK: NoAlias: i8* %P, i8* %R
7373 ; CHECK: NoAlias: i8* %Q, i8* %R
74 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
75 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
76 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
77 ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
78 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
79 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
80 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
81 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
74 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
75 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
76 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
77 ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
78 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
79 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
80 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
81 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
8282 }
8383
8484 define void @test2d(i8* noalias %P, i8* noalias %Q) #3 {
85 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
85 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
8686 %R = getelementptr i8, i8* %P, i64 -12
87 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
87 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
8888 ret void
8989
9090 ; CHECK-LABEL: Function: test2d:
9292 ; CHECK: NoAlias: i8* %P, i8* %Q
9393 ; CHECK: NoAlias: i8* %P, i8* %R
9494 ; CHECK: NoAlias: i8* %Q, i8* %R
95 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
96 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
97 ; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
98 ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
99 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
100 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
101 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
102 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
95 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
96 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
97 ; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
98 ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
99 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
100 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
101 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
102 ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
103103 }
104104
105105 define void @test2e(i8* noalias %P, i8* noalias %Q) #3 {
106 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
106 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
107107 %R = getelementptr i8, i8* %P, i64 -11
108 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
108 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
109109 ret void
110110
111111 ; CHECK-LABEL: Function: test2e:
113113 ; CHECK: NoAlias: i8* %P, i8* %Q
114114 ; CHECK: NoAlias: i8* %P, i8* %R
115115 ; CHECK: NoAlias: i8* %Q, i8* %R
116 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
117 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
118 ; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
119 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
120 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
121 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
122 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false)
123 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
116 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
117 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
118 ; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
119 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
120 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
121 ; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
122 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false)
123 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
124124 }
125125
126126 define void @test3(i8* %P, i8* %Q) #3 {
127 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
128 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
127 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
128 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
129129 ret void
130130
131131 ; CHECK-LABEL: Function: test3:
132132
133133 ; CHECK: MayAlias: i8* %P, i8* %Q
134 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
135 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
136 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
137 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
138 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
139 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
134 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
135 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
136 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
137 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
138 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
139 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
140140 }
141141
142142 define void @test3a(i8* noalias %P, i8* noalias %Q) #3 {
143 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
144 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
143 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
144 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
145145 ret void
146146
147147 ; CHECK-LABEL: Function: test3a:
148148
149149 ; CHECK: NoAlias: i8* %P, i8* %Q
150 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
151 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
152 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
153 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
154 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
155 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
150 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
151 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
152 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
153 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
154 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
155 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
156156 }
157157
158158 define void @test4(i8* %P, i8* noalias %Q) #3 {
159 tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
160 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
159 tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false)
160 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
161161 ret void
162162
163163 ; CHECK-LABEL: Function: test4:
164164
165165 ; CHECK: NoAlias: i8* %P, i8* %Q
166 ; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
167 ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
168 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
169 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
170 ; CHECK: Just Mod: tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
171 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
166 ; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false)
167 ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false)
168 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
169 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
170 ; CHECK: Just Mod: tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
171 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false)
172172 }
173173
174174 define void @test5(i8* %P, i8* %Q, i8* %R) #3 {
175 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
176 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
175 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
176 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
177177 ret void
178178
179179 ; CHECK-LABEL: Function: test5:
181181 ; CHECK: MayAlias: i8* %P, i8* %Q
182182 ; CHECK: MayAlias: i8* %P, i8* %R
183183 ; CHECK: MayAlias: i8* %Q, i8* %R
184 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
185 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
186 ; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
187 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
188 ; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
189 ; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
190 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
191 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
184 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
185 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
186 ; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
187 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
188 ; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
189 ; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
190 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
191 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
192192 }
193193
194194 define void @test5a(i8* noalias %P, i8* noalias %Q, i8* noalias %R) nounwind ssp {
195 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
196 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
195 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
196 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
197197 ret void
198198
199199 ; CHECK-LABEL: Function: test5a:
201201 ; CHECK: NoAlias: i8* %P, i8* %Q
202202 ; CHECK: NoAlias: i8* %P, i8* %R
203203 ; CHECK: NoAlias: i8* %Q, i8* %R
204 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
205 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
206 ; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
207 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
208 ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
209 ; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
210 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
211 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
204 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
205 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
206 ; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
207 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
208 ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
209 ; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
210 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
211 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
212212 }
213213
214214 define void @test6(i8* %P) #3 {
215 call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false)
215 call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false)
216216 call void @a_readonly_func(i8* %P)
217217 ret void
218218
219219 ; CHECK-LABEL: Function: test6:
220220
221 ; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false)
221 ; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false)
222222 ; CHECK: Just Ref: Ptr: i8* %P <-> call void @a_readonly_func(i8* %P)
223 ; CHECK: Just Mod: call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) <-> call void @a_readonly_func(i8* %P)
224 ; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false)
223 ; CHECK: Just Mod: call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false) <-> call void @a_readonly_func(i8* %P)
224 ; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false)
225225 }
226226
227227 define void @test7(i8* %P) #3 {
55 ; The load and store address in the loop body could alias so the load
66 ; can't be hoisted above the store and out of the loop.
77
8 declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)
8 declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1)
99
1010 define i32 @foo(i32 %x, i32 %z, i32 %n) {
1111 entry:
1212 %pool = alloca [59 x i32], align 4
1313 %tmp = bitcast [59 x i32]* %pool to i8*
14 call void @llvm.memset.p0i8.i32(i8* nonnull %tmp, i8 0, i32 236, i32 4, i1 false)
14 call void @llvm.memset.p0i8.i32(i8* align 4 nonnull %tmp, i8 0, i32 236, i1 false)
1515 %cmp3 = icmp eq i32 %n, 0
1616 br i1 %cmp3, label %for.end, label %for.body.lr.ph
1717
1111 ret void
1212 }
1313
14 ; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
15 ; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
14 ; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false)
15 ; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false)
1616
17 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
17 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
1818
1919 @A = external global i8
2020 @B = external global i8
2121 define void @test1() {
22 call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
23 call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
22 call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false)
23 call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false)
2424 ret void
2525 }
0 ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
11 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
22
3 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0
3 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0
44 declare void @llvm.experimental.guard(i1, ...)
55 declare void @unknown_but_readonly() readonly
66
77 define void @test1(i8* %P, i8* %Q) {
88 tail call void(i1,...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
9 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
9 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
1010 ret void
1111
1212 ; CHECK-LABEL: Function: test1:
1313
1414 ; CHECK: Just Ref: Ptr: i8* %P <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
1515 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
16 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
17 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
18 ; CHECK: Just Ref: tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
19 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
16 ; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
17 ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
18 ; CHECK: Just Ref: tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
19 ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
2020 }
2121
2222 define void @test2() {
1010
1111 store i32 0, i32* %A
1212
13 call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i32 1, i1 false)
13 call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i1 false)
1414
1515 %B = load i32, i32* %A
1616 ret i32 %B
2626
2727 store i8 2, i8* %B ;; Not written to by memcpy
2828
29 call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
29 call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false)
3030
3131 %C = load i8, i8* %B
3232 ret i8 %C
3737 ; CHECK-LABEL: @test2
3838 %P2 = getelementptr i8, i8* %P, i32 127
3939 store i8 1, i8* %P2 ;; Not dead across memset
40 call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
40 call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i1 false)
4141 %A = load i8, i8* %P2
4242 ret i8 %A
4343 ; CHECK: ret i8 1
5050 ;; FIXME: DSE isn't zapping this dead store.
5151 store i8 1, i8* %P2 ;; Dead, clobbered by memset.
5252
53 call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
53 call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i1 false)
5454 %A = load i8, i8* %P2
5555 ret i8 %A
5656 ; CHECK-NOT: load
9090
9191 define i32 @test4(i8* %P) {
9292 %tmp = load i32, i32* @G1
93 call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1, i1 false)
93 call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i1 false)
9494 %tmp2 = load i32, i32* @G1
9595 %sub = sub i32 %tmp2, %tmp
9696 ret i32 %sub
105105 ; write to G1.
106106 define i32 @test5(i8* %P, i32 %Len) {
107107 %tmp = load i32, i32* @G1
108 call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1, i1 false)
108 call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i1 false)
109109 %tmp2 = load i32, i32* @G1
110110 %sub = sub i32 %tmp2, %tmp
111111 ret i32 %sub
226226 ; CHECK: ret i32 0
227227 }
228228
229 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
230 declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
231 declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
232 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
229 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
230 declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind
231 declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
232 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
22
33 ; Check that intrinsics aren't added to the call graph
44
5 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
5 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
66
77 define void @f(i8* %out, i8* %in) {
8 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %in, i32 100, i32 4, i1 false)
8 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %out, i8* align 4 %in, i32 100, i1 false)
99 ret void
1010 }
1111
4242 %scevgep = getelementptr [6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i32 0, i32 %5, i32 %8
4343 %9 = add i32 %f.promoted, %smax
4444 %10 = add i32 %9, 2
45 call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i32 1, i1 false)
46 ; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false)
45 call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i1 false)
46 ; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i1 false)
4747 ; CHECK-NOT: call void @llvm.memset.p0i8.i32(i8* getelementptr ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i64 1, i64 4, i64 4, i32 1)
4848 ret i32 0
4949 }
5050 ; Function Attrs: argmemonly nounwind
51 declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)
51 declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1)
695695 ret void
696696 }
697697
698 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
698 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
2121 %c = alloca [1 x i32], align 4
2222 store i32 0, i32* %retval, align 4
2323 %0 = bitcast [1 x i32]* %c to i8*
24 call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false)
24 call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
2525 store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
2626 store i32 0, i32* @b, align 4
2727 br label %for.cond
5858 }
5959
6060 ; Function Attrs: nounwind argmemonly
61 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
61 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind argmemonly
6262
6363 ; Function Attrs: noreturn nounwind
6464 declare void @abort() noreturn nounwind
5858 }
5959
6060 ; Function Attrs: nounwind argmemonly
61 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
61 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind argmemonly
6262
6363 ; Function Attrs: noreturn nounwind
6464 declare void @abort() noreturn nounwind
0 ; RUN: opt < %s -basicaa -globals-aa -gvn -S -disable-verify | FileCheck %s
11
2 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
2 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
33 define void @foo(i8* %x, i8* %y) {
4 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i32 1, i1 false);
4 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i1 false);
55 ret void
66 }
77
99 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
1010
1111 declare i32 @printf(i8* nocapture, ...) nounwind
12 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
12 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
1313
1414
1515 ; Make sure that the initial memcpy call does not go away
2020
2121 define i32 @main() nounwind uwtable ssp {
2222 main_entry:
23 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false)
23 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.anon* @b to i8*), i8* align 4 bitcast (%struct.anon* @a to i8*), i64 12, i1 false)
2424 %0 = load volatile i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0), align 4
2525 store i32 %0, i32* @c, align 4
26 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind
26 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.anon* @b to i8*), i8* align 4 bitcast (%struct.anon* @a to i8*), i64 12, i1 false) nounwind
2727 %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind
2828 ret i32 0
2929 }
22 %s = type { i8 }
33
44 ; Function Attrs: argmemonly nounwind
5 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #0
5 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #0
66
77 ; Function Attrs: argmemonly nounwind
8 declare void @llvm.memset.p0i8.i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #0
8 declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) #0
99
1010 declare void @f1(%s* noalias nocapture sret, %s* nocapture readnone)
1111
1515 %tmp = alloca %s
1616 %0 = bitcast %s* %c to i8*
1717 %1 = bitcast %s* %tmp to i8*
18 call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
18 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i1 false)
1919 call void @f1(%s* sret %c, %s* %c)
2020 ret void
2121 }
3333 %tmp = alloca %s
3434 %0 = bitcast %s* %c to i8*
3535 %1 = bitcast %s* %tmp to i8*
36 call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
36 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i1 false)
3737 call void @f3(%s* sret %c, %s* byval %c)
3838 ret void
3939 }
0 ; RUN: opt -disable-output -basicaa -print-memoryssa %s 2>&1 | FileCheck %s
11
2 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
2 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
33
44 define void @source_clobber(i8* %a, i8* %b) {
55 ; CHECK-LABEL: @source_clobber(
66 ; CHECK-NEXT: ; 1 = MemoryDef(liveOnEntry)
7 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i32 1, i1 false)
7 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 false)
88 ; CHECK-NEXT: ; MemoryUse(liveOnEntry)
99 ; CHECK-NEXT: [[X:%.*]] = load i8, i8* %b
1010 ; CHECK-NEXT: ret void
1111 ;
12 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i32 1, i1 false)
12 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 false)
1313 %x = load i8, i8* %b
1414 ret void
1515 }
171171 %55 = mul i32 %y.21, %w ; [#uses=1]
172172 %.sum5 = add i32 %55, %.sum3 ; [#uses=1]
173173 %56 = getelementptr i8, i8* %j, i32 %.sum5 ; [#uses=1]
174 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i32 1, i1 false)
174 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i1 false)
175175 %57 = add i32 %y.21, 1 ; [#uses=2]
176176 br label %bb24
177177
188188 %60 = getelementptr i8, i8* %j, i32 %.sum4 ; [#uses=1]
189189 %61 = mul i32 %x, %w ; [#uses=1]
190190 %62 = sdiv i32 %61, 2 ; [#uses=1]
191 tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i32 1, i1 false)
191 tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i1 false)
192192 ret void
193193
194194 bb29: ; preds = %bb20, %entry
206206 %67 = getelementptr i8, i8* %r, i32 %66 ; [#uses=1]
207207 %68 = mul i32 %y.310, %w ; [#uses=1]
208208 %69 = getelementptr i8, i8* %j, i32 %68 ; [#uses=1]
209 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i32 1, i1 false)
209 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i1 false)
210210 %70 = add i32 %y.310, 1 ; [#uses=2]
211211 br label %bb31
212212
222222 %73 = getelementptr i8, i8* %j, i32 %72 ; [#uses=1]
223223 %74 = mul i32 %x, %w ; [#uses=1]
224224 %75 = sdiv i32 %74, 2 ; [#uses=1]
225 tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i32 1, i1 false)
225 tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i1 false)
226226 ret void
227227
228228 return: ; preds = %bb20
229229 ret void
230230 }
231231
232 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
233 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
232 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
233 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
4040 entry:
4141 %bins = alloca [16 x i64], align 16
4242 %0 = bitcast [16 x i64]* %bins to i8*
43 call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 128, i32 16, i1 false)
43 call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 128, i1 false)
4444 br label %preheader
4545
4646 preheader: ; preds = %for.inc.1, %entry
8787 }
8888
8989 ; Function Attrs: nounwind
90 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
90 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0
9191
9292 declare void @may_exit() nounwind
9393
4949 bb2.i: ; preds = %bb3.i
5050 %1 = getelementptr %struct.SHA_INFO, %struct.SHA_INFO* %sha_info, i64 0, i32 3
5151 %2 = bitcast [16 x i32]* %1 to i8*
52 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1, i1 false)
52 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i1 false)
5353 %3 = getelementptr %struct.SHA_INFO, %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0
5454 %4 = bitcast i32* %3 to i8*
5555 br label %codeRepl
7373
7474 declare void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8*) nounwind
7575
76 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
76 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
7777
78 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
78 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
7979
4444
4545 ; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) #4 {
4646 define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind {
47 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !1
47 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i1 false), !tbaa !1
4848 ret void
4949 }
5050
5151 ; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture readonly %q, i64 %n) #3 {
5252 define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind {
53 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !2
53 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i1 false), !tbaa !2
5454 ret void
5555 }
5656
6969 }
7070
7171 declare void @callee(i32* %p) nounwind
72 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
72 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind
7373
7474 ; CHECK: attributes #0 = { norecurse nounwind readnone }
7575 ; CHECK: attributes #1 = { norecurse nounwind }
55 ; it has a TBAA tag which declares that it is unrelated.
66
77 ; CHECK: @foo
8 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0
8 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %p, i8* align 1 %q, i64 16, i1 false), !tbaa !0
99 ; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa [[TAGA:!.*]]
1010 ; CHECK-NEXT: ret void
1111 define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind {
12 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2
12 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i1 false), !tbaa !2
1313 store i8 2, i8* %s, align 1, !tbaa !1
14 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i32 1, i1 false), !tbaa !2
14 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i1 false), !tbaa !2
1515 ret void
1616 }
1717
18 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
18 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
1919
2020 ; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0}
2121 ; CHECK: [[TYPEA]] = !{!"A", !{{.*}}}
66 define void @memcpyintrinsic(i8* %dest, i8* %src, i32 %len) {
77 entry:
88
9 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 true)
10 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 true)
9 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 %len, i1 true)
10 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 true)
1111
1212 ret void
1313 }
1414
15 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 %align, i1 %isvolatile)
15 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
0 ; RUN: opt -S < %s | FileCheck %s
1
2 ; Test to ensure that calls to the memcpy/memmove/memset intrinsics are auto-upgraded
3 ; to remove the alignment parameter in favour of align attributes on the pointer args.
4
5 ; Make sure a non-zero alignment is propagated
6 define void @test(i8* %p1, i8* %p2, i8* %p3) {
7 ; CHECK-LABEL: @test
8 ; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %p1, i8 55, i64 100, i1 false)
9 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p1, i8* align 4 %p2, i64 50, i1 false)
10 ; CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 %p2, i8* align 4 %p3, i64 1000, i1 false)
11 call void @llvm.memset.p0i8.i64(i8* %p1, i8 55, i64 100, i32 4, i1 false)
12 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p1, i8* %p2, i64 50, i32 4, i1 false)
13 call void @llvm.memmove.p0i8.p0i8.i64(i8* %p2, i8* %p3, i64 1000, i32 4, i1 false)
14 ret void
15 }
16
17 ; Make sure that a zero alignment is handled properly
18 define void @test2(i8* %p1, i8* %p2, i8* %p3) {
19 ; CHECK-LABEL: @test
20 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %p1, i8 55, i64 100, i1 false)
21 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p1, i8* %p2, i64 50, i1 false)
22 ; CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* %p2, i8* %p3, i64 1000, i1 false)
23 call void @llvm.memset.p0i8.i64(i8* %p1, i8 55, i64 100, i32 0, i1 false)
24 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p1, i8* %p2, i64 50, i32 0, i1 false)
25 call void @llvm.memmove.p0i8.p0i8.i64(i8* %p2, i8* %p3, i64 1000, i32 0, i1 false)
26 ret void
27 }
28
29 ; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)
30 ; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
31 ; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
32 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
33 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly , i8* nocapture readonly, i64, i32, i1)
34 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
35
11461146 ret void()* @allocai64
11471147 }
11481148
1149 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile)
1149 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
11501150 define void @test_memcpy(i8* %dst, i8* %src, i64 %size) {
11511151 ; CHECK-LABEL: name: test_memcpy
11521152 ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0
11561156 ; CHECK: %x1 = COPY [[SRC]]
11571157 ; CHECK: %x2 = COPY [[SIZE]]
11581158 ; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2
1159 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0)
1160 ret void
1161 }
1162
1163 declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile)
1159 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0)
1160 ret void
1161 }
1162
1163 declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1)
11641164 define void @test_memmove(i8* %dst, i8* %src, i64 %size) {
11651165 ; CHECK-LABEL: name: test_memmove
11661166 ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0
11701170 ; CHECK: %x1 = COPY [[SRC]]
11711171 ; CHECK: %x2 = COPY [[SIZE]]
11721172 ; CHECK: BL &memmove, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2
1173 call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0)
1174 ret void
1175 }
1176
1177 declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32 %align, i1 %volatile)
1173 call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0)
1174 ret void
1175 }
1176
1177 declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1)
11781178 define void @test_memset(i8* %dst, i8 %val, i64 %size) {
11791179 ; CHECK-LABEL: name: test_memset
11801180 ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0
11861186 ; CHECK: %w1 = COPY [[SRC_TMP]]
11871187 ; CHECK: %x2 = COPY [[SIZE]]
11881188 ; CHECK: BL &memset, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %w1, implicit %x2
1189 call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i32 1, i1 0)
1189 call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i1 0)
11901190 ret void
11911191 }
11921192
2121 %z.i60 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 9, i32 2
2222 %na = getelementptr inbounds %rs, %rs* %r, i64 0, i32 0
2323 %0 = bitcast double* %x.i to i8*
24 call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 72, i32 8, i1 false)
24 call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 72, i1 false)
2525 %1 = load i32, i32* %na, align 4
2626 %cmp70 = icmp sgt i32 %1, 0
2727 br i1 %cmp70, label %for.body.lr.ph, label %for.end
8686 }
8787
8888 ; Function Attrs: nounwind
89 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
89 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
9090
55 declare void @extern(i8*)
66
77 ; Function Attrs: argmemonly nounwind
8 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
8 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0
99
1010 ; Function Attrs: nounwind
1111 define void @func(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 {
1212 bb:
1313 %tmp = getelementptr inbounds i8, i8* %arg2, i64 88
14 tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false)
14 tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false)
1515 store i8 0, i8* %arg3
1616 store i8 2, i8* %arg2
1717 store float 0.000000e+00, float* %arg
2626 define void @func2(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 {
2727 bb:
2828 %tmp = getelementptr inbounds i8, i8* %arg2, i64 88
29 tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false)
29 tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false)
3030 store i8 0, i8* %arg3
3131 store i8 2, i8* %arg2
3232 store float 0.000000e+00, float* %arg
1313 ; CHECK-NEXT: str [[VAL2]], [x0]
1414
1515 define void @foo(i8* %a) {
16 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([3 x i32]* @b to i8*), i64 12, i32 4, i1 false)
16 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast ([3 x i32]* @b to i8*), i64 12, i1 false)
1717 ret void
1818 }
1919
20 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
20 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
163163 %4 = bitcast i8* %ap.align to %struct.s41*
164164 %5 = bitcast %struct.s41* %vs to i8*
165165 %6 = bitcast %struct.s41* %4 to i8*
166 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 16, i32 16, i1 false)
166 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 %6, i64 16, i1 false)
167167 ret void
168168 }
169 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
169 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
170170
171171 define void @bar2(i32 %x, i128 %s41.coerce) nounwind {
172172 entry:
299299 %tmp = alloca %struct.s42, align 4
300300 %tmp1 = alloca %struct.s42, align 4
301301 %0 = bitcast %struct.s42* %tmp to i8*
302 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
302 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4
303303 %1 = bitcast %struct.s42* %tmp1 to i8*
304 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
304 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4
305305 %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
306306 ret i32 %call
307307 }
308308
309 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4
309 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4
310310
311311 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
312312 i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
345345 %tmp = alloca %struct.s42, align 4
346346 %tmp1 = alloca %struct.s42, align 4
347347 %0 = bitcast %struct.s42* %tmp to i8*
348 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
348 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4
349349 %1 = bitcast %struct.s42* %tmp1 to i8*
350 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
350 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4
351351 %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
352352 i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
353353 ret i32 %call
413413 %tmp = alloca %struct.s43, align 16
414414 %tmp1 = alloca %struct.s43, align 16
415415 %0 = bitcast %struct.s43* %tmp to i8*
416 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
416 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4
417417 %1 = bitcast %struct.s43* %tmp1 to i8*
418 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
418 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4
419419 %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
420420 ret i32 %call
421421 }
464464 %tmp = alloca %struct.s43, align 16
465465 %tmp1 = alloca %struct.s43, align 16
466466 %0 = bitcast %struct.s43* %tmp to i8*
467 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
467 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4
468468 %1 = bitcast %struct.s43* %tmp1 to i8*
469 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
469 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4
470470 %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
471471 i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
472472 ret i32 %call
1010 ; ARM64: mov x2, #80
1111 ; ARM64: uxtb w1, w9
1212 ; ARM64: bl _memset
13 call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false)
13 call void @llvm.memset.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i1 false)
1414 ret void
1515 }
1616
17 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
17 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
1818
1919 define void @t2() {
2020 ; ARM64-LABEL: t2
2424 ; ARM64: add x1, x8, _message@PAGEOFF
2525 ; ARM64: mov x2, #80
2626 ; ARM64: bl _memcpy
27 call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false)
27 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i1 false)
2828 ret void
2929 }
3030
31 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
31 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1)
3232
3333 define void @t3() {
3434 ; ARM64-LABEL: t3
3838 ; ARM64: add x1, x8, _message@PAGEOFF
3939 ; ARM64: mov x2, #20
4040 ; ARM64: bl _memmove
41 call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false)
41 call void @llvm.memmove.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i1 false)
4242 ret void
4343 }
4444
45 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
45 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1)
4646
4747 define void @t4() {
4848 ; ARM64-LABEL: t4
5757 ; ARM64: ldrb w11, [x9, #16]
5858 ; ARM64: strb w11, [x8, #16]
5959 ; ARM64: ret
60 call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 16, i1 false)
60 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false)
6161 ret void
6262 }
6363
7474 ; ARM64: ldrb w11, [x9, #16]
7575 ; ARM64: strb w11, [x8, #16]
7676 ; ARM64: ret
77 call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 8, i1 false)
77 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false)
7878 ret void
7979 }
8080
9191 ; ARM64: ldrb w10, [x9, #8]
9292 ; ARM64: strb w10, [x8, #8]
9393 ; ARM64: ret
94 call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i32 4, i1 false)
94 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i1 false)
9595 ret void
9696 }
9797
110110 ; ARM64: ldrb w10, [x9, #6]
111111 ; ARM64: strb w10, [x8, #6]
112112 ; ARM64: ret
113 call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i32 2, i1 false)
113 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i1 false)
114114 ret void
115115 }
116116
129129 ; ARM64: ldrb w10, [x9, #3]
130130 ; ARM64: strb w10, [x8, #3]
131131 ; ARM64: ret
132 call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false)
132 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i1 false)
133133 ret void
134134 }
135135
142142 ; ARM64: strb [[BYTE]], [x0]
143143 %array = alloca i8, i32 8192
144144 %elem = getelementptr i8, i8* %array, i32 8000
145 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i32 1, i1 false)
145 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i1 false)
146146 ret void
147147 }
2121 ; CHECK: strh [[REG1]], [x[[BASEREG2]], #8]
2222 ; CHECK: ldr [[REG2:x[0-9]+]],
2323 ; CHECK: str [[REG2]],
24 call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
24 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false)
2525 ret i32 0
2626 }
2727
3232 ; CHECK: stur [[DEST]], [x0, #15]
3333 ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
3434 ; CHECK: str [[DEST]], [x0]
35 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
35 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false)
3636 ret void
3737 }
3838
4444 ; CHECK: str [[REG3]], [x0, #32]
4545 ; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}]
4646 ; CHECK: stp [[DEST1]], [[DEST2]], [x0]
47 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
47 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false)
4848 ret void
4949 }
5050
5555 ; CHECK: str [[REG4]], [x0, #16]
5656 ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
5757 ; CHECK: str [[DEST]], [x0]
58 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
58 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false)
5959 ret void
6060 }
6161
6666 ; CHECK: strh [[REG5]], [x0, #16]
6767 ; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}]
6868 ; CHECK: str [[REG6]], [x0]
69 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
69 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false)
7070 ret void
7171 }
7272
7979 ; CHECK: mov [[REG8:w[0-9]+]],
8080 ; CHECK: movk [[REG8]],
8181 ; CHECK: str [[REG8]], [x0]
82 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
82 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false)
8383 ret void
8484 }
8585
9090 ; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6]
9191 ; CHECK: ldr
9292 ; CHECK: str
93 call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
93 call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false)
9494 ret void
9595 }
9696
103103 ; CHECK: str [[REG10]], [x0]
104104 %0 = bitcast %struct.Foo* %a to i8*
105105 %1 = bitcast %struct.Foo* %b to i8*
106 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
106 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false)
107107 ret void
108108 }
109109
110 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
111 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
110 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
111 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
44 ; CHECK-LABEL: t1:
55 ; CHECK: str wzr, [x0, #8]
66 ; CHECK: str xzr, [x0]
7 call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
7 call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false)
88 ret void
99 }
1010
1616 ; CHECK: str xzr, [sp, #8]
1717 %buf = alloca [26 x i8], align 1
1818 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
19 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
19 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i1 false)
2020 call void @something(i8* %0) nounwind
2121 ret void
2222 }
2323
2424 declare void @something(i8*) nounwind
25 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
26 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
25 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
26 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
99 ; CHECK-LINUX: {{b|bl}} memset
1010 define void @fct1(i8* nocapture %ptr) {
1111 entry:
12 tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i32 1, i1 false)
12 tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false)
1313 ret void
1414 }
1515
16 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
16 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
1717
1818 ; CHECK-LABEL: fct2:
1919 ; When the size is bigger than 256, change into bzero.
2121 ; CHECK-LINUX: {{b|bl}} memset
2222 define void @fct2(i8* nocapture %ptr) {
2323 entry:
24 tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i32 1, i1 false)
24 tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false)
2525 ret void
2626 }
2727
3232 define void @fct3(i8* nocapture %ptr, i32 %unknown) {
3333 entry:
3434 %conv = sext i32 %unknown to i64
35 tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i32 1, i1 false)
35 tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false)
3636 ret void
3737 }
3838
66 ; CHECK: orr w2, wzr, #0x10
77 ; CHECK-NEXT: bl _memcpy
88 entry:
9 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false)
9 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i1 false)
1010 ret void
1111 }
1212
13 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
13 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
2525 %yy = alloca i32, align 4
2626 store i32 0, i32* %retval
2727 %0 = bitcast [8 x i32]* %x to i8*
28 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false)
28 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false)
2929 %1 = bitcast [8 x i32]* %y to i8*
30 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false)
30 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false)
3131 store i32 0, i32* %xx, align 4
3232 store i32 0, i32* %yy, align 4
3333 store i32 0, i32* %i, align 4
104104 }
105105
106106 ; Function Attrs: nounwind
107 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
107 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1
108108
109109 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
110110 attributes #1 = { nounwind }
3131 %yy = alloca i32, align 4
3232 store i32 0, i32* %retval
3333 %0 = bitcast [8 x i32]* %x to i8*
34 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false)
34 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false)
3535 %1 = bitcast [8 x i32]* %y to i8*
36 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false)
36 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false)
3737 store i32 0, i32* %xx, align 4
3838 store i32 0, i32* %yy, align 4
3939 store i32 0, i32* %i, align 4
105105
106106
107107 ; Function Attrs: nounwind
108 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
108 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1
109109
110110 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
111111 attributes #1 = { nounwind }
5454 ; CHECK-NEXT: ret
5555 %B = getelementptr inbounds %struct.X, %struct.X* %p, i64 0, i32 1
5656 %val = bitcast i64* %B to i8*
57 call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1 false)
57 call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i1 false)
5858 ret void
5959 }
6060
61 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
61 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
6262
6363 ; Unaligned 16b stores are split into 8b stores for performance.
6464 ; radar://15424193
4242 %tmp14 = bitcast double* %arraydecay5.3.1 to i8*
4343 %arraydecay11.3.1 = getelementptr inbounds %struct.Bicubic_Patch_Struct, %struct.Bicubic_Patch_Struct* %Shape, i64 0, i32 12, i64 1, i64 3, i64 0
4444 %tmp15 = bitcast double* %arraydecay11.3.1 to i8*
45 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 1, i1 false)
45 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i1 false)
4646 ret void
4747 }
4848
4949 ; Function Attrs: nounwind
50 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
50 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1)
77 define void @test(i64 %a, i8* %b) {
88 %1 = and i64 %a, 9223372036854775807
99 %2 = inttoptr i64 %1 to i8*
10 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %b, i64 8, i32 8, i1 false)
10 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %b, i64 8, i1 false)
1111 ret void
1212 }
1313
14 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
14 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
185185 ret void
186186 }
187187
188 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
188 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
189189
190190 define i32 @test_extern() {
191191 ; CHECK-LABEL: test_extern:
192 call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
192 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 undef, i8* align 4 undef, i32 undef, i1 0)
193193 ; CHECK: bl memcpy
194194 ret i32 0
195195 }
114114 %C = getelementptr inbounds [12 x i8], [12 x i8]* %a2, i64 0, i64 4
115115 %1 = bitcast i8* %C to i64*
116116 store i64 0, i64* %1, align 4
117 call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 8, i1 false)
117 call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 8, i1 false)
118118 ret void
119119 }
120120
121 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
121 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
122122
123123
124124 attributes #0 = { nounwind }
44 declare void @f(i8*, i8*)
55 declare void @f2(i8*, i8*)
66 declare void @_Z5setupv()
7 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3
7 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #3
88
99 define i32 @main() local_unnamed_addr #1 {
1010 ; Make sure the stores happen in the correct order (the exact instructions could change).
2323 tail call void @_Z5setupv()
2424 %x2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 6
2525 %x3 = bitcast i32* %x2 to i8*
26 call void @llvm.memset.p0i8.i64(i8* %x3, i8 0, i64 16, i32 8, i1 false)
26 call void @llvm.memset.p0i8.i64(i8* align 8 %x3, i8 0, i64 16, i1 false)
2727 %arraydecay2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 0
2828 %x4 = bitcast [10 x i32]* %b1 to <4 x i32>*
2929 store <4 x i32> , <4 x i32>* %x4, align 16
22 ; Tests to check that zero stores which are generated as STP xzr, xzr aren't
33 ; scheduled incorrectly due to incorrect alias information
44
5 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
5 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
66 %struct.tree_common = type { i8*, i8*, i32 }
77
88 ; Original test case which exhibited the bug
1313 ; CHECK-DAG: str xzr, [x0]
1414 entry:
1515 %0 = bitcast %struct.tree_common* %t to i8*
16 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false)
16 tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false)
1717 %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2
1818 store i32 %code, i32* %code1, align 8
1919 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
1818 %class.D = type { %class.basic_string.base, [4 x i8] }
1919 %class.basic_string.base = type <{ i64, i64, i32 }>
2020 @a = global %class.D* zeroinitializer, align 8
21 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
21 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
2222 define internal void @fun() section ".text.startup" {
2323 entry:
2424 %tmp.i.i = alloca %class.D, align 8
3030 %x = load %class.D*, %class.D** getelementptr inbounds (%class.D*, %class.D** @a, i64 0), align 8
3131 %arrayidx.i.i.i = getelementptr inbounds %class.D, %class.D* %x, i64 %conv11.i.i
3232 %d = bitcast %class.D* %arrayidx.i.i.i to i8*
33 call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %y, i8* %d, i64 24, i32 8, i1 false)
33 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 nonnull %y, i8* align 8 %d, i64 24, i1 false)
3434 %inc.i.i = add i64 %i, 1
3535 %cmp.i.i = icmp slt i64 %inc.i.i, 0
3636 br i1 %cmp.i.i, label %loop, label %exit
1111 ; CHECK: str q0
1212 ; CHECK: ret
1313 entry:
14 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i32 8, i1 false)
14 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 undef, i8* align 8 bitcast (%structA* @stubA to i8*), i64 48, i1 false)
1515 ret void
1616 }
1717
18 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
18 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
1313 ; A53: str [[DATA]], {{.*}}
1414
1515 %0 = bitcast %struct1* %fde to i8*
16 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false)
16 tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 40, i1 false)
1717 %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4
1818 store i16 256, i16* %state, align 8
1919 %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2
5757 ret void
5858 }
5959
60 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
60 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
6161 declare i32 @fcntl(i32, i32, ...)
6262 declare noalias i8* @foo()
1515 ; CHECK-DAG: str [[R3]], [x0, #24]
1616
1717 define void @pr33475(i8* %p0, i8* %p1) noimplicitfloat {
18 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p0, i8* %p1, i64 32, i32 4, i1 false)
18 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p0, i8* align 4 %p1, i64 32, i1 false)
1919 ret void
2020 }
2121
22 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
22 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
2929 ret void
3030 }
3131
32 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
32 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
3333 %struct.tree_common = type { i8*, i8*, i32 }
3434
3535 ; CHECK-LABEL: test_zero
4040 define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) {
4141 entry:
4242 %0 = bitcast %struct.tree_common* %t to i8*
43 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false)
43 tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false)
4444 %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2
4545 store i32 %code, i32* %code1, align 8
4646 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
2020 %wide.load8291059.4 = load i64, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.b, i64 0, i64 18) to i64*), align 8
2121 store i64 %wide.load8281058.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 16) to i64*), align 8
2222 store i64 %wide.load8291059.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 18) to i64*), align 8
23 tail call void @llvm.memset.p0i8.i64(i8* bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i32 8, i1 false) #2
23 tail call void @llvm.memset.p0i8.i64(i8* align 8 bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i1 false) #2
2424 unreachable
2525 }
2626
2727 ; Function Attrs: argmemonly nounwind
28 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1
28 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
2929
3030 attributes #1 = { argmemonly nounwind }
3131 attributes #2 = { nounwind }
33 ; CHECK: b memcpy
44 define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
55 entry:
6 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
6 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false)
77 ret void
88 }
99
1111 ; CHECK: b memmove
1212 define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
1313 entry:
14 tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
14 tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false)
1515 ret void
1616 }
1717
1919 ; CHECK: b memset
2020 define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 {
2121 entry:
22 tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false)
22 tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false)
2323 ret void
2424 }
2525
26 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
27 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
28 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
26 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0
27 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0
28 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0
2929
3030 attributes #0 = { nounwind }
3131 %tmp1 = bitcast %class.basic_string.11.42.73* %arg to %union.anon.8.39.70**
3232 store %union.anon.8.39.70* %tmp, %union.anon.8.39.70** %tmp1, align 8
3333 %tmp2 = bitcast %union.anon.8.39.70* %tmp to i8*
34 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i32 1, i1 false)
34 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i1 false)
3535 %tmp3 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 1
3636 store i64 13, i64* %tmp3, align 8
3737 %tmp4 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 2, i32 1, i64 5
4141 }
4242
4343 ; Function Attrs: argmemonly nounwind
44 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
44 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0
4545
4646 attributes #0 = { argmemonly nounwind }
0 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
11
2 declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i32, i1) #0
2 declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0
33
44 @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
55 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
6767
6868 ; HSA: @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
6969 define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
70 call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* %out, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i32 4, i1 false)
70 call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
7171 ret void
7272 }
7373
88 @lds.missing.align.0 = internal unnamed_addr addrspace(3) global [39 x i32] undef
99 @lds.missing.align.1 = internal unnamed_addr addrspace(3) global [7 x i64] undef
1010
11 declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) #0
12 declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #0
11 declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #0
12 declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #0
1313
1414
1515 ; HSA-LABEL: {{^}}test_no_round_size_1:
1616 ; HSA: workgroup_group_segment_byte_size = 38
1717 define amdgpu_kernel void @test_no_round_size_1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
1818 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
19 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false)
20 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false)
19 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false)
20 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false)
2121 ret void
2222 }
2323
3535 ; HSA: group_segment_alignment = 4
3636 define amdgpu_kernel void @test_round_size_2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
3737 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
38 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false)
39 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false)
38 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false)
39 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false)
4040
4141 %lds.align16.1.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.1 to i8 addrspace(3)*
42 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.1.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false)
43 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.1.bc, i32 38, i32 4, i1 false)
42 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.1.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false)
43 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.1.bc, i32 38, i1 false)
4444
4545 ret void
4646 }
5151 ; HSA: group_segment_alignment = 4
5252 define amdgpu_kernel void @test_round_size_2_align_8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
5353 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
54 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
55 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false)
56
57 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
58 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
59 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false)
54 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
55 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false)
56
57 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
58 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
59 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false)
6060
6161 ret void
6262 }
6666 ; HSA: group_segment_alignment = 4
6767 define amdgpu_kernel void @test_round_local_lds_and_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* %lds.arg) #1 {
6868 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
69 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false)
70
71 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false)
72 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 4, i1 false)
73 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 4, i1 false)
69 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false)
70
71 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false)
72 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.arg, i8 addrspace(1)* align 4 %in, i32 38, i1 false)
73 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.arg, i32 38, i1 false)
7474 ret void
7575 }
7676
7878 ; HSA: workgroup_group_segment_byte_size = 0
7979 ; HSA: group_segment_alignment = 4
8080 define amdgpu_kernel void @test_round_lds_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* %lds.arg) #1 {
81 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 4, i1 false)
82 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 4, i1 false)
81 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.arg, i8 addrspace(1)* align 4 %in, i32 38, i1 false)
82 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.arg, i32 38, i1 false)
8383 ret void
8484 }
8585
8888 ; HSA: workgroup_group_segment_byte_size = 0
8989 ; HSA: group_segment_alignment = 4
9090 define amdgpu_kernel void @test_high_align_lds_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* align 64 %lds.arg) #1 {
91 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 64, i1 false)
92 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 64, i1 false)
91 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 64 %lds.arg, i8 addrspace(1)* align 64 %in, i32 38, i1 false)
92 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 64 %out, i8 addrspace(3)* align 64 %lds.arg, i32 38, i1 false)
9393 ret void
9494 }
9595
9999 ; HSA: group_segment_alignment = 4
100100 define amdgpu_kernel void @test_missing_alignment_size_2_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
101101 %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)*
102 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.0.bc, i8 addrspace(1)* %in, i32 160, i32 4, i1 false)
103 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.0.bc, i32 160, i32 4, i1 false)
102 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i8 addrspace(1)* align 4 %in, i32 160, i1 false)
103 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i32 160, i1 false)
104104
105105 %lds.missing.align.1.bc = bitcast [7 x i64] addrspace(3)* @lds.missing.align.1 to i8 addrspace(3)*
106 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.1.bc, i8 addrspace(1)* %in, i32 56, i32 8, i1 false)
107 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.1.bc, i32 56, i32 8, i1 false)
106 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i8 addrspace(1)* align 8 %in, i32 56, i1 false)
107 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i32 56, i1 false)
108108
109109 ret void
110110 }
115115 ; HSA: group_segment_alignment = 4
116116 define amdgpu_kernel void @test_missing_alignment_size_2_order1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
117117 %lds.missing.align.1.bc = bitcast [7 x i64] addrspace(3)* @lds.missing.align.1 to i8 addrspace(3)*
118 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.1.bc, i8 addrspace(1)* %in, i32 56, i32 8, i1 false)
119 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.1.bc, i32 56, i32 8, i1 false)
118 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i8 addrspace(1)* align 8 %in, i32 56, i1 false)
119 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i32 56, i1 false)
120120
121121 %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)*
122 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.0.bc, i8 addrspace(1)* %in, i32 160, i32 4, i1 false)
123 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.0.bc, i32 160, i32 4, i1 false)
122 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i8 addrspace(1)* align 4 %in, i32 160, i1 false)
123 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i32 160, i1 false)
124124
125125 ret void
126126 }
143143 ; HSA: group_segment_alignment = 4
144144 define amdgpu_kernel void @test_round_size_3_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
145145 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
146 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
147 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false)
148
149 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
150 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
151 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false)
152
153 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
154 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
155 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false)
146 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
147 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false)
148
149 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
150 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
151 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false)
152
153 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
154 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
155 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false)
156156
157157 ret void
158158 }
164164 ; HSA: group_segment_alignment = 4
165165 define amdgpu_kernel void @test_round_size_3_order1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
166166 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
167 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
168 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false)
169
170 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
171 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
172 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false)
173
174 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
175 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
176 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false)
167 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
168 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false)
169
170 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
171 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
172 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false)
173
174 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
175 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
176 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false)
177177
178178 ret void
179179 }
185185 ; HSA: group_segment_alignment = 4
186186 define amdgpu_kernel void @test_round_size_3_order2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
187187 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
188 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
189 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false)
190
191 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
192 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
193 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false)
194
195 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
196 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
197 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false)
188 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
189 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false)
190
191 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
192 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
193 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false)
194
195 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
196 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
197 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false)
198198
199199 ret void
200200 }
206206 ; HSA: group_segment_alignment = 4
207207 define amdgpu_kernel void @test_round_size_3_order3(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
208208 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
209 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
210 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false)
211
212 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
213 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
214 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false)
215
216 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
217 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
218 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false)
209 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
210 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false)
211
212 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
213 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
214 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false)
215
216 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
217 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
218 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false)
219219
220220 ret void
221221 }
227227 ; HSA: group_segment_alignment = 4
228228 define amdgpu_kernel void @test_round_size_3_order4(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
229229 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
230 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
231 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false)
232
233 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
234 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
235 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false)
236
237 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
238 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
239 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false)
230 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
231 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false)
232
233 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
234 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
235 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false)
236
237 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
238 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
239 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false)
240240
241241 ret void
242242 }
248248 ; HSA: group_segment_alignment = 4
249249 define amdgpu_kernel void @test_round_size_3_order5(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
250250 %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)*
251 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
252 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false)
253
254 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
255 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
256 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false)
257
258 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
259 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false)
260 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false)
251 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
252 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false)
253
254 %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)*
255 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
256 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false)
257
258 %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)*
259 call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false)
260 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false)
261261
262262 ret void
263263 }
0 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
22
3 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
4 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
5 declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind
3 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind
4 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
5 declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i1) nounwind
66
77
88 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
8282 define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
8383 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
8484 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
85 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
85 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i1 false) nounwind
8686 ret void
8787 }
8888
127127 define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
128128 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
129129 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
130 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
130 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %bcout, i8 addrspace(3)* align 2 %bcin, i32 32, i1 false) nounwind
131131 ret void
132132 }
133133
146146 define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
147147 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
148148 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
149 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
149 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %bcout, i8 addrspace(3)* align 4 %bcin, i32 32, i1 false) nounwind
150150 ret void
151151 }
152152
163163 define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
164164 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
165165 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
166 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
166 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 8 %bcout, i8 addrspace(3)* align 8 %bcin, i32 32, i1 false) nounwind
167167 ret void
168168 }
169169
240240 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
241241 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
242242 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
243 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
243 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i1 false) nounwind
244244 ret void
245245 }
246246
283283 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
284284 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
285285 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
286 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
286 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %bcout, i8 addrspace(1)* align 2 %bcin, i64 32, i1 false) nounwind
287287 ret void
288288 }
289289
296296 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
297297 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
298298 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
299 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
299 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %bcout, i8 addrspace(1)* align 4 %bcin, i64 32, i1 false) nounwind
300300 ret void
301301 }
302302
309309 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
310310 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
311311 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
312 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
312 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %bcout, i8 addrspace(1)* align 8 %bcin, i64 32, i1 false) nounwind
313313 ret void
314314 }
315315
322322 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
323323 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
324324 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
325 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
325 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 16 %bcout, i8 addrspace(1)* align 16 %bcin, i64 32, i1 false) nounwind
326326 ret void
327327 }
328328
341341 ; SI-DAG: buffer_store_dwordx4
342342 define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind {
343343 %str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)*
344 call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 4, i1 false)
344 call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(2)* align 4 %str, i64 32, i1 false)
345345 ret void
346346 }
347347
366366 ; SI: buffer_store_byte
367367 define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind {
368368 %str = bitcast [16 x i8] addrspace(2)* @hello.align1 to i8 addrspace(2)*
369 call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 1, i1 false)
370 ret void
371 }
369 call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i1 false)
370 ret void
371 }
0 ; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s
11
2 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1
3 declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1
2 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1
3 declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1
44
5 declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1
6 declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i32, i1) #1
5 declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1
6 declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) #1
77
88 ; Test the upper bound for sizes to leave
99 ; OPT-LABEL: @max_size_small_static_memcpy_caller0(
10 ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
10 ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
1111 define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
12 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
12 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
1313 ret void
1414 }
1515
2525 ; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025
2626 ; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split
2727 define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
28 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
28 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false)
2929 ret void
3030 }
3131
3232 ; OPT-LABEL: @max_size_small_static_memmove_caller0(
33 ; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
33 ; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
3434 define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
35 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
35 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
3636 ret void
3737 }
3838
4343 ; OPT: getelementptr
4444 ; OPT-NEXT: store i8
4545 define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
46 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
46 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false)
4747 ret void
4848 }
4949
5050 ; OPT-LABEL: @max_size_small_static_memset_caller0(
51 ; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false)
51 ; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false)
5252 define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
53 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false)
53 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false)
5454 ret void
5555 }
5656
5959 ; OPT: getelementptr
6060 ; OPT: store i8
6161 define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
62 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i32 1, i1 false)
62 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i1 false)
6363 ret void
6464 }
6565
6767 ; OPT-NOT: call
6868 ; OPT: phi
6969 define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
70 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
70 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false)
7171 ret void
7272 }
7373
7575 ; OPT-NOT: call
7676 ; OPT: phi
7777 define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
78 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
78 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false)
7979 ret void
8080 }
8181
8686 ; OPT: phi
8787 ; OPT-NOT: call
8888 define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 {
89 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
90 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i32 1, i1 false)
89 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false)
90 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i1 false)
9191 ret void
9292 }
9393
9898 ; OPT: getelementptr inbounds i8, i8 addrspace(1)*
9999 ; OPT: store i8
100100 define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
101 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i32 1, i1 false)
101 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i1 false)
102102 ret void
103103 }
104104
109109 ; OPT: getelementptr inbounds i8, i8 addrspace(1)*
110110 ; OPT: store i8
111111
112 ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false)
112 ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false)
113113 define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 {
114 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
115 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false)
114 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false)
115 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false)
116116 ret void
117117 }
118118
0 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
11
2 declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0
3 declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0
2 declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
3 declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0
44
5 declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0
6 declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0
5 declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
6 declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0
77
8 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
8 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0
99
1010 declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) #1
1111
1212 ; CHECK-LABEL: @promote_with_memcpy(
1313 ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}}
14 ; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
15 ; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false)
14 ; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
15 ; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false)
1616 define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
1717 %alloca = alloca [17 x i32], align 4
1818 %alloca.bc = bitcast [17 x i32]* %alloca to i8*
1919 %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
2020 %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
21 call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
22 call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false)
21 call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
22 call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false)
2323 ret void
2424 }
2525
2626 ; CHECK-LABEL: @promote_with_memmove(
2727 ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memmove.alloca, i32 0, i32 %{{[0-9]+}}
28 ; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
29 ; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false)
28 ; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
29 ; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false)
3030 define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
3131 %alloca = alloca [17 x i32], align 4
3232 %alloca.bc = bitcast [17 x i32]* %alloca to i8*
3333 %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
3434 %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
35 call void @llvm.memmove.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
36 call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false)
35 call void @llvm.memmove.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false)
36 call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false)
3737 ret void
3838 }
3939
4040 ; CHECK-LABEL: @promote_with_memset(
4141 ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}}
42 ; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %alloca.bc, i8 7, i32 68, i32 4, i1 false)
42 ; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 7, i32 68, i1 false)
4343 define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
4444 %alloca = alloca [17 x i32], align 4
4545 %alloca.bc = bitcast [17 x i32]* %alloca to i8*
4646 %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
4747 %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)*
48 call void @llvm.memset.p0i8.i32(i8* %alloca.bc, i8 7, i32 68, i32 4, i1 false)
48 call void @llvm.memset.p0i8.i32(i8* align 4 %alloca.bc, i8 7, i32 68, i1 false)
4949 ret void
5050 }
5151
0 ; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERROR %s
11 ; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
22
3 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #1
3 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #1
44
55 ; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit
66 ; GCN: ; ScratchSize: 4294967296
88 entry:
99 %alloca = alloca [1073741823 x i32], align 4
1010 %bc = bitcast [1073741823 x i32]* %alloca to i8*
11 call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i32 1, i1 true)
11 call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i1 true)
1212 ret void
1313 }
5858 %34 = fadd double %31, 0.000000e+00
5959 %35 = fadd double %32, 0.000000e+00
6060 %36 = bitcast %struct.ggPoint3* %x to i8*
61 call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i32 4, i1 false)
61 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 null, i8* align 4 %36, i32 24, i1 false)
6262 store double %33, double* null, align 8
6363 br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
6464
7575 ret i32 0
7676 }
7777
78 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
78 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
1515
1616 bb1: ; preds = %entry
1717 %0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind
18 call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false)
18 call void @llvm.memset.p0i8.i32(i8* align 4 undef, i8 0, i32 40, i1 false)
1919 %1 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 0
2020 store %struct.mo* undef, %struct.mo** %1, align 4
2121 %2 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 5
3939
4040 declare %struct.ui* @vn_pp_to_ui(i32*)
4141
42 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
42 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
4343
4444 declare i32 @mo_create_nnm(%struct.mo*, i64, i32**)
4545
1313 ; CHECK-UNALIGNED: str
1414 define void @foo(i8* nocapture %c) nounwind optsize {
1515 entry:
16 call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false)
16 call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i1 false)
1717 ret void
1818 }
1919
20 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
20 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
44 ; CHECK: vst1.64
55 define void @f_0_40(i8* nocapture %c) nounwind optsize {
66 entry:
7 call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
7 call void @llvm.memset.p0i8.i64(i8* align 16 %c, i8 0, i64 40, i1 false)
88 ret void
99 }
1010
11 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
11 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
1818
1919 declare i8* @__cxa_begin_catch(i8*)
2020
21 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
21 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
2222
2323 declare void @__cxa_end_catch()
2424
11
22 @source = common global [512 x i8] zeroinitializer, align 4
33
4 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
4 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
55
66 define void @function() {
77 entry:
8 call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i32 0, i1 false)
8 call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i1 false)
99 unreachable
1010 }
1111
0 ; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -verify-machineinstrs -o - %s | FileCheck %s
11
2 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
3 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
2 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
3 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
44
55 @source = common global [512 x i8] zeroinitializer, align 4
66 @target = common global [512 x i8] zeroinitializer, align 4
77
88 define void @move() nounwind {
99 entry:
10 call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false)
10 call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i1 false)
1111 unreachable
1212 }
1313
1515
1616 define void @copy() nounwind {
1717 entry:
18