llvm.org GIT mirror llvm / c96acc5
Add element atomic memmove intrinsic Summary: Continuing the work from https://reviews.llvm.org/D33240, this change introduces an element unordered-atomic memmove intrinsic. This intrinsic is essentially memmove with the implementation requirement that all loads/stores used for the copy are done with unordered-atomic loads/stores of a given element size. Reviewers: eli.friedman, reames, mkazantsev, skatkov Reviewed By: reames Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D34884 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307796 91177308-0d34-0410-b5e6-96231b3b80d8 Daniel Neilson 3 years ago
9 changed file(s) with 379 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
1028110281 to be aligned to some boundary, this can be specified as the fourth
1028210282 argument, otherwise it should be set to 0 or 1 (both meaning no alignment).
1028310283
10284 .. _int_memmove:
10285
1028410286 '``llvm.memmove``' Intrinsic
1028510287 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1028610288
1417714179 lowered to a call to the symbol ``__llvm_memcpy_element_unordered_atomic_*``. Where '*'
1417814180 is replaced with an actual element size.
1417914181
14182 Optimizer is allowed to inline memory copy when it's profitable to do so.
14183
14184 '``llvm.memmove.element.unordered.atomic``' Intrinsic
14185 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
14186
14187 Syntax:
14188 """""""
14189
14190 This is an overloaded intrinsic. You can use
14191 ``llvm.memmove.element.unordered.atomic`` on any integer bit width and for
14192 different address spaces. Not all targets support all bit widths however.
14193
14194 ::
14195
14196 declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* ,
14197 i8* ,
14198 i32 ,
14199 i32 )
14200 declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* ,
14201 i8* ,
14202 i64 ,
14203 i32 )
14204
14205 Overview:
14206 """""""""
14207
14208 The '``llvm.memmove.element.unordered.atomic.*``' intrinsic is a specialization
14209 of the '``llvm.memmove.*``' intrinsic. It differs in that the ``dest`` and
14210 ``src`` are treated as arrays with elements that are exactly ``element_size``
14211 bytes, and the copy between buffers uses a sequence of
14212 :ref:`unordered atomic ` load/store operations that are a positive
14213 integer multiple of the ``element_size`` in size.
14214
14215 Arguments:
14216 """"""""""
14217
14218 The first three arguments are the same as they are in the
14219 :ref:`@llvm.memmove ` intrinsic, with the added constraint that
14220 ``len`` is required to be a positive integer multiple of the ``element_size``.
14221 If ``len`` is not a positive integer multiple of ``element_size``, then the
14222 behaviour of the intrinsic is undefined.
14223
14224 ``element_size`` must be a compile-time constant positive power of two no
14225 greater than a target-specific atomic access size limit.
14226
14227 For each of the input pointers the ``align`` parameter attribute must be
14228 specified. It must be a power of two no less than the ``element_size``. Caller
14229 guarantees that both the source and destination pointers are aligned to that
14230 boundary.
14231
14232 Semantics:
14233 """"""""""
14234
14235 The '``llvm.memmove.element.unordered.atomic.*``' intrinsic copies ``len`` bytes
14236 of memory from the source location to the destination location. These locations
14237 are allowed to overlap. The memory copy is performed as a sequence of load/store
14238 operations where each access is guaranteed to be a multiple of ``element_size``
14239 bytes wide and aligned at an ``element_size`` boundary.
14240
14241 The order of the copy is unspecified. The same value may be read from the source
14242 buffer many times, but only one write is issued to the destination buffer per
14243 element. It is well defined to have concurrent reads and writes to both source
14244 and destination provided those reads and writes are unordered atomic when
14245 specified.
14246
14247 This intrinsic does not provide any additional ordering guarantees over those
14248 provided by a set of unordered loads from the source location and stores to the
14249 destination.
14250
14251 Lowering:
14252 """""""""
14253
14254 In the most general case call to the
14255 '``llvm.memmove.element.unordered.atomic.*``' is lowered to a call to the symbol
14256 ``__llvm_memmove_element_unordered_atomic_*``. Where '*' is replaced with an
14257 actual element size.
14258
1418014259 The optimizer is allowed to inline the memory copy when it's profitable to do so.
339339 MEMCPY_ELEMENT_UNORDERED_ATOMIC_8,
340340 MEMCPY_ELEMENT_UNORDERED_ATOMIC_16,
341341
342 MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1,
343 MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2,
344 MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4,
345 MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8,
346 MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16,
347
342348 // EXCEPTION HANDLING
343349 UNWIND_RESUME,
344350
514520 /// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
515521 /// UNKNOW_LIBCALL if there is none.
516522 Libcall getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize);
523
524 /// getMEMMOVE_ELEMENT_UNORDERED_ATOMIC - Return
525 /// MEMMOVE_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
526 /// UNKNOW_LIBCALL if there is none.
527 Libcall getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize);
517528 }
518529 }
519530
295295 }
296296 };
297297
298 class ElementUnorderedAtomicMemMoveInst : public IntrinsicInst {
299 private:
300 enum { ARG_DEST = 0, ARG_SOURCE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 };
301
302 public:
303 Value *getRawDest() const {
304 return const_cast(getArgOperand(ARG_DEST));
305 }
306 const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); }
307 Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); }
308
309 /// Return the arguments to the instruction.
310 Value *getRawSource() const {
311 return const_cast(getArgOperand(ARG_SOURCE));
312 }
313 const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); }
314 Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); }
315
316 Value *getLength() const {
317 return const_cast(getArgOperand(ARG_LENGTH));
318 }
319 const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); }
320 Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); }
321
322 bool isVolatile() const { return false; }
323
324 Value *getRawElementSizeInBytes() const {
325 return const_cast(getArgOperand(ARG_ELEMENTSIZE));
326 }
327
328 ConstantInt *getElementSizeInBytesCst() const {
329 return cast(getRawElementSizeInBytes());
330 }
331
332 uint32_t getElementSizeInBytes() const {
333 return getElementSizeInBytesCst()->getZExtValue();
334 }
335
336 /// This is just like getRawDest, but it strips off any cast
337 /// instructions that feed it, giving the original input. The returned
338 /// value is guaranteed to be a pointer.
339 Value *getDest() const { return getRawDest()->stripPointerCasts(); }
340
341 /// This is just like getRawSource, but it strips off any cast
342 /// instructions that feed it, giving the original input. The returned
343 /// value is guaranteed to be a pointer.
344 Value *getSource() const { return getRawSource()->stripPointerCasts(); }
345
346 unsigned getDestAddressSpace() const {
347 return cast(getRawDest()->getType())->getAddressSpace();
348 }
349
350 unsigned getSourceAddressSpace() const {
351 return cast(getRawSource()->getType())->getAddressSpace();
352 }
353
354 /// Set the specified arguments of the instruction.
355 void setDest(Value *Ptr) {
356 assert(getRawDest()->getType() == Ptr->getType() &&
357 "setDest called with pointer of wrong type!");
358 setArgOperand(ARG_DEST, Ptr);
359 }
360
361 void setSource(Value *Ptr) {
362 assert(getRawSource()->getType() == Ptr->getType() &&
363 "setSource called with pointer of wrong type!");
364 setArgOperand(ARG_SOURCE, Ptr);
365 }
366
367 void setLength(Value *L) {
368 assert(getLength()->getType() == L->getType() &&
369 "setLength called with value of wrong type!");
370 setArgOperand(ARG_LENGTH, L);
371 }
372
373 void setElementSizeInBytes(Constant *V) {
374 assert(V->getType() == Type::getInt8Ty(getContext()) &&
375 "setElementSizeInBytes called with value of wrong type!");
376 setArgOperand(ARG_ELEMENTSIZE, V);
377 }
378
379 static inline bool classof(const IntrinsicInst *I) {
380 return I->getIntrinsicID() == Intrinsic::memmove_element_unordered_atomic;
381 }
382 static inline bool classof(const Value *V) {
383 return isa(V) && classof(cast(V));
384 }
385 };
386
298387 /// This is the common base class for memset/memcpy/memmove.
299388 class MemIntrinsic : public IntrinsicInst {
300389 public:
871871 IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
872872 ReadOnly<1>
873873 ]>;
874
875 // @llvm.memmove.element.unordered.atomic.*(dest, src, length, elementsize)
876 def int_memmove_element_unordered_atomic
877 : Intrinsic<[],
878 [
879 llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty
880 ],
881 [
882 IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
883 ReadOnly<1>
884 ]>;
885
874886
875887 //===------------------------ Reduction Intrinsics ------------------------===//
876888 //
49784978 uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
49794979 RTLIB::Libcall LibraryCall =
49804980 RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
4981 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
4982 report_fatal_error("Unsupported element size");
4983
4984 TargetLowering::CallLoweringInfo CLI(DAG);
4985 CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
4986 TLI.getLibcallCallingConv(LibraryCall),
4987 Type::getVoidTy(*DAG.getContext()),
4988 DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
4989 TLI.getPointerTy(DAG.getDataLayout())),
4990 std::move(Args));
4991
4992 std::pair CallResult = TLI.LowerCallTo(CLI);
4993 DAG.setRoot(CallResult.second);
4994 return nullptr;
4995 }
4996 case Intrinsic::memmove_element_unordered_atomic: {
4997 auto &MI = cast(I);
4998 SDValue Dst = getValue(MI.getRawDest());
4999 SDValue Src = getValue(MI.getRawSource());
5000 SDValue Length = getValue(MI.getLength());
5001
5002 // Emit a library call.
5003 TargetLowering::ArgListTy Args;
5004 TargetLowering::ArgListEntry Entry;
5005 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
5006 Entry.Node = Dst;
5007 Args.push_back(Entry);
5008
5009 Entry.Node = Src;
5010 Args.push_back(Entry);
5011
5012 Entry.Ty = MI.getLength()->getType();
5013 Entry.Node = Length;
5014 Args.push_back(Entry);
5015
5016 uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
5017 RTLIB::Libcall LibraryCall =
5018 RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
49815019 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
49825020 report_fatal_error("Unsupported element size");
49835021
383383 "__llvm_memcpy_element_unordered_atomic_8";
384384 Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] =
385385 "__llvm_memcpy_element_unordered_atomic_16";
386 Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] =
387 "__llvm_memmove_element_unordered_atomic_1";
388 Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] =
389 "__llvm_memmove_element_unordered_atomic_2";
390 Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] =
391 "__llvm_memmove_element_unordered_atomic_4";
392 Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] =
393 "__llvm_memmove_element_unordered_atomic_8";
394 Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] =
395 "__llvm_memmove_element_unordered_atomic_16";
386396 Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
387397 Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
388398 Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
797807 return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8;
798808 case 16:
799809 return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16;
810 default:
811 return UNKNOWN_LIBCALL;
812 }
813 }
814
815 RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
816 switch (ElementSize) {
817 case 1:
818 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1;
819 case 2:
820 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2;
821 case 4:
822 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4;
823 case 8:
824 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8;
825 case 16:
826 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16;
800827 default:
801828 return UNKNOWN_LIBCALL;
802829 }
40434043 "incorrect alignment of the source argument", CS);
40444044 break;
40454045 }
4046 case Intrinsic::memmove_element_unordered_atomic: {
4047 auto *MI = cast(CS.getInstruction());
4048
4049 ConstantInt *ElementSizeCI =
4050 dyn_cast(MI->getRawElementSizeInBytes());
4051 Assert(ElementSizeCI,
4052 "element size of the element-wise unordered atomic memory "
4053 "intrinsic must be a constant int",
4054 CS);
4055 const APInt &ElementSizeVal = ElementSizeCI->getValue();
4056 Assert(ElementSizeVal.isPowerOf2(),
4057 "element size of the element-wise atomic memory intrinsic "
4058 "must be a power of 2",
4059 CS);
4060
4061 if (auto *LengthCI = dyn_cast(MI->getLength())) {
4062 uint64_t Length = LengthCI->getZExtValue();
4063 uint64_t ElementSize = MI->getElementSizeInBytes();
4064 Assert((Length % ElementSize) == 0,
4065 "constant length must be a multiple of the element size in the "
4066 "element-wise atomic memory intrinsic",
4067 CS);
4068 }
4069
4070 auto IsValidAlignment = [&](uint64_t Alignment) {
4071 return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
4072 };
4073 uint64_t DstAlignment = CS.getParamAlignment(0),
4074 SrcAlignment = CS.getParamAlignment(1);
4075 Assert(IsValidAlignment(DstAlignment),
4076 "incorrect alignment of the destination argument", CS);
4077 Assert(IsValidAlignment(SrcAlignment),
4078 "incorrect alignment of the source argument", CS);
4079
4080 break;
4081 }
40464082 case Intrinsic::gcroot:
40474083 case Intrinsic::gcwrite:
40484084 case Intrinsic::gcread:
6161 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 4, i32 4) ret void
6262 }
6363
64 define i8* @test_memmove1(i8* %P, i8* %Q) {
65 ; CHECK: test_memmove
66 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 1)
67 ret i8* %P
68 ; 3rd arg (%edx) -- length
69 ; CHECK-DAG: movl $1, %edx
70 ; CHECK: __llvm_memmove_element_unordered_atomic_1
71 }
72
73 define i8* @test_memmove2(i8* %P, i8* %Q) {
74 ; CHECK: test_memmove2
75 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 2, i32 2)
76 ret i8* %P
77 ; 3rd arg (%edx) -- length
78 ; CHECK-DAG: movl $2, %edx
79 ; CHECK: __llvm_memmove_element_unordered_atomic_2
80 }
81
82 define i8* @test_memmove4(i8* %P, i8* %Q) {
83 ; CHECK: test_memmove4
84 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 4, i32 4)
85 ret i8* %P
86 ; 3rd arg (%edx) -- length
87 ; CHECK-DAG: movl $4, %edx
88 ; CHECK: __llvm_memmove_element_unordered_atomic_4
89 }
90
91 define i8* @test_memmove8(i8* %P, i8* %Q) {
92 ; CHECK: test_memmove8
93 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 8, i32 8)
94 ret i8* %P
95 ; 3rd arg (%edx) -- length
96 ; CHECK-DAG: movl $8, %edx
97 ; CHECK: __llvm_memmove_element_unordered_atomic_8
98 }
99
100 define i8* @test_memmove16(i8* %P, i8* %Q) {
101 ; CHECK: test_memmove16
102 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 16, i32 16)
103 ret i8* %P
104 ; 3rd arg (%edx) -- length
105 ; CHECK-DAG: movl $16, %edx
106 ; CHECK: __llvm_memmove_element_unordered_atomic_16
107 }
108
109 define void @test_memmove_args(i8** %Storage) {
110 ; CHECK: test_memmove_args
111 %Dst = load i8*, i8** %Storage
112 %Src.addr = getelementptr i8*, i8** %Storage, i64 1
113 %Src = load i8*, i8** %Src.addr
114
115 ; 1st arg (%rdi)
116 ; CHECK-DAG: movq (%rdi), [[REG1:%r.+]]
117 ; CHECK-DAG: movq [[REG1]], %rdi
118 ; 2nd arg (%rsi)
119 ; CHECK-DAG: movq 8(%rdi), %rsi
120 ; 3rd arg (%edx) -- length
121 ; CHECK-DAG: movl $4, %edx
122 ; CHECK: __llvm_memmove_element_unordered_atomic_4
123 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 4, i32 4) ret void
124 }
125
64126 declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
127 declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
2121 ret void
2222 }
2323 declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
24
25 define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) {
26 ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int
27 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E)
28 ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
29 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 3)
30
31 ; CHECK: constant length must be a multiple of the element size in the element-wise atomic memory intrinsic
32 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 7, i32 4)
33
34 ; CHECK: incorrect alignment of the destination argument
35 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* %P, i8* align 4 %Q, i32 1, i32 1)
36 ; CHECK: incorrect alignment of the destination argument
37 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 4 %Q, i32 4, i32 4)
38
39 ; CHECK: incorrect alignment of the source argument
40 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* %Q, i32 1, i32 1)
41 ; CHECK: incorrect alignment of the source argument
42 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 1 %Q, i32 4, i32 4)
43
44 ret void
45 }
46 declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
47
2448 ; CHECK: input module is broken!