llvm.org GIT mirror llvm / a06b091
Add element atomic memset intrinsic Summary: Continuing the work from https://reviews.llvm.org/D33240, this change introduces an element unordered-atomic memset intrinsic. This intrinsic is essentially memset with the implementation requirement that all stores used for the assignment are done with unordered-atomic stores of a given element size. Reviewers: eli.friedman, reames, mkazantsev, skatkov Reviewed By: reames Subscribers: jfb, dschuff, sbc100, jgravelle-google, aheejin, efriedma, llvm-commits Differential Revision: https://reviews.llvm.org/D34885 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307854 91177308-0d34-0410-b5e6-96231b3b80d8 Daniel Neilson 3 years ago
10 changed file(s) with 359 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
1033810338 aligned to some boundary, this can be specified as the fourth argument,
1033910339 otherwise it should be set to 0 or 1 (both meaning no alignment).
1034010340
10341 .. _int_memset:
10342
1034110343 '``llvm.memset.*``' Intrinsics
1034210344 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1034310345
1425714259 actual element size.
1425814260
1425914261 The optimizer is allowed to inline the memory copy when it's profitable to do so.
14262
14263 .. _int_memset_element_unordered_atomic:
14264
14265 '``llvm.memset.element.unordered.atomic``' Intrinsic
14266 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
14267
14268 Syntax:
14269 """""""
14270
14271 This is an overloaded intrinsic. You can use ``llvm.memset.element.unordered.atomic`` on
14272 any integer bit width and for different address spaces. Not all targets
14273 support all bit widths however.
14274
14275 ::
14276
14277 declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* ,
14278 i8 ,
14279 i32 ,
14280 i32 )
14281 declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* ,
14282 i8 ,
14283 i64 ,
14284 i32 )
14285
14286 Overview:
14287 """""""""
14288
14289 The '``llvm.memset.element.unordered.atomic.*``' intrinsic is a specialization of the
14290 '``llvm.memset.*``' intrinsic. It differs in that the ``dest`` is treated as an array
14291 with elements that are exactly ``element_size`` bytes, and the assignment to that array
14292 uses uses a sequence of :ref:`unordered atomic ` store operations
14293 that are a positive integer multiple of the ``element_size`` in size.
14294
14295 Arguments:
14296 """"""""""
14297
14298 The first three arguments are the same as they are in the :ref:`@llvm.memset `
14299 intrinsic, with the added constraint that ``len`` is required to be a positive integer
14300 multiple of the ``element_size``. If ``len`` is not a positive integer multiple of
14301 ``element_size``, then the behaviour of the intrinsic is undefined.
14302
14303 ``element_size`` must be a compile-time constant positive power of two no greater than
14304 target-specific atomic access size limit.
14305
14306 The ``dest`` input pointer must have the ``align`` parameter attribute specified. It
14307 must be a power of two no less than the ``element_size``. Caller guarantees that
14308 the destination pointer is aligned to that boundary.
14309
14310 Semantics:
14311 """"""""""
14312
14313 The '``llvm.memset.element.unordered.atomic.*``' intrinsic sets the ``len`` bytes of
14314 memory starting at the destination location to the given ``value``. The memory is
14315 set with a sequence of store operations where each access is guaranteed to be a
14316 multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary.
14317
14318 The order of the assignment is unspecified. Only one write is issued to the
14319 destination buffer per element. It is well defined to have concurrent reads and
14320 writes to the destination provided those reads and writes are unordered atomic
14321 when specified.
14322
14323 This intrinsic does not provide any additional ordering guarantees over those
14324 provided by a set of unordered stores to the destination.
14325
14326 Lowering:
14327 """""""""
14328
14329 In the most general case call to the '``llvm.memset.element.unordered.atomic.*``' is
14330 lowered to a call to the symbol ``__llvm_memset_element_unordered_atomic_*``. Where '*'
14331 is replaced with an actual element size.
14332
14333 The optimizer is allowed to inline the memory assignment when it's profitable to do so.
14334
345345 MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8,
346346 MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16,
347347
348 MEMSET_ELEMENT_UNORDERED_ATOMIC_1,
349 MEMSET_ELEMENT_UNORDERED_ATOMIC_2,
350 MEMSET_ELEMENT_UNORDERED_ATOMIC_4,
351 MEMSET_ELEMENT_UNORDERED_ATOMIC_8,
352 MEMSET_ELEMENT_UNORDERED_ATOMIC_16,
353
348354 // EXCEPTION HANDLING
349355 UNWIND_RESUME,
350356
525531 /// MEMMOVE_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
526532 /// UNKNOW_LIBCALL if there is none.
527533 Libcall getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize);
534
535 /// getMEMSET_ELEMENT_UNORDERED_ATOMIC - Return
536 /// MEMSET_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
537 /// UNKNOW_LIBCALL if there is none.
538 Libcall getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize);
539
528540 }
529541 }
530542
384384 }
385385 };
386386
387 /// This class represents atomic memset intrinsic
388 /// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is
389 /// C&P of all methods from that hierarchy
390 class ElementUnorderedAtomicMemSetInst : public IntrinsicInst {
391 private:
392 enum { ARG_DEST = 0, ARG_VALUE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 };
393
394 public:
395 Value *getRawDest() const {
396 return const_cast(getArgOperand(ARG_DEST));
397 }
398 const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); }
399 Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); }
400
401 Value *getValue() const { return const_cast(getArgOperand(ARG_VALUE)); }
402 const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); }
403 Use &getValueUse() { return getArgOperandUse(ARG_VALUE); }
404
405 Value *getLength() const {
406 return const_cast(getArgOperand(ARG_LENGTH));
407 }
408 const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); }
409 Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); }
410
411 bool isVolatile() const { return false; }
412
413 Value *getRawElementSizeInBytes() const {
414 return const_cast(getArgOperand(ARG_ELEMENTSIZE));
415 }
416
417 ConstantInt *getElementSizeInBytesCst() const {
418 return cast(getRawElementSizeInBytes());
419 }
420
421 uint32_t getElementSizeInBytes() const {
422 return getElementSizeInBytesCst()->getZExtValue();
423 }
424
425 /// This is just like getRawDest, but it strips off any cast
426 /// instructions that feed it, giving the original input. The returned
427 /// value is guaranteed to be a pointer.
428 Value *getDest() const { return getRawDest()->stripPointerCasts(); }
429
430 unsigned getDestAddressSpace() const {
431 return cast(getRawDest()->getType())->getAddressSpace();
432 }
433
434 /// Set the specified arguments of the instruction.
435 void setDest(Value *Ptr) {
436 assert(getRawDest()->getType() == Ptr->getType() &&
437 "setDest called with pointer of wrong type!");
438 setArgOperand(ARG_DEST, Ptr);
439 }
440
441 void setValue(Value *Val) {
442 assert(getValue()->getType() == Val->getType() &&
443 "setValue called with value of wrong type!");
444 setArgOperand(ARG_VALUE, Val);
445 }
446
447 void setLength(Value *L) {
448 assert(getLength()->getType() == L->getType() &&
449 "setLength called with value of wrong type!");
450 setArgOperand(ARG_LENGTH, L);
451 }
452
453 void setElementSizeInBytes(Constant *V) {
454 assert(V->getType() == Type::getInt8Ty(getContext()) &&
455 "setElementSizeInBytes called with value of wrong type!");
456 setArgOperand(ARG_ELEMENTSIZE, V);
457 }
458
459 static inline bool classof(const IntrinsicInst *I) {
460 return I->getIntrinsicID() == Intrinsic::memset_element_unordered_atomic;
461 }
462 static inline bool classof(const Value *V) {
463 return isa(V) && classof(cast(V));
464 }
465 };
466
387467 /// This is the common base class for memset/memcpy/memmove.
388468 class MemIntrinsic : public IntrinsicInst {
389469 public:
883883 ReadOnly<1>
884884 ]>;
885885
886 // @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize)
887 def int_memset_element_unordered_atomic
888 : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ],
889 [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>;
886890
887891 //===------------------------ Reduction Intrinsics ------------------------===//
888892 //
50165016 uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
50175017 RTLIB::Libcall LibraryCall =
50185018 RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
5019 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
5020 report_fatal_error("Unsupported element size");
5021
5022 TargetLowering::CallLoweringInfo CLI(DAG);
5023 CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
5024 TLI.getLibcallCallingConv(LibraryCall),
5025 Type::getVoidTy(*DAG.getContext()),
5026 DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
5027 TLI.getPointerTy(DAG.getDataLayout())),
5028 std::move(Args));
5029
5030 std::pair CallResult = TLI.LowerCallTo(CLI);
5031 DAG.setRoot(CallResult.second);
5032 return nullptr;
5033 }
5034 case Intrinsic::memset_element_unordered_atomic: {
5035 auto &MI = cast(I);
5036 SDValue Dst = getValue(MI.getRawDest());
5037 SDValue Val = getValue(MI.getValue());
5038 SDValue Length = getValue(MI.getLength());
5039
5040 // Emit a library call.
5041 TargetLowering::ArgListTy Args;
5042 TargetLowering::ArgListEntry Entry;
5043 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
5044 Entry.Node = Dst;
5045 Args.push_back(Entry);
5046
5047 Entry.Ty = Type::getInt8Ty(*DAG.getContext());
5048 Entry.Node = Val;
5049 Args.push_back(Entry);
5050
5051 Entry.Ty = MI.getLength()->getType();
5052 Entry.Node = Length;
5053 Args.push_back(Entry);
5054
5055 uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
5056 RTLIB::Libcall LibraryCall =
5057 RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
50195058 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
50205059 report_fatal_error("Unsupported element size");
50215060
393393 "__llvm_memmove_element_unordered_atomic_8";
394394 Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] =
395395 "__llvm_memmove_element_unordered_atomic_16";
396 Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] =
397 "__llvm_memset_element_unordered_atomic_1";
398 Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] =
399 "__llvm_memset_element_unordered_atomic_2";
400 Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] =
401 "__llvm_memset_element_unordered_atomic_4";
402 Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] =
403 "__llvm_memset_element_unordered_atomic_8";
404 Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] =
405 "__llvm_memset_element_unordered_atomic_16";
396406 Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
397407 Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
398408 Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
824834 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8;
825835 case 16:
826836 return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16;
837 default:
838 return UNKNOWN_LIBCALL;
839 }
840 }
841
842 RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
843 switch (ElementSize) {
844 case 1:
845 return MEMSET_ELEMENT_UNORDERED_ATOMIC_1;
846 case 2:
847 return MEMSET_ELEMENT_UNORDERED_ATOMIC_2;
848 case 4:
849 return MEMSET_ELEMENT_UNORDERED_ATOMIC_4;
850 case 8:
851 return MEMSET_ELEMENT_UNORDERED_ATOMIC_8;
852 case 16:
853 return MEMSET_ELEMENT_UNORDERED_ATOMIC_16;
827854 default:
828855 return UNKNOWN_LIBCALL;
829856 }
40764076 "incorrect alignment of the destination argument", CS);
40774077 Assert(IsValidAlignment(SrcAlignment),
40784078 "incorrect alignment of the source argument", CS);
4079
4079 break;
4080 }
4081 case Intrinsic::memset_element_unordered_atomic: {
4082 auto *MI = cast(CS.getInstruction());
4083
4084 ConstantInt *ElementSizeCI =
4085 dyn_cast(MI->getRawElementSizeInBytes());
4086 Assert(ElementSizeCI,
4087 "element size of the element-wise unordered atomic memory "
4088 "intrinsic must be a constant int",
4089 CS);
4090 const APInt &ElementSizeVal = ElementSizeCI->getValue();
4091 Assert(ElementSizeVal.isPowerOf2(),
4092 "element size of the element-wise atomic memory intrinsic "
4093 "must be a power of 2",
4094 CS);
4095
4096 if (auto *LengthCI = dyn_cast(MI->getLength())) {
4097 uint64_t Length = LengthCI->getZExtValue();
4098 uint64_t ElementSize = MI->getElementSizeInBytes();
4099 Assert((Length % ElementSize) == 0,
4100 "constant length must be a multiple of the element size in the "
4101 "element-wise atomic memory intrinsic",
4102 CS);
4103 }
4104
4105 auto IsValidAlignment = [&](uint64_t Alignment) {
4106 return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
4107 };
4108 uint64_t DstAlignment = CS.getParamAlignment(0);
4109 Assert(IsValidAlignment(DstAlignment),
4110 "incorrect alignment of the destination argument", CS);
40804111 break;
40814112 }
40824113 case Intrinsic::gcroot:
403403 /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported,
404404 /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported,
405405 /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported,
406
407 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported,
408 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported,
409 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported,
410 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported,
411 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported,
406412
407413 // EXCEPTION HANDLING
408414 /* UNWIND_RESUME */ unsupported,
853859 /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr,
854860 /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr,
855861 /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr,
862 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr,
863 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr,
864 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr,
865 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr,
866 /* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr,
856867 /* UNWIND_RESUME */ "_Unwind_Resume",
857868 /* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1",
858869 /* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2",
123123 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 4, i32 4) ret void
124124 }
125125
126 define i8* @test_memset1(i8* %P, i8 %V) {
127 ; CHECK: test_memset
128 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 1)
129 ret i8* %P
130 ; 3rd arg (%edx) -- length
131 ; CHECK-DAG: movl $1, %edx
132 ; CHECK: __llvm_memset_element_unordered_atomic_1
133 }
134
135 define i8* @test_memset2(i8* %P, i8 %V) {
136 ; CHECK: test_memset2
137 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 2, i32 2)
138 ret i8* %P
139 ; 3rd arg (%edx) -- length
140 ; CHECK-DAG: movl $2, %edx
141 ; CHECK: __llvm_memset_element_unordered_atomic_2
142 }
143
144 define i8* @test_memset4(i8* %P, i8 %V) {
145 ; CHECK: test_memset4
146 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 4, i32 4)
147 ret i8* %P
148 ; 3rd arg (%edx) -- length
149 ; CHECK-DAG: movl $4, %edx
150 ; CHECK: __llvm_memset_element_unordered_atomic_4
151 }
152
153 define i8* @test_memset8(i8* %P, i8 %V) {
154 ; CHECK: test_memset8
155 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 8, i32 8)
156 ret i8* %P
157 ; 3rd arg (%edx) -- length
158 ; CHECK-DAG: movl $8, %edx
159 ; CHECK: __llvm_memset_element_unordered_atomic_8
160 }
161
162 define i8* @test_memset16(i8* %P, i8 %V) {
163 ; CHECK: test_memset16
164 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 16, i32 16)
165 ret i8* %P
166 ; 3rd arg (%edx) -- length
167 ; CHECK-DAG: movl $16, %edx
168 ; CHECK: __llvm_memset_element_unordered_atomic_16
169 }
170
171 define void @test_memset_args(i8** %Storage, i8* %V) {
172 ; CHECK: test_memset_args
173 %Dst = load i8*, i8** %Storage
174 %Val = load i8, i8* %V
175
176 ; 1st arg (%rdi)
177 ; CHECK-DAG: movq (%rdi), %rdi
178 ; 2nd arg (%rsi)
179 ; CHECK-DAG: movzbl (%rsi), %esi
180 ; 3rd arg (%edx) -- length
181 ; CHECK-DAG: movl $4, %edx
182 ; CHECK: __llvm_memset_element_unordered_atomic_4
183 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %Dst, i8 %Val, i32 4, i32 4) ret void
184 }
185
126186 declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
127187 declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
188 declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind
4545 }
4646 declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
4747
48 define void @test_memset(i8* %P, i8 %V, i32 %A, i32 %E) {
49 ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int
50 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E)
51 ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
52 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 3)
53
54 ; CHECK: constant length must be a multiple of the element size in the element-wise atomic memory intrinsic
55 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 7, i32 4)
56
57 ; CHECK: incorrect alignment of the destination argument
58 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* %P, i8 %V, i32 1, i32 1)
59 ; CHECK: incorrect alignment of the destination argument
60 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %P, i8 %V, i32 4, i32 4)
61
62 ret void
63 }
64 declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind
65
4866 ; CHECK: input module is broken!