llvm.org GIT mirror llvm / f755153
Introduce element-wise atomic memcpy intrinsic This change adds a new intrinsic which is intended to provide memcpy functionality with additional atomicity guarantees. Please refer to the review thread or language reference for further details. Differential Revision: https://reviews.llvm.org/D27133 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290708 91177308-0d34-0410-b5e6-96231b3b80d8 Igor Laevsky 3 years ago
8 changed file(s) with 275 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
1266012660 LLVM provides experimental intrinsics to support runtime patching
1266112661 mechanisms commonly desired in dynamic language JITs. These intrinsics
1266212662 are described in :doc:`StackMaps`.
12663
12664 Element Wise Atomic Memory Intrinsics
12665 -----------------------------
12666
12667 These intrinsics are similar to the standard library memory intrinsics except
12668 that they perform memory transfer as a sequence of atomic memory accesses.
12669
12670 .. _int_memcpy_element_atomic:
12671
12672 '``llvm.memcpy.element.atomic``' Intrinsic
12673 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
12674
12675 Syntax:
12676 """""""
12677
12678 This is an overloaded intrinsic. You can use ``llvm.memcpy.element.atomic`` on
12679 any integer bit width and for different address spaces. Not all targets
12680 support all bit widths however.
12681
12682 ::
12683
12684 declare void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* , i8* ,
12685 i64 , i32 )
12686
12687 Overview:
12688 """""""""
12689
12690 The '``llvm.memcpy.element.atomic.*``' intrinsic performs copy of a block of
12691 memory from the source location to the destination location as a sequence of
12692 unordered atomic memory accesses where each access is a multiple of
12693 ``element_size`` bytes wide and aligned at an element size boundary. For example
12694 each element is accessed atomically in source and destination buffers.
12695
12696 Arguments:
12697 """"""""""
12698
12699 The first argument is a pointer to the destination, the second is a
12700 pointer to the source. The third argument is an integer argument
12701 specifying the number of elements to copy, the fourth argument is size of
12702 the single element in bytes.
12703
12704 ``element_size`` should be a power of two, greater than zero and less than
12705 a target-specific atomic access size limit.
12706
12707 For each of the input pointers ``align`` parameter attribute must be specified.
12708 It must be a power of two and greater than or equal to the ``element_size``.
12709 Caller guarantees that both the source and destination pointers are aligned to
12710 that boundary.
12711
12712 Semantics:
12713 """"""""""
12714
12715 The '``llvm.memcpy.element.atomic.*``' intrinsic copies
12716 '``num_elements`` * ``element_size``' bytes of memory from the source location to
12717 the destination location. These locations are not allowed to overlap. Memory copy
12718 is performed as a sequence of unordered atomic memory accesses where each access
12719 is guaranteed to be a multiple of ``element_size`` bytes wide and aligned at an
12720 element size boundary.
12721
12722 The order of the copy is unspecified. The same value may be read from the source
12723 buffer many times, but only one write is issued to the destination buffer per
12724 element. It is well defined to have concurrent reads and writes to both source
12725 and destination provided those reads and writes are at least unordered atomic.
12726
12727 This intrinsic does not provide any additional ordering guarantees over those
12728 provided by a set of unordered loads from the source location and stores to the
12729 destination.
12730
12731 Lowering:
12732 """"""""""
12733
12734 In the most general case call to the '``llvm.memcpy.element.atomic.*``' is lowered
12735 to a call to the symbol ``__llvm_memcpy_element_atomic_*``. Where '*' is replaced
12736 with an actual element size.
12737
12738 Optimizer is allowed to inline memory copy when it's profitable to do so.
332332 MEMSET,
333333 MEMMOVE,
334334
335 // ELEMENT-WISE ATOMIC MEMORY
336 MEMCPY_ELEMENT_ATOMIC_1,
337 MEMCPY_ELEMENT_ATOMIC_2,
338 MEMCPY_ELEMENT_ATOMIC_4,
339 MEMCPY_ELEMENT_ATOMIC_8,
340 MEMCPY_ELEMENT_ATOMIC_16,
341
335342 // EXCEPTION HANDLING
336343 UNWIND_RESUME,
337344
502509 /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
503510 /// UNKNOWN_LIBCALL if there is none.
504511 Libcall getSYNC(unsigned Opc, MVT VT);
512
513 /// getMEMCPY_ELEMENT_ATOMIC - Return MEMCPY_ELEMENT_ATOMIC_* value for the
514 /// given element size or UNKNOW_LIBCALL if there is none.
515 Libcall getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize);
505516 }
506517 }
507518
758758 def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
759759 [IntrReadMem, IntrArgMemOnly]>;
760760
761 //===------ Memory intrinsics with element-wise atomicity guarantees ------===//
762 //
763
764 def int_memcpy_element_atomic : Intrinsic<[],
765 [llvm_anyptr_ty, llvm_anyptr_ty,
766 llvm_i64_ty, llvm_i32_ty],
767 [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
768 WriteOnly<0>, ReadOnly<1>]>;
769
761770 //===----------------------------------------------------------------------===//
762771 // Target-specific intrinsics
763772 //===----------------------------------------------------------------------===//
48934893 isTC, MachinePointerInfo(I.getArgOperand(0)),
48944894 MachinePointerInfo(I.getArgOperand(1)));
48954895 updateDAGForMaybeTailCall(MM);
4896 return nullptr;
4897 }
4898 case Intrinsic::memcpy_element_atomic: {
4899 SDValue Dst = getValue(I.getArgOperand(0));
4900 SDValue Src = getValue(I.getArgOperand(1));
4901 SDValue NumElements = getValue(I.getArgOperand(2));
4902 SDValue ElementSize = getValue(I.getArgOperand(3));
4903
4904 // Emit a library call.
4905 TargetLowering::ArgListTy Args;
4906 TargetLowering::ArgListEntry Entry;
4907 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4908 Entry.Node = Dst;
4909 Args.push_back(Entry);
4910
4911 Entry.Node = Src;
4912 Args.push_back(Entry);
4913
4914 Entry.Ty = I.getArgOperand(2)->getType();
4915 Entry.Node = NumElements;
4916 Args.push_back(Entry);
4917
4918 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
4919 Entry.Node = ElementSize;
4920 Args.push_back(Entry);
4921
4922 uint64_t ElementSizeConstant =
4923 cast(I.getArgOperand(3))->getZExtValue();
4924 RTLIB::Libcall LibraryCall =
4925 RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant);
4926 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
4927 report_fatal_error("Unsupported element size");
4928
4929 TargetLowering::CallLoweringInfo CLI(DAG);
4930 CLI.setDebugLoc(sdl)
4931 .setChain(getRoot())
4932 .setCallee(TLI.getLibcallCallingConv(LibraryCall),
4933 Type::getVoidTy(*DAG.getContext()),
4934 DAG.getExternalSymbol(
4935 TLI.getLibcallName(LibraryCall),
4936 TLI.getPointerTy(DAG.getDataLayout())),
4937 std::move(Args));
4938
4939 std::pair CallResult = TLI.LowerCallTo(CLI);
4940 DAG.setRoot(CallResult.second);
48964941 return nullptr;
48974942 }
48984943 case Intrinsic::dbg_declare: {
360360 Names[RTLIB::MEMCPY] = "memcpy";
361361 Names[RTLIB::MEMMOVE] = "memmove";
362362 Names[RTLIB::MEMSET] = "memset";
363 Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1";
364 Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2";
365 Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4";
366 Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8";
367 Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16";
363368 Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
364369 Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
365370 Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
760765 #undef OP_TO_LIBCALL
761766
762767 return UNKNOWN_LIBCALL;
768 }
769
770 RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) {
771 switch (ElementSize) {
772 case 1:
773 return MEMCPY_ELEMENT_ATOMIC_1;
774 case 2:
775 return MEMCPY_ELEMENT_ATOMIC_2;
776 case 4:
777 return MEMCPY_ELEMENT_ATOMIC_4;
778 case 8:
779 return MEMCPY_ELEMENT_ATOMIC_8;
780 case 16:
781 return MEMCPY_ELEMENT_ATOMIC_16;
782 default:
783 return UNKNOWN_LIBCALL;
784 }
785
763786 }
764787
765788 /// InitCmpLibcallCCs - Set default comparison libcall CC.
39513951 CS);
39523952 break;
39533953 }
3954 case Intrinsic::memcpy_element_atomic: {
3955 ConstantInt *ElementSizeCI = dyn_cast(CS.getArgOperand(3));
3956 Assert(ElementSizeCI, "element size of the element-wise atomic memory "
3957 "intrinsic must be a constant int",
3958 CS);
3959 const APInt &ElementSizeVal = ElementSizeCI->getValue();
3960 Assert(ElementSizeVal.isPowerOf2(),
3961 "element size of the element-wise atomic memory intrinsic "
3962 "must be a power of 2",
3963 CS);
3964
3965 auto IsValidAlignment = [&](uint64_t Alignment) {
3966 return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
3967 };
3968
3969 uint64_t DstAlignment = CS.getParamAlignment(1),
3970 SrcAlignment = CS.getParamAlignment(2);
3971
3972 Assert(IsValidAlignment(DstAlignment),
3973 "incorrect alignment of the destination argument",
3974 CS);
3975 Assert(IsValidAlignment(SrcAlignment),
3976 "incorrect alignment of the source argument",
3977 CS);
3978 break;
3979 }
39543980 case Intrinsic::gcroot:
39553981 case Intrinsic::gcwrite:
39563982 case Intrinsic::gcread:
0 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
1
2 define i8* @test_memcpy1(i8* %P, i8* %Q) {
3 ; CHECK: test_memcpy
4 call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %P, i8* align 4 %Q, i64 1, i32 1)
5 ret i8* %P
6 ; CHECK-DAG: movl $1, %edx
7 ; CHECK-DAG: movl $1, %ecx
8 ; CHECK: __llvm_memcpy_element_atomic_1
9 }
10
11 define i8* @test_memcpy2(i8* %P, i8* %Q) {
12 ; CHECK: test_memcpy2
13 call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %P, i8* align 4 %Q, i64 2, i32 2)
14 ret i8* %P
15 ; CHECK-DAG: movl $2, %edx
16 ; CHECK-DAG: movl $2, %ecx
17 ; CHECK: __llvm_memcpy_element_atomic_2
18 }
19
20 define i8* @test_memcpy4(i8* %P, i8* %Q) {
21 ; CHECK: test_memcpy4
22 call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %P, i8* align 4 %Q, i64 4, i32 4)
23 ret i8* %P
24 ; CHECK-DAG: movl $4, %edx
25 ; CHECK-DAG: movl $4, %ecx
26 ; CHECK: __llvm_memcpy_element_atomic_4
27 }
28
29 define i8* @test_memcpy8(i8* %P, i8* %Q) {
30 ; CHECK: test_memcpy8
31 call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 8 %P, i8* align 8 %Q, i64 8, i32 8)
32 ret i8* %P
33 ; CHECK-DAG: movl $8, %edx
34 ; CHECK-DAG: movl $8, %ecx
35 ; CHECK: __llvm_memcpy_element_atomic_8
36 }
37
38 define i8* @test_memcpy16(i8* %P, i8* %Q) {
39 ; CHECK: test_memcpy16
40 call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 16 %P, i8* align 16 %Q, i64 16, i32 16)
41 ret i8* %P
42 ; CHECK-DAG: movl $16, %edx
43 ; CHECK-DAG: movl $16, %ecx
44 ; CHECK: __llvm_memcpy_element_atomic_16
45 }
46
47 define void @test_memcpy_args(i8** %Storage) {
48 ; CHECK: test_memcpy_args
49 %Dst = load i8*, i8** %Storage
50 %Src.addr = getelementptr i8*, i8** %Storage, i64 1
51 %Src = load i8*, i8** %Src.addr
52
53 ; First argument
54 ; CHECK-DAG: movq (%rdi), [[REG1:%r.+]]
55 ; CHECK-DAG: movq [[REG1]], %rdi
56 ; Second argument
57 ; CHECK-DAG: movq 8(%rdi), %rsi
58 ; Third argument
59 ; CHECK-DAG: movl $4, %edx
60 ; Fourth argument
61 ; CHECK-DAG: movl $4, %ecx
62 ; CHECK: __llvm_memcpy_element_atomic_4
63 call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dst, i8* align 4 %Src, i64 4, i32 4)
64 ret void
65 }
66
67 declare void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* nocapture, i8* nocapture, i64, i32) nounwind
0 ; RUN: not opt -verify < %s 2>&1 | FileCheck %s
1
2 define void @test_memcpy(i8* %P, i8* %Q) {
3 ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2
4 call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 2 %P, i8* align 2 %Q, i64 4, i32 3)
5
6 ; CHECK: incorrect alignment of the destination argument
7 call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 2 %P, i8* align 4 %Q, i64 4, i32 4)
8
9 ; CHECK: incorrect alignment of the source argument
10 call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %P, i8* align 2 %Q, i64 4, i32 4)
11
12 ret void
13 }
14 declare void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* nocapture, i8* nocapture, i64, i32) nounwind
15
16 ; CHECK: input module is broken!