llvm.org GIT mirror llvm / d8660fa
[NVPTX] Implement __nvvm_atom_add_gen_d builtin. Summary: This just seems to have been an oversight. We already supported the f64 atomic add with an explicit scope (e.g. "cta"), but not the scopeless version. Reviewers: tra Subscribers: jholewinski, sanjoy, cfe-commits, llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D39638 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317623 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Lebar 1 year, 11 months ago
4 changed file(s) with 39 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
682682 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
683683
684684
685 // Atomic not available as an llvm intrinsic.
685 // Atomics not available as llvm intrinsics.
686686 def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
687687 [LLVMAnyPointerType, llvm_float_ty],
688688 [IntrArgMemOnly, NoCapture<0>]>;
689 // Atomic add of f64 requires sm_60.
690 def int_nvvm_atomic_load_add_f64 : Intrinsic<[llvm_double_ty],
691 [LLVMAnyPointerType, llvm_double_ty],
692 [IntrArgMemOnly, NoCapture<0>]>;
693
689694 def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
690695 [LLVMAnyPointerType, llvm_i32_ty],
691696 [IntrArgMemOnly, NoCapture<0>]>;
34483448 }
34493449
34503450 case Intrinsic::nvvm_atomic_load_add_f32:
3451 case Intrinsic::nvvm_atomic_load_add_f64:
34513452 case Intrinsic::nvvm_atomic_load_inc_32:
34523453 case Intrinsic::nvvm_atomic_load_dec_32:
34533454
10941094 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
10951095 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
10961096 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1097 def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1098 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1099 def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1100 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1101 def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1102 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
10971103
10981104 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2
10991105 atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
11191125 atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
11201126 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2
11211127 atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
1128
1129 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2
1130 atomic_load_add_f64_g, f64imm, fpimm, hasAtomAddF64>;
1131 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2
1132 atomic_load_add_f64_s, f64imm, fpimm, hasAtomAddF64>;
1133 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2
1134 atomic_load_add_f64_gen, f64imm, fpimm, hasAtomAddF64>;
11221135
11231136 // atom_sub
11241137
0 ; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s
1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
2
3 ; CHECK-LABEL .func test(
4 define void @test(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
5 ; CHECK: atom.add.f64
6 %r1 = call double @llvm.nvvm.atomic.load.add.f64.p0f64(double* %dp0, double %d)
7 ; CHECK: atom.global.add.f64
8 %r2 = call double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* %dp1, double %d)
9 ; CHECK: atom.shared.add.f64
10 %ret = call double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* %dp3, double %d)
11 ret void
12 }
13
14 declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double* nocapture, double) #1
15 declare double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* nocapture, double) #1
16 declare double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* nocapture, double) #1
17
18 attributes #1 = { argmemonly nounwind }