llvm.org GIT mirror llvm / 09680eb
[NVPTX] Some nvvm.read.ptx.sreg intrinsics should have IntrInaccessibleMemOnly attribute. These intrinsics may return different values every time they are called and should not be CSE'd. IntrInaccessibleMemOnly appears to be the right attribute to model this behavior. Differential Revision: https://reviews.llvm.org/D57259 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352256 91177308-0d34-0410-b5e6-96231b3b80d8 Artem Belevich 1 year, 4 months ago
2 changed file(s) with 57 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
36723672 class PTXReadSRegIntrinsic_r32
36733673 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
36743674 GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
3675
36763675 class PTXReadSRegIntrinsic_r64
36773676 : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
3677 GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
3678
3679 // Intrinsics to read registers with non-constant values. E.g. the values that
3680 // do change over the kernel lifetime. Such reads should not be CSE'd.
3681 class PTXReadNCSRegIntrinsic_r32
3682 : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>,
3683 GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
3684 class PTXReadNCSRegIntrinsic_r64
3685 : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>,
36783686 GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
36793687
36803688 defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
37023710 def int_nvvm_read_ptx_sreg_lanemask_gt :
37033711 PTXReadSRegIntrinsic_r32<"lanemask_gt">;
37043712
3705 def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
3706 def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
3707
3708 def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
3709 def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
3710 def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
3711 def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
3713 def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
3714 def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
3715
3716 def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
3717 def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
3718 def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
3719 def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
37123720
37133721 def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
37143722
9393 ret i32 %zext
9494 }
9595
96 ; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may
97 ; be CSE'd.
98 ; CHECK-LABEL: test_tid
99 define i32 @test_tid() {
100 ; CHECK: mov.u32 %r{{.*}}, %tid.x;
101 %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
102 ; CHECK-NOT: mov.u32 %r{{.*}}, %tid.x;
103 %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
104 %ret = add i32 %a, %b
105 ; CHECK: ret
106 ret i32 %ret
107 }
108
109 ; reading clock() or clock64() should not be CSE'd as each read may return
110 ; different value.
111 ; CHECK-LABEL: test_clock
112 define i32 @test_clock() {
113 ; CHECK: mov.u32 %r{{.*}}, %clock;
114 %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
115 ; CHECK: mov.u32 %r{{.*}}, %clock;
116 %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
117 %ret = add i32 %a, %b
118 ; CHECK: ret
119 ret i32 %ret
120 }
121
122 ; CHECK-LABEL: test_clock64
123 define i64 @test_clock64() {
124 ; CHECK: mov.u64 %r{{.*}}, %clock64;
125 %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
126 ; CHECK: mov.u64 %r{{.*}}, %clock64;
127 %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
128 %ret = add i64 %a, %b
129 ; CHECK: ret
130 ret i64 %ret
131 }
132
96133 declare float @llvm.fabs.f32(float)
97134 declare double @llvm.fabs.f64(double)
98135 declare float @llvm.nvvm.sqrt.f(float)
102139 declare i16 @llvm.ctpop.i16(i16)
103140 declare i32 @llvm.ctpop.i32(i32)
104141 declare i64 @llvm.ctpop.i64(i64)
142
143 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
144 declare i32 @llvm.nvvm.read.ptx.sreg.clock()
145 declare i64 @llvm.nvvm.read.ptx.sreg.clock64()