llvm.org GIT mirror llvm / f252661
AMDGPU: Remove IntrReadMem from memtime/memrealtime intrinsics EarlyCSE with MemorySSA was able to use this to merge multiple calls with no intervening store. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354814 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 6 months ago
4 changed file(s) with 60 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
11171117
11181118 def int_amdgcn_s_memtime :
11191119 GCCBuiltin<"__builtin_amdgcn_s_memtime">,
1120 Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>;
1120 Intrinsic<[llvm_i64_ty], []>;
11211121
11221122 def int_amdgcn_s_sleep :
11231123 GCCBuiltin<"__builtin_amdgcn_s_sleep">,
13901390
13911391 def int_amdgcn_s_memrealtime :
13921392 GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
1393 Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>;
1393 Intrinsic<[llvm_i64_ty]>;
13941394
13951395 // llvm.amdgcn.ds.permute
13961396 def int_amdgcn_ds_permute :
151151 def _SGPR : SM_Discard_Pseudo ;
152152 }
153153
154 class SM_Time_Pseudo> : SM_Pseudo<
154 class SM_Time_Pseudo = null_frag> : SM_Pseudo<
155155 opName, (outs SReg_64_XEXEC:$sdst), (ins),
156156 " $sdst", [(set i64:$sdst, (node))]> {
157157 let hasSideEffects = 1;
158 let mayStore = 0;
158
159 // FIXME: This should be definitively mayStore = 0. TableGen
160 // brokenly tries to infer these based on the intrinsic properties
161 // corresponding to the IR attributes. The target intrinsics are
162 // considered as writing to memory for IR dependency purposes, but
163 // those can be modeled with hasSideEffects here. These also end up
164 // inferring differently for llvm.readcyclecounter and the amdgcn
165 // intrinsics.
166 let mayStore = ?;
159167 let mayLoad = 1;
160168 let has_sbase = 0;
161169 let has_offset = 0;
0 config.suffixes = ['.ll']
1
2 targets = set(config.root.targets_to_build.split())
3 if not 'AMDGPU' in targets:
4 config.unsupported = True
0 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -early-cse-memssa < %s | FileCheck %s
1 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
2
3 ; CHECK-LABEL: @memrealtime(
4 ; CHECK: call i64 @llvm.amdgcn.s.memrealtime()
5 ; CHECK: call i64 @llvm.amdgcn.s.memrealtime()
6 define amdgpu_kernel void @memrealtime(i64 %cycles) #0 {
7 entry:
8 %0 = tail call i64 @llvm.amdgcn.s.memrealtime()
9 %cmp3 = icmp sgt i64 %cycles, 0
10 br i1 %cmp3, label %while.body, label %while.end
11
12 while.body:
13 %1 = tail call i64 @llvm.amdgcn.s.memrealtime()
14 %sub = sub nsw i64 %1, %0
15 %cmp = icmp slt i64 %sub, %cycles
16 br i1 %cmp, label %while.body, label %while.end
17
18 while.end:
19 ret void
20 }
21
22 ; CHECK-LABEL: @memtime(
23 ; CHECK: call i64 @llvm.amdgcn.s.memtime()
24 ; CHECK: call i64 @llvm.amdgcn.s.memtime()
25 define amdgpu_kernel void @memtime(i64 %cycles) #0 {
26 entry:
27 %0 = tail call i64 @llvm.amdgcn.s.memtime()
28 %cmp3 = icmp sgt i64 %cycles, 0
29 br i1 %cmp3, label %while.body, label %while.end
30
31 while.body:
32 %1 = tail call i64 @llvm.amdgcn.s.memtime()
33 %sub = sub nsw i64 %1, %0
34 %cmp = icmp slt i64 %sub, %cycles
35 br i1 %cmp, label %while.body, label %while.end
36
37 while.end:
38 ret void
39 }
40
41 declare i64 @llvm.amdgcn.s.memrealtime()
42 declare i64 @llvm.amdgcn.s.memtime()