llvm.org GIT mirror llvm / a164276
AMDGPU: Implement readcyclecounter This matches the behavior of the HSAIL clock instruction. s_realmemtime is used if the subtarget supports it, and falls back to s_memtime if not. Also introduces new intrinsics for each of s_memtime / s_memrealtime. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@262119 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 4 years ago
12 changed file(s) with 148 addition(s) and 10 deletion(s). Raw diff Collapse all Expand all
187187 GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
188188 Intrinsic<[], [], []>;
189189
190 def int_amdgcn_s_memtime :
191 GCCBuiltin<"__builtin_amdgcn_s_memtime">,
192 Intrinsic<[llvm_i64_ty], [], []>;
193
190194 def int_amdgcn_dispatch_ptr :
191195 GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
192196 Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>;
245249 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
246250 Intrinsic<[], [], []>;
247251
252 def int_amdgcn_s_memrealtime :
253 GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
254 Intrinsic<[llvm_i64_ty], [], []>;
248255 }
146146 "CIInsts",
147147 "true",
148148 "Additional intstructions for CI+"
149 >;
150
151 def FeatureVIInsts : SubtargetFeature<"vi-insts",
152 "VIInsts",
153 "true",
154 "Additional intstructions for VI+"
149155 >;
150156
151157 //===------------------------------------------------------------===//
307313 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
308314 [FeatureFP64, FeatureLocalMemorySize65536,
309315 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
310 FeatureGCN3Encoding, FeatureCIInsts]
316 FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts]
311317 >;
312318
313319 //===----------------------------------------------------------------------===//
8080 WavefrontSize(0), CFALUBug(false),
8181 LocalMemorySize(0), MaxPrivateElementSize(0),
8282 EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
83 GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
83 GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), VIInsts(false),
84 LDSBankCount(0),
8485 IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
8586 EnableSIScheduler(false), FrameLowering(nullptr),
8687 InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
8787 bool GCN1Encoding;
8888 bool GCN3Encoding;
8989 bool CIInsts;
90 bool VIInsts;
9091 bool FeatureDisable;
9192 int LDSBankCount;
9293 unsigned IsaVersion;
133133 setOperationAction(ISD::BR_CC, MVT::i64, Expand);
134134 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
135135 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
136
137 // On SI this is s_memtime and s_memrealtime on VI.
138 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
136139
137140 for (MVT VT : MVT::integer_valuetypes()) {
138141 if (VT == MVT::i64)
10761076 }
10771077 }
10781078
1079 multiclass SMRD_Inval
1080 SDPatternOperator node> {
1081 let hasSideEffects = 1, mayStore = 1 in {
1082 def "" : SMRD_Pseudo ;
1079 multiclass SMRD_Special
1080 string opStr = "",
1081 list pattern = []> {
1082 let hasSideEffects = 1 in {
1083 def "" : SMRD_Pseudo ;
10831084
10841085 let sbase = 0, offset = 0 in {
10851086 let sdst = 0 in {
1086 def _si : SMRD_Real_si (outs), (ins), opName>;
1087 def _si : SMRD_Real_si outs, (ins), opName#opStr>;
10871088 }
10881089
10891090 let glc = 0, sdata = 0 in {
1090 def _vi : SMRD_Real_vi (outs), (ins), opName>;
1091 def _vi : SMRD_Real_vi outs, (ins), opName#opStr>;
10911092 }
10921093 }
1094 }
1095 }
1096
1097 multiclass SMRD_Inval
1098 SDPatternOperator node> {
1099 let mayStore = 1 in {
1100 defm : SMRD_Special;
10931101 }
10941102 }
10951103
10971105 SMRD_Real_vi {
10981106 let hasSideEffects = 1;
10991107 let mayStore = 1;
1108 let sbase = 0;
1109 let sdata = 0;
1110 let glc = 0;
1111 let offset = 0;
1112 }
1113
1114 class SMEM_Ret op, string opName, SDPatternOperator node> :
1115 SMRD_Real_vi
1116 opName#" $dst", [(set i64:$dst, (node))]> {
1117 let hasSideEffects = 1;
1118 let mayStore = ?;
1119 let mayLoad = ?;
11001120 let sbase = 0;
11011121 let sdata = 0;
11021122 let glc = 0;
8787 smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
8888 >;
8989
90 //def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
90 let mayStore = ? in {
91 // FIXME: mayStore = ? is a workaround for tablegen bug for different
92 // inferred mayStore flags for the instruction pattern vs. standalone
93 // Pat. Each considers the other contradictory.
94
95 defm S_MEMTIME : SMRD_Special , "s_memtime",
96 (outs SReg_64:$dst), " $dst", [(set i64:$dst, (int_amdgcn_s_memtime))]
97 >;
98 }
9199
92100 defm S_DCACHE_INV : SMRD_Inval , "s_dcache_inv",
93101 int_amdgcn_s_dcache_inv>;
31503158
31513159 def : BFEPattern ;
31523160
3161 let Predicates = [isSICI] in {
3162 def : Pat <
3163 (i64 (readcyclecounter)),
3164 (S_MEMTIME)
3165 >;
3166 }
3167
31533168 //===----------------------------------------------------------------------===//
31543169 // Fract Patterns
31553170 //===----------------------------------------------------------------------===//
102102 def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
103103 "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
104104
105 def S_MEMREALTIME : SMEM_Ret<0x25,
106 "s_memrealtime", int_amdgcn_s_memrealtime>;
107
105108 } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
106109
107110 let Predicates = [isVI] in {
113116 >;
114117
115118 //===----------------------------------------------------------------------===//
116 // DPP Paterns
119 // DPP Patterns
117120 //===----------------------------------------------------------------------===//
118121
119122 def : Pat <
123126 (as_i32imm $bank_mask), (as_i32imm $row_mask))
124127 >;
125128
129 //===----------------------------------------------------------------------===//
130 // Misc Patterns
131 //===----------------------------------------------------------------------===//
132
133 def : Pat <
134 (i64 (readcyclecounter)),
135 (S_MEMREALTIME)
136 >;
137
126138 } // End Predicates = [isVI]
0 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
1
2 declare i64 @llvm.amdgcn.s.memrealtime() #0
3
4 ; GCN-LABEL: {{^}}test_s_memrealtime:
5 ; GCN-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
6 ; GCN-DAG: s_load_dwordx2
7 ; GCN: lgkmcnt
8 ; GCN: buffer_store_dwordx2
9 ; GCN-NOT: lgkmcnt
10 ; GCN: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
11 ; GCN: buffer_store_dwordx2
12 define void @test_s_memrealtime(i64 addrspace(1)* %out) #0 {
13 %cycle0 = call i64 @llvm.amdgcn.s.memrealtime()
14 store volatile i64 %cycle0, i64 addrspace(1)* %out
15
16 %cycle1 = call i64 @llvm.amdgcn.s.memrealtime()
17 store volatile i64 %cycle1, i64 addrspace(1)* %out
18 ret void
19 }
20
21 attributes #0 = { nounwind }
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
2
3 declare i64 @llvm.amdgcn.s.memtime() #0
4
5 ; GCN-LABEL: {{^}}test_s_memtime:
6 ; GCN-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
7 ; GCN-DAG: s_load_dwordx2
8 ; GCN: lgkmcnt
9 ; GCN: buffer_store_dwordx2
10 ; GCN-NOT: lgkmcnt
11 ; GCN: s_memtime s{{\[[0-9]+:[0-9]+\]}}
12 ; GCN: buffer_store_dwordx2
13 define void @test_s_memtime(i64 addrspace(1)* %out) #0 {
14 %cycle0 = call i64 @llvm.amdgcn.s.memtime()
15 store volatile i64 %cycle0, i64 addrspace(1)* %out
16
17 %cycle1 = call i64 @llvm.amdgcn.s.memtime()
18 store volatile i64 %cycle1, i64 addrspace(1)* %out
19 ret void
20 }
21
22 attributes #0 = { nounwind }
0 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
2
3 declare i64 @llvm.readcyclecounter() #0
4
5 ; GCN-LABEL: {{^}}test_readcyclecounter:
6 ; SI-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
7 ; VI-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
8 ; GCN-DAG: s_load_dwordx2
9 ; GCN: lgkmcnt
10 ; GCN: buffer_store_dwordx2
11 ; GCN-NOT: lgkmcnt
12 ; SI: s_memtime s{{\[[0-9]+:[0-9]+\]}}
13 ; VI: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
14 ; GCN: buffer_store_dwordx2
15 define void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
16 %cycle0 = call i64 @llvm.readcyclecounter()
17 store volatile i64 %cycle0, i64 addrspace(1)* %out
18
19 %cycle1 = call i64 @llvm.readcyclecounter()
20 store volatile i64 %cycle1, i64 addrspace(1)* %out
21 ret void
22 }
23
24 attributes #0 = { nounwind }
6666 s_dcache_inv_vol
6767 // CI: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7]
6868 // NOSI: error: instruction not supported on this GPU
69
70 s_memtime s[0:1]
71 // GCN: s_memtime s[0:1] ; encoding: [0x00,0x00,0x80,0xc7]