llvm.org GIT mirror llvm / 464205d
[llvm-mca] Refactor some of the logic in InstrBuilder, and add a verifyOperands method. With this change, InstrBuilder emits an error if the MCInst sequence contains an instruction with a variadic opcode, and a non-zero number of variadic operands. Currently we don't know how to correctly analyze variadic opcodes. The problem with variadic operands is that there is no information for them in the opcode descriptor (i.e. MCInstrDesc). That means, we don't know which variadic operands are defs, and which are uses. In future, we could try to conservatively assume that any extra register operands is both a register use and a register definition. This patch fixes a subtle bug in the evaluation of read/write operands for ARM VLD1 with implicit index update. Added test vld1-index-update.s git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@347503 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 9 months ago
3 changed file(s) with 193 addition(s) and 80 deletion(s). Raw diff Collapse all Expand all
0 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
1 # RUN: llvm-mca -mtriple=armv7-unknown-unknown -mcpu=swift -timeline -iterations=5 < %s | FileCheck %s
2
3 # Register r1 is updated in one cycle by instruction vld1.32, so the add.w can
4 # start one cycle later.
5
6 add.w r1, r1, r12
7 vld1.32 {d16, d17}, [r1]!
8
9 # CHECK: Iterations: 5
10 # CHECK-NEXT: Instructions: 10
11 # CHECK-NEXT: Total Cycles: 16
12 # CHECK-NEXT: Total uOps: 15
13
14 # CHECK: Dispatch Width: 3
15 # CHECK-NEXT: uOps Per Cycle: 0.94
16 # CHECK-NEXT: IPC: 0.63
17 # CHECK-NEXT: Block RThroughput: 1.0
18
19 # CHECK: Instruction Info:
20 # CHECK-NEXT: [1]: #uOps
21 # CHECK-NEXT: [2]: Latency
22 # CHECK-NEXT: [3]: RThroughput
23 # CHECK-NEXT: [4]: MayLoad
24 # CHECK-NEXT: [5]: MayStore
25 # CHECK-NEXT: [6]: HasSideEffects (U)
26
27 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
28 # CHECK-NEXT: 1 1 0.50 add r1, r1, r12
29 # CHECK-NEXT: 2 4 1.00 * vld1.32 {d16, d17}, [r1]!
30
31 # CHECK: Resources:
32 # CHECK-NEXT: [0] - SwiftUnitDiv
33 # CHECK-NEXT: [1] - SwiftUnitP0
34 # CHECK-NEXT: [2] - SwiftUnitP1
35 # CHECK-NEXT: [3] - SwiftUnitP2
36 # CHECK-NEXT: [4.0] - SwiftUnitP01
37 # CHECK-NEXT: [4.1] - SwiftUnitP01
38
39 # CHECK: Resource pressure per iteration:
40 # CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1]
41 # CHECK-NEXT: - - - 1.00 1.00 1.00
42
43 # CHECK: Resource pressure by instruction:
44 # CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1] Instructions:
45 # CHECK-NEXT: - - - - - 1.00 add r1, r1, r12
46 # CHECK-NEXT: - - - 1.00 1.00 - vld1.32 {d16, d17}, [r1]!
47
48 # CHECK: Timeline view:
49 # CHECK-NEXT: 012345
50 # CHECK-NEXT: Index 0123456789
51
52 # CHECK: [0,0] DeER . . . add r1, r1, r12
53 # CHECK-NEXT: [0,1] D=eeeeER . . vld1.32 {d16, d17}, [r1]!
54 # CHECK-NEXT: [1,0] .D=eE--R . . add r1, r1, r12
55 # CHECK-NEXT: [1,1] .D==eeeeER. . vld1.32 {d16, d17}, [r1]!
56 # CHECK-NEXT: [2,0] . D==eE--R. . add r1, r1, r12
57 # CHECK-NEXT: [2,1] . D===eeeeER . vld1.32 {d16, d17}, [r1]!
58 # CHECK-NEXT: [3,0] . D===eE--R . add r1, r1, r12
59 # CHECK-NEXT: [3,1] . D====eeeeER . vld1.32 {d16, d17}, [r1]!
60 # CHECK-NEXT: [4,0] . D====eE--R . add r1, r1, r12
61 # CHECK-NEXT: [4,1] . D=====eeeeER vld1.32 {d16, d17}, [r1]!
62
63 # CHECK: Average Wait times (based on the timeline view):
64 # CHECK-NEXT: [0]: Executions
65 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
66 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
67 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
68
69 # CHECK: [0] [1] [2] [3]
70 # CHECK-NEXT: 0. 5 3.0 0.2 1.6 add r1, r1, r12
71 # CHECK-NEXT: 1. 5 4.0 0.0 0.0 vld1.32 {d16, d17}, [r1]!
5151 InstrBuilder(const InstrBuilder &) = delete;
5252 InstrBuilder &operator=(const InstrBuilder &) = delete;
5353
54 Error populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
55 Error populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
54 void populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
55 void populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
5656 Error verifyInstrDesc(const InstrDesc &ID, const MCInst &MCI) const;
5757
5858 public:
187187 ID.MaxLatency = Latency < 0 ? 100U : static_cast(Latency);
188188 }
189189
190 Error InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
191 unsigned SchedClassID) {
190 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
191 // Variadic opcodes are not correctly supported.
192 if (MCDesc.isVariadic()) {
193 if (MCI.getNumOperands() - MCDesc.getNumOperands()) {
194 return make_error>(
195 "Don't know how to process this variadic opcode.", MCI);
196 }
197 }
198
199 // Count register definitions, and skip non register operands in the process.
200 unsigned I, E;
201 unsigned NumExplicitDefs = MCDesc.getNumDefs();
202 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
203 const MCOperand &Op = MCI.getOperand(I);
204 if (Op.isReg())
205 --NumExplicitDefs;
206 }
207
208 if (NumExplicitDefs) {
209 return make_error>(
210 "Expected more register operand definitions.", MCI);
211 }
212
213 if (MCDesc.hasOptionalDef()) {
214 // Always assume that the optional definition is the last operand.
215 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
216 if (I == MCI.getNumOperands() || !Op.isReg()) {
217 std::string Message =
218 "expected a register operand for an optional definition. Instruction "
219 "has not been correctly analyzed.";
220 return make_error>(Message, MCI);
221 }
222 }
223
224 return ErrorSuccess();
225 }
226
227 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
228 unsigned SchedClassID) {
192229 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
193230 const MCSchedModel &SM = STI.getSchedModel();
194231 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
195232
196 // These are for now the (strong) assumptions made by this algorithm:
197 // * The number of explicit and implicit register definitions in a MCInst
198 // matches the number of explicit and implicit definitions according to
199 // the opcode descriptor (MCInstrDesc).
200 // * Register definitions take precedence over register uses in the operands
201 // list.
202 // * If an opcode specifies an optional definition, then the optional
203 // definition is always the last operand in the sequence, and it can be
204 // set to zero (i.e. "no register").
233 // Assumptions made by this algorithm:
234 // 1. The number of explicit and implicit register definitions in a MCInst
235 // matches the number of explicit and implicit definitions according to
236 // the opcode descriptor (MCInstrDesc).
237 // 2. Uses start at index #(MCDesc.getNumDefs()).
238 // 3. There can only be a single optional register definition, an it is
239 // always the last operand of the sequence (excluding extra operands
240 // contributed by variadic opcodes).
205241 //
206242 // These assumptions work quite well for most out-of-order in-tree targets
207243 // like x86. This is mainly because the vast majority of instructions is
208244 // expanded to MCInst using a straightforward lowering logic that preserves
209245 // the ordering of the operands.
246 //
247 // About assumption 1.
248 // The algorithm allows non-register operands between register operand
249 // definitions. This helps to handle some special ARM instructions with
250 // implicit operand increment (-mtriple=armv7):
251 //
252 // vld1.32 {d18, d19}, [r1]! @
253 // @
254 // @ (!!)
255 // @
256 // @
257 // @
258 // @ >
259 //
260 // MCDesc reports:
261 // 6 explicit operands.
262 // 1 optional definition
263 // 2 explicit definitions (!!)
264 //
265 // The presence of an 'Imm' operand between the two register definitions
266 // breaks the assumption that "register definitions are always at the
267 // beginning of the operand sequence".
268 //
269 // To workaround this issue, this algorithm ignores (i.e. skips) any
270 // non-register operands between register definitions. The optional
271 // definition is still at index #(NumOperands-1).
272 //
273 // According to assumption 2. register reads start at #(NumExplicitDefs-1).
274 // That means, register R1 from the example is both read and written.
210275 unsigned NumExplicitDefs = MCDesc.getNumDefs();
211276 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
212277 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
213278 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
214279 if (MCDesc.hasOptionalDef())
215280 TotalDefs++;
281
216282 ID.Writes.resize(TotalDefs);
217283 // Iterate over the operands list, and skip non-register operands.
218284 // The first NumExplictDefs register operands are expected to be register
240306 }
241307 Write.IsOptionalDef = false;
242308 LLVM_DEBUG({
243 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
309 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
244310 << ", Latency=" << Write.Latency
245311 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
246312 });
247313 CurrentDef++;
248314 }
249315
250 if (CurrentDef != NumExplicitDefs) {
251 return make_error>(
252 "Expected more register operand definitions.", MCI);
253 }
254
255 CurrentDef = 0;
316 assert(CurrentDef == NumExplicitDefs &&
317 "Expected more register operand definitions.");
256318 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
257319 unsigned Index = NumExplicitDefs + CurrentDef;
258320 WriteDescriptor &Write = ID.Writes[Index];
274336 Write.IsOptionalDef = false;
275337 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
276338 LLVM_DEBUG({
277 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
339 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
278340 << ", PhysReg=" << MRI.getName(Write.RegisterID)
279341 << ", Latency=" << Write.Latency
280342 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
282344 }
283345
284346 if (MCDesc.hasOptionalDef()) {
285 // Always assume that the optional definition is the last operand of the
286 // MCInst sequence.
287 const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
288 if (i == MCI.getNumOperands() || !Op.isReg()) {
289 std::string Message =
290 "expected a register operand for an optional definition. Instruction "
291 "has not been correctly analyzed.";
292 return make_error>(Message, MCI);
293 }
294
295 WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
296 Write.OpIndex = MCI.getNumOperands() - 1;
347 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
348 Write.OpIndex = MCDesc.getNumOperands() - 1;
297349 // Assign a default latency for this write.
298350 Write.Latency = ID.MaxLatency;
299351 Write.SClassOrWriteResourceID = 0;
300352 Write.IsOptionalDef = true;
301 }
302
303 return ErrorSuccess();
304 }
305
306 Error InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
307 unsigned SchedClassID) {
353 LLVM_DEBUG({
354 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
355 << ", Latency=" << Write.Latency
356 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
357 });
358 }
359 }
360
361 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
362 unsigned SchedClassID) {
308363 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
309 unsigned NumExplicitDefs = MCDesc.getNumDefs();
310
311 // Skip explicit definitions.
312 unsigned i = 0;
313 for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
314 const MCOperand &Op = MCI.getOperand(i);
315 if (Op.isReg())
316 NumExplicitDefs--;
317 }
318
319 if (NumExplicitDefs) {
320 return make_error>(
321 "Expected more register operand definitions.", MCI);
322 }
323
324 unsigned NumExplicitUses = MCI.getNumOperands() - i;
364 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
325365 unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
326 if (MCDesc.hasOptionalDef()) {
327 assert(NumExplicitUses);
328 NumExplicitUses--;
329 }
366 // Remove the optional definition.
367 if (MCDesc.hasOptionalDef())
368 --NumExplicitUses;
330369 unsigned TotalUses = NumExplicitUses + NumImplicitUses;
331 if (!TotalUses)
332 return ErrorSuccess();
333370
334371 ID.Reads.resize(TotalUses);
335 for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
336 ReadDescriptor &Read = ID.Reads[CurrentUse];
337 Read.OpIndex = i + CurrentUse;
338 Read.UseIndex = CurrentUse;
372 for (unsigned I = 0; I < NumExplicitUses; ++I) {
373 ReadDescriptor &Read = ID.Reads[I];
374 Read.OpIndex = MCDesc.getNumDefs() + I;
375 Read.UseIndex = I;
339376 Read.SchedClassID = SchedClassID;
340 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
377 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
341378 << ", UseIndex=" << Read.UseIndex << '\n');
342379 }
343380
344 for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
345 ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
346 Read.OpIndex = ~CurrentUse;
347 Read.UseIndex = NumExplicitUses + CurrentUse;
348 Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
381 // For the purpose of ReadAdvance, implicit uses come directly after explicit
382 // uses. The "UseIndex" must be updated according to that implicit layout.
383 for (unsigned I = 0; I < NumImplicitUses; ++I) {
384 ReadDescriptor &Read = ID.Reads[NumExplicitUses + I];
385 Read.OpIndex = ~I;
386 Read.UseIndex = NumExplicitUses + I;
387 Read.RegisterID = MCDesc.getImplicitUses()[I];
349388 Read.SchedClassID = SchedClassID;
350 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID="
389 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
390 << ", UseIndex=" << Read.UseIndex << ", RegisterID="
351391 << MRI.getName(Read.RegisterID) << '\n');
352392 }
353 return ErrorSuccess();
354393 }
355394
356395 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
434473
435474 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
436475 computeMaxLatency(*ID, MCDesc, SCDesc, STI);
437 if (auto Err = populateWrites(*ID, MCI, SchedClassID))
476
477 if (Error Err = verifyOperands(MCDesc, MCI))
438478 return std::move(Err);
439 if (auto Err = populateReads(*ID, MCI, SchedClassID))
440 return std::move(Err);
479
480 populateWrites(*ID, MCI, SchedClassID);
481 populateReads(*ID, MCI, SchedClassID);
441482
442483 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
443484 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
555596 }
556597
557598 assert(RegID && "Expected a valid register ID!");
558 NewIS->getDefs().emplace_back(
559 WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex],
560 /* WritesZero */ IsZeroIdiom);
599 NewIS->getDefs().emplace_back(WD, RegID,
600 /* ClearsSuperRegs */ WriteMask[WriteIndex],
601 /* WritesZero */ IsZeroIdiom);
561602 ++WriteIndex;
562603 }
563604