llvm.org GIT mirror llvm / 8b53eb1
[llvm-mca] Add support for instructions with a variadic number of operands. By default, llvm-mca conservatively assumes that a register operand from the variadic sequence is both a register read and a register write. That is because MCInstrDesc doesn't describe extra variadic operands; we don't have enough dataflow information to tell which register operands from the variadic sequence is a definition, and which is a use instead. However, if a variadic instruction is flagged 'mayStore' (but not 'mayLoad'), and it has no 'unmodeledSideEffects', then llvm-mca (very) optimistically assumes that any register operand in the variadic sequence is a register read only. Conversely, if a variadic instruction is marked as 'mayLoad' (but not 'mayStore'), and it has no 'unmodeledSideEffects', then llvm-mca optimistically assumes that any extra register operand is a register definition only. These assumptions work quite well for variadic load/store multiple instructions defined by the ARM backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@347522 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 9 months ago
2 changed file(s) with 146 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
0 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
1 # RUN: llvm-mca -mtriple=armv7-unknown-unknown -mcpu=swift -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
2
3 ldm r2!, {r3, r4, r5, r6, r12, lr}
4 stm r0!, {r3, r4, r5, r6, r12, lr}
5
6 # CHECK: Iterations: 300
7 # CHECK-NEXT: Instructions: 600
8 # CHECK-NEXT: Total Cycles: 1295
9 # CHECK-NEXT: Total uOps: 2400
10
11 # CHECK: Dispatch Width: 3
12 # CHECK-NEXT: uOps Per Cycle: 1.85
13 # CHECK-NEXT: IPC: 0.46
14 # CHECK-NEXT: Block RThroughput: 4.0
15
16 # CHECK: Instruction Info:
17 # CHECK-NEXT: [1]: #uOps
18 # CHECK-NEXT: [2]: Latency
19 # CHECK-NEXT: [3]: RThroughput
20 # CHECK-NEXT: [4]: MayLoad
21 # CHECK-NEXT: [5]: MayStore
22 # CHECK-NEXT: [6]: HasSideEffects (U)
23
24 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
25 # CHECK-NEXT: 3 18 2.00 * ldm r2!, {r3, r4, r5, r6, r12, lr}
26 # CHECK-NEXT: 5 1 2.00 * stm r0!, {r3, r4, r5, r6, r12, lr}
27
28 # CHECK: Resources:
29 # CHECK-NEXT: [0] - SwiftUnitDiv
30 # CHECK-NEXT: [1] - SwiftUnitP0
31 # CHECK-NEXT: [2] - SwiftUnitP1
32 # CHECK-NEXT: [3] - SwiftUnitP2
33 # CHECK-NEXT: [4.0] - SwiftUnitP01
34 # CHECK-NEXT: [4.1] - SwiftUnitP01
35
36 # CHECK: Resource pressure per iteration:
37 # CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1]
38 # CHECK-NEXT: - - - 4.00 2.46 2.54
39
40 # CHECK: Resource pressure by instruction:
41 # CHECK-NEXT: [0] [1] [2] [3] [4.0] [4.1] Instructions:
42 # CHECK-NEXT: - - - 2.00 1.09 0.91 ldm r2!, {r3, r4, r5, r6, r12, lr}
43 # CHECK-NEXT: - - - 2.00 1.37 1.63 stm r0!, {r3, r4, r5, r6, r12, lr}
44
45 # CHECK: Timeline view:
46 # CHECK-NEXT: 0123456789
47 # CHECK-NEXT: Index 0123456789 012345678
48
49 # CHECK: [0,0] DeeeeeeeeeeeeeeeeeeER . . ldm r2!, {r3, r4, r5, r6, r12, lr}
50 # CHECK-NEXT: [0,1] .D=================eER . . stm r0!, {r3, r4, r5, r6, r12, lr}
51 # CHECK-NEXT: [1,0] . DeeeeeeeeeeeeeeeeeeER . . ldm r2!, {r3, r4, r5, r6, r12, lr}
52 # CHECK-NEXT: [1,1] . D=================eER. . stm r0!, {r3, r4, r5, r6, r12, lr}
53 # CHECK-NEXT: [2,0] . .DeeeeeeeeeeeeeeeeeeER . ldm r2!, {r3, r4, r5, r6, r12, lr}
54 # CHECK-NEXT: [2,1] . . D==================eER stm r0!, {r3, r4, r5, r6, r12, lr}
55
56 # CHECK: Average Wait times (based on the timeline view):
57 # CHECK-NEXT: [0]: Executions
58 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
59 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
60 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
61
62 # CHECK: [0] [1] [2] [3]
63 # CHECK-NEXT: 0. 3 1.0 1.0 0.0 ldm r2!, {r3, r4, r5, r6, r12, lr}
64 # CHECK-NEXT: 1. 3 18.3 0.3 0.0 stm r0!, {r3, r4, r5, r6, r12, lr}
189189 }
190190
191191 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
192 // Variadic opcodes are not correctly supported.
193 if (MCDesc.isVariadic()) {
194 if (MCI.getNumOperands() - MCDesc.getNumOperands()) {
195 return make_error>(
196 "Don't know how to process this variadic opcode.", MCI);
197 }
198 }
199
200192 // Count register definitions, and skip non register operands in the process.
201193 unsigned I, E;
202194 unsigned NumExplicitDefs = MCDesc.getNumDefs();
280272 if (MCDesc.hasOptionalDef())
281273 TotalDefs++;
282274
283 ID.Writes.resize(TotalDefs);
275 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
276 ID.Writes.resize(TotalDefs + NumVariadicOps);
284277 // Iterate over the operands list, and skip non-register operands.
285278 // The first NumExplictDefs register operands are expected to be register
286279 // definitions.
357350 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
358351 });
359352 }
353
354 if (!NumVariadicOps)
355 return;
356
357 // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
358 // "unmodeledSideEffects', then this logic optimistically assumes that any
359 // extra register operands in the variadic sequence is not a register
360 // definition.
361 //
362 // Otherwise, we conservatively assume that any register operand from the
363 // variadic sequence is both a register read and a register write.
364 bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
365 !MCDesc.hasUnmodeledSideEffects();
366 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
367 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
368 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
369 const MCOperand &Op = MCI.getOperand(OpIndex);
370 if (!Op.isReg())
371 continue;
372
373 WriteDescriptor &Write = ID.Writes[CurrentDef];
374 Write.OpIndex = OpIndex;
375 // Assign a default latency for this write.
376 Write.Latency = ID.MaxLatency;
377 Write.SClassOrWriteResourceID = 0;
378 Write.IsOptionalDef = false;
379 ++CurrentDef;
380 LLVM_DEBUG({
381 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
382 << ", Latency=" << Write.Latency
383 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
384 });
385 }
386
387 ID.Writes.resize(CurrentDef);
360388 }
361389
362390 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
367395 // Remove the optional definition.
368396 if (MCDesc.hasOptionalDef())
369397 --NumExplicitUses;
370 unsigned TotalUses = NumExplicitUses + NumImplicitUses;
371
398 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
399 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
372400 ID.Reads.resize(TotalUses);
373 for (unsigned I = 0; I < NumExplicitUses; ++I) {
374 ReadDescriptor &Read = ID.Reads[I];
375 Read.OpIndex = MCDesc.getNumDefs() + I;
401 unsigned CurrentUse = 0;
402 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
403 ++I, ++OpIndex) {
404 const MCOperand &Op = MCI.getOperand(OpIndex);
405 if (!Op.isReg())
406 continue;
407
408 ReadDescriptor &Read = ID.Reads[CurrentUse];
409 Read.OpIndex = OpIndex;
376410 Read.UseIndex = I;
377411 Read.SchedClassID = SchedClassID;
412 ++CurrentUse;
378413 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
379414 << ", UseIndex=" << Read.UseIndex << '\n');
380415 }
382417 // For the purpose of ReadAdvance, implicit uses come directly after explicit
383418 // uses. The "UseIndex" must be updated according to that implicit layout.
384419 for (unsigned I = 0; I < NumImplicitUses; ++I) {
385 ReadDescriptor &Read = ID.Reads[NumExplicitUses + I];
420 ReadDescriptor &Read = ID.Reads[CurrentUse + I];
386421 Read.OpIndex = ~I;
387422 Read.UseIndex = NumExplicitUses + I;
388423 Read.RegisterID = MCDesc.getImplicitUses()[I];
391426 << ", UseIndex=" << Read.UseIndex << ", RegisterID="
392427 << MRI.getName(Read.RegisterID) << '\n');
393428 }
429
430 CurrentUse += NumImplicitUses;
431
432 // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
433 // "unmodeledSideEffects", then this logic optimistically assumes that any
434 // extra register operands in the variadic sequence are not register
435 // definition.
436
437 bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
438 !MCDesc.hasUnmodeledSideEffects();
439 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
440 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
441 const MCOperand &Op = MCI.getOperand(OpIndex);
442 if (!Op.isReg())
443 continue;
444
445 ReadDescriptor &Read = ID.Reads[CurrentUse];
446 Read.OpIndex = OpIndex;
447 Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
448 Read.SchedClassID = SchedClassID;
449 ++CurrentUse;
450 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
451 << ", UseIndex=" << Read.UseIndex << '\n');
452 }
453
454 ID.Reads.resize(CurrentUse);
394455 }
395456
396457 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
430491
431492 // Then obtain the scheduling class information from the instruction.
432493 unsigned SchedClassID = MCDesc.getSchedClass();
433 unsigned CPUID = SM.getProcessorID();
494 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
434495
435496 // Try to solve variant scheduling classes.
436 if (SchedClassID) {
497 if (IsVariant) {
498 unsigned CPUID = SM.getProcessorID();
437499 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
438500 SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
439501
492554
493555 // Now add the new descriptor.
494556 SchedClassID = MCDesc.getSchedClass();
495 if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) {
557 bool IsVariadic = MCDesc.isVariadic();
558 if (!IsVariadic && !IsVariant) {
496559 Descriptors[MCI.getOpcode()] = std::move(ID);
497560 return *Descriptors[MCI.getOpcode()];
498561 }