llvm.org GIT mirror llvm / c46dd2b
[tblgen][disasm] Allow multiple encodings to disassemble to the same instruction Summary: Add an AdditionalEncoding class which can be used to define additional encodings for a given instruction. This causes the disassembler to add an additional encoding to its matching tables that map to the specified instruction. Usage: def ADD1 : Instruction { bits<8> Reg; bits<32> Inst; let Size = 4; let Inst{0-7} = Reg; let Inst{8-14} = 0; let Inst{15} = 1; // Continuation bit let Inst{16-31} = 0; ... } def : AdditionalEncoding<ADD1> { bits<8> Reg; bits<16> Inst; // You can also have bits<32> and it will still be a 16-bit encoding let Size = 2; let Inst{0-3} = 0; let Inst{4-7} = Reg; let Inst{8-15} = 0; ... } with those definitions, llvm-mc will successfully disassemble both of these: 0x01 0x00 0x10 0x80 0x00 0x00 to: ADD1 r1 Depends on D52366 Reviewers: bogner, charukcs Reviewed By: bogner Subscribers: nlguillemot, nhaehnle, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D52369 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363744 91177308-0d34-0410-b5e6-96231b3b80d8 Daniel Sanders a month ago
2 changed file(s) with 146 addition(s) and 80 deletion(s). Raw diff Collapse all Expand all
397397
398398 class Predicate; // Forward def
399399
400 class InstructionEncoding {
401 // Size of encoded instruction.
402 int Size;
403
404 // The "namespace" in which this instruction exists, on targets like ARM
405 // which multiple ISA namespaces exist.
406 string DecoderNamespace = "";
407
408 // List of predicates which will be turned into isel matching code.
409 list Predicates = [];
410
411 string DecoderMethod = "";
412
413 // Is the instruction decoder method able to completely determine if the
414 // given instruction is valid or not. If the TableGen definition of the
415 // instruction specifies bitpattern A??B where A and B are static bits, the
416 // hasCompleteDecoder flag says whether the decoder method fully handles the
417 // ?? space, i.e. if it is a final arbiter for the instruction validity.
418 // If not then the decoder attempts to continue decoding when the decoder
419 // method fails.
420 //
421 // This allows to handle situations where the encoding is not fully
422 // orthogonal. Example:
423 // * InstA with bitpattern 0b0000????,
424 // * InstB with bitpattern 0b000000?? but the associated decoder method
425 // DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
426 //
427 // The decoder tries to decode a bitpattern that matches both InstA and
428 // InstB bitpatterns first as InstB (because it is the most specific
429 // encoding). In the default case (hasCompleteDecoder = 1), when
430 // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
431 // hasCompleteDecoder = 0 in InstB, the decoder is informed that
432 // DecodeInstB() is not able to determine if all possible values of ?? are
433 // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
434 // decode the bitpattern as InstA too.
435 bit hasCompleteDecoder = 1;
436 }
437
400438 //===----------------------------------------------------------------------===//
401439 // Instruction set description - These classes correspond to the C++ classes in
402440 // the Target/TargetInstrInfo.h file.
403441 //
404 class Instruction {
442 class Instruction : InstructionEncoding {
405443 string Namespace = "";
406444
407445 dag OutOperandList; // An dag containing the MI def operand list.
425463 // Size - Size of encoded instruction, or zero if the size cannot be determined
426464 // from the opcode.
427465 int Size = 0;
428
429 // DecoderNamespace - The "namespace" in which this instruction exists, on
430 // targets like ARM which multiple ISA namespaces exist.
431 string DecoderNamespace = "";
432466
433467 // Code size, for instruction selection.
434468 // FIXME: What does this actually mean?
531565 string DisableEncoding = "";
532566
533567 string PostEncoderMethod = "";
534 string DecoderMethod = "";
535
536 // Is the instruction decoder method able to completely determine if the
537 // given instruction is valid or not. If the TableGen definition of the
538 // instruction specifies bitpattern A??B where A and B are static bits, the
539 // hasCompleteDecoder flag says whether the decoder method fully handles the
540 // ?? space, i.e. if it is a final arbiter for the instruction validity.
541 // If not then the decoder attempts to continue decoding when the decoder
542 // method fails.
543 //
544 // This allows to handle situations where the encoding is not fully
545 // orthogonal. Example:
546 // * InstA with bitpattern 0b0000????,
547 // * InstB with bitpattern 0b000000?? but the associated decoder method
548 // DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
549 //
550 // The decoder tries to decode a bitpattern that matches both InstA and
551 // InstB bitpatterns first as InstB (because it is the most specific
552 // encoding). In the default case (hasCompleteDecoder = 1), when
553 // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
554 // hasCompleteDecoder = 0 in InstB, the decoder is informed that
555 // DecodeInstB() is not able to determine if all possible values of ?? are
556 // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
557 // decode the bitpattern as InstA too.
558 bit hasCompleteDecoder = 1;
559568
560569 /// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc.
561570 bits<64> TSFlags = 0;
590599 /// instruction selection predicates. FastISel cannot handle such cases, but
591600 /// SelectionDAG can.
592601 bit FastISelShouldIgnore = 0;
602 }
603
604 /// Defines an additional encoding that disassembles to the given instruction
605 /// Like Instruction, the Inst and SoftFail fields are omitted to allow targets
606 // to specify their size.
607 class AdditionalEncoding : InstructionEncoding {
608 Instruction AliasOf = I;
593609 }
594610
595611 /// PseudoInstExpansion - Expansion information for a pseudo-instruction.
1515 #include "llvm/ADT/APInt.h"
1616 #include "llvm/ADT/ArrayRef.h"
1717 #include "llvm/ADT/CachedHashString.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SetVector.h"
1820 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Statistic.h"
2122 #include "llvm/ADT/StringExtras.h"
2223 #include "llvm/ADT/StringRef.h"
2324 #include "llvm/MC/MCFixedLenDisassembler.h"
4647
4748 namespace {
4849
50 STATISTIC(NumEncodings, "Number of encodings considered");
51 STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info");
52 STATISTIC(NumInstructions, "Number of instructions considered");
53 STATISTIC(NumEncodingsSupported, "Number of encodings supported");
54 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
55
4956 struct EncodingField {
5057 unsigned Base, Width, Offset;
5158 EncodingField(unsigned B, unsigned W, unsigned O)
93100 : EncodingDef(EncodingDef), Inst(Inst) {}
94101 };
95102
103 struct EncodingIDAndOpcode {
104 unsigned EncodingID;
105 unsigned Opcode;
106
107 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {}
108 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
109 : EncodingID(EncodingID), Opcode(Opcode) {}
110 };
111
96112 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
97113 if (Value.EncodingDef != Value.Inst->TheDef)
98114 OS << Value.EncodingDef->getName() << ":";
101117 }
102118
103119 class FixedLenDecoderEmitter {
120 RecordKeeper &RK;
104121 std::vector NumberedEncodings;
105122
106123 public:
112129 std::string ROK = "MCDisassembler::Success",
113130 std::string RFail = "MCDisassembler::Fail",
114131 std::string L = "")
115 : Target(R), PredicateNamespace(std::move(PredicateNamespace)),
132 : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)),
116133 GuardPrefix(std::move(GPrefix)), GuardPostfix(std::move(GPostfix)),
117134 ReturnOK(std::move(ROK)), ReturnFail(std::move(RFail)),
118135 Locals(std::move(L)) {}
250267 bool Mixed; // a mixed region contains both set and unset bits
251268
252269 // Map of well-known segment value to the set of uid's with that value.
253 std::mapunsigned>> FilteredInstructions;
270 std::mapEncodingIDAndOpcode>>
271 FilteredInstructions;
254272
255273 // Set of uid's with non-constant segment values.
256 std::vector<unsigned> VariableInstructions;
274 std::vector<EncodingIDAndOpcode> VariableInstructions;
257275
258276 // Map of well-known segment value to its delegate.
259277 std::map> FilterChooserMap;
262280 unsigned NumFiltered;
263281
264282 // Keeps track of the last opcode in the filtered bucket.
265 unsigned LastOpcFiltered;
283 EncodingIDAndOpcode LastOpcFiltered;
266284
267285 public:
268286 Filter(Filter &&f);
272290
273291 unsigned getNumFiltered() const { return NumFiltered; }
274292
275 unsigned getSingletonOpc() const {
293 EncodingIDAndOpcode getSingletonOpc() const {
276294 assert(NumFiltered == 1);
277295 return LastOpcFiltered;
278296 }
336354 friend class Filter;
337355
338356 // Vector of codegen instructions to choose our filter.
339 ArrayRef<EncodingAndInst> AllInstructions;
357 ArrayRef<const EncodingAndInst> AllInstructions;
340358
341359 // Vector of uid's for this filter chooser to work on.
342 const std::vector &Opcodes;
360 // The first member of the pair is the opcode id being decoded, the second is
361 // the opcode id that should be emitted.
362 const std::vector &Opcodes;
343363
344364 // Lookup table for the operand decoding of instructions.
345365 const std::map> &Operands;
364384 const FixedLenDecoderEmitter *Emitter;
365385
366386 public:
367 FilterChooser(ArrayRef Insts,
368 const std::vector &IDs,
387 FilterChooser(ArrayRef Insts,
388 const std::vector &IDs,
369389 const std::map> &Ops,
370390 unsigned BW, const FixedLenDecoderEmitter *E)
371391 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
374394 doFilter();
375395 }
376396
377 FilterChooser(ArrayRef Insts,
378 const std::vector &IDs,
397 FilterChooser(ArrayRef Insts,
398 const std::vector &IDs,
379399 const std::map> &Ops,
380400 const std::vector &ParentFilterBitValues,
381401 const FilterChooser &parent)
409429 else
410430 Insn.push_back(bitFromBits(Bits, i));
411431 }
432 }
433
434 // Emit the name of the encoding/instruction pair.
435 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const {
436 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
437 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef;
438 if (EncodingDef != InstDef)
439 OS << EncodingDef->getName() << ":";
440 OS << InstDef->getName();
412441 }
413442
414443 // Populates the field of the insn given the start position and the number of
461490
462491 // Emits table entries to decode the singleton.
463492 void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
464 unsigned Opc) const;
493 EncodingIDAndOpcode Opc) const;
465494
466495 // Emits code to decode the singleton, and then to decode the rest.
467496 void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
522551 assert(StartBit + NumBits - 1 < Owner->BitWidth);
523552
524553 NumFiltered = 0;
525 LastOpcFiltered = 0;
554 LastOpcFiltered = {0, 0};
526555
527556 for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
528557 insn_t Insn;
529558
530559 // Populates the insn given the uid.
531 Owner->insnWithID(Insn, Owner->Opcodes[i]);
560 Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID);
532561
533562 uint64_t Field;
534563 // Scans the segment for possibly well-specified encoding bits.
10241053 // 1: Water (the bit value does not affect decoding)
10251054 // 2: Island (well-known bit value needed for decoding)
10261055 int State = 0;
1027 int Val = -1;
1056 int64_t Val = -1;
10281057
10291058 for (unsigned i = 0; i < BitWidth; ++i) {
10301059 Val = Value(Insn[i]);
13121341
13131342 // Emits table entries to decode the singleton.
13141343 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1315 unsigned Opc) const {
1344 EncodingIDAndOpcode Opc) const {
13161345 std::vector StartBits;
13171346 std::vector EndBits;
13181347 std::vector FieldVals;
13191348 insn_t Insn;
1320 insnWithID(Insn, Opc);
1349 insnWithID(Insn, Opc.EncodingID);
13211350
13221351 // Look for islands of undecoded bits of the singleton.
13231352 getIslands(StartBits, EndBits, FieldVals, Insn);
13251354 unsigned Size = StartBits.size();
13261355
13271356 // Emit the predicate table entry if one is needed.
1328 emitPredicateTableEntry(TableInfo, Opc);
1357 emitPredicateTableEntry(TableInfo, Opc.EncodingID);
13291358
13301359 // Check any additional encoding fields needed.
13311360 for (unsigned I = Size; I != 0; --I) {
13491378 }
13501379
13511380 // Check for soft failure of the match.
1352 emitSoftFailTableEntry(TableInfo, Opc);
1381 emitSoftFailTableEntry(TableInfo, Opc.EncodingID);
13531382
13541383 bool HasCompleteDecoder;
1355 unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc, HasCompleteDecoder);
1384 unsigned DIdx =
1385 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder);
13561386
13571387 // Produce OPC_Decode or OPC_TryDecode opcode based on the information
13581388 // whether the instruction decoder is complete or not. If it is complete
13651395 // can decode it.
13661396 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode :
13671397 MCD::OPC_TryDecode);
1398 NumEncodingsSupported++;
13681399 uint8_t Buffer[16], *p;
1369 encodeULEB128(Opc, Buffer);
1400 encodeULEB128(Opc.Opcode, Buffer);
13701401 for (p = Buffer; *p >= 128 ; ++p)
13711402 TableInfo.Table.push_back(*p);
13721403 TableInfo.Table.push_back(*p);
13921423 // Emits table entries to decode the singleton, and then to decode the rest.
13931424 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
13941425 const Filter &Best) const {
1395 unsigned Opc = Best.getSingletonOpc();
1426 EncodingIDAndOpcode Opc = Best.getSingletonOpc();
13961427
13971428 // complex singletons need predicate checks from the first singleton
13981429 // to refer forward to the variable filterchooser that follows.
14521483 std::vector FieldVals;
14531484 insn_t Insn;
14541485
1455 insnWithID(Insn, Opcodes[i]);
1486 insnWithID(Insn, Opcodes[i].EncodingID);
14561487
14571488 // Look for islands of undecoded bits of any instruction.
14581489 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
14961527 for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
14971528 insn_t insn;
14981529
1499 insnWithID(insn, Opcodes[InsnIndex]);
1530 insnWithID(insn, Opcodes[InsnIndex].EncodingID);
15001531
15011532 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
15021533 switch (bitAttrs[BitIndex]) {
17151746 dumpStack(errs(), "\t\t");
17161747
17171748 for (unsigned i = 0; i < Opcodes.size(); ++i) {
1718 errs() << '\t' << AllInstructions[Opcodes[i]] << " ";
1719 dumpBits(errs(),
1720 getBitsField(*AllInstructions[Opcodes[i]].EncodingDef, "Inst"));
1749 errs() << '\t';
1750 emitNameWithID(errs(), Opcodes[i].EncodingID);
1751 errs() << " ";
1752 dumpBits(
1753 errs(),
1754 getBitsField(*AllInstructions[Opcodes[i].EncodingID].EncodingDef, "Inst"));
17211755 errs() << '\n';
17221756 }
17231757 }
17491783 return Decoder;
17501784 }
17511785
1752 static bool populateInstruction(CodeGenTarget &Target,
1753 const CodeGenInstruction &CGI, unsigned Opc,
1754 std::map> &Operands){
1786 static bool
1787 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
1788 const CodeGenInstruction &CGI, unsigned Opc,
1789 std::map> &Operands) {
17551790 const Record &Def = *CGI.TheDef;
17561791 // If all the bit positions are not specified; do not decode this instruction.
17571792 // We are bound to fail! For proper disassembly, the well-known encoding bits
17581793 // of the instruction must be fully specified.
17591794
1760 BitsInit &Bits = getBitsField(Def, "Inst");
1795 BitsInit &Bits = getBitsField(EncodingDef, "Inst");
17611796 if (Bits.allInComplete()) return false;
17621797
17631798 std::vector InsnOperands;
17641799
17651800 // If the instruction has specified a custom decoding hook, use that instead
17661801 // of trying to auto-generate the decoder.
1767 StringRef InstDecoder = Def.getValueAsString("DecoderMethod");
1802 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod");
17681803 if (InstDecoder != "") {
1769 bool HasCompleteInstDecoder = Def.getValueAsBit("hasCompleteDecoder");
1804 bool HasCompleteInstDecoder = EncodingDef.getValueAsBit("hasCompleteDecoder");
17701805 InsnOperands.push_back(OperandInfo(InstDecoder, HasCompleteInstDecoder));
17711806 Operands[Opc] = InsnOperands;
17721807 return true;
21422177 << " const FeatureBitset& Bits = STI.getFeatureBits();\n"
21432178 << "\n"
21442179 << " const uint8_t *Ptr = DecodeTable;\n"
2145 << " uint32_t CurFieldValue = 0;\n"
2180 << " InsnType CurFieldValue = 0;\n"
21462181 << " DecodeStatus S = MCDisassembler::Success;\n"
21472182 << " while (true) {\n"
21482183 << " ptrdiff_t Loc = Ptr - DecodeTable;\n"
21872222 << " unsigned Len = *++Ptr;\n"
21882223 << " InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n"
21892224 << " // Decode the field value.\n"
2190 << " uint32_t ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
2225 << " InsnType ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
21912226 << " Ptr += Len;\n"
21922227 << " // NumToSkip is a plain 24-bit integer.\n"
21932228 << " unsigned NumToSkip = *Ptr++;\n"
23342369 // Parameterize the decoders based on namespace and instruction width.
23352370 const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
23362371 NumberedEncodings.reserve(NumberedInstructions.size());
2337 for (const auto &NumberedInstruction : NumberedInstructions)
2372 DenseMap IndexOfInstruction;
2373 for (const auto &NumberedInstruction : NumberedInstructions) {
2374 IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size();
23382375 NumberedEncodings.emplace_back(NumberedInstruction->TheDef, NumberedInstruction);
2339
2340 std::map,
2341 std::vector> OpcMap;
2376 }
2377 for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding"))
2378 NumberedEncodings.emplace_back(
2379 NumberedAlias,
2380 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf")));
2381
2382 std::map, std::vector>
2383 OpcMap;
23422384 std::map> Operands;
23432385
23442386 for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {
2387 const Record *EncodingDef = NumberedEncodings[i].EncodingDef;
23452388 const CodeGenInstruction *Inst = NumberedEncodings[i].Inst;
23462389 const Record *Def = Inst->TheDef;
2347 unsigned Size = Def->getValueAsInt("Size");
2390 unsigned Size = EncodingDef->getValueAsInt("Size");
23482391 if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
23492392 Def->getValueAsBit("isPseudo") ||
23502393 Def->getValueAsBit("isAsmParserOnly") ||
2351 Def->getValueAsBit("isCodeGenOnly"))
2394 Def->getValueAsBit("isCodeGenOnly")) {
2395 NumEncodingsLackingDisasm++;
23522396 continue;
2353
2354 StringRef DecoderNamespace = Def->getValueAsString("DecoderNamespace");
2397 }
2398
2399 if (i < NumberedInstructions.size())
2400 NumInstructions++;
2401 NumEncodings++;
2402
2403 StringRef DecoderNamespace = EncodingDef->getValueAsString("DecoderNamespace");
23552404
23562405 if (Size) {
2357 if (populateInstruction(Target, *Inst, i, Operands)) {
2358 OpcMap[std::make_pair(DecoderNamespace, Size)].push_back(i);
2359 }
2406 if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) {
2407 OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back(i, IndexOfInstruction.find(Def)->second);
2408 } else
2409 NumEncodingsOmitted++;
23602410 }
23612411 }
23622412
23632413 DecoderTableInfo TableInfo;
23642414 for (const auto &Opc : OpcMap) {
23652415 // Emit the decoder for this namespace+width combination.
2366 ArrayRef NumberedEncodingsRef(NumberedEncodings.data(),
2367 NumberedEncodings.size());
2416 ArrayRef NumberedEncodingsRef(
2417 NumberedEncodings.data(), NumberedEncodings.size());
23682418 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
23692419 8 * Opc.first.second, this);
23702420