llvm.org GIT mirror llvm / f654cda
MIR Serialization: Introduce a lexer for machine instructions. This commit adds a function that tokenizes the string containing the machine instruction. This commit also adds a struct called 'MIToken' which is used to represent the lexer's tokens. Reviewers: Sean Silva Differential Revision: http://reviews.llvm.org/D10521 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240323 91177308-0d34-0410-b5e6-96231b3b80d8 Alex Lorenz 5 years ago
7 changed file(s) with 230 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
0 add_llvm_library(LLVMMIRParser
1 MILexer.cpp
12 MIParser.cpp
23 MIRParser.cpp
34 )
0 //===- MILexer.cpp - Machine instructions lexer implementation ----------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the lexing of machine instructions.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "MILexer.h"
14 #include "llvm/ADT/Twine.h"
15 #include
16
17 using namespace llvm;
18
19 namespace {
20
21 /// This class provides a way to iterate and get characters from the source
22 /// string.
23 class Cursor {
24 const char *Ptr;
25 const char *End;
26
27 public:
28 explicit Cursor(StringRef Str) {
29 Ptr = Str.data();
30 End = Ptr + Str.size();
31 }
32
33 bool isEOF() const { return Ptr == End; }
34
35 char peek() const { return isEOF() ? 0 : *Ptr; }
36
37 void advance() { ++Ptr; }
38
39 StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
40
41 StringRef upto(Cursor C) const {
42 assert(C.Ptr >= Ptr && C.Ptr <= End);
43 return StringRef(Ptr, C.Ptr - Ptr);
44 }
45
46 StringRef::iterator location() const { return Ptr; }
47 };
48
49 } // end anonymous namespace
50
51 /// Skip the leading whitespace characters and return the updated cursor.
52 static Cursor skipWhitespace(Cursor C) {
53 while (isspace(C.peek()))
54 C.advance();
55 return C;
56 }
57
58 static bool isIdentifierChar(char C) {
59 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
60 }
61
62 static Cursor lexIdentifier(Cursor C, MIToken &Token) {
63 auto Range = C;
64 while (isIdentifierChar(C.peek()))
65 C.advance();
66 Token = MIToken(MIToken::Identifier, Range.upto(C));
67 return C;
68 }
69
70 StringRef llvm::lexMIToken(
71 StringRef Source, MIToken &Token,
72 function_ref ErrorCallback) {
73 auto C = skipWhitespace(Cursor(Source));
74 if (C.isEOF()) {
75 Token = MIToken(MIToken::Eof, C.remaining());
76 return C.remaining();
77 }
78
79 auto Char = C.peek();
80 if (isalpha(Char) || Char == '_')
81 return lexIdentifier(C, Token).remaining();
82 Token = MIToken(MIToken::Error, C.remaining());
83 ErrorCallback(C.location(),
84 Twine("unexpected character '") + Twine(Char) + "'");
85 return C.remaining();
86 }
0 //===- MILexer.h - Lexer for machine instructions -------------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the function that lexes the machine instruction source
10 // string.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
15 #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
16
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include
20
21 namespace llvm {
22
23 class Twine;
24
25 /// A token produced by the machine instruction lexer.
26 struct MIToken {
27 enum TokenKind {
28 // Markers
29 Eof,
30 Error,
31
32 // Identifier tokens
33 Identifier
34 };
35
36 private:
37 TokenKind Kind;
38 StringRef Range;
39
40 public:
41 MIToken(TokenKind Kind, StringRef Range) : Kind(Kind), Range(Range) {}
42
43 TokenKind kind() const { return Kind; }
44
45 bool isError() const { return Kind == Error; }
46
47 bool is(TokenKind K) const { return Kind == K; }
48
49 bool isNot(TokenKind K) const { return Kind != K; }
50
51 StringRef::iterator location() const { return Range.begin(); }
52
53 StringRef stringValue() const { return Range; }
54 };
55
56 /// Consume a single machine instruction token in the given source and return
57 /// the remaining source string.
58 StringRef lexMIToken(
59 StringRef Source, MIToken &Token,
60 function_ref ErrorCallback);
61
62 } // end namespace llvm
63
64 #endif
1111 //===----------------------------------------------------------------------===//
1212
1313 #include "MIParser.h"
14 #include "MILexer.h"
1415 #include "llvm/ADT/StringMap.h"
1516 #include "llvm/CodeGen/MachineBasicBlock.h"
1617 #include "llvm/CodeGen/MachineFunction.h"
2829 SourceMgr &SM;
2930 MachineFunction &MF;
3031 SMDiagnostic &Error;
31 StringRef Source;
32 StringRef Source, CurrentSource;
33 MIToken Token;
3234 /// Maps from instruction names to op codes.
3335 StringMap Names2InstrOpCodes;
3436
3638 MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
3739 StringRef Source);
3840
41 void lex();
42
3943 /// Report an error at the current location with the given message.
4044 ///
4145 /// This function always return true.
4246 bool error(const Twine &Msg);
47
48 /// Report an error at the given location with the given message.
49 ///
50 /// This function always return true.
51 bool error(StringRef::iterator Loc, const Twine &Msg);
4352
4453 MachineInstr *parse();
4554
4958 /// Try to convert an instruction name to an opcode. Return true if the
5059 /// instruction name is invalid.
5160 bool parseInstrName(StringRef InstrName, unsigned &OpCode);
61
62 bool parseInstruction(unsigned &OpCode);
5263 };
5364
5465 } // end anonymous namespace
5566
5667 MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
5768 StringRef Source)
58 : SM(SM), MF(MF), Error(Error), Source(Source) {}
69 : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
70 Token(MIToken::Error, StringRef()) {}
5971
60 bool MIParser::error(const Twine &Msg) {
72 void MIParser::lex() {
73 CurrentSource = lexMIToken(
74 CurrentSource, Token,
75 [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); });
76 }
77
78 bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
79
80 bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
6181 // TODO: Get the proper location in the MIR file, not just a location inside
6282 // the string.
63 Error =
64 SMDiagnostic(SM, SMLoc(), SM.getMemoryBuffer(SM.getMainFileID())
65 ->getBufferIdentifier(),
66 1, 0, SourceMgr::DK_Error, Msg.str(), Source, None, None);
83 assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
84 Error = SMDiagnostic(
85 SM, SMLoc(),
86 SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1,
87 Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None);
6788 return true;
6889 }
6990
7091 MachineInstr *MIParser::parse() {
71 StringRef InstrName = Source;
92 lex();
93
7294 unsigned OpCode;
73 if (parseInstrName(InstrName, OpCode)) {
74 error(Twine("unknown machine instruction name '") + InstrName + "'");
95 if (Token.isError() || parseInstruction(OpCode))
7596 return nullptr;
76 }
7797
7898 // TODO: Parse the rest of instruction - machine operands, etc.
7999 const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
80100 auto *MI = MF.CreateMachineInstr(MCID, DebugLoc());
81101 return MI;
102 }
103
104 bool MIParser::parseInstruction(unsigned &OpCode) {
105 if (Token.isNot(MIToken::Identifier))
106 return error("expected a machine instruction");
107 StringRef InstrName = Token.stringValue();
108 if (parseInstrName(InstrName, OpCode))
109 return error(Twine("unknown machine instruction name '") + InstrName + "'");
110 return false;
82111 }
83112
84113 void MIParser::initNames2InstrOpCodes() {
1919 # CHECK: - IMUL32rri8
2020 # CHECK-NEXT: - RETQ
2121 - IMUL32rri8
22 - RETQ
22 - ' RETQ '
2323 ...
0 # RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
1
2 --- |
3
4 define void @foo() {
5 entry:
6 ret void
7 }
8
9 ...
10 ---
11 name: foo
12 body:
13 - name: entry
14 instructions:
15 # CHECK: 1:1: expected a machine instruction
16 - ''
17 ...
0 # RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
1
2 --- |
3
4 define void @foo() {
5 entry:
6 ret void
7 }
8
9 ...
10 ---
11 name: foo
12 body:
13 - name: entry
14 instructions:
15 # CHECK: 1:1: unexpected character '`'
16 - '` RETQ'
17 ...