llvm.org GIT mirror llvm / 44a4cfb
[Support][YAML] Add support for accessing tags and tag handle substitution. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193004 91177308-0d34-0410-b5e6-96231b3b80d8 Michael J. Spencer 6 years ago
6 changed file(s) with 169 addition(s) and 47 deletion(s). Raw diff Collapse all Expand all
4242 #include "llvm/ADT/StringRef.h"
4343 #include "llvm/Support/Allocator.h"
4444 #include "llvm/Support/SMLoc.h"
45
46 #include
4547 #include
4648 #include
4749
98100 OwningPtr CurrentDoc;
99101
100102 friend class Document;
101
102 /// @brief Validate a %YAML x.x directive.
103 void handleYAMLDirective(const Token &);
104103 };
105104
106105 /// @brief Abstract base class for all Nodes.
115114 NK_Alias
116115 };
117116
118 Node(unsigned int Type, OwningPtr&, StringRef Anchor);
117 Node(unsigned int Type, OwningPtr &, StringRef Anchor,
118 StringRef Tag);
119119
120120 /// @brief Get the value of the anchor attached to this node. If it does not
121121 /// have one, getAnchor().size() will be 0.
122122 StringRef getAnchor() const { return Anchor; }
123
124 /// \brief Get the tag as it was written in the document. This does not
125 /// perform tag resolution.
126 StringRef getRawTag() const { return Tag; }
127
128 /// \brief Get the verbatium tag for a given Node. This performs tag resoluton
129 /// and substitution.
130 std::string getVerbatimTag() const;
123131
124132 SMRange getSourceRange() const { return SourceRange; }
125133 void setSourceRange(SMRange SR) { SourceRange = SR; }
157165 private:
158166 unsigned int TypeID;
159167 StringRef Anchor;
168 /// \brief The tag as typed in the document.
169 StringRef Tag;
160170 };
161171
162172 /// @brief A null value.
165175 /// !!null null
166176 class NullNode : public Node {
167177 public:
168 NullNode(OwningPtr &D) : Node(NK_Null, D, StringRef()) {}
178 NullNode(OwningPtr &D)
179 : Node(NK_Null, D, StringRef(), StringRef()) {}
169180
170181 static inline bool classof(const Node *N) {
171182 return N->getType() == NK_Null;
179190 /// Adena
180191 class ScalarNode : public Node {
181192 public:
182 ScalarNode(OwningPtr &D, StringRef Anchor, StringRef Val)
183 : Node(NK_Scalar, D, Anchor)
184 , Value(Val) {
193 ScalarNode(OwningPtr &D, StringRef Anchor, StringRef Tag,
194 StringRef Val)
195 : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
185196 SMLoc Start = SMLoc::getFromPointer(Val.begin());
186197 SMLoc End = SMLoc::getFromPointer(Val.end());
187198 SourceRange = SMRange(Start, End);
221232 class KeyValueNode : public Node {
222233 public:
223234 KeyValueNode(OwningPtr &D)
224 : Node(NK_KeyValue, D, StringRef())
235 : Node(NK_KeyValue, D, StringRef(), StringRef())
225236 , Key(0)
226237 , Value(0)
227238 {}
337348 MT_Inline ///< An inline mapping node is used for "[key: value]".
338349 };
339350
340 MappingNode(OwningPtr &D, StringRef Anchor, MappingType MT)
341 : Node(NK_Mapping, D, Anchor)
342 , Type(MT)
343 , IsAtBeginning(true)
344 , IsAtEnd(false)
345 , CurrentEntry(0)
346 {}
351 MappingNode(OwningPtr &D, StringRef Anchor, StringRef Tag,
352 MappingType MT)
353 : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true),
354 IsAtEnd(false), CurrentEntry(0) {}
347355
348356 friend class basic_collection_iterator;
349357 typedef basic_collection_iterator iterator;
396404 ST_Indentless
397405 };
398406
399 SequenceNode(OwningPtr &D, StringRef Anchor, SequenceType ST)
400 : Node(NK_Sequence, D, Anchor)
401 , SeqType(ST)
402 , IsAtBeginning(true)
403 , IsAtEnd(false)
404 , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
405 , CurrentEntry(0)
406 {}
407 SequenceNode(OwningPtr &D, StringRef Anchor, StringRef Tag,
408 SequenceType ST)
409 : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true),
410 IsAtEnd(false),
411 WasPreviousTokenFlowEntry(true), // Start with an imaginary ','.
412 CurrentEntry(0) {}
407413
408414 friend class basic_collection_iterator;
409415 typedef basic_collection_iterator iterator;
441447 class AliasNode : public Node {
442448 public:
443449 AliasNode(OwningPtr &D, StringRef Val)
444 : Node(NK_Alias, D, StringRef()), Name(Val) {}
450 : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
445451
446452 StringRef getName() const { return Name; }
447453 Node *getTarget();
474480 return Root = parseBlockNode();
475481 }
476482
483 const std::map &getTagMap() const {
484 return TagMap;
485 }
486
477487 private:
478488 friend class Node;
479489 friend class document_iterator;
488498 /// @brief The root node. Used to support skipping a partially parsed
489499 /// document.
490500 Node *Root;
501
502 /// \brief Maps tag prefixes to their expansion.
503 std::map TagMap;
491504
492505 Token &peekNext();
493506 Token getNext();
494507 void setError(const Twine &Message, Token &Location) const;
495508 bool failed() const;
496509
497 void handleTagDirective(const Token &Tag) {
498 // TODO: Track tags.
499 }
500
501510 /// @brief Parse %BLAH directives and return true if any were encountered.
502511 bool parseDirectives();
512
513 /// \brief Parse %YAML
514 void parseYAMLDirective();
515
516 /// \brief Parse %TAG
517 void parseTAGDirective();
503518
504519 /// @brief Consume the next token and error if it is not \a TK.
505520 bool expectToken(int TK);
10691069 Current = skip_while(&Scanner::skip_ns_char, Current);
10701070 StringRef Name(NameStart, Current - NameStart);
10711071 Current = skip_while(&Scanner::skip_s_white, Current);
1072
1072
1073 Token T;
10731074 if (Name == "YAML") {
10741075 Current = skip_while(&Scanner::skip_ns_char, Current);
1075 Token T;
10761076 T.Kind = Token::TK_VersionDirective;
1077 T.Range = StringRef(Start, Current - Start);
1078 TokenQueue.push_back(T);
1079 return true;
1080 } else if(Name == "TAG") {
1081 Current = skip_while(&Scanner::skip_ns_char, Current);
1082 Current = skip_while(&Scanner::skip_s_white, Current);
1083 Current = skip_while(&Scanner::skip_ns_char, Current);
1084 T.Kind = Token::TK_TagDirective;
10771085 T.Range = StringRef(Start, Current - Start);
10781086 TokenQueue.push_back(T);
10791087 return true;
15631571 , Ranges);
15641572 }
15651573
1566 void Stream::handleYAMLDirective(const Token &t) {
1567 // TODO: Ensure version is 1.x.
1568 }
1569
15701574 document_iterator Stream::begin() {
15711575 if (CurrentDoc)
15721576 report_fatal_error("Can only iterate over the stream once");
15871591 i->skip();
15881592 }
15891593
1590 Node::Node(unsigned int Type, OwningPtr &D, StringRef A)
1594 Node::Node(unsigned int Type, OwningPtr &D, StringRef A, StringRef T)
15911595 : Doc(D)
15921596 , TypeID(Type)
1593 , Anchor(A) {
1597 , Anchor(A)
1598 , Tag(T) {
15941599 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
15951600 SourceRange = SMRange(Start, Start);
1601 }
1602
1603 std::string Node::getVerbatimTag() const {
1604 StringRef Raw = getRawTag();
1605 if (!Raw.empty() && Raw != "!") {
1606 std::string Ret;
1607 if (Raw.find_last_of('!') == 0) {
1608 Ret = Doc->getTagMap().find("!")->second;
1609 Ret += Raw.substr(1);
1610 return std::move(Ret);
1611 } else if (Raw.startswith("!!")) {
1612 Ret = Doc->getTagMap().find("!!")->second;
1613 Ret += Raw.substr(2);
1614 return std::move(Ret);
1615 } else {
1616 StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1617 std::map::const_iterator It =
1618 Doc->getTagMap().find(TagHandle);
1619 if (It != Doc->getTagMap().end())
1620 Ret = It->second;
1621 else {
1622 Token T;
1623 T.Kind = Token::TK_Tag;
1624 T.Range = TagHandle;
1625 setError(Twine("Unknown tag handle ") + TagHandle, T);
1626 }
1627 Ret += Raw.substr(Raw.find_last_of('!') + 1);
1628 return std::move(Ret);
1629 }
1630 }
1631
1632 switch (getType()) {
1633 case NK_Null:
1634 return "tag:yaml.org,2002:null";
1635 case NK_Scalar:
1636 // TODO: Tag resolution.
1637 return "tag:yaml.org,2002:str";
1638 case NK_Mapping:
1639 return "tag:yaml.org,2002:map";
1640 case NK_Sequence:
1641 return "tag:yaml.org,2002:seq";
1642 }
1643
1644 return "";
15961645 }
15971646
15981647 Token &Node::peekNext() {
19982047 }
19992048
20002049 Document::Document(Stream &S) : stream(S), Root(0) {
2050 // Tag maps starts with two default mappings.
2051 TagMap["!"] = "!";
2052 TagMap["!!"] = "tag:yaml.org,2002:";
2053
20012054 if (parseDirectives())
20022055 expectToken(Token::TK_DocumentStart);
20032056 Token &T = peekNext();
20412094 Token T = peekNext();
20422095 // Handle properties.
20432096 Token AnchorInfo;
2097 Token TagInfo;
20442098 parse_property:
20452099 switch (T.Kind) {
20462100 case Token::TK_Alias:
20552109 T = peekNext();
20562110 goto parse_property;
20572111 case Token::TK_Tag:
2058 getNext(); // Skip TK_Tag.
2112 if (TagInfo.Kind == Token::TK_Tag) {
2113 setError("Already encountered a tag for this node!", T);
2114 return 0;
2115 }
2116 TagInfo = getNext(); // Consume TK_Tag.
20592117 T = peekNext();
20602118 goto parse_property;
20612119 default:
20692127 // Don't eat the TK_BlockEntry, SequenceNode needs it.
20702128 return new (NodeAllocator) SequenceNode( stream.CurrentDoc
20712129 , AnchorInfo.Range.substr(1)
2130 , TagInfo.Range
20722131 , SequenceNode::ST_Indentless);
20732132 case Token::TK_BlockSequenceStart:
20742133 getNext();
20752134 return new (NodeAllocator)
20762135 SequenceNode( stream.CurrentDoc
20772136 , AnchorInfo.Range.substr(1)
2137 , TagInfo.Range
20782138 , SequenceNode::ST_Block);
20792139 case Token::TK_BlockMappingStart:
20802140 getNext();
20812141 return new (NodeAllocator)
20822142 MappingNode( stream.CurrentDoc
20832143 , AnchorInfo.Range.substr(1)
2144 , TagInfo.Range
20842145 , MappingNode::MT_Block);
20852146 case Token::TK_FlowSequenceStart:
20862147 getNext();
20872148 return new (NodeAllocator)
20882149 SequenceNode( stream.CurrentDoc
20892150 , AnchorInfo.Range.substr(1)
2151 , TagInfo.Range
20902152 , SequenceNode::ST_Flow);
20912153 case Token::TK_FlowMappingStart:
20922154 getNext();
20932155 return new (NodeAllocator)
20942156 MappingNode( stream.CurrentDoc
20952157 , AnchorInfo.Range.substr(1)
2158 , TagInfo.Range
20962159 , MappingNode::MT_Flow);
20972160 case Token::TK_Scalar:
20982161 getNext();
20992162 return new (NodeAllocator)
21002163 ScalarNode( stream.CurrentDoc
21012164 , AnchorInfo.Range.substr(1)
2165 , TagInfo.Range
21022166 , T.Range);
21032167 case Token::TK_Key:
21042168 // Don't eat the TK_Key, KeyValueNode expects it.
21052169 return new (NodeAllocator)
21062170 MappingNode( stream.CurrentDoc
21072171 , AnchorInfo.Range.substr(1)
2172 , TagInfo.Range
21082173 , MappingNode::MT_Inline);
21092174 case Token::TK_DocumentStart:
21102175 case Token::TK_DocumentEnd:
21252190 while (true) {
21262191 Token T = peekNext();
21272192 if (T.Kind == Token::TK_TagDirective) {
2128 handleTagDirective(getNext());
2193 parseTAGDirective();
21292194 isDirective = true;
21302195 } else if (T.Kind == Token::TK_VersionDirective) {
2131 stream.handleYAMLDirective(getNext());
2196 parseYAMLDirective();
21322197 isDirective = true;
21332198 } else
21342199 break;
21352200 }
21362201 return isDirective;
2202 }
2203
2204 void Document::parseYAMLDirective() {
2205 getNext(); // Eat %YAML
2206 }
2207
2208 void Document::parseTAGDirective() {
2209 Token Tag = getNext(); // %TAG
2210 StringRef T = Tag.Range;
2211 // Strip %TAG
2212 T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2213 std::size_t HandleEnd = T.find_first_of(" \t");
2214 StringRef TagHandle = T.substr(0, HandleEnd);
2215 StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2216 TagMap[TagHandle] = TagPrefix;
21372217 }
21382218
21392219 bool Document::expectToken(int TK) {
None # RUN: yaml-bench -canonical %s
0 # RUN: yaml-bench -canonical %s | FileCheck %s
11
22 %TAG ! tag:clarkevans.com,2002:
33 --- !shape
1313 start: *ORIGIN
1414 color: 0xFFEEBB
1515 text: Pretty vector drawing.
16
17 #CHECK: !
18 #CHECK: !
19 #CHECK: !
20 #CHECK: !
None # RUN: yaml-bench -canonical %s
0 # RUN: yaml-bench -canonical %s | FileCheck %s
11
22 %TAG !yaml! tag:yaml.org,2002:
33 ---
44 !yaml!str "foo"
5
6 #CHECK: !!str "foo"
None # RUN: yaml-bench -canonical %s
0 # RUN: yaml-bench -canonical %s | FileCheck %s
11
22 - !!yaml '!'
33 - !!yaml '&'
44 - !!yaml '*'
5
6 # CHECK: !!seq [
7 # CHECK: !!yaml "!",
8 # CHECK: !!yaml "&",
9 # CHECK: !!yaml "*",
10 # CHECK: ]
6060 for (unsigned i = 0; i < in.distance; ++i)
6161 os << " ";
6262 return os;
63 }
64
65 /// \brief Pretty print a tag by replacing tag:yaml.org,2002: with !!.
66 static std::string prettyTag(yaml::Node *N) {
67 std::string Tag = N->getVerbatimTag();
68 if (StringRef(Tag).startswith("tag:yaml.org,2002:")) {
69 std::string Ret = "!!";
70 Ret += StringRef(Tag).substr(18);
71 return std::move(Ret);
72 }
73 std::string Ret = "!<";
74 Ret += Tag;
75 Ret += ">";
76 return Ret;
6377 }
6478
6579 static void dumpNode( yaml::Node *n
7589 if (yaml::ScalarNode *sn = dyn_cast(n)) {
7690 SmallString<32> Storage;
7791 StringRef Val = sn->getValue(Storage);
78 outs() << "!!str \"" << yaml::escape(Val) << "\"";
92 outs() << prettyTag(n) << " \"" << yaml::escape(Val) << "\"";
7993 } else if (yaml::SequenceNode *sn = dyn_cast(n)) {
80 outs() << "!!seq [\n";
94 outs() << prettyTag(n) << " [\n";
8195 ++Indent;
8296 for (yaml::SequenceNode::iterator i = sn->begin(), e = sn->end();
8397 i != e; ++i) {
87101 --Indent;
88102 outs() << indent(Indent) << "]";
89103 } else if (yaml::MappingNode *mn = dyn_cast(n)) {
90 outs() << "!!map {\n";
104 outs() << prettyTag(n) << " {\n";
91105 ++Indent;
92106 for (yaml::MappingNode::iterator i = mn->begin(), e = mn->end();
93107 i != e; ++i) {
103117 } else if (yaml::AliasNode *an = dyn_cast(n)){
104118 outs() << "*" << an->getName();
105119 } else if (dyn_cast(n)) {
106 outs() << "!!null null";
120 outs() << prettyTag(n) << " null";
107121 }
108122 }
109123