llvm.org GIT mirror llvm / 93210e8
Add YAML parser to Support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153977 91177308-0d34-0410-b5e6-96231b3b80d8 Michael J. Spencer 7 years ago
182 changed file(s) with 4586 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
396396 add_subdirectory(utils/not)
397397 add_subdirectory(utils/llvm-lit)
398398 add_subdirectory(utils/json-bench)
399 add_subdirectory(utils/yaml-bench)
399400
400401 add_subdirectory(projects)
401402
6666 CellSPU backend llvm/lib/Target/CellSPU/README.txt
6767 Google Test llvm/utils/unittest/googletest
6868 OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
69 pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT}
0 //===--- YAMLParser.h - Simple YAML parser --------------------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a YAML 1.2 parser.
10 //
11 // See http://www.yaml.org/spec/1.2/spec.html for the full standard.
12 //
13 // This currently does not implement the following:
14 // * Multi-line literal folding.
15 // * Tag resolution.
16 // * UTF-16.
17 // * BOMs anywhere other than the first Unicode scalar value in the file.
18 //
19 // The most important class here is Stream. This represents a YAML stream with
20 // 0, 1, or many documents.
21 //
22 // SourceMgr sm;
23 // StringRef input = getInput();
24 // yaml::Stream stream(input, sm);
25 //
26 // for (yaml::document_iterator di = stream.begin(), de = stream.end();
27 // di != de; ++di) {
28 // yaml::Node *n = di->getRoot();
29 // if (n) {
30 // // Do something with n...
31 // } else
32 // break;
33 // }
34 //
35 //===----------------------------------------------------------------------===//
36
37 #ifndef LLVM_SUPPORT_YAML_PARSER_H
38 #define LLVM_SUPPORT_YAML_PARSER_H
39
40 #include "llvm/ADT/OwningPtr.h"
41 #include "llvm/ADT/SmallString.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/Support/Allocator.h"
44 #include "llvm/Support/SMLoc.h"
45
46 #include
47 #include
48
49 namespace llvm {
50 class MemoryBuffer;
51 class SourceMgr;
52 class raw_ostream;
53 class Twine;
54
55 namespace yaml {
56
57 class document_iterator;
58 class Document;
59 class Node;
60 class Scanner;
61 struct Token;
62
63 /// @brief Dump all the tokens in this stream to OS.
64 /// @returns true if there was an error, false otherwise.
65 bool dumpTokens(StringRef Input, raw_ostream &);
66
67 /// @brief Scans all tokens in input without outputting anything. This is used
68 /// for benchmarking the tokenizer.
69 /// @returns true if there was an error, false otherwise.
70 bool scanTokens(StringRef Input);
71
72 /// @brief Escape \a Input for a double quoted scalar.
73 std::string escape(StringRef Input);
74
75 /// @brief This class represents a YAML stream potentially containing multiple
76 /// documents.
77 class Stream {
78 public:
79 Stream(StringRef Input, SourceMgr &);
80
81 document_iterator begin();
82 document_iterator end();
83 void skip();
84 bool failed();
85 bool validate() {
86 skip();
87 return !failed();
88 }
89
90 void printError(Node *N, const Twine &Msg);
91
92 private:
93 OwningPtr scanner;
94 OwningPtr CurrentDoc;
95
96 friend class Document;
97
98 /// @brief Validate a %YAML x.x directive.
99 void handleYAMLDirective(const Token &);
100 };
101
102 /// @brief Abstract base class for all Nodes.
103 class Node {
104 public:
105 enum NodeKind {
106 NK_Null,
107 NK_Scalar,
108 NK_KeyValue,
109 NK_Mapping,
110 NK_Sequence,
111 NK_Alias
112 };
113
114 Node(unsigned int Type, OwningPtr&, StringRef Anchor);
115 virtual ~Node();
116
117 /// @brief Get the value of the anchor attached to this node. If it does not
118 /// have one, getAnchor().size() will be 0.
119 StringRef getAnchor() const { return Anchor; }
120
121 SMRange getSourceRange() const { return SourceRange; }
122 void setSourceRange(SMRange SR) { SourceRange = SR; }
123
124 // These functions forward to Document and Scanner.
125 Token &peekNext();
126 Token getNext();
127 Node *parseBlockNode();
128 BumpPtrAllocator &getAllocator();
129 void setError(const Twine &Message, Token &Location) const;
130 bool failed() const;
131
132 virtual void skip() {};
133
134 unsigned int getType() const { return TypeID; }
135 static inline bool classof(const Node *) { return true; }
136
137 void *operator new ( size_t Size
138 , BumpPtrAllocator &Alloc
139 , size_t Alignment = 16) throw() {
140 return Alloc.Allocate(Size, Alignment);
141 }
142
143 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() {
144 Alloc.Deallocate(Ptr);
145 }
146
147 protected:
148 OwningPtr &Doc;
149 SMRange SourceRange;
150
151 private:
152 unsigned int TypeID;
153 StringRef Anchor;
154 };
155
156 /// @brief A null value.
157 ///
158 /// Example:
159 /// !!null null
160 class NullNode : public Node {
161 public:
162 NullNode(OwningPtr &D) : Node(NK_Null, D, StringRef()) {}
163
164 static inline bool classof(const NullNode *) { return true; }
165 static inline bool classof(const Node *N) {
166 return N->getType() == NK_Null;
167 }
168 };
169
170 /// @brief A scalar node is an opaque datum that can be presented as a
171 /// series of zero or more Unicode scalar values.
172 ///
173 /// Example:
174 /// Adena
175 class ScalarNode : public Node {
176 public:
177 ScalarNode(OwningPtr &D, StringRef Anchor, StringRef Val)
178 : Node(NK_Scalar, D, Anchor)
179 , Value(Val) {
180 SMLoc Start = SMLoc::getFromPointer(Val.begin());
181 SMLoc End = SMLoc::getFromPointer(Val.end() - 1);
182 SourceRange = SMRange(Start, End);
183 }
184
185 // Return Value without any escaping or folding or other fun YAML stuff. This
186 // is the exact bytes that are contained in the file (after conversion to
187 // utf8).
188 StringRef getRawValue() const { return Value; }
189
190 /// @brief Gets the value of this node as a StringRef.
191 ///
192 /// @param Storage is used to store the content of the returned StringRef iff
193 /// it requires any modification from how it appeared in the source.
194 /// This happens with escaped characters and multi-line literals.
195 StringRef getValue(SmallVectorImpl &Storage) const;
196
197 static inline bool classof(const ScalarNode *) { return true; }
198 static inline bool classof(const Node *N) {
199 return N->getType() == NK_Scalar;
200 }
201
202 private:
203 StringRef Value;
204
205 StringRef unescapeDoubleQuoted( StringRef UnquotedValue
206 , StringRef::size_type Start
207 , SmallVectorImpl &Storage) const;
208 };
209
210 static bool getAs(const ScalarNode *SN, bool &Result) {
211 SmallString<4> Storage;
212 StringRef Value = SN->getValue(Storage);
213 if (Value == "true")
214 Result = true;
215 else if (Value == "false")
216 Result = false;
217 else
218 return false;
219 return true;
220 }
221
222 template
223 typename enable_if_c::is_integer, bool>::type
224 getAs(const ScalarNode *SN, T &Result) {
225 SmallString<4> Storage;
226 return !SN->getValue(Storage).getAsInteger(0, Result);
227 }
228
229 /// @brief A key and value pair. While not technically a Node under the YAML
230 /// representation graph, it is easier to treat them this way.
231 ///
232 /// TODO: Consider making this not a child of Node.
233 ///
234 /// Example:
235 /// Section: .text
236 class KeyValueNode : public Node {
237 public:
238 KeyValueNode(OwningPtr &D)
239 : Node(NK_KeyValue, D, StringRef())
240 , Key(0)
241 , Value(0)
242 {}
243
244 /// @brief Parse and return the key.
245 ///
246 /// This may be called multiple times.
247 ///
248 /// @returns The key, or nullptr if failed() == true.
249 Node *getKey();
250
251 /// @brief Parse and return the value.
252 ///
253 /// This may be called multiple times.
254 ///
255 /// @returns The value, or nullptr if failed() == true.
256 Node *getValue();
257
258 virtual void skip() {
259 getKey()->skip();
260 getValue()->skip();
261 }
262
263 static inline bool classof(const KeyValueNode *) { return true; }
264 static inline bool classof(const Node *N) {
265 return N->getType() == NK_KeyValue;
266 }
267
268 private:
269 Node *Key;
270 Node *Value;
271 };
272
273 /// @brief This is an iterator abstraction over YAML collections shared by both
274 /// sequences and maps.
275 ///
276 /// BaseT must have a ValueT* member named CurrentEntry and a member function
277 /// increment() which must set CurrentEntry to 0 to create an end iterator.
278 template
279 class basic_collection_iterator
280 : public std::iterator {
281 public:
282 basic_collection_iterator() : Base(0) {}
283 basic_collection_iterator(BaseT *B) : Base(B) {}
284
285 ValueT *operator ->() const {
286 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
287 return Base->CurrentEntry;
288 }
289
290 ValueT &operator *() const {
291 assert(Base && Base->CurrentEntry &&
292 "Attempted to dereference end iterator!");
293 return *Base->CurrentEntry;
294 }
295
296 operator ValueT*() const {
297 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
298 return Base->CurrentEntry;
299 }
300
301 bool operator !=(const basic_collection_iterator &Other) const {
302 if(Base != Other.Base)
303 return true;
304 return (Base && Other.Base) && Base->CurrentEntry
305 != Other.Base->CurrentEntry;
306 }
307
308 basic_collection_iterator &operator++() {
309 assert(Base && "Attempted to advance iterator past end!");
310 Base->increment();
311 // Create an end iterator.
312 if (Base->CurrentEntry == 0)
313 Base = 0;
314 return *this;
315 }
316
317 private:
318 BaseT *Base;
319 };
320
321 // The following two templates are used for both MappingNode and Sequence Node.
322 template
323 typename CollectionType::iterator begin(CollectionType &C) {
324 assert(C.IsAtBeginning && "You may only iterate over a collection once!");
325 C.IsAtBeginning = false;
326 typename CollectionType::iterator ret(&C);
327 ++ret;
328 return ret;
329 }
330
331 template
332 void skip(CollectionType &C) {
333 // TODO: support skipping from the middle of a parsed collection ;/
334 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
335 if (C.IsAtBeginning)
336 for (typename CollectionType::iterator i = begin(C), e = C.end();
337 i != e; ++i)
338 i->skip();
339 }
340
341 /// @brief Represents a YAML map created from either a block map for a flow map.
342 ///
343 /// This parses the YAML stream as increment() is called.
344 ///
345 /// Example:
346 /// Name: _main
347 /// Scope: Global
348 class MappingNode : public Node {
349 public:
350 enum MappingType {
351 MT_Block,
352 MT_Flow,
353 MT_Inline //< An inline mapping node is used for "[key: value]".
354 };
355
356 MappingNode(OwningPtr &D, StringRef Anchor, MappingType MT)
357 : Node(NK_Mapping, D, Anchor)
358 , Type(MT)
359 , IsAtBeginning(true)
360 , IsAtEnd(false)
361 , CurrentEntry(0)
362 {}
363
364 friend class basic_collection_iterator;
365 typedef basic_collection_iterator iterator;
366 template friend typename T::iterator yaml::begin(T &);
367 template friend void yaml::skip(T &);
368
369 iterator begin() {
370 return yaml::begin(*this);
371 }
372
373 iterator end() { return iterator(); }
374
375 virtual void skip() {
376 yaml::skip(*this);
377 }
378
379 static inline bool classof(const MappingNode *) { return true; }
380 static inline bool classof(const Node *N) {
381 return N->getType() == NK_Mapping;
382 }
383
384 private:
385 MappingType Type;
386 bool IsAtBeginning;
387 bool IsAtEnd;
388 KeyValueNode *CurrentEntry;
389
390 void increment();
391 };
392
393 /// @brief Represents a YAML sequence created from either a block sequence for a
394 /// flow sequence.
395 ///
396 /// This parses the YAML stream as increment() is called.
397 ///
398 /// Example:
399 /// - Hello
400 /// - World
401 class SequenceNode : public Node {
402 public:
403 enum SequenceType {
404 ST_Block,
405 ST_Flow,
406 // Use for:
407 //
408 // key:
409 // - val1
410 // - val2
411 //
412 // As a BlockMappingEntry and BlockEnd are not created in this case.
413 ST_Indentless
414 };
415
416 SequenceNode(OwningPtr &D, StringRef Anchor, SequenceType ST)
417 : Node(NK_Sequence, D, Anchor)
418 , SeqType(ST)
419 , IsAtBeginning(true)
420 , IsAtEnd(false)
421 , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
422 , CurrentEntry(0)
423 {}
424
425 friend class basic_collection_iterator;
426 typedef basic_collection_iterator iterator;
427 template friend typename T::iterator yaml::begin(T &);
428 template friend void yaml::skip(T &);
429
430 void increment();
431
432 iterator begin() {
433 return yaml::begin(*this);
434 }
435
436 iterator end() { return iterator(); }
437
438 virtual void skip() {
439 yaml::skip(*this);
440 }
441
442 static inline bool classof(const SequenceNode *) { return true; }
443 static inline bool classof(const Node *N) {
444 return N->getType() == NK_Sequence;
445 }
446
447 private:
448 SequenceType SeqType;
449 bool IsAtBeginning;
450 bool IsAtEnd;
451 bool WasPreviousTokenFlowEntry;
452 Node *CurrentEntry;
453 };
454
455 /// @brief Represents an alias to a Node with an anchor.
456 ///
457 /// Example:
458 /// *AnchorName
459 class AliasNode : public Node {
460 public:
461 AliasNode(OwningPtr &D, StringRef Val)
462 : Node(NK_Alias, D, StringRef()), Name(Val) {}
463
464 StringRef getName() const { return Name; }
465 Node *getTarget();
466
467 static inline bool classof(const ScalarNode *) { return true; }
468 static inline bool classof(const Node *N) {
469 return N->getType() == NK_Alias;
470 }
471
472 private:
473 StringRef Name;
474 };
475
476 /// @brief A YAML Stream is a sequence of Documents. A document contains a root
477 /// node.
478 class Document {
479 public:
480 /// @brief Root for parsing a node. Returns a single node.
481 Node *parseBlockNode();
482
483 Document(Stream &ParentStream);
484
485 /// @brief Finish parsing the current document and return true if there are
486 /// more. Return false otherwise.
487 bool skip();
488
489 /// @brief Parse and return the root level node.
490 Node *getRoot() {
491 if (Root)
492 return Root;
493 return Root = parseBlockNode();
494 }
495
496 private:
497 friend class Node;
498 friend class document_iterator;
499
500 /// @brief Stream to read tokens from.
501 Stream &stream;
502
503 /// @brief Used to allocate nodes to. All are destroyed without calling their
504 /// destructor when the document is destroyed.
505 BumpPtrAllocator NodeAllocator;
506
507 /// @brief The root node. Used to support skipping a partially parsed
508 /// document.
509 Node *Root;
510
511 Token &peekNext();
512 Token getNext();
513 void setError(const Twine &Message, Token &Location) const;
514 bool failed() const;
515
516 void handleTagDirective(const Token &Tag) {
517 // TODO: Track tags.
518 }
519
520 /// @brief Parse %BLAH directives and return true if any were encountered.
521 bool parseDirectives();
522
523 /// @brief Consume the next token and error if it is not \a TK.
524 bool expectToken(int TK);
525 };
526
527 /// @brief Iterator abstraction for Documents over a Stream.
528 class document_iterator {
529 public:
530 document_iterator() : Doc(NullDoc) {}
531 document_iterator(OwningPtr &D) : Doc(D) {}
532
533 bool operator !=(const document_iterator &Other) {
534 return Doc != Other.Doc;
535 }
536
537 document_iterator operator ++() {
538 if (!Doc->skip()) {
539 Doc.reset(0);
540 } else {
541 Stream &S = Doc->stream;
542 Doc.reset(new Document(S));
543 }
544 return *this;
545 }
546
547 Document &operator *() {
548 return *Doc;
549 }
550
551 OwningPtr &operator ->() {
552 return Doc;
553 }
554
555 private:
556 static OwningPtr NullDoc;
557 OwningPtr &Doc;
558 };
559
560 }
561 }
562
563 #endif
5353 ToolOutputFile.cpp
5454 Triple.cpp
5555 Twine.cpp
56 YAMLParser.cpp
5657 raw_os_ostream.cpp
5758 raw_ostream.cpp
5859 regcomp.c
0 //===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a YAML parser.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Support/YAMLParser.h"
14
15 #include "llvm/ADT/ilist.h"
16 #include "llvm/ADT/ilist_node.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Support/SourceMgr.h"
24
25 using namespace llvm;
26 using namespace yaml;
27
28 enum UnicodeEncodingForm {
29 UEF_UTF32_LE, //< UTF-32 Little Endian
30 UEF_UTF32_BE, //< UTF-32 Big Endian
31 UEF_UTF16_LE, //< UTF-16 Little Endian
32 UEF_UTF16_BE, //< UTF-16 Big Endian
33 UEF_UTF8, //< UTF-8 or ascii.
34 UEF_Unknown //< Not a valid Unicode encoding.
35 };
36
37 /// EncodingInfo - Holds the encoding type and length of the byte order mark if
38 /// it exists. Length is in {0, 2, 3, 4}.
39 typedef std::pair EncodingInfo;
40
41 /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
42 /// encoding form of \a Input.
43 ///
44 /// @param Input A string of length 0 or more.
45 /// @returns An EncodingInfo indicating the Unicode encoding form of the input
46 /// and how long the byte order mark is if one exists.
47 static EncodingInfo getUnicodeEncoding(StringRef Input) {
48 if (Input.size() == 0)
49 return std::make_pair(UEF_Unknown, 0);
50
51 switch (uint8_t(Input[0])) {
52 case 0x00:
53 if (Input.size() >= 4) {
54 if ( Input[1] == 0
55 && uint8_t(Input[2]) == 0xFE
56 && uint8_t(Input[3]) == 0xFF)
57 return std::make_pair(UEF_UTF32_BE, 4);
58 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
59 return std::make_pair(UEF_UTF32_BE, 0);
60 }
61
62 if (Input.size() >= 2 && Input[1] != 0)
63 return std::make_pair(UEF_UTF16_BE, 0);
64 return std::make_pair(UEF_Unknown, 0);
65 case 0xFF:
66 if ( Input.size() >= 4
67 && uint8_t(Input[1]) == 0xFE
68 && Input[2] == 0
69 && Input[3] == 0)
70 return std::make_pair(UEF_UTF32_LE, 4);
71
72 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
73 return std::make_pair(UEF_UTF16_LE, 2);
74 return std::make_pair(UEF_Unknown, 0);
75 case 0xFE:
76 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
77 return std::make_pair(UEF_UTF16_BE, 2);
78 return std::make_pair(UEF_Unknown, 0);
79 case 0xEF:
80 if ( Input.size() >= 3
81 && uint8_t(Input[1]) == 0xBB
82 && uint8_t(Input[2]) == 0xBF)
83 return std::make_pair(UEF_UTF8, 3);
84 return std::make_pair(UEF_Unknown, 0);
85 }
86
87 // It could still be utf-32 or utf-16.
88 if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
89 return std::make_pair(UEF_UTF32_LE, 0);
90
91 if (Input.size() >= 2 && Input[1] == 0)
92 return std::make_pair(UEF_UTF16_LE, 0);
93
94 return std::make_pair(UEF_UTF8, 0);
95 }
96
97 namespace llvm {
98 namespace yaml {
99 /// Token - A single YAML token.
100 struct Token : ilist_node {
101 enum TokenKind {
102 TK_Error, // Uninitialized token.
103 TK_StreamStart,
104 TK_StreamEnd,
105 TK_VersionDirective,
106 TK_TagDirective,
107 TK_DocumentStart,
108 TK_DocumentEnd,
109 TK_BlockEntry,
110 TK_BlockEnd,
111 TK_BlockSequenceStart,
112 TK_BlockMappingStart,
113 TK_FlowEntry,
114 TK_FlowSequenceStart,
115 TK_FlowSequenceEnd,
116 TK_FlowMappingStart,
117 TK_FlowMappingEnd,
118 TK_Key,
119 TK_Value,
120 TK_Scalar,
121 TK_Alias,
122 TK_Anchor,
123 TK_Tag
124 } Kind;
125
126 /// A string of length 0 or more whose begin() points to the logical location
127 /// of the token in the input.
128 StringRef Range;
129
130 Token() : Kind(TK_Error) {}
131 };
132 }
133 }
134
135 template<>
136 struct ilist_sentinel_traits {
137 Token *createSentinel() const {
138 return &Sentinel;
139 }
140 static void destroySentinel(Token*) {}
141
142 Token *provideInitialHead() const { return createSentinel(); }
143 Token *ensureHead(Token*) const { return createSentinel(); }
144 static void noteHead(Token*, Token*) {}
145
146 private:
147 mutable Token Sentinel;
148 };
149
150 template<>
151 struct ilist_node_traits {
152 Token *createNode(const Token &V) {
153 return new (Alloc.Allocate()) Token(V);
154 }
155 static void deleteNode(Token *V) {}
156
157 void addNodeToList(Token *) {}
158 void removeNodeFromList(Token *) {}
159 void transferNodesFromList(ilist_node_traits & /*SrcTraits*/,
160 ilist_iterator /*first*/,
161 ilist_iterator /*last*/) {}
162
163 BumpPtrAllocator Alloc;
164 };
165
166 typedef ilist TokenQueueT;
167
168 namespace {
169 /// @brief This struct is used to track simple keys.
170 ///
171 /// Simple keys are handled by creating an entry in SimpleKeys for each Token
172 /// which could legally be the start of a simple key. When peekNext is called,
173 /// if the Token To be returned is referenced by a SimpleKey, we continue
174 /// tokenizing until that potential simple key has either been found to not be
175 /// a simple key (we moved on to the next line or went further than 1024 chars).
176 /// Or when we run into a Value, and then insert a Key token (and possibly
177 /// others) before the SimpleKey's Tok.
178 struct SimpleKey {
179 TokenQueueT::iterator Tok;
180 unsigned Column;
181 unsigned Line;
182 unsigned FlowLevel;
183 bool IsRequired;
184
185 bool operator ==(const SimpleKey &Other) {
186 return Tok == Other.Tok;
187 }
188 };
189 }
190
191 /// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
192 /// subsequence and the subsequence's length in code units (uint8_t).
193 /// A length of 0 represents an error.
194 typedef std::pair UTF8Decoded;
195
196 static UTF8Decoded decodeUTF8(StringRef Range) {
197 StringRef::iterator Position= Range.begin();
198 StringRef::iterator End = Range.end();
199 // 1 byte: [0x00, 0x7f]
200 // Bit pattern: 0xxxxxxx
201 if ((*Position & 0x80) == 0) {
202 return std::make_pair(*Position, 1);
203 }
204 // 2 bytes: [0x80, 0x7ff]
205 // Bit pattern: 110xxxxx 10xxxxxx
206 if (Position + 1 != End &&
207 ((*Position & 0xE0) == 0xC0) &&
208 ((*(Position + 1) & 0xC0) == 0x80)) {
209 uint32_t codepoint = ((*Position & 0x1F) << 6) |
210 (*(Position + 1) & 0x3F);
211 if (codepoint >= 0x80)
212 return std::make_pair(codepoint, 2);
213 }
214 // 3 bytes: [0x8000, 0xffff]
215 // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
216 if (Position + 2 != End &&
217 ((*Position & 0xF0) == 0xE0) &&
218 ((*(Position + 1) & 0xC0) == 0x80) &&
219 ((*(Position + 2) & 0xC0) == 0x80)) {
220 uint32_t codepoint = ((*Position & 0x0F) << 12) |
221 ((*(Position + 1) & 0x3F) << 6) |
222 (*(Position + 2) & 0x3F);
223 // Codepoints between 0xD800 and 0xDFFF are invalid, as
224 // they are high / low surrogate halves used by UTF-16.
225 if (codepoint >= 0x800 &&
226 (codepoint < 0xD800 || codepoint > 0xDFFF))
227 return std::make_pair(codepoint, 3);
228 }
229 // 4 bytes: [0x10000, 0x10FFFF]
230 // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
231 if (Position + 3 != End &&
232 ((*Position & 0xF8) == 0xF0) &&
233 ((*(Position + 1) & 0xC0) == 0x80) &&
234 ((*(Position + 2) & 0xC0) == 0x80) &&
235 ((*(Position + 3) & 0xC0) == 0x80)) {
236 uint32_t codepoint = ((*Position & 0x07) << 18) |
237 ((*(Position + 1) & 0x3F) << 12) |
238 ((*(Position + 2) & 0x3F) << 6) |
239 (*(Position + 3) & 0x3F);
240 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
241 return std::make_pair(codepoint, 4);
242 }
243 return std::make_pair(0, 0);
244 }
245
246 namespace llvm {
247 namespace yaml {
248 /// @brief Scans YAML tokens from a MemoryBuffer.
249 class Scanner {
250 public:
251 Scanner(const StringRef Input, SourceMgr &SM);
252
253 /// @brief Parse the next token and return it without popping it.
254 Token &peekNext();
255
256 /// @brief Parse the next token and pop it from the queue.
257 Token getNext();
258
259 void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
260 ArrayRef Ranges = ArrayRef()) {
261 SM.PrintMessage(Loc, Kind, Message, Ranges);
262 }
263
264 void setError(const Twine &Message, StringRef::iterator Position) {
265 if (Current >= End)
266 Current = End - 1;
267
268 // Don't print out more errors after the first one we encounter. The rest
269 // are just the result of the first, and have no meaning.
270 if (!Failed)
271 printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
272 Failed = true;
273 }
274
275 void setError(const Twine &Message) {
276 setError(Message, Current);
277 }
278
279 /// @brief Returns true if an error occurred while parsing.
280 bool failed() {
281 return Failed;
282 }
283
284 private:
285 StringRef currentInput() {
286 return StringRef(Current, End - Current);
287 }
288
289 /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
290 /// at \a Position.
291 ///
292 /// If the UTF-8 code units starting at Position do not form a well-formed
293 /// code unit subsequence, then the Unicode scalar value is 0, and the length
294 /// is 0.
295 UTF8Decoded decodeUTF8(StringRef::iterator Position) {
296 return ::decodeUTF8(StringRef(Position, End - Position));
297 }
298
299 // The following functions are based on the gramar rules in the YAML spec. The
300 // style of the function names it meant to closely match how they are written
301 // in the spec. The number within the [] is the number of the grammar rule in
302 // the spec.
303 //
304 // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
305 //
306 // c-
307 // A production starting and ending with a special character.
308 // b-
309 // A production matching a single line break.
310 // nb-
311 // A production starting and ending with a non-break character.
312 // s-
313 // A production starting and ending with a white space character.
314 // ns-
315 // A production starting and ending with a non-space character.
316 // l-
317 // A production matching complete line(s).
318
319 /// @brief Skip a single nb-char[27] starting at Position.
320 ///
321 /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
322 /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
323 ///
324 /// @returns The code unit after the nb-char, or Position if it's not an
325 /// nb-char.
326 StringRef::iterator skip_nb_char(StringRef::iterator Position);
327
328 /// @brief Skip a single b-break[28] starting at Position.
329 ///
330 /// A b-break is 0xD 0xA | 0xD | 0xA
331 ///
332 /// @returns The code unit after the b-break, or Position if it's not a
333 /// b-break.
334 StringRef::iterator skip_b_break(StringRef::iterator Position);
335
336 /// @brief Skip a single s-white[33] starting at Position.
337 ///
338 /// A s-white is 0x20 | 0x9
339 ///
340 /// @returns The code unit after the s-white, or Position if it's not a
341 /// s-white.
342 StringRef::iterator skip_s_white(StringRef::iterator Position);
343
344 /// @brief Skip a single ns-char[34] starting at Position.
345 ///
346 /// A ns-char is nb-char - s-white
347 ///
348 /// @returns The code unit after the ns-char, or Position if it's not a
349 /// ns-char.
350 StringRef::iterator skip_ns_char(StringRef::iterator Position);
351
352 typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
353 /// @brief Skip minimal well-formed code unit subsequences until Func
354 /// returns its input.
355 ///
356 /// @returns The code unit after the last minimal well-formed code unit
357 /// subsequence that Func accepted.
358 StringRef::iterator skip_while( SkipWhileFunc Func
359 , StringRef::iterator Position);
360
361 /// @brief Scan ns-uri-char[39]s starting at Cur.
362 ///
363 /// This updates Cur and Column while scanning.
364 ///
365 /// @returns A StringRef starting at Cur which covers the longest contiguous
366 /// sequence of ns-uri-char.
367 StringRef scan_ns_uri_char();
368
369 /// @brief Scan ns-plain-one-line[133] starting at \a Cur.
370 StringRef scan_ns_plain_one_line();
371
372 /// @brief Consume a minimal well-formed code unit subsequence starting at
373 /// \a Cur. Return false if it is not the same Unicode scalar value as
374 /// \a Expected. This updates \a Column.
375 bool consume(uint32_t Expected);
376
377 /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
378 void skip(uint32_t Distance);
379
380 /// @brief Return true if the minimal well-formed code unit subsequence at
381 /// Pos is whitespace or a new line
382 bool isBlankOrBreak(StringRef::iterator Position);
383
384 /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
385 void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
386 , unsigned AtColumn
387 , bool IsRequired);
388
389 /// @brief Remove simple keys that can no longer be valid simple keys.
390 ///
391 /// Invalid simple keys are not on the current line or are further than 1024
392 /// columns back.
393 void removeStaleSimpleKeyCandidates();
394
395 /// @brief Remove all simple keys on FlowLevel \a Level.
396 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
397
398 /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
399 /// tokens if needed.
400 bool unrollIndent(int ToColumn);
401
402 /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
403 /// if needed.
404 bool rollIndent( int ToColumn
405 , Token::TokenKind Kind
406 , TokenQueueT::iterator InsertPoint);
407
408 /// @brief Skip whitespace and comments until the start of the next token.
409 void scanToNextToken();
410
411 /// @brief Must be the first token generated.
412 bool scanStreamStart();
413
414 /// @brief Generate tokens needed to close out the stream.
415 bool scanStreamEnd();
416
417 /// @brief Scan a %BLAH directive.
418 bool scanDirective();
419
420 /// @brief Scan a ... or ---.
421 bool scanDocumentIndicator(bool IsStart);
422
423 /// @brief Scan a [ or { and generate the proper flow collection start token.
424 bool scanFlowCollectionStart(bool IsSequence);
425
426 /// @brief Scan a ] or } and generate the proper flow collection end token.
427 bool scanFlowCollectionEnd(bool IsSequence);
428
429 /// @brief Scan the , that separates entries in a flow collection.
430 bool scanFlowEntry();
431
432 /// @brief Scan the - that starts block sequence entries.
433 bool scanBlockEntry();
434
435 /// @brief Scan an explicit ? indicating a key.
436 bool scanKey();
437
438 /// @brief Scan an explicit : indicating a value.
439 bool scanValue();
440
441 /// @brief Scan a quoted scalar.
442 bool scanFlowScalar(bool IsDoubleQuoted);
443
444 /// @brief Scan an unquoted scalar.
445 bool scanPlainScalar();
446
447 /// @brief Scan an Alias or Anchor starting with * or &.
448 bool scanAliasOrAnchor(bool IsAlias);
449
450 /// @brief Scan a block scalar starting with | or >.
451 bool scanBlockScalar(bool IsLiteral);
452
453 /// @brief Scan a tag of the form !stuff.
454 bool scanTag();
455
456 /// @brief Dispatch to the next scanning function based on \a *Cur.
457 bool fetchMoreTokens();
458
459 /// @brief The SourceMgr used for diagnostics and buffer management.
460 SourceMgr &SM;
461
462 /// @brief The original input.
463 MemoryBuffer *InputBuffer;
464
465 /// @brief The current position of the scanner.
466 StringRef::iterator Current;
467
468 /// @brief The end of the input (one past the last character).
469 StringRef::iterator End;
470
471 /// @brief Current YAML indentation level in spaces.
472 int Indent;
473
474 /// @brief Current column number in Unicode code points.
475 unsigned Column;
476
477 /// @brief Current line number.
478 unsigned Line;
479
480 /// @brief How deep we are in flow style containers. 0 Means at block level.
481 unsigned FlowLevel;
482
483 /// @brief Are we at the start of the stream?
484 bool IsStartOfStream;
485
486 /// @brief Can the next token be the start of a simple key?
487 bool IsSimpleKeyAllowed;
488
489 /// @brief Is the next token required to start a simple key?
490 bool IsSimpleKeyRequired;
491
492 /// @brief True if an error has occurred.
493 bool Failed;
494
495 /// @brief Queue of tokens. This is required to queue up tokens while looking
496 /// for the end of a simple key. And for cases where a single character
497 /// can produce multiple tokens (e.g. BlockEnd).
498 TokenQueueT TokenQueue;
499
500 /// @brief Indentation levels.
501 SmallVector Indents;
502
503 /// @brief Potential simple keys.
504 SmallVector SimpleKeys;
505 };
506
507 } // end namespace yaml
508 } // end namespace llvm
509
510 /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
511 static void encodeUTF8( uint32_t UnicodeScalarValue
512 , SmallVectorImpl &Result) {
513 if (UnicodeScalarValue <= 0x7F) {
514 Result.push_back(UnicodeScalarValue & 0x7F);
515 } else if (UnicodeScalarValue <= 0x7FF) {
516 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
517 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
518 Result.push_back(FirstByte);
519 Result.push_back(SecondByte);
520 } else if (UnicodeScalarValue <= 0xFFFF) {
521 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
522 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
523 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
524 Result.push_back(FirstByte);
525 Result.push_back(SecondByte);
526 Result.push_back(ThirdByte);
527 } else if (UnicodeScalarValue <= 0x10FFFF) {
528 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
529 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
530 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
531 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
532 Result.push_back(FirstByte);
533 Result.push_back(SecondByte);
534 Result.push_back(ThirdByte);
535 Result.push_back(FourthByte);
536 }
537 }
538
539 bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
540 SourceMgr SM;
541 Scanner scanner(Input, SM);
542 while (true) {
543 Token T = scanner.getNext();
544 switch (T.Kind) {
545 case Token::TK_StreamStart:
546 OS << "Stream-Start: ";
547 break;
548 case Token::TK_StreamEnd:
549 OS << "Stream-End: ";
550 break;
551 case Token::TK_VersionDirective:
552 OS << "Version-Directive: ";
553 break;
554 case Token::TK_TagDirective:
555 OS << "Tag-Directive: ";
556 break;
557 case Token::TK_DocumentStart:
558 OS << "Document-Start: ";
559 break;
560 case Token::TK_DocumentEnd:
561 OS << "Document-End: ";
562 break;
563 case Token::TK_BlockEntry:
564 OS << "Block-Entry: ";
565 break;
566 case Token::TK_BlockEnd:
567 OS << "Block-End: ";
568 break;
569 case Token::TK_BlockSequenceStart:
570 OS << "Block-Sequence-Start: ";
571 break;
572 case Token::TK_BlockMappingStart:
573 OS << "Block-Mapping-Start: ";
574 break;
575 case Token::TK_FlowEntry:
576 OS << "Flow-Entry: ";
577 break;
578 case Token::TK_FlowSequenceStart:
579 OS << "Flow-Sequence-Start: ";
580 break;
581 case Token::TK_FlowSequenceEnd:
582 OS << "Flow-Sequence-End: ";
583 break;
584 case Token::TK_FlowMappingStart:
585 OS << "Flow-Mapping-Start: ";
586 break;
587 case Token::TK_FlowMappingEnd:
588 OS << "Flow-Mapping-End: ";
589 break;
590 case Token::TK_Key:
591 OS << "Key: ";
592 break;
593 case Token::TK_Value:
594 OS << "Value: ";
595 break;
596 case Token::TK_Scalar:
597 OS << "Scalar: ";
598 break;
599 case Token::TK_Alias:
600 OS << "Alias: ";
601 break;
602 case Token::TK_Anchor:
603 OS << "Anchor: ";
604 break;
605 case Token::TK_Tag:
606 OS << "Tag: ";
607 break;
608 case Token::TK_Error:
609 break;
610 }
611 OS << T.Range << "\n";
612 if (T.Kind == Token::TK_StreamEnd)
613 break;
614 else if (T.Kind == Token::TK_Error)
615 return false;
616 }
617 return true;
618 }
619
620 bool yaml::scanTokens(StringRef Input) {
621 llvm::SourceMgr SM;
622 llvm::yaml::Scanner scanner(Input, SM);
623 for (;;) {
624 llvm::yaml::Token T = scanner.getNext();
625 if (T.Kind == Token::TK_StreamEnd)
626 break;
627 else if (T.Kind == Token::TK_Error)
628 return false;
629 }
630 return true;
631 }
632
633 std::string yaml::escape(StringRef Input) {
634 std::string EscapedInput;
635 for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
636 if (*i == '\\')
637 EscapedInput += "\\\\";
638 else if (*i == '"')
639 EscapedInput += "\\\"";
640 else if (*i == 0)
641 EscapedInput += "\\0";
642 else if (*i == 0x07)
643 EscapedInput += "\\a";
644 else if (*i == 0x08)
645 EscapedInput += "\\b";
646 else if (*i == 0x09)
647 EscapedInput += "\\t";
648 else if (*i == 0x0A)
649 EscapedInput += "\\n";
650 else if (*i == 0x0B)
651 EscapedInput += "\\v";
652 else if (*i == 0x0C)
653 EscapedInput += "\\f";
654 else if (*i == 0x0D)
655 EscapedInput += "\\r";
656 else if (*i == 0x1B)
657 EscapedInput += "\\e";
658 else if (*i >= 0 && *i < 0x20) { // Control characters not handled above.
659 std::string HexStr = utohexstr(*i);
660 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
661 } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
662 UTF8Decoded UnicodeScalarValue
663 = decodeUTF8(StringRef(i, Input.end() - i));
664 if (UnicodeScalarValue.second == 0) {
665 // Found invalid char.
666 SmallString<4> Val;
667 encodeUTF8(0xFFFD, Val);
668 EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
669 // FIXME: Error reporting.
670 return EscapedInput;
671 }
672 if (UnicodeScalarValue.first == 0x85)
673 EscapedInput += "\\N";
674 else if (UnicodeScalarValue.first == 0xA0)
675 EscapedInput += "\\_";
676 else if (UnicodeScalarValue.first == 0x2028)
677 EscapedInput += "\\L";
678 else if (UnicodeScalarValue.first == 0x2029)
679 EscapedInput += "\\P";
680 else {
681 std::string HexStr = utohexstr(UnicodeScalarValue.first);
682 if (HexStr.size() <= 2)
683 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
684 else if (HexStr.size() <= 4)
685 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
686 else if (HexStr.size() <= 8)
687 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
688 }
689 i += UnicodeScalarValue.second - 1;
690 } else
691 EscapedInput.push_back(*i);
692 }
693 return EscapedInput;
694 }
695
696 Scanner::Scanner(StringRef Input, SourceMgr &sm)
697 : SM(sm)
698 , Indent(-1)
699 , Column(0)
700 , Line(0)
701 , FlowLevel(0)
702 , IsStartOfStream(true)
703 , IsSimpleKeyAllowed(true)
704 , IsSimpleKeyRequired(false)
705 , Failed(false) {
706 InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
707 SM.AddNewSourceBuffer(InputBuffer, SMLoc());
708 Current = InputBuffer->getBufferStart();
709 End = InputBuffer->getBufferEnd();
710 }
711
712 Token &Scanner::peekNext() {
713 // If the current token is a possible simple key, keep parsing until we
714 // can confirm.
715 bool NeedMore = false;
716 while (true) {
717 if (TokenQueue.empty() || NeedMore) {
718 if (!fetchMoreTokens()) {
719 TokenQueue.clear();
720 TokenQueue.push_back(Token());
721 return TokenQueue.front();
722 }
723 }
724 assert(!TokenQueue.empty() &&
725 "fetchMoreTokens lied about getting tokens!");
726
727 removeStaleSimpleKeyCandidates();
728 SimpleKey SK;
729 SK.Tok = TokenQueue.front();
730 if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
731 == SimpleKeys.end())
732 break;
733 else
734 NeedMore = true;
735 }
736 return TokenQueue.front();
737 }
738
739 Token Scanner::getNext() {
740 Token Ret = peekNext();
741 // TokenQueue can be empty if there was an error getting the next token.
742 if (!TokenQueue.empty())
743 TokenQueue.pop_front();
744
745 // There cannot be any referenced Token's if the TokenQueue is empty. So do a
746 // quick deallocation of them all.
747 if (TokenQueue.empty()) {
748 TokenQueue.Alloc.Reset();
749 }
750
751 return Ret;
752 }
753
754 StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
755 // Check 7 bit c-printable - b-char.
756 if ( *Position == 0x09
757 || (*Position >= 0x20 && *Position <= 0x7E))
758 return Position + 1;
759
760 // Check for valid UTF-8.
761 if (uint8_t(*Position) & 0x80) {
762 UTF8Decoded u8d = decodeUTF8(Position);
763 if ( u8d.second != 0
764 && u8d.first != 0xFEFF
765 && ( u8d.first == 0x85
766 || ( u8d.first >= 0xA0
767 && u8d.first <= 0xD7FF)
768 || ( u8d.first >= 0xE000
769 && u8d.first <= 0xFFFD)
770 || ( u8d.first >= 0x10000
771 && u8d.first <= 0x10FFFF)))
772 return Position + u8d.second;
773 }
774 return Position;
775 }
776
777 StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
778 if (*Position == 0x0D) {
779 if (Position + 1 != End && *(Position + 1) == 0x0A)
780 return Position + 2;
781 return Position + 1;
782 }
783
784 if (*Position == 0x0A)
785 return Position + 1;
786 return Position;
787 }
788
789
790 StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
791 if (Position == End)
792 return Position;
793 if (*Position == ' ' || *Position == '\t')
794 return Position + 1;
795 return Position;
796 }
797
798 StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
799 if (Position == End)
800 return Position;
801 if (*Position == ' ' || *Position == '\t')
802 return Position;
803 return skip_nb_char(Position);
804 }
805
806 StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
807 , StringRef::iterator Position) {
808 while (true) {
809 StringRef::iterator i = (this->*Func)(Position);
810 if (i == Position)
811 break;
812 Position = i;
813 }
814 return Position;
815 }
816
817 static bool is_ns_hex_digit(const char C) {
818 return (C >= '0' && C <= '9')
819 || (C >= 'a' && C <= 'z')
820 || (C >= 'A' && C <= 'Z');
821 }
822
823 static bool is_ns_word_char(const char C) {
824 return C == '-'
825 || (C >= 'a' && C <= 'z')
826 || (C >= 'A' && C <= 'Z');
827 }
828
829 StringRef Scanner::scan_ns_uri_char() {
830 StringRef::iterator Start = Current;
831 while (true) {
832 if (Current == End)
833 break;
834 if (( *Current == '%'
835 && Current + 2 < End
836 && is_ns_hex_digit(*(Current + 1))
837 && is_ns_hex_digit(*(Current + 2)))
838 || is_ns_word_char(*Current)
839 || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
840 != StringRef::npos) {
841 ++Current;
842 ++Column;
843 } else
844 break;
845 }
846 return StringRef(Start, Current - Start);
847 }
848
849 StringRef Scanner::scan_ns_plain_one_line() {
850 StringRef::iterator start = Current;
851 // The first character must already be verified.
852 ++Current;
853 while (true) {
854 if (Current == End) {
855 break;
856 } else if (*Current == ':') {
857 // Check if the next character is a ns-char.
858 if (Current + 1 == End)
859 break;
860 StringRef::iterator i = skip_ns_char(Current + 1);
861 if (Current + 1 != i) {
862 Current = i;
863 Column += 2; // Consume both the ':' and ns-char.
864 } else
865 break;
866 } else if (*Current == '#') {
867 // Check if the previous character was a ns-char.
868 // The & 0x80 check is to check for the trailing byte of a utf-8
869 if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
870 ++Current;
871 ++Column;
872 } else
873 break;
874 } else {
875 StringRef::iterator i = skip_nb_char(Current);
876 if (i == Current)
877 break;
878 Current = i;
879 ++Column;
880 }
881 }
882 return StringRef(start, Current - start);
883 }
884
885 bool Scanner::consume(uint32_t Expected) {
886 if (Expected >= 0x80)
887 report_fatal_error("Not dealing with this yet");
888 if (Current == End)
889 return false;
890 if (uint8_t(*Current) >= 0x80)
891 report_fatal_error("Not dealing with this yet");
892 if (uint8_t(*Current) == Expected) {
893 ++Current;
894 ++Column;
895 return true;
896 }
897 return false;
898 }
899
900 void Scanner::skip(uint32_t Distance) {
901 Current += Distance;
902 Column += Distance;
903 }
904
905 bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
906 if (Position == End)
907 return false;
908 if ( *Position == ' ' || *Position == '\t'
909 || *Position == '\r' || *Position == '\n')
910 return true;
911 return false;
912 }
913
914 void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
915 , unsigned AtColumn
916 , bool IsRequired) {
917 if (IsSimpleKeyAllowed) {
918 SimpleKey SK;
919 SK.Tok = Tok;
920 SK.Line = Line;
921 SK.Column = AtColumn;
922 SK.IsRequired = IsRequired;
923 SK.FlowLevel = FlowLevel;
924 SimpleKeys.push_back(SK);
925 }
926 }
927
928 void Scanner::removeStaleSimpleKeyCandidates() {
929 for (SmallVectorImpl::iterator i = SimpleKeys.begin();
930 i != SimpleKeys.end();) {
931 if (i->Line != Line || i->Column + 1024 < Column) {
932 if (i->IsRequired)
933 setError( "Could not find expected : for simple key"
934 , i->Tok->Range.begin());
935 i = SimpleKeys.erase(i);
936 } else
937 ++i;
938 }
939 }
940
941 void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
942 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
943 SimpleKeys.pop_back();
944 }
945
946 bool Scanner::unrollIndent(int ToColumn) {
947 Token T;
948 // Indentation is ignored in flow.
949 if (FlowLevel != 0)
950 return true;
951
952 while (Indent > ToColumn) {
953 T.Kind = Token::TK_BlockEnd;
954 T.Range = StringRef(Current, 1);
955 TokenQueue.push_back(T);
956 Indent = Indents.pop_back_val();
957 }
958
959 return true;
960 }
961
962 bool Scanner::rollIndent( int ToColumn
963 , Token::TokenKind Kind
964 , TokenQueueT::iterator InsertPoint) {
965 if (FlowLevel)
966 return true;
967 if (Indent < ToColumn) {
968 Indents.push_back(Indent);
969 Indent = ToColumn;
970
971 Token T;
972 T.Kind = Kind;
973 T.Range = StringRef(Current, 0);
974 TokenQueue.insert(InsertPoint, T);
975 }
976 return true;
977 }
978
979 void Scanner::scanToNextToken() {
980 while (true) {
981 while (*Current == ' ' || *Current == '\t') {
982 skip(1);
983 }
984
985 // Skip comment.
986 if (*Current == '#') {
987 while (true) {
988 // This may skip more than one byte, thus Column is only incremented
989 // for code points.
990 StringRef::iterator i = skip_nb_char(Current);
991 if (i == Current)
992 break;
993 Current = i;
994 ++Column;
995 }
996 }
997
998 // Skip EOL.
999 StringRef::iterator i = skip_b_break(Current);
1000 if (i == Current)
1001 break;
1002 Current = i;
1003 ++Line;
1004 Column = 0;
1005 // New lines may start a simple key.
1006 if (!FlowLevel)
1007 IsSimpleKeyAllowed = true;
1008 }
1009 }
1010
1011 bool Scanner::scanStreamStart() {
1012 IsStartOfStream = false;
1013
1014 EncodingInfo EI = getUnicodeEncoding(currentInput());
1015
1016 Token T;
1017 T.Kind = Token::TK_StreamStart;
1018 T.Range = StringRef(Current, EI.second);
1019 TokenQueue.push_back(T);
1020 Current += EI.second;
1021 return true;
1022 }
1023
1024 bool Scanner::scanStreamEnd() {
1025 // Force an ending new line if one isn't present.
1026 if (Column != 0) {
1027 Column = 0;
1028 ++Line;
1029 }
1030
1031 unrollIndent(-1);
1032 SimpleKeys.clear();
1033 IsSimpleKeyAllowed = false;
1034
1035 Token T;
1036 T.Kind = Token::TK_StreamEnd;
1037 T.Range = StringRef(Current, 0);
1038 TokenQueue.push_back(T);
1039 return true;
1040 }
1041
1042 bool Scanner::scanDirective() {
1043 // Reset the indentation level.
1044 unrollIndent(-1);
1045 SimpleKeys.clear();
1046 IsSimpleKeyAllowed = false;
1047
1048 StringRef::iterator Start = Current;
1049 consume('%');
1050 StringRef::iterator NameStart = Current;
1051 Current = skip_while(&Scanner::skip_ns_char, Current);
1052 StringRef Name(NameStart, Current - NameStart);
1053 Current = skip_while(&Scanner::skip_s_white, Current);
1054
1055 if (Name == "YAML") {
1056 Current = skip_while(&Scanner::skip_ns_char, Current);
1057 Token T;
1058 T.Kind = Token::TK_VersionDirective;
1059 T.Range = StringRef(Start, Current - Start);
1060 TokenQueue.push_back(T);
1061 return true;
1062 }
1063 return false;
1064 }
1065
1066 bool Scanner::scanDocumentIndicator(bool IsStart) {
1067 unrollIndent(-1);
1068 SimpleKeys.clear();
1069 IsSimpleKeyAllowed = false;
1070
1071 Token T;
1072 T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
1073 T.Range = StringRef(Current, 3);
1074 skip(3);
1075 TokenQueue.push_back(T);
1076 return true;
1077 }
1078
1079 bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1080 Token T;
1081 T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1082 : Token::TK_FlowMappingStart;
1083 T.Range = StringRef(Current, 1);
1084 skip(1);
1085 TokenQueue.push_back(T);
1086
1087 // [ and { may begin a simple key.
1088 saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
1089
1090 // And may also be followed by a simple key.
1091 IsSimpleKeyAllowed = true;
1092 ++FlowLevel;
1093 return true;
1094 }
1095
1096 bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1097 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1098 IsSimpleKeyAllowed = false;
1099 Token T;
1100 T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1101 : Token::TK_FlowMappingEnd;
1102 T.Range = StringRef(Current, 1);
1103 skip(1);
1104 TokenQueue.push_back(T);
1105 if (FlowLevel)
1106 --FlowLevel;
1107 return true;
1108 }
1109
1110 bool Scanner::scanFlowEntry() {
1111 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1112 IsSimpleKeyAllowed = true;
1113 Token T;
1114 T.Kind = Token::TK_FlowEntry;
1115 T.Range = StringRef(Current, 1);
1116 skip(1);
1117 TokenQueue.push_back(T);
1118 return true;
1119 }
1120
1121 bool Scanner::scanBlockEntry() {
1122 rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1123 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1124 IsSimpleKeyAllowed = true;
1125 Token T;
1126 T.Kind = Token::TK_BlockEntry;
1127 T.Range = StringRef(Current, 1);
1128 skip(1);
1129 TokenQueue.push_back(T);
1130 return true;
1131 }
1132
1133 bool Scanner::scanKey() {
1134 if (!FlowLevel)
1135 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1136
1137 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1138 IsSimpleKeyAllowed = !FlowLevel;
1139
1140 Token T;
1141 T.Kind = Token::TK_Key;
1142 T.Range = StringRef(Current, 1);
1143 skip(1);
1144 TokenQueue.push_back(T);
1145 return true;
1146 }
1147
1148 bool Scanner::scanValue() {
1149 // If the previous token could have been a simple key, insert the key token
1150 // into the token queue.
1151 if (!SimpleKeys.empty()) {
1152 SimpleKey SK = SimpleKeys.pop_back_val();
1153 Token T;
1154 T.Kind = Token::TK_Key;
1155 T.Range = SK.Tok->Range;
1156 TokenQueueT::iterator i, e;
1157 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1158 if (i == SK.Tok)
1159 break;
1160 }
1161 assert(i != e && "SimpleKey not in token queue!");
1162 i = TokenQueue.insert(i, T);
1163
1164 // We may also need to add a Block-Mapping-Start token.
1165 rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1166
1167 IsSimpleKeyAllowed = false;
1168 } else {
1169 if (!FlowLevel)
1170 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1171 IsSimpleKeyAllowed = !FlowLevel;
1172 }
1173
1174 Token T;
1175 T.Kind = Token::TK_Value;
1176 T.Range = StringRef(Current, 1);
1177 skip(1);
1178 TokenQueue.push_back(T);
1179 return true;
1180 }
1181
1182 // Forbidding inlining improves performance by roughly 20%.
1183 // FIXME: Remove once llvm optimizes this to the faster version without hints.
1184 LLVM_ATTRIBUTE_NOINLINE static bool
1185 wasEscaped(StringRef::iterator First, StringRef::iterator Position);
1186
1187 // Returns whether a character at 'Position' was escaped with a leading '\'.
1188 // 'First' specifies the position of the first character in the string.
1189 static bool wasEscaped(StringRef::iterator First,
1190 StringRef::iterator Position) {
1191 assert(Position - 1 >= First);
1192 StringRef::iterator I = Position - 1;
1193 // We calculate the number of consecutive '\'s before the current position
1194 // by iterating backwards through our string.
1195 while (I >= First && *I == '\\') --I;
1196 // (Position - 1 - I) now contains the number of '\'s before the current
1197 // position. If it is odd, the character at 'Position' was escaped.
1198 return (Position - 1 - I) % 2 == 1;
1199 }
1200
1201 bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1202 StringRef::iterator Start = Current;
1203 unsigned ColStart = Column;
1204 if (IsDoubleQuoted) {
1205 do {
1206 ++Current;
1207 while (Current != End && *Current != '"')
1208 ++Current;
1209 // Repeat until the previous character was not a '\' or was an escaped
1210 // backslash.
1211 } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current));
1212 } else {
1213 skip(1);
1214 while (true) {
1215 // Skip a ' followed by another '.
1216 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
1217 skip(2);
1218 continue;
1219 } else if (*Current == '\'')
1220 break;
1221 StringRef::iterator i = skip_nb_char(Current);
1222 if (i == Current) {
1223 i = skip_b_break(Current);
1224 if (i == Current)
1225 break;
1226 Current = i;
1227 Column = 0;
1228 ++Line;
1229 } else {
1230 if (i == End)
1231 break;
1232 Current = i;
1233 ++Column;
1234 }
1235 }
1236 }
1237 skip(1); // Skip ending quote.
1238 Token T;
1239 T.Kind = Token::TK_Scalar;
1240 T.Range = StringRef(Start, Current - Start);
1241 TokenQueue.push_back(T);
1242
1243 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
1244
1245 IsSimpleKeyAllowed = false;
1246
1247 return true;
1248 }
1249
1250 bool Scanner::scanPlainScalar() {
1251 StringRef::iterator Start = Current;
1252 unsigned ColStart = Column;
1253 unsigned LeadingBlanks = 0;
1254 assert(Indent >= -1 && "Indent must be >= -1 !");
1255 unsigned indent = static_cast(Indent + 1);
1256 while (true) {
1257 if (*Current == '#')
1258 break;
1259
1260 while (!isBlankOrBreak(Current)) {
1261 if ( FlowLevel && *Current == ':'
1262 && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
1263 setError("Found unexpected ':' while scanning a plain scalar", Current);
1264 return false;
1265 }
1266
1267 // Check for the end of the plain scalar.
1268 if ( (*Current == ':' && isBlankOrBreak(Current + 1))
1269 || ( FlowLevel
1270 && (StringRef(Current, 1).find_first_of(",:?[]{}")
1271 != StringRef::npos)))
1272 break;
1273
1274 StringRef::iterator i = skip_nb_char(Current);
1275 if (i == Current)
1276 break;
1277 Current = i;
1278 ++Column;
1279 }
1280
1281 // Are we at the end?
1282 if (!isBlankOrBreak(Current))
1283 break;
1284
1285 // Eat blanks.
1286 StringRef::iterator Tmp = Current;
1287 while (isBlankOrBreak(Tmp)) {
1288 StringRef::iterator i = skip_s_white(Tmp);
1289 if (i != Tmp) {
1290 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
1291 setError("Found invalid tab character in indentation", Tmp);
1292 return false;
1293 }
1294 Tmp = i;
1295 ++Column;
1296 } else {
1297 i = skip_b_break(Tmp);
1298 if (!LeadingBlanks)
1299 LeadingBlanks = 1;
1300 Tmp = i;
1301 Column = 0;
1302 ++Line;
1303 }
1304 }
1305
1306 if (!FlowLevel && Column < indent)
1307 break;
1308
1309 Current = Tmp;
1310 }
1311 if (Start == Current) {
1312 setError("Got empty plain scalar", Start);
1313 return false;
1314 }
1315 Token T;
1316 T.Kind = Token::TK_Scalar;
1317 T.Range = StringRef(Start, Current - Start);
1318 TokenQueue.push_back(T);
1319
1320 // Plain scalars can be simple keys.
1321 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
1322
1323 IsSimpleKeyAllowed = false;
1324
1325 return true;
1326 }
1327
1328 bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1329 StringRef::iterator Start = Current;
1330 unsigned ColStart = Column;
1331 skip(1);
1332 while(true) {
1333 if ( *Current == '[' || *Current == ']'
1334 || *Current == '{' || *Current == '}'
1335 || *Current == ','
1336 || *Current == ':')
1337 break;
1338 StringRef::iterator i = skip_ns_char(Current);
1339 if (i == Current)
1340 break;
1341 Current = i;
1342 ++Column;
1343 }
1344
1345 if (Start == Current) {
1346 setError("Got empty alias or anchor", Start);
1347 return false;
1348 }
1349
1350 Token T;
1351 T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
1352 T.Range = StringRef(Start, Current - Start);
1353 TokenQueue.push_back(T);
1354
1355 // Alias and anchors can be simple keys.
1356 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
1357
1358 IsSimpleKeyAllowed = false;
1359
1360 return true;
1361 }
1362
1363 bool Scanner::scanBlockScalar(bool IsLiteral) {
1364 StringRef::iterator Start = Current;
1365 skip(1); // Eat | or >
1366 while(true) {
1367 StringRef::iterator i = skip_nb_char(Current);
1368 if (i == Current) {
1369 if (Column == 0)
1370 break;
1371 i = skip_b_break(Current);
1372 if (i != Current) {
1373 // We got a line break.
1374 Column = 0;
1375 ++Line;
1376 Current = i;
1377 continue;
1378 } else {
1379 // There was an error, which should already have been printed out.
1380 return false;
1381 }
1382 }
1383 Current = i;
1384 ++Column;
1385 }
1386
1387 if (Start == Current) {
1388 setError("Got empty block scalar", Start);
1389 return false;
1390 }
1391
1392 Token T;
1393 T.Kind = Token::TK_Scalar;
1394 T.Range = StringRef(Start, Current - Start);
1395 TokenQueue.push_back(T);
1396 return true;
1397 }
1398
1399 bool Scanner::scanTag() {
1400 StringRef::iterator Start = Current;
1401 unsigned ColStart = Column;
1402 skip(1); // Eat !.
1403 if (Current == End || isBlankOrBreak(Current)); // An empty tag.
1404 else if (*Current == '<') {
1405 skip(1);
1406 scan_ns_uri_char();
1407 if (!consume('>'))
1408 return false;
1409 } else {
1410 // FIXME: Actually parse the c-ns-shorthand-tag rule.
1411 Current = skip_while(&Scanner::skip_ns_char, Current);
1412 }
1413
1414 Token T;
1415 T.Kind = Token::TK_Tag;
1416 T.Range = StringRef(Start, Current - Start);
1417 TokenQueue.push_back(T);
1418
1419 // Tags can be simple keys.
1420 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
1421
1422 IsSimpleKeyAllowed = false;
1423
1424 return true;
1425 }
1426
1427 bool Scanner::fetchMoreTokens() {
1428 if (IsStartOfStream)
1429 return scanStreamStart();
1430
1431 scanToNextToken();
1432
1433 if (Current == End)
1434 return scanStreamEnd();
1435
1436 removeStaleSimpleKeyCandidates();
1437
1438 unrollIndent(Column);
1439
1440 if (Column == 0 && *Current == '%')
1441 return scanDirective();
1442
1443 if (Column == 0 && Current + 4 <= End
1444 && *Current == '-'
1445 && *(Current + 1) == '-'
1446 && *(Current + 2) == '-'
1447 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1448 return scanDocumentIndicator(true);
1449
1450 if (Column == 0 && Current + 4 <= End
1451 && *Current == '.'
1452 && *(Current + 1) == '.'
1453 && *(Current + 2) == '.'
1454 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1455 return scanDocumentIndicator(false);
1456
1457 if (*Current == '[')
1458 return scanFlowCollectionStart(true);
1459
1460 if (*Current == '{')
1461 return scanFlowCollectionStart(false);
1462
1463 if (*Current == ']')
1464 return scanFlowCollectionEnd(true);
1465
1466 if (*Current == '}')
1467 return scanFlowCollectionEnd(false);
1468
1469 if (*Current == ',')
1470 return scanFlowEntry();
1471
1472 if (*Current == '-' && isBlankOrBreak(Current + 1))
1473 return scanBlockEntry();
1474
1475 if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1476 return scanKey();
1477
1478 if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1479 return scanValue();
1480
1481 if (*Current == '*')
1482 return scanAliasOrAnchor(true);
1483
1484 if (*Current == '&')
1485 return scanAliasOrAnchor(false);
1486
1487 if (*Current == '!')
1488 return scanTag();
1489
1490 if (*Current == '|' && !FlowLevel)
1491 return scanBlockScalar(true);
1492
1493 if (*Current == '>' && !FlowLevel)
1494 return scanBlockScalar(false);
1495
1496 if (*Current == '\'')
1497 return scanFlowScalar(false);
1498
1499 if (*Current == '"')
1500 return scanFlowScalar(true);
1501
1502 // Get a plain scalar.
1503 StringRef FirstChar(Current, 1);
1504 if (!(isBlankOrBreak(Current)
1505 || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
1506 || (*Current == '-' && !isBlankOrBreak(Current + 1))
1507 || (!FlowLevel && (*Current == '?' || *Current == ':')
1508 && isBlankOrBreak(Current + 1))
1509 || (!FlowLevel && *Current == ':'
1510 && Current + 2 < End
1511 && *(Current + 1) == ':'
1512 && !isBlankOrBreak(Current + 2)))
1513 return scanPlainScalar();
1514
1515 setError("Unrecognized character while tokenizing.");
1516 return false;
1517 }
1518
1519 Stream::Stream(StringRef Input, SourceMgr &SM)
1520 : scanner(new Scanner(Input, SM))
1521 , CurrentDoc(0) {}
1522
1523 bool Stream::failed() { return scanner->failed(); }
1524
1525 void Stream::printError(Node *N, const Twine &Msg) {
1526 SmallVector Ranges;
1527 Ranges.push_back(N->getSourceRange());
1528 scanner->printError( N->getSourceRange().Start
1529 , SourceMgr::DK_Error
1530 , Msg
1531 , Ranges);
1532 }
1533
1534 void Stream::handleYAMLDirective(const Token &t) {
1535 // TODO: Ensure version is 1.x.
1536 }
1537
1538 document_iterator Stream::begin() {
1539 if (CurrentDoc)
1540 report_fatal_error("Can only iterate over the stream once");
1541
1542 // Skip Stream-Start.
1543 scanner->getNext();
1544
1545 CurrentDoc.reset(new Document(*this));
1546 return document_iterator(CurrentDoc);
1547 }
1548
1549 document_iterator Stream::end() {
1550 return document_iterator();
1551 }
1552
1553 void Stream::skip() {
1554 for (document_iterator i = begin(), e = end(); i != e; ++i)
1555 i->skip();
1556 }
1557
1558 Node::Node(unsigned int Type, OwningPtr &D, StringRef A)
1559 : Doc(D)
1560 , TypeID(Type)
1561 , Anchor(A) {
1562 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1563 SourceRange = SMRange(Start, Start);
1564 }
1565
1566 Node::~Node() {}
1567
1568 Token &Node::peekNext() {
1569 return Doc->peekNext();
1570 }
1571
1572 Token Node::getNext() {
1573 return Doc->getNext();
1574 }
1575
1576 Node *Node::parseBlockNode() {
1577 return Doc->parseBlockNode();
1578 }
1579
1580 BumpPtrAllocator &Node::getAllocator() {
1581 return Doc->NodeAllocator;
1582 }
1583
1584 void Node::setError(const Twine &Msg, Token &Tok) const {
1585 Doc->setError(Msg, Tok);
1586 }
1587
1588 bool Node::failed() const {
1589 return Doc->failed();
1590 }
1591
1592
1593
1594 StringRef ScalarNode::getValue(SmallVectorImpl &Storage) const {
1595 // TODO: Handle newlines properly. We need to remove leading whitespace.
1596 if (Value[0] == '"') { // Double quoted.
1597 // Pull off the leading and trailing "s.
1598 StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1599 // Search for characters that would require unescaping the value.
1600 StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
1601 if (i != StringRef::npos)
1602 return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1603 return UnquotedValue;
1604 } else if (Value[0] == '\'') { // Single quoted.
1605 // Pull off the leading and trailing 's.
1606 StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1607 StringRef::size_type i = UnquotedValue.find('\'');
1608 if (i != StringRef::npos) {
1609 // We're going to need Storage.
1610 Storage.clear();
1611 Storage.reserve(UnquotedValue.size());
1612 for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
1613 StringRef Valid(UnquotedValue.begin(), i);
1614 Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1615 Storage.push_back('\'');
1616 UnquotedValue = UnquotedValue.substr(i + 2);
1617 }
1618 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
1619 return StringRef(Storage.begin(), Storage.size());
1620 }
1621 return UnquotedValue;
1622 }
1623 // Plain or block.
1624 size_t trimtrail = Value.rfind(' ');
1625 return Value.drop_back(
1626 trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail);
1627 }
1628
1629 StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
1630 , StringRef::size_type i
1631 , SmallVectorImpl &Storage)
1632 const {
1633 // Use Storage to build proper value.
1634 Storage.clear();
1635 Storage.reserve(UnquotedValue.size());
1636 for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
1637 // Insert all previous chars into Storage.
1638 StringRef Valid(UnquotedValue.begin(), i);
1639 Storage.insert(Storage.end(), Valid.begin(), Valid.end());
1640 // Chop off inserted chars.
1641 UnquotedValue = UnquotedValue.substr(i);
1642
1643 assert(!UnquotedValue.empty() && "Can't be empty!");
1644
1645 // Parse escape or line break.
1646 switch (UnquotedValue[0]) {
1647 case '\r':
1648 case '\n':
1649 Storage.push_back('\n');
1650 if ( UnquotedValue.size() > 1
1651 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1652 UnquotedValue = UnquotedValue.substr(1);
1653 UnquotedValue = UnquotedValue.substr(1);
1654 break;
1655 default:
1656 if (UnquotedValue.size() == 1)
1657 // TODO: Report error.
1658 break;
1659 UnquotedValue = UnquotedValue.substr(1);
1660 switch (UnquotedValue[0]) {
1661 default: {
1662 Token T;
1663 T.Range = StringRef(UnquotedValue.begin(), 1);
1664 setError("Unrecognized escape code!", T);
1665 return "";
1666 }
1667 case '\r':
1668 case '\n':
1669 // Remove the new line.
1670 if ( UnquotedValue.size() > 1
1671 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
1672 UnquotedValue = UnquotedValue.substr(1);
1673 // If this was just a single byte newline, it will get skipped
1674 // below.
1675 break;
1676 case '0':
1677 Storage.push_back(0x00);
1678 break;
1679 case 'a':
1680 Storage.push_back(0x07);
1681 break;
1682 case 'b':
1683 Storage.push_back(0x08);
1684 break;
1685 case 't':
1686 case 0x09:
1687 Storage.push_back(0x09);
1688 break;
1689 case 'n':
1690 Storage.push_back(0x0A);
1691 break;
1692 case 'v':
1693 Storage.push_back(0x0B);
1694 break;
1695 case 'f':
1696 Storage.push_back(0x0C);
1697 break;
1698 case 'r':
1699 Storage.push_back(0x0D);
1700 break;
1701 case 'e':
1702 Storage.push_back(0x1B);
1703 break;
1704 case ' ':
1705 Storage.push_back(0x20);
1706 break;
1707 case '"':
1708 Storage.push_back(0x22);
1709 break;
1710 case '/':
1711 Storage.push_back(0x2F);
1712 break;
1713 case '\\':
1714 Storage.push_back(0x5C);
1715 break;
1716 case 'N':
1717 encodeUTF8(0x85, Storage);
1718 break;
1719 case '_':
1720 encodeUTF8(0xA0, Storage);
1721 break;
1722 case 'L':
1723 encodeUTF8(0x2028, Storage);
1724 break;
1725 case 'P':
1726 encodeUTF8(0x2029, Storage);
1727 break;
1728 case 'x': {
1729 if (UnquotedValue.size() < 3)
1730 // TODO: Report error.
1731 break;
1732 unsigned int UnicodeScalarValue;
1733 UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue);
1734 encodeUTF8(UnicodeScalarValue, Storage);
1735 UnquotedValue = UnquotedValue.substr(2);
1736 break;
1737 }
1738 case 'u': {
1739 if (UnquotedValue.size() < 5)
1740 // TODO: Report error.
1741 break;
1742 unsigned int UnicodeScalarValue;
1743 UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue);
1744 encodeUTF8(UnicodeScalarValue, Storage);
1745 UnquotedValue = UnquotedValue.substr(4);
1746 break;
1747 }
1748 case 'U': {
1749 if (UnquotedValue.size() < 9)
1750 // TODO: Report error.
1751 break;
1752 unsigned int UnicodeScalarValue;
1753 UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue);
1754 encodeUTF8(UnicodeScalarValue, Storage);
1755 UnquotedValue = UnquotedValue.substr(8);
1756 break;
1757 }
1758 }
1759 UnquotedValue = UnquotedValue.substr(1);
1760 }
1761 }
1762 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
1763 return StringRef(Storage.begin(), Storage.size());
1764 }
1765
1766 Node *KeyValueNode::getKey() {
1767 if (Key)
1768 return Key;
1769 // Handle implicit null keys.
1770 {
1771 Token &t = peekNext();
1772 if ( t.Kind == Token::TK_BlockEnd
1773 || t.Kind == Token::TK_Value
1774 || t.Kind == Token::TK_Error) {
1775 return Key = new (getAllocator()) NullNode(Doc);
1776 }
1777 if (t.Kind == Token::TK_Key)
1778 getNext(); // skip TK_Key.
1779 }
1780
1781 // Handle explicit null keys.
1782 Token &t = peekNext();
1783 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
1784 return Key = new (getAllocator()) NullNode(Doc);
1785 }
1786
1787 // We've got a normal key.
1788 return Key = parseBlockNode();
1789 }
1790
1791 Node *KeyValueNode::getValue() {
1792 if (Value)
1793 return Value;
1794 getKey()->skip();
1795 if (failed())
1796 return Value = new (getAllocator()) NullNode(Doc);
1797
1798 // Handle implicit null values.
1799 {
1800 Token &t = peekNext();
1801 if ( t.Kind == Token::TK_BlockEnd
1802 || t.Kind == Token::TK_FlowMappingEnd
1803 || t.Kind == Token::TK_Key
1804 || t.Kind == Token::TK_FlowEntry
1805 || t.Kind == Token::TK_Error) {
1806 return Value = new (getAllocator()) NullNode(Doc);
1807 }
1808
1809 if (t.Kind != Token::TK_Value) {
1810 setError("Unexpected token in Key Value.", t);
1811 return Value = new (getAllocator()) NullNode(Doc);
1812 }
1813 getNext(); // skip TK_Value.
1814 }
1815
1816 // Handle explicit null values.
1817 Token &t = peekNext();
1818 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
1819 return Value = new (getAllocator()) NullNode(Doc);
1820 }
1821
1822 // We got a normal value.
1823 return Value = parseBlockNode();
1824 }
1825
1826 void MappingNode::increment() {
1827 if (failed()) {
1828 IsAtEnd = true;
1829 CurrentEntry = 0;
1830 return;
1831 }
1832 if (CurrentEntry) {
1833 CurrentEntry->skip();
1834 if (Type == MT_Inline) {
1835 IsAtEnd = true;
1836 CurrentEntry = 0;
1837 return;
1838 }
1839 }
1840 Token T = peekNext();
1841 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
1842 // KeyValueNode eats the TK_Key. That way it can detect null keys.
1843 CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
1844 } else if (Type == MT_Block) {
1845 switch (T.Kind) {
1846 case Token::TK_BlockEnd:
1847 getNext();
1848 IsAtEnd = true;
1849 CurrentEntry = 0;
1850 break;
1851 default:
1852 setError("Unexpected token. Expected Key or Block End", T);
1853 case Token::TK_Error:
1854 IsAtEnd = true;
1855 CurrentEntry = 0;
1856 }
1857 } else {
1858 switch (T.Kind) {
1859 case Token::TK_FlowEntry:
1860 // Eat the flow entry and recurse.
1861 getNext();
1862 return increment();
1863 case Token::TK_FlowMappingEnd:
1864 getNext();
1865 case Token::TK_Error:
1866 // Set this to end iterator.
1867 IsAtEnd = true;
1868 CurrentEntry = 0;
1869 break;
1870 default:
1871 setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
1872 "Mapping End."
1873 , T);
1874 IsAtEnd = true;
1875 CurrentEntry = 0;
1876 }
1877 }
1878 }
1879
1880 void SequenceNode::increment() {
1881 if (failed()) {
1882 IsAtEnd = true;
1883 CurrentEntry = 0;
1884 return;
1885 }
1886 if (CurrentEntry)
1887 CurrentEntry->skip();
1888 Token T = peekNext();
1889 if (SeqType == ST_Block) {
1890 switch (T.Kind) {
1891 case Token::TK_BlockEntry:
1892 getNext();
1893 CurrentEntry = parseBlockNode();
1894 if (CurrentEntry == 0) { // An error occurred.
1895 IsAtEnd = true;
1896 CurrentEntry = 0;
1897 }
1898 break;
1899 case Token::TK_BlockEnd:
1900 getNext();
1901 IsAtEnd = true;
1902 CurrentEntry = 0;
1903 break;
1904 default:
1905 setError( "Unexpected token. Expected Block Entry or Block End."
1906 , T);
1907 case Token::TK_Error:
1908 IsAtEnd = true;
1909 CurrentEntry = 0;
1910 }
1911 } else if (SeqType == ST_Indentless) {
1912 switch (T.Kind) {
1913 case Token::TK_BlockEntry:
1914 getNext();
1915 CurrentEntry = parseBlockNode();
1916 if (CurrentEntry == 0) { // An error occurred.
1917 IsAtEnd = true;
1918 CurrentEntry = 0;
1919 }
1920 break;
1921 default:
1922 case Token::TK_Error:
1923 IsAtEnd = true;
1924 CurrentEntry = 0;
1925 }
1926 } else if (SeqType == ST_Flow) {
1927 switch (T.Kind) {
1928 case Token::TK_FlowEntry:
1929 // Eat the flow entry and recurse.
1930 getNext();
1931 WasPreviousTokenFlowEntry = true;
1932 return increment();
1933 case Token::TK_FlowSequenceEnd:
1934 getNext();
1935 case Token::TK_Error:
1936 // Set this to end iterator.
1937 IsAtEnd = true;
1938 CurrentEntry = 0;
1939 break;
1940 case Token::TK_StreamEnd:
1941 case Token::TK_DocumentEnd:
1942 case Token::TK_DocumentStart:
1943 setError("Could not find closing ]!", T);
1944 // Set this to end iterator.
1945 IsAtEnd = true;
1946 CurrentEntry = 0;
1947 break;
1948 default:
1949 if (!WasPreviousTokenFlowEntry) {
1950 setError("Expected , between entries!", T);
1951 IsAtEnd = true;
1952 CurrentEntry = 0;
1953 break;
1954 }
1955 // Otherwise it must be a flow entry.
1956 CurrentEntry = parseBlockNode();
1957 if (!CurrentEntry) {
1958 IsAtEnd = true;
1959 }
1960 WasPreviousTokenFlowEntry = false;
1961 break;
1962 }
1963 }
1964 }
1965
1966 Document::Document(Stream &S) : stream(S), Root(0) {
1967 if (parseDirectives())
1968 expectToken(Token::TK_DocumentStart);
1969 Token &T = peekNext();
1970 if (T.Kind == Token::TK_DocumentStart)
1971 getNext();
1972 }
1973
1974 bool Document::skip() {
1975 if (stream.scanner->failed())
1976 return false;
1977 if (!Root)
1978 getRoot();
1979 Root->skip();
1980 Token &T = peekNext();
1981 if (T.Kind == Token::TK_StreamEnd)
1982 return false;
1983 if (T.Kind == Token::TK_DocumentEnd) {
1984 getNext();
1985 return skip();
1986 }
1987 return true;
1988 }
1989
1990 Token &Document::peekNext() {
1991 return stream.scanner->peekNext();
1992 }
1993
1994 Token Document::getNext() {
1995 return stream.scanner->getNext();
1996 }
1997
1998 void Document::setError(const Twine &Message, Token &Location) const {
1999 stream.scanner->setError(Message, Location.Range.begin());
2000 }
2001
2002 bool Document::failed() const {
2003 return stream.scanner->failed();
2004 }
2005
2006 Node *Document::parseBlockNode() {
2007 Token T = peekNext();
2008 // Handle properties.
2009 Token AnchorInfo;
2010 parse_property:
2011 switch (T.Kind) {
2012 case Token::TK_Alias:
2013 getNext();
2014 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2015 case Token::TK_Anchor:
2016 if (AnchorInfo.Kind == Token::TK_Anchor) {
2017 setError("Already encountered an anchor for this node!", T);
2018 return 0;
2019 }
2020 AnchorInfo = getNext(); // Consume TK_Anchor.
2021 T = peekNext();
2022 goto parse_property;
2023 case Token::TK_Tag:
2024 getNext(); // Skip TK_Tag.
2025 T = peekNext();
2026 goto parse_property;
2027 default:
2028 break;
2029 }
2030
2031 switch (T.Kind) {
2032 case Token::TK_BlockEntry:
2033 // We got an unindented BlockEntry sequence. This is not terminated with
2034 // a BlockEnd.
2035 // Don't eat the TK_BlockEntry, SequenceNode needs it.
2036 return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2037 , AnchorInfo.Range.substr(1)
2038 , SequenceNode::ST_Indentless);
2039 case Token::TK_BlockSequenceStart:
2040 getNext();
2041 return new (NodeAllocator)
2042 SequenceNode( stream.CurrentDoc
2043 , AnchorInfo.Range.substr(1)
2044 , SequenceNode::ST_Block);
2045 case Token::TK_BlockMappingStart:
2046 getNext();
2047 return new (NodeAllocator)
2048 MappingNode( stream.CurrentDoc
2049 , AnchorInfo.Range.substr(1)
2050 , MappingNode::MT_Block);
2051 case Token::TK_FlowSequenceStart:
2052 getNext();
2053 return new (NodeAllocator)
2054 SequenceNode( stream.CurrentDoc
2055 , AnchorInfo.Range.substr(1)
2056 , SequenceNode::ST_Flow);
2057 case Token::TK_FlowMappingStart:
2058 getNext();
2059 return new (NodeAllocator)
2060 MappingNode( stream.CurrentDoc
2061 , AnchorInfo.Range.substr(1)
2062 , MappingNode::MT_Flow);
2063 case Token::TK_Scalar:
2064 getNext();
2065 return new (NodeAllocator)
2066 ScalarNode( stream.CurrentDoc
2067 , AnchorInfo.Range.substr(1)
2068 , T.Range);
2069 case Token::TK_Key:
2070 // Don't eat the TK_Key, KeyValueNode expects it.
2071 return new (NodeAllocator)
2072 MappingNode( stream.CurrentDoc
2073 , AnchorInfo.Range.substr(1)
2074 , MappingNode::MT_Inline);
2075 case Token::TK_DocumentStart:
2076 case Token::TK_DocumentEnd:
2077 case Token::TK_StreamEnd:
2078 default:
2079 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2080 // !!null null.
2081 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2082 case Token::TK_Error:
2083 return 0;
2084 }
2085 llvm_unreachable("Control flow shouldn't reach here.");
2086 return 0;
2087 }
2088
2089 bool Document::parseDirectives() {
2090 bool isDirective = false;
2091 while (true) {
2092 Token T = peekNext();
2093 if (T.Kind == Token::TK_TagDirective) {
2094 handleTagDirective(getNext());
2095 isDirective = true;
2096 } else if (T.Kind == Token::TK_VersionDirective) {
2097 stream.handleYAMLDirective(getNext());
2098 isDirective = true;
2099 } else
2100 break;
2101 }
2102 return isDirective;
2103 }
2104
2105 bool Document::expectToken(int TK) {
2106 Token T = getNext();
2107 if (T.Kind != TK) {
2108 setError("Unexpected token", T);
2109 return false;
2110 }
2111 return true;
2112 }
2113
2114 OwningPtr document_iterator::NullDoc;
0 Copyright (c) 2006 Kirill Simonov
1
2 Permission is hereby granted, free of charge, to any person obtaining a copy of
3 this software and associated documentation files (the "Software"), to deal in
4 the Software without restriction, including without limitation the rights to
5 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 of the Software, and to permit persons to whom the Software is furnished to do
7 so, subject to the following conditions:
8
9 The above copyright notice and this permission notice shall be included in all
10 copies or substantial portions of the Software.
11
12 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 SOFTWARE.
0 # RUN: yaml-bench -canonical %s
1
2 - yes
3 - NO
4 - True
5 - on
0 # RUN: yaml-bench -canonical %s
1
2 canonical: yes
3 answer: NO
4 logical: True
5 option: on
6
7
8 but:
9 y: is a string
10 n: is a string
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 - !tag1
4 x: 1
5 - !tag1
6 x: 1
7 'y': 2
8 z: 3
9 - !tag2
10 10
11 - !tag2
12 =: 10
13 'y': 20
14 z: 30
15 - !tag3
16 x: 1
17 - !tag3
18 x: 1
19 'y': 2
20 z: 3
21 - !tag3
22 =: 1
23 'y': 2
24 z: 3
25 - !foo
26 my-parameter: foo
27 my-another-parameter: [1,2,3]
0 # RUN: yaml-bench -canonical %s
1
2 canonical: 6.8523015e+5
3 exponential: 685.230_15e+03
4 fixed: 685_230.15
5 sexagesimal: 190:20:30.15
6 negative infinity: -.inf
7 not a number: .NaN
0 # RUN: yaml-bench -canonical %s
1
2 canonical: 685230
3 decimal: +685_230
4 octal: 02472256
5 hexadecimal: 0x_0A_74_AE
6 binary: 0b1010_0111_0100_1010_1110
7 sexagesimal: 190:20:30
0 # RUN: yaml-bench -canonical %s
1
2 # Unordered set of key: value pairs.
3 Block style: !!map
4 Clark : Evans
5 Brian : Ingerson
6 Oren : Ben-Kiki
7 Flow style: !!map { Clark: Evans, Brian: Ingerson, Oren: Ben-Kiki }
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 - &CENTER { x: 1, 'y': 2 }
4 - &LEFT { x: 0, 'y': 2 }
5 - &BIG { r: 10 }
6 - &SMALL { r: 1 }
7
8 # All the following maps are equal:
9
10 - # Explicit keys
11 x: 1
12 'y': 2
13 r: 10
14 label: center/big
15
16 - # Merge one map
17 << : *CENTER
18 r: 10
19 label: center/big
20
21 - # Merge multiple maps
22 << : [ *CENTER, *BIG ]
23 label: center/big
24
25 - # Override
26 << : [ *BIG, *LEFT, *SMALL ]
27 x: 1
28 label: center/big
0 # RUN: yaml-bench -canonical %s
1
2 # A document may be null.
3 ---
4 ---
5 # This mapping has four keys,
6 # one has a value.
7 empty:
8 canonical: ~
9 english: null
10 ~: null key
11 ---
12 # This sequence has five
13 # entries, two have values.
14 sparse:
15 - ~
16 - 2nd entry
17 -
18 - 4th entry
19 - Null
0 # RUN: yaml-bench -canonical %s
1
2 # Explicitly typed ordered map (dictionary).
3 Bestiary: !!omap
4 - aardvark: African pig-like ant eater. Ugly.
5 - anteater: South-American ant eater. Two species.
6 - anaconda: South-American constrictor snake. Scaly.
7 # Etc.
8 # Flow style
9 Numbers: !!omap [ one: 1, two: 2, three : 3 ]
0 # RUN: yaml-bench -canonical %s
1
2 # Explicitly typed pairs.
3 Block tasks: !!pairs
4 - meeting: with team.
5 - meeting: with boss.
6 - break: lunch.
7 - meeting: with client.
8 Flow tasks: !!pairs [ meeting: with team, meeting: with boss ]
0 # RUN: yaml-bench -canonical %s
1
2 # Ordered sequence of nodes
3 Block style: !!seq
4 - Mercury # Rotates - no light/dark sides.
5 - Venus # Deadliest. Aptly named.
6 - Earth # Mostly dirt.
7 - Mars # Seems empty.
8 - Jupiter # The king.
9 - Saturn # Pretty.
10 - Uranus # Where the sun hardly shines.
11 - Neptune # Boring. No rings.
12 - Pluto # You call this a planet?
13 Flow style: !!seq [ Mercury, Venus, Earth, Mars, # Rocks
14 Jupiter, Saturn, Uranus, Neptune, # Gas
15 Pluto ] # Overrated
16
0 # RUN: yaml-bench -canonical %s
1
2 # Explicitly typed set.
3 baseball players: !!set
4 ? Mark McGwire
5 ? Sammy Sosa
6 ? Ken Griffey
7 # Flow style
8 baseball teams: !!set { Boston Red Sox, Detroit Tigers, New York Yankees }
0 # RUN: yaml-bench -canonical %s
1
2 --- !!str "ascii string"
0 # RUN: yaml-bench -canonical %s
1
2 string: abcd
0 # RUN: yaml-bench -canonical %s
1
2 canonical: 2001-12-15T02:59:43.1Z
3 valid iso8601: 2001-12-14t21:59:43.10-05:00
4 space separated: 2001-12-14 21:59:43.10 -5
5 no time zone (Z): 2001-12-15 2:59:43.10
6 date (00:00:00Z): 2002-12-14
0 # RUN: yaml-bench -canonical %s
1
2 --- # Old schema
3 link with:
4 - library1.dll
5 - library2.dll
6 --- # New schema
7 link with:
8 - = : library1.dll
9 version: 1.2
10 - = : library2.dll
11 version: 2.3
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 foo: bar
4 foo: baz
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 &anchor foo:
4 foo: bar
5 *anchor: duplicate key
6 baz: bat
7 *anchor: duplicate key
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 <<: {x: 1, y: 2}
4 foo: bar
5 <<: {z: 3, t: 4}
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 =: 1
4 foo: bar
5 =: 2
0 # RUN: yaml-bench -canonical %s
1
2 ? |-
3 foo
4 : |-
5 bar
0 # RUN: yaml-bench -canonical %s
1
0 # RUN: yaml-bench -canonical %s
1
2 - 6.8523015e+5
3 - 685.230_15e+03
4 - 685_230.15
5 - 190:20:30.15
6 - -.inf
7 - .NaN
0 # RUN: yaml-bench -canonical %s
1
2 - 685230
3 - +685_230
4 - 02472256
5 - 0x_0A_74_AE
6 - 0b1010_0111_0100_1010_1110
7 - 190:20:30
0 # RUN: yaml-bench -canonical %s
1
2 - "foo 'bar'"
3 - "foo\n'bar'"
0 # RUN: yaml-bench -canonical %s
1
2 - <<
0 # RUN: yaml-bench -canonical %s
1
2 [0.0, +1.0, -1.0, +.inf, -.inf, .nan, .nan]
0 # RUN: yaml-bench -canonical %s
1
2 -1.0
0 # RUN: yaml-bench -canonical %s
1
2 -
3 - ~
4 - null
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 "this scalar should be selected"
4 ---
5 key11: !foo
6 key12:
7 is: [selected]
8 key22:
9 key13: [not, selected]
10 key23: [not, selected]
11 key32:
12 key31: [not, selected]
13 key32: [not, selected]
14 key33: {not: selected}
15 key21: !bar
16 - not selected
17 - selected
18 - not selected
19 key31: !baz
20 key12:
21 key13:
22 key14: {selected}
23 key23:
24 key14: [not, selected]
25 key33:
26 key14: {selected}
27 key24: {not: selected}
28 key22:
29 - key14: {selected}
30 key24: {not: selected}
31 - key14: {selected}
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 - Harry Potter and the Prisoner of Azkaban
4 - Harry Potter and the Goblet of Fire
5 - Harry Potter and the Order of the Phoenix
6 ---
7 - Memoirs Found in a Bathtub
8 - Snow Crash
9 - Ghost World
0 # RUN: yaml-bench -canonical %s
1
2 # Ticket #4
3 ---
4 ...
0 # RUN: yaml-bench -canonical %s
1
2 foo:
3 bar
4 baz
0 # RUN: yaml-bench -canonical %s
1
2 .
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 in the block context:
4 indentation should be kept: {
5 but in the flow context: [
6 it may be violated]
7 }
8 ---
9 the parser does not require scalars
10 to be indented with at least one space
11 ...
12 ---
13 "the parser does not require scalars
14 to be indented with at least one space"
15 ---
16 foo:
17 bar: 'quoted scalars
18 may not adhere indentation'
0 # RUN: yaml-bench -canonical %s
1
2 - Mark McGwire
3 - Sammy Sosa
4 - Ken Griffey
0 # RUN: yaml-bench -canonical %s
1
2 hr: 65 # Home runs
3 avg: 0.278 # Batting average
4 rbi: 147 # Runs Batted In
0 # RUN: yaml-bench -canonical %s
1
2 american:
3 - Boston Red Sox
4 - Detroit Tigers
5 - New York Yankees
6 national:
7 - New York Mets
8 - Chicago Cubs
9 - Atlanta Braves
0 # RUN: yaml-bench -canonical %s
1
2 -
3 name: Mark McGwire
4 hr: 65
5 avg: 0.278
6 -
7 name: Sammy Sosa
8 hr: 63
9 avg: 0.288
0 # RUN: yaml-bench -canonical %s
1
2 - [name , hr, avg ]
3 - [Mark McGwire, 65, 0.278]
4 - [Sammy Sosa , 63, 0.288]
0 # RUN: yaml-bench -canonical %s
1
2 Mark McGwire: {hr: 65, avg: 0.278}
3 Sammy Sosa: {
4 hr: 63,
5 avg: 0.288
6 }
0 # RUN: yaml-bench -canonical %s
1
2 # Ranking of 1998 home runs
3 ---
4 - Mark McGwire
5 - Sammy Sosa
6 - Ken Griffey
7
8 # Team ranking
9 ---
10 - Chicago Cubs
11 - St Louis Cardinals
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 time: 20:03:20
4 player: Sammy Sosa
5 action: strike (miss)
6 ...
7 ---
8 time: 20:03:47
9 player: Sammy Sosa
10 action: grand slam
11 ...
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 hr: # 1998 hr ranking
4 - Mark McGwire
5 - Sammy Sosa
6 rbi:
7 # 1998 rbi ranking
8 - Sammy Sosa
9 - Ken Griffey
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 hr:
4 - Mark McGwire
5 # Following node labeled SS
6 - &SS Sammy Sosa
7 rbi:
8 - *SS # Subsequent occurrence
9 - Ken Griffey
0 # RUN: yaml-bench -canonical %s
1
2 ? - Detroit Tigers
3 - Chicago cubs
4 :
5 - 2001-07-23
6
7 ? [ New York Yankees,
8 Atlanta Braves ]
9 : [ 2001-07-02, 2001-08-12,
10 2001-08-14 ]
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 # products purchased
4 - item : Super Hoop
5 quantity: 1
6 - item : Basketball
7 quantity: 4
8 - item : Big Shoes
9 quantity: 1
0 # RUN: yaml-bench -canonical %s
1
2 # ASCII Art
3 --- |
4 \//||\/||
5 // || ||__
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 Mark McGwire's
4 year was crippled
5 by a knee injury.
0 # RUN: yaml-bench -canonical %s
1
2 >
3 Sammy Sosa completed another
4 fine season with great stats.
5
6 63 Home Runs
7 0.288 Batting Average
8
9 What a year!
0 # RUN: yaml-bench -canonical %s
1
2 name: Mark McGwire
3 accomplishment: >
4 Mark set a major league
5 home run record in 1998.
6 stats: |
7 65 Home Runs
8 0.278 Batting Average
0 # RUN: yaml-bench -canonical %s
1
2 unicode: "Sosa did fine.\u263A"
3 control: "\b1998\t1999\t2000\n"
4 hexesc: "\x13\x10 is \r\n"
5
6 single: '"Howdy!" he cried.'
7 quoted: ' # not a ''comment''.'
8 tie-fighter: '|\-*-/|'
9
10 # CHECK: !!str "Sosa did fine.\u263A"
11 # CHECK: !!str "\b1998\t1999\t2000\n"
12 # CHECK: !!str "\x13\x10 is \r\n"
13 # CHECK: !!str "\"Howdy!\" he cried."
14 # CHECK: !!str " # not a 'comment'."
15 # CHECK: !!str "|\\-*-/|"
0 # RUN: yaml-bench -canonical %s
1
2 plain:
3 This unquoted scalar
4 spans many lines.
5
6 quoted: "So does this
7 quoted scalar.\n"
0 # RUN: yaml-bench -canonical %s
1
2 canonical: 12345
3 decimal: +12,345
4 sexagesimal: 3:25:45
5 octal: 014
6 hexadecimal: 0xC
0 # RUN: yaml-bench -canonical %s
1
2 canonical: 1.23015e+3
3 exponential: 12.3015e+02
4 sexagesimal: 20:30.15
5 fixed: 1,230.15
6 negative infinity: -.inf
7 not a number: .NaN
0 # RUN: yaml-bench -canonical %s
1
2 null: ~
3 true: y
4 false: n
5 string: '12345'
0 # RUN: yaml-bench -canonical %s
1
2 canonical: 2001-12-15T02:59:43.1Z
3 iso8601: 2001-12-14t21:59:43.10-05:00
4 spaced: 2001-12-14 21:59:43.10 -5
5 date: 2002-12-14
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 not-date: !!str 2002-04-28
4
5 picture: !!binary |
6 R0lGODlhDAAMAIQAAP//9/X
7 17unp5WZmZgAAAOfn515eXv
8 Pz7Y6OjuDg4J+fn5OTk6enp
9 56enmleECcgggoBADs=
10
11 application specific tag: !something |
12 The semantics of the tag
13 above may be different for
14 different documents.
0 # RUN: yaml-bench -canonical %s
1
2 %TAG ! tag:clarkevans.com,2002:
3 --- !shape
4 # Use the ! handle for presenting
5 # tag:clarkevans.com,2002:circle
6 - !circle
7 center: &ORIGIN {x: 73, y: 129}
8 radius: 7
9 - !line
10 start: *ORIGIN
11 finish: { x: 89, y: 102 }
12 - !label
13 start: *ORIGIN
14 color: 0xFFEEBB
15 text: Pretty vector drawing.
0 # RUN: yaml-bench -canonical %s
1
2 # sets are represented as a
3 # mapping where each key is
4 # associated with the empty string
5 --- !!set
6 ? Mark McGwire
7 ? Sammy Sosa
8 ? Ken Griff
0 # RUN: yaml-bench -canonical %s
1
2 # ordered maps are represented as
3 # a sequence of mappings, with
4 # each mapping having one key
5 --- !!omap
6 - Mark McGwire: 65
7 - Sammy Sosa: 63
8 - Ken Griffy: 58
0 # RUN: yaml-bench -canonical %s
1
2 --- !
3 invoice: 34843
4 date : 2001-01-23
5 bill-to: &id001
6 given : Chris
7 family : Dumars
8 address:
9 lines: |
10 458 Walkman Dr.
11 Suite #292
12 city : Royal Oak
13 state : MI
14 postal : 48046
15 ship-to: *id001
16 product:
17 - sku : BL394D
18 quantity : 4
19 description : Basketball
20 price : 450.00
21 - sku : BL4438H
22 quantity : 1
23 description : Super Hoop
24 price : 2392.00
25 tax : 251.42
26 total: 4443.52
27 comments:
28 Late afternoon is best.
29 Backup contact is Nancy
30 Billsmer @ 338-4338.
0 # RUN: yaml-bench -canonical %s
1
2 ---
3 Time: 2001-11-23 15:01:42 -5
4 User: ed
5 Warning:
6 This is an error message
7 for the log file
8 ---
9 Time: 2001-11-23 15:02:31 -5
10 User: ed
11 Warning:
12 A slightly different error
13 message.
14 ---
15 Date: 2001-11-23 15:03:17 -5
16 User: ed
17 Fatal:
18 Unknown variable "bar"
19 Stack:
20 - file: TopClass.py
21 line: 23
22 code: |
23 x = MoreObject("345\n")
24 - file: MoreClass.py
25 line: 58
26 code: |-
27 foo = bar
0 # RUN: yaml-bench -canonical %s
1
2 # Comment only.
0 # RUN: yaml-bench -canonical %s |& FileCheck %s
1
2 # Invalid use of BOM
3 # inside a
4 # document.
5
6 # CHECK: error
0 # RUN: yaml-bench -canonical %s
1
2 sequence:
3 - one
4 - two
5 mapping:
6 ? sky
7 : blue
8 ? sea : green
0 # RUN: yaml-bench -canonical %s
1
2 sequence: [ one, two, ]
3 mapping: { sky: blue, sea: green }
0 # RUN: yaml-bench -canonical %s
1
2 # Comment only.
0 # RUN: yaml-bench -canonical %s
1
2 anchored: !local &anchor value
3 alias: *anchor
0 # RUN: yaml-bench -canonical %s
1
2 literal: |
3 text
4 folded: >
5 text
0 # RUN: yaml-bench -canonical %s
1
2 single: 'text'
3 double: "text"
0 # RUN: yaml-bench -canonical %s
1
2