Mercurial > projects > orange
comparison orange/xml/PhobosXML.d @ 9:99c52d46822a
Serialization works now with D2, deserialization still doesn't work
author | Jacob Carlborg <doob@me.com> |
---|---|
date | Sat, 24 Jul 2010 18:58:18 +0200 |
parents | |
children | d17ae98330bf |
comparison
equal
deleted
inserted
replaced
8:613a0bb20207 | 9:99c52d46822a |
---|---|
1 // Written in the D programming language. | |
2 | |
3 /** | |
4 Classes and functions for creating and parsing XML | |
5 | |
6 The basic architecture of this module is that there are standalone functions, | |
7 classes for constructing an XML document from scratch (Tag, Element and | |
8 Document), and also classes for parsing a pre-existing XML file (ElementParser | |
9 and DocumentParser). The parsing classes <i>may</i> be used to build a | |
10 Document, but that is not their primary purpose. The handling capabilities of | |
11 DocumentParser and ElementParser are sufficiently customizable that you can | |
12 make them do pretty much whatever you want. | |
13 | |
14 Example: This example creates a DOM (Document Object Model) tree | |
15 from an XML file. | |
16 ------------------------------------------------------------------------------ | |
17 import std.xml; | |
18 import std.stdio; | |
19 import std.string; | |
20 | |
21 // books.xml is used in various samples throughout the Microsoft XML Core | |
22 // Services (MSXML) SDK. | |
23 // | |
24 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx | |
25 | |
26 void main() | |
27 { | |
28 string s = cast(string)std.file.read("books.xml"); | |
29 | |
30 // Check for well-formedness | |
31 check(s); | |
32 | |
33 // Make a DOM tree | |
34 auto doc = new Document(s); | |
35 | |
36 // Plain-print it | |
37 writefln(doc); | |
38 } | |
39 ------------------------------------------------------------------------------ | |
40 | |
41 Example: This example does much the same thing, except that the file is | |
42 deconstructed and reconstructed by hand. This is more work, but the | |
43 techniques involved offer vastly more power. | |
44 ------------------------------------------------------------------------------ | |
45 import std.xml; | |
46 import std.stdio; | |
47 import std.string; | |
48 | |
49 struct Book | |
50 { | |
51 string id; | |
52 string author; | |
53 string title; | |
54 string genre; | |
55 string price; | |
56 string pubDate; | |
57 string description; | |
58 } | |
59 | |
60 void main() | |
61 { | |
62 string s = cast(string)std.file.read("books.xml"); | |
63 | |
64 // Check for well-formedness | |
65 check(s); | |
66 | |
67 // Take it apart | |
68 Book[] books; | |
69 | |
70 auto xml = new DocumentParser(s); | |
71 xml.onStartTag["book"] = (ElementParser xml) | |
72 { | |
73 Book book; | |
74 book.id = xml.tag.attr["id"]; | |
75 | |
76 xml.onEndTag["author"] = (in Element e) { book.author = e.text; }; | |
77 xml.onEndTag["title"] = (in Element e) { book.title = e.text; }; | |
78 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text; }; | |
79 xml.onEndTag["price"] = (in Element e) { book.price = e.text; }; | |
80 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text; }; | |
81 xml.onEndTag["description"] = (in Element e) { book.description = e.text; }; | |
82 | |
83 xml.parse(); | |
84 | |
85 books ~= book; | |
86 }; | |
87 xml.parse(); | |
88 | |
89 // Put it back together again; | |
90 auto doc = new Document(new Tag("catalog")); | |
91 foreach(book;books) | |
92 { | |
93 auto element = new Element("book"); | |
94 element.tag.attr["id"] = book.id; | |
95 | |
96 element ~= new Element("author", book.author); | |
97 element ~= new Element("title", book.title); | |
98 element ~= new Element("genre", book.genre); | |
99 element ~= new Element("price", book.price); | |
100 element ~= new Element("publish-date",book.pubDate); | |
101 element ~= new Element("description", book.description); | |
102 | |
103 doc ~= element; | |
104 } | |
105 | |
106 // Pretty-print it | |
107 writefln(join(doc.pretty(3),"\n")); | |
108 } | |
109 ------------------------------------------------------------------------------- | |
110 Macros: | |
111 WIKI=Phobos/StdXml | |
112 | |
113 Copyright: Copyright Janice Caron 2008 - 2009. | |
114 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. | |
115 Authors: Janice Caron | |
116 | |
117 Copyright Janice Caron 2008 - 2009. | |
118 Distributed under the Boost Software License, Version 1.0. | |
119 (See accompanying file LICENSE_1_0.txt or copy at | |
120 http://www.boost.org/LICENSE_1_0.txt) | |
121 */ | |
122 module orange.xml.PhobosXML; | |
123 | |
124 version (Tango) {} | |
125 else | |
126 version = Phobos; | |
127 | |
128 version (Phobos): | |
129 | |
130 import std.array; | |
131 import std.string; | |
132 import std.encoding; | |
133 import orange.util.io; | |
134 | |
135 immutable cdata = "<![CDATA["; | |
136 | |
137 final class Attribute : Element | |
138 { | |
139 private alias string tstring; | |
140 private tstring name_; | |
141 private tstring value_; | |
142 | |
143 this (tstring name, tstring value) | |
144 { | |
145 super(name); | |
146 name_ = name; | |
147 value_ = value; | |
148 } | |
149 | |
150 tstring name () | |
151 { | |
152 return name_; | |
153 } | |
154 | |
155 tstring value () | |
156 { | |
157 return value_; | |
158 } | |
159 } | |
160 | |
161 /*struct TagProxy | |
162 { | |
163 private alias string tstring; | |
164 private tstring name_; | |
165 | |
166 private static TagProxy opCall (tstring name) | |
167 { | |
168 TagProxy tp; | |
169 tp.name_ = name; | |
170 | |
171 return tp; | |
172 } | |
173 | |
174 tstring name () | |
175 { | |
176 return name_; | |
177 } | |
178 }*/ | |
179 | |
180 /** | |
181 * Returns true if the character is a character according to the XML standard | |
182 * | |
183 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
184 * | |
185 * Params: | |
186 * c = the character to be tested | |
187 */ | |
188 bool isChar(dchar c) // rule 2 | |
189 { | |
190 if (c <= 0xD7FF) | |
191 { | |
192 if (c >= 0x20) | |
193 return true; | |
194 switch(c) | |
195 { | |
196 case 0xA: | |
197 case 0x9: | |
198 case 0xD: | |
199 return true; | |
200 default: | |
201 return false; | |
202 } | |
203 } | |
204 else if (0xE000 <= c && c <= 0x10FFFF) | |
205 { | |
206 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF | |
207 return true; | |
208 } | |
209 return false; | |
210 } | |
211 | |
212 unittest | |
213 { | |
214 // const CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, | |
215 // 0x10000,0x10FFFF]; | |
216 assert(!isChar(cast(dchar)0x8)); | |
217 assert( isChar(cast(dchar)0x9)); | |
218 assert( isChar(cast(dchar)0xA)); | |
219 assert(!isChar(cast(dchar)0xB)); | |
220 assert(!isChar(cast(dchar)0xC)); | |
221 assert( isChar(cast(dchar)0xD)); | |
222 assert(!isChar(cast(dchar)0xE)); | |
223 assert(!isChar(cast(dchar)0x1F)); | |
224 assert( isChar(cast(dchar)0x20)); | |
225 assert( isChar('J')); | |
226 assert( isChar(cast(dchar)0xD7FF)); | |
227 assert(!isChar(cast(dchar)0xD800)); | |
228 assert(!isChar(cast(dchar)0xDFFF)); | |
229 assert( isChar(cast(dchar)0xE000)); | |
230 assert( isChar(cast(dchar)0xFFFD)); | |
231 assert(!isChar(cast(dchar)0xFFFE)); | |
232 assert(!isChar(cast(dchar)0xFFFF)); | |
233 assert( isChar(cast(dchar)0x10000)); | |
234 assert( isChar(cast(dchar)0x10FFFF)); | |
235 assert(!isChar(cast(dchar)0x110000)); | |
236 | |
237 debug (stdxml_TestHardcodedChecks) | |
238 { | |
239 foreach (c; 0 .. dchar.max + 1) | |
240 assert(isChar(c) == lookup(CharTable, c)); | |
241 } | |
242 } | |
243 | |
244 /** | |
245 * Returns true if the character is whitespace according to the XML standard | |
246 * | |
247 * Only the following characters are considered whitespace in XML - space, tab, | |
248 * carriage return and linefeed | |
249 * | |
250 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
251 * | |
252 * Params: | |
253 * c = the character to be tested | |
254 */ | |
255 bool isSpace(dchar c) | |
256 { | |
257 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; | |
258 } | |
259 | |
260 /** | |
261 * Returns true if the character is a digit according to the XML standard | |
262 * | |
263 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
264 * | |
265 * Params: | |
266 * c = the character to be tested | |
267 */ | |
268 bool isDigit(dchar c) | |
269 { | |
270 if (c <= 0x0039 && c >= 0x0030) | |
271 return true; | |
272 else | |
273 return lookup(DigitTable,c); | |
274 } | |
275 | |
276 unittest | |
277 { | |
278 debug (stdxml_TestHardcodedChecks) | |
279 { | |
280 foreach (c; 0 .. dchar.max + 1) | |
281 assert(isDigit(c) == lookup(DigitTable, c)); | |
282 } | |
283 } | |
284 | |
285 /** | |
286 * Returns true if the character is a letter according to the XML standard | |
287 * | |
288 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
289 * | |
290 * Params: | |
291 * c = the character to be tested | |
292 */ | |
293 bool isLetter(dchar c) // rule 84 | |
294 { | |
295 return isIdeographic(c) || isBaseChar(c); | |
296 } | |
297 | |
298 /** | |
299 * Returns true if the character is an ideographic character according to the | |
300 * XML standard | |
301 * | |
302 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
303 * | |
304 * Params: | |
305 * c = the character to be tested | |
306 */ | |
307 bool isIdeographic(dchar c) | |
308 { | |
309 if (c == 0x3007) | |
310 return true; | |
311 if (c <= 0x3029 && c >= 0x3021 ) | |
312 return true; | |
313 if (c <= 0x9FA5 && c >= 0x4E00) | |
314 return true; | |
315 return false; | |
316 } | |
317 | |
318 unittest | |
319 { | |
320 assert(isIdeographic('\u4E00')); | |
321 assert(isIdeographic('\u9FA5')); | |
322 assert(isIdeographic('\u3007')); | |
323 assert(isIdeographic('\u3021')); | |
324 assert(isIdeographic('\u3029')); | |
325 | |
326 debug (stdxml_TestHardcodedChecks) | |
327 { | |
328 foreach (c; 0 .. dchar.max + 1) | |
329 assert(isIdeographic(c) == lookup(IdeographicTable, c)); | |
330 } | |
331 } | |
332 | |
333 /** | |
334 * Returns true if the character is a base character according to the XML | |
335 * standard | |
336 * | |
337 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
338 * | |
339 * Params: | |
340 * c = the character to be tested | |
341 */ | |
342 bool isBaseChar(dchar c) | |
343 { | |
344 return lookup(BaseCharTable,c); | |
345 } | |
346 | |
347 /** | |
348 * Returns true if the character is a combining character according to the | |
349 * XML standard | |
350 * | |
351 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
352 * | |
353 * Params: | |
354 * c = the character to be tested | |
355 */ | |
356 bool isCombiningChar(dchar c) | |
357 { | |
358 return lookup(CombiningCharTable,c); | |
359 } | |
360 | |
361 /** | |
362 * Returns true if the character is an extender according to the XML standard | |
363 * | |
364 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
365 * | |
366 * Params: | |
367 * c = the character to be tested | |
368 */ | |
369 bool isExtender(dchar c) | |
370 { | |
371 return lookup(ExtenderTable,c); | |
372 } | |
373 | |
374 /** | |
375 * Encodes a string by replacing all characters which need to be escaped with | |
376 * appropriate predefined XML entities. | |
377 * | |
378 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than | |
379 * and greater-than), and similarly, decode() unescapes them. These functions | |
380 * are provided for convenience only. You do not need to use them when using | |
381 * the std.xml classes, because then all the encoding and decoding will be done | |
382 * for you automatically. | |
383 * | |
384 * If the string is not modified, the original will be returned. | |
385 * | |
386 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
387 * | |
388 * Params: | |
389 * s = The string to be encoded | |
390 * | |
391 * Returns: The encoded string | |
392 * | |
393 * Examples: | |
394 * -------------- | |
395 * writefln(encode("a > b")); // writes "a > b" | |
396 * -------------- | |
397 */ | |
398 S encode(S)(S s, S buffer = null) | |
399 { | |
400 string r; | |
401 size_t lastI; | |
402 if (buffer) buffer.length = 0; | |
403 auto result = appender(&buffer); | |
404 | |
405 foreach (i, c; s) | |
406 { | |
407 switch (c) | |
408 { | |
409 case '&': r = "&"; break; | |
410 case '"': r = """; break; | |
411 case '\'': r = "'"; break; | |
412 case '<': r = "<"; break; | |
413 case '>': r = ">"; break; | |
414 default: continue; | |
415 } | |
416 // Replace with r | |
417 result.put(s[lastI .. i]); | |
418 result.put(r); | |
419 lastI = i + 1; | |
420 } | |
421 | |
422 if (!result.data) return s; | |
423 result.put(s[lastI .. $]); | |
424 return result.data; | |
425 } | |
426 | |
427 unittest | |
428 { | |
429 assert(encode("hello") is "hello"); | |
430 assert(encode("a > b") == "a > b", encode("a > b")); | |
431 assert(encode("a < b") == "a < b"); | |
432 assert(encode("don't") == "don't"); | |
433 assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); | |
434 assert(encode("cat & dog") == "cat & dog"); | |
435 } | |
436 | |
437 /** | |
438 * Mode to use for decoding. | |
439 * | |
440 * $(DDOC_ENUM_MEMBERS NONE) Do not decode | |
441 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors | |
442 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error | |
443 */ | |
444 enum DecodeMode | |
445 { | |
446 NONE, LOOSE, STRICT | |
447 } | |
448 | |
449 /** | |
450 * Decodes a string by unescaping all predefined XML entities. | |
451 * | |
452 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than | |
453 * and greater-than), and similarly, decode() unescapes them. These functions | |
454 * are provided for convenience only. You do not need to use them when using | |
455 * the std.xml classes, because then all the encoding and decoding will be done | |
456 * for you automatically. | |
457 * | |
458 * This function decodes the entities &amp;, &quot;, &apos;, | |
459 * &lt; and &gt, | |
460 * as well as decimal and hexadecimal entities such as &#x20AC; | |
461 * | |
462 * If the string does not contain an ampersand, the original will be returned. | |
463 * | |
464 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not | |
465 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT | |
466 * (decode, and throw a DecodeException in the event of an error). | |
467 * | |
468 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
469 * | |
470 * Params: | |
471 * s = The string to be decoded | |
472 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). | |
473 * | |
474 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails | |
475 * | |
476 * Returns: The decoded string | |
477 * | |
478 * Examples: | |
479 * -------------- | |
480 * writefln(decode("a > b")); // writes "a > b" | |
481 * -------------- | |
482 */ | |
483 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) | |
484 { | |
485 if (mode == DecodeMode.NONE) return s; | |
486 | |
487 char[] buffer; | |
488 for (int i=0; i<s.length; ++i) | |
489 { | |
490 char c = s[i]; | |
491 if (c != '&') | |
492 { | |
493 if (buffer.length != 0) buffer ~= c; | |
494 } | |
495 else | |
496 { | |
497 if (buffer.length == 0) | |
498 { | |
499 buffer = s[0 .. i].dup; | |
500 } | |
501 if (startsWith(s[i..$],"&#")) | |
502 { | |
503 try | |
504 { | |
505 dchar d; | |
506 string t = s[i..$]; | |
507 checkCharRef(t, d); | |
508 char[4] temp; | |
509 buffer ~= temp[0 .. std.utf.encode(temp, d)]; | |
510 i = s.length - t.length - 1; | |
511 } | |
512 catch(Err e) | |
513 { | |
514 if (mode == DecodeMode.STRICT) | |
515 throw new DecodeException("Unescaped &"); | |
516 buffer ~= '&'; | |
517 } | |
518 } | |
519 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } | |
520 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } | |
521 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } | |
522 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } | |
523 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } | |
524 else | |
525 { | |
526 if (mode == DecodeMode.STRICT) | |
527 throw new DecodeException("Unescaped &"); | |
528 buffer ~= '&'; | |
529 } | |
530 } | |
531 } | |
532 return (buffer.length == 0) ? s : cast(string)buffer; | |
533 } | |
534 | |
535 unittest | |
536 { | |
537 void assertNot(string s) | |
538 { | |
539 bool b = false; | |
540 try { decode(s,DecodeMode.STRICT); } | |
541 catch (DecodeException e) { b = true; } | |
542 assert(b,s); | |
543 } | |
544 | |
545 // Assert that things that should work, do | |
546 assert(decode("hello", DecodeMode.STRICT) is "hello"); | |
547 assert(decode("a > b", DecodeMode.STRICT) == "a > b"); | |
548 assert(decode("a < b", DecodeMode.STRICT) == "a < b"); | |
549 assert(decode("don't", DecodeMode.STRICT) == "don't"); | |
550 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); | |
551 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); | |
552 assert(decode("*", DecodeMode.STRICT) == "*"); | |
553 assert(decode("*", DecodeMode.STRICT) == "*"); | |
554 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); | |
555 assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); | |
556 assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); | |
557 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); | |
558 assert(decode("G;", DecodeMode.LOOSE) == "G;"); | |
559 assert(decode("G;", DecodeMode.LOOSE) == "G;"); | |
560 | |
561 // Assert that things that shouldn't work, don't | |
562 assertNot("cat & dog"); | |
563 assertNot("a > b"); | |
564 assertNot("&#;"); | |
565 assertNot("&#x;"); | |
566 assertNot("G;"); | |
567 assertNot("G;"); | |
568 } | |
569 | |
570 /** | |
571 * Class representing an XML document. | |
572 * | |
573 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
574 * | |
575 */ | |
576 class Document : Element | |
577 { | |
578 /** | |
579 * Contains all text which occurs before the root element. | |
580 * Defaults to <?xml version="1.0"?> | |
581 */ | |
582 string prolog = "<?xml version=\"1.0\"?>"; | |
583 /** | |
584 * Contains all text which occurs after the root element. | |
585 * Defaults to the empty string | |
586 */ | |
587 string epilog; | |
588 | |
589 /** | |
590 * Constructs a Document by parsing XML text. | |
591 * | |
592 * This function creates a complete DOM (Document Object Model) tree. | |
593 * | |
594 * The input to this function MUST be valid XML. | |
595 * This is enforced by DocumentParser's in contract. | |
596 * | |
597 * Params: | |
598 * s = the complete XML text. | |
599 */ | |
600 this(string s) | |
601 in | |
602 { | |
603 assert(s.length != 0); | |
604 } | |
605 body | |
606 { | |
607 auto xml = new DocumentParser(s); | |
608 string tagString = xml.tag.tagString; | |
609 | |
610 this(xml.tag); | |
611 prolog = s[0 .. tagString.ptr - s.ptr]; | |
612 parse(xml); | |
613 epilog = *xml.s; | |
614 } | |
615 | |
616 /** | |
617 * Constructs a Document from a Tag. | |
618 * | |
619 * Params: | |
620 * tag = the start tag of the document. | |
621 */ | |
622 this(const(Tag) tag) | |
623 { | |
624 super(tag); | |
625 } | |
626 | |
627 const | |
628 { | |
629 /** | |
630 * Compares two Documents for equality | |
631 * | |
632 * Examples: | |
633 * -------------- | |
634 * Document d1,d2; | |
635 * if (d1 == d2) { } | |
636 * -------------- | |
637 */ | |
638 override bool opEquals(Object o) | |
639 { | |
640 const doc = toType!(const Document)(o); | |
641 return | |
642 (prolog != doc.prolog ) ? false : ( | |
643 (super != cast(const Element)doc) ? false : ( | |
644 (epilog != doc.epilog ) ? false : ( | |
645 true ))); | |
646 } | |
647 | |
648 /** | |
649 * Compares two Documents | |
650 * | |
651 * You should rarely need to call this function. It exists so that | |
652 * Documents can be used as associative array keys. | |
653 * | |
654 * Examples: | |
655 * -------------- | |
656 * Document d1,d2; | |
657 * if (d1 < d2) { } | |
658 * -------------- | |
659 */ | |
660 override int opCmp(Object o) | |
661 { | |
662 const doc = toType!(const Document)(o); | |
663 return | |
664 ((prolog != doc.prolog ) | |
665 ? ( prolog < doc.prolog ? -1 : 1 ) : | |
666 ((super != cast(const Element)doc) | |
667 ? ( super < cast(const Element)doc ? -1 : 1 ) : | |
668 ((epilog != doc.epilog ) | |
669 ? ( epilog < doc.epilog ? -1 : 1 ) : | |
670 0 ))); | |
671 } | |
672 | |
673 /** | |
674 * Returns the hash of a Document | |
675 * | |
676 * You should rarely need to call this function. It exists so that | |
677 * Documents can be used as associative array keys. | |
678 */ | |
679 override hash_t toHash() | |
680 { | |
681 return hash(prolog,hash(epilog,super.toHash)); | |
682 } | |
683 | |
684 /** | |
685 * Returns the string representation of a Document. (That is, the | |
686 * complete XML of a document). | |
687 */ | |
688 override string toString() | |
689 { | |
690 return prolog ~ super.toString ~ epilog; | |
691 } | |
692 } | |
693 } | |
694 | |
695 /** | |
696 * Class representing an XML element. | |
697 * | |
698 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
699 */ | |
700 class Element : Item | |
701 { | |
702 Tag tag; /// The start tag of the element | |
703 Item[] items; /// The element's items | |
704 Text[] texts; /// The element's text items | |
705 CData[] cdatas; /// The element's CData items | |
706 Comment[] comments; /// The element's comments | |
707 ProcessingInstruction[] pis; /// The element's processing instructions | |
708 Element[] elements; /// The element's child elements | |
709 Element parent_; | |
710 | |
711 /** | |
712 * Constructs an Element given a name and a string to be used as a Text | |
713 * interior. | |
714 * | |
715 * Params: | |
716 * name = the name of the element. | |
717 * interior = (optional) the string interior. | |
718 * | |
719 * Examples: | |
720 * ------------------------------------------------------- | |
721 * auto element = new Element("title","Serenity") | |
722 * // constructs the element <title>Serenity</title> | |
723 * ------------------------------------------------------- | |
724 */ | |
725 this(string name, string interior=null) | |
726 { | |
727 this(new Tag(name)); | |
728 if (interior.length != 0) opCatAssign(new Text(interior)); | |
729 } | |
730 | |
731 /** | |
732 * Constructs an Element from a Tag. | |
733 * | |
734 * Params: | |
735 * tag = the start or empty tag of the element. | |
736 */ | |
737 this(const(Tag) tag_) | |
738 { | |
739 this.tag = new Tag(tag_.name); | |
740 tag.type = TagType.EMPTY; | |
741 foreach(k,v;tag_.attr) tag.attr[k] = v; | |
742 tag.tagString = tag_.tagString; | |
743 } | |
744 | |
745 Element parent () | |
746 { | |
747 return parent_; | |
748 } | |
749 | |
750 Element parent (Element parent) | |
751 { | |
752 return parent_ = parent; | |
753 } | |
754 | |
755 string name () | |
756 { | |
757 return tag.name; | |
758 } | |
759 | |
760 string value () | |
761 { | |
762 return text; | |
763 } | |
764 | |
765 alias elements children; | |
766 | |
767 Attribute[] attributes () | |
768 { | |
769 auto attrs = new Attribute[tag.attr.length]; | |
770 attrs = attrs[0 .. 0]; | |
771 | |
772 foreach (k, v ; tag.attr) | |
773 attrs ~= new Attribute(k, v); | |
774 | |
775 return attrs; | |
776 } | |
777 | |
778 Element query () | |
779 { | |
780 return this; | |
781 } | |
782 | |
783 Element attribute (string prefix, string name, string value = null) | |
784 { | |
785 tag.attr[name] = value; | |
786 | |
787 return this; | |
788 } | |
789 | |
790 /** | |
791 * Append a text item to the interior of this element | |
792 * | |
793 * Params: | |
794 * item = the item you wish to append. | |
795 * | |
796 * Examples: | |
797 * -------------- | |
798 * Element element; | |
799 * element ~= new Text("hello"); | |
800 * -------------- | |
801 */ | |
802 void opCatAssign(Text item) | |
803 { | |
804 texts ~= item; | |
805 appendItem(item); | |
806 } | |
807 | |
808 /** | |
809 * Append a CData item to the interior of this element | |
810 * | |
811 * Params: | |
812 * item = the item you wish to append. | |
813 * | |
814 * Examples: | |
815 * -------------- | |
816 * Element element; | |
817 * element ~= new CData("hello"); | |
818 * -------------- | |
819 */ | |
820 void opCatAssign(CData item) | |
821 { | |
822 cdatas ~= item; | |
823 appendItem(item); | |
824 } | |
825 | |
826 /** | |
827 * Append a comment to the interior of this element | |
828 * | |
829 * Params: | |
830 * item = the item you wish to append. | |
831 * | |
832 * Examples: | |
833 * -------------- | |
834 * Element element; | |
835 * element ~= new Comment("hello"); | |
836 * -------------- | |
837 */ | |
838 void opCatAssign(Comment item) | |
839 { | |
840 comments ~= item; | |
841 appendItem(item); | |
842 } | |
843 | |
844 /** | |
845 * Append a processing instruction to the interior of this element | |
846 * | |
847 * Params: | |
848 * item = the item you wish to append. | |
849 * | |
850 * Examples: | |
851 * -------------- | |
852 * Element element; | |
853 * element ~= new ProcessingInstruction("hello"); | |
854 * -------------- | |
855 */ | |
856 void opCatAssign(ProcessingInstruction item) | |
857 { | |
858 pis ~= item; | |
859 appendItem(item); | |
860 } | |
861 | |
862 /** | |
863 * Append a complete element to the interior of this element | |
864 * | |
865 * Params: | |
866 * item = the item you wish to append. | |
867 * | |
868 * Examples: | |
869 * -------------- | |
870 * Element element; | |
871 * Element other = new Element("br"); | |
872 * element ~= other; | |
873 * // appends element representing <br /> | |
874 * -------------- | |
875 */ | |
876 void opCatAssign(Element item) | |
877 { | |
878 elements ~= item; | |
879 appendItem(item); | |
880 } | |
881 | |
882 private void appendItem(Item item) | |
883 { | |
884 items ~= item; | |
885 if (tag.type == TagType.EMPTY && !item.isEmptyXML) | |
886 tag.type = TagType.START; | |
887 } | |
888 | |
889 private void parse(ElementParser xml) | |
890 { | |
891 xml.onText = (string s) { opCatAssign(new Text(s)); }; | |
892 xml.onCData = (string s) { opCatAssign(new CData(s)); }; | |
893 xml.onComment = (string s) { opCatAssign(new Comment(s)); }; | |
894 xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); }; | |
895 | |
896 xml.onStartTag[null] = (ElementParser xml) | |
897 { | |
898 auto e = new Element(xml.tag); | |
899 e.parse(xml); | |
900 opCatAssign(e); | |
901 }; | |
902 | |
903 xml.parse(); | |
904 } | |
905 | |
906 /** | |
907 * Compares two Elements for equality | |
908 * | |
909 * Examples: | |
910 * -------------- | |
911 * Element e1,e2; | |
912 * if (e1 == e2) { } | |
913 * -------------- | |
914 */ | |
915 override bool opEquals(Object o) | |
916 { | |
917 const element = toType!(const Element)(o); | |
918 uint len = items.length; | |
919 if (len != element.items.length) return false; | |
920 for (uint i=0; i<len; ++i) | |
921 { | |
922 if (!items[i].opEquals(element.items[i])) return false; | |
923 } | |
924 return true; | |
925 } | |
926 | |
927 /** | |
928 * Compares two Elements | |
929 * | |
930 * You should rarely need to call this function. It exists so that Elements | |
931 * can be used as associative array keys. | |
932 * | |
933 * Examples: | |
934 * -------------- | |
935 * Element e1,e2; | |
936 * if (e1 < e2) { } | |
937 * -------------- | |
938 */ | |
939 override int opCmp(Object o) | |
940 { | |
941 const element = toType!(const Element)(o); | |
942 for (uint i=0; ; ++i) | |
943 { | |
944 if (i == items.length && i == element.items.length) return 0; | |
945 if (i == items.length) return -1; | |
946 if (i == element.items.length) return 1; | |
947 if (items[i] != element.items[i]) | |
948 return items[i].opCmp(element.items[i]); | |
949 } | |
950 } | |
951 | |
952 /** | |
953 * Returns the hash of an Element | |
954 * | |
955 * You should rarely need to call this function. It exists so that Elements | |
956 * can be used as associative array keys. | |
957 */ | |
958 override hash_t toHash() | |
959 { | |
960 hash_t hash = tag.toHash; | |
961 foreach(item;items) hash += item.toHash(); | |
962 return hash; | |
963 } | |
964 | |
965 const | |
966 { | |
967 /** | |
968 * Returns the decoded interior of an element. | |
969 * | |
970 * The element is assumed to containt text <i>only</i>. So, for | |
971 * example, given XML such as "<title>Good &amp; | |
972 * Bad</title>", will return "Good & Bad". | |
973 * | |
974 * Params: | |
975 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). | |
976 * | |
977 * Throws: DecodeException if decode fails | |
978 */ | |
979 string text(DecodeMode mode=DecodeMode.LOOSE) | |
980 { | |
981 string buffer; | |
982 foreach(item;items) | |
983 { | |
984 Text t = cast(Text)item; | |
985 if (t is null) throw new DecodeException(item.toString); | |
986 buffer ~= decode(t.toString,mode); | |
987 } | |
988 return buffer; | |
989 } | |
990 | |
991 /** | |
992 * Returns an indented string representation of this item | |
993 * | |
994 * Params: | |
995 * indent = (optional) number of spaces by which to indent this | |
996 * element. Defaults to 2. | |
997 */ | |
998 override string[] pretty(uint indent=2) | |
999 { | |
1000 | |
1001 if (isEmptyXML || tag.isEmpty) return [ tag.toEmptyString ]; | |
1002 | |
1003 if (items.length == 1) | |
1004 { | |
1005 Text t = cast(Text)(items[0]); | |
1006 if (t !is null) | |
1007 { | |
1008 return [tag.toStartString ~ t.toString ~ tag.toEndString]; | |
1009 } | |
1010 } | |
1011 | |
1012 string[] a = [ tag.toStartString ]; | |
1013 foreach(item;items) | |
1014 { | |
1015 string[] b = item.pretty(indent); | |
1016 foreach(s;b) | |
1017 { | |
1018 a ~= rjustify(s,s.length + indent); | |
1019 } | |
1020 } | |
1021 a ~= tag.toEndString; | |
1022 return a; | |
1023 } | |
1024 | |
1025 /** | |
1026 * Returns the string representation of an Element | |
1027 * | |
1028 * Examples: | |
1029 * -------------- | |
1030 * auto element = new Element("br"); | |
1031 * writefln(element.toString); // writes "<br />" | |
1032 * -------------- | |
1033 */ | |
1034 override string toString() | |
1035 { | |
1036 if (isEmptyXML || tag.isEmpty) return tag.toEmptyString; | |
1037 | |
1038 string buffer = tag.toStartString; | |
1039 foreach(item;items) { buffer ~= item.toString; } | |
1040 buffer ~= tag.toEndString; | |
1041 return buffer; | |
1042 } | |
1043 | |
1044 override bool isEmptyXML() { return false; } /// Returns false always | |
1045 } | |
1046 } | |
1047 | |
1048 /** | |
1049 * Tag types. | |
1050 * | |
1051 * $(DDOC_ENUM_MEMBERS START) Used for start tags | |
1052 * $(DDOC_ENUM_MEMBERS END) Used for end tags | |
1053 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags | |
1054 * | |
1055 */ | |
1056 enum TagType { START, END, EMPTY }; | |
1057 | |
1058 /** | |
1059 * Class representing an XML tag. | |
1060 * | |
1061 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
1062 * | |
1063 * The class invariant guarantees | |
1064 * <ul> | |
1065 * <li> that $(B type) is a valid enum TagType value</li> | |
1066 * <li> that $(B name) consists of valid characters</li> | |
1067 * <li> that each attribute name consists of valid characters</li> | |
1068 * </ul> | |
1069 */ | |
1070 class Tag | |
1071 { | |
1072 TagType type = TagType.START; /// Type of tag | |
1073 string name; /// Tag name | |
1074 string[string] attr; /// Associative array of attributes | |
1075 private string tagString; | |
1076 | |
1077 invariant() | |
1078 { | |
1079 string s; | |
1080 string t; | |
1081 | |
1082 assert(type == TagType.START | |
1083 || type == TagType.END | |
1084 || type == TagType.EMPTY); | |
1085 | |
1086 s = name; | |
1087 try { checkName(s,t); } | |
1088 catch(Err e) { assert(false,"Invalid tag name:" ~ e.toString); } | |
1089 | |
1090 foreach(k,v;attr) | |
1091 { | |
1092 s = k; | |
1093 try { checkName(s,t); } | |
1094 catch(Err e) | |
1095 { assert(false,"Invalid atrribute name:" ~ e.toString); } | |
1096 } | |
1097 } | |
1098 | |
1099 /** | |
1100 * Constructs an instance of Tag with a specified name and type | |
1101 * | |
1102 * The constructor does not initialize the attributes. To initialize the | |
1103 * attributes, you access the $(B attr) member variable. | |
1104 * | |
1105 * Params: | |
1106 * name = the Tag's name | |
1107 * type = (optional) the Tag's type. If omitted, defaults to | |
1108 * TagType.START. | |
1109 * | |
1110 * Examples: | |
1111 * -------------- | |
1112 * auto tag = new Tag("img",Tag.EMPTY); | |
1113 * tag.attr["src"] = "http://example.com/example.jpg"; | |
1114 * -------------- | |
1115 */ | |
1116 this(string name, TagType type=TagType.START) | |
1117 { | |
1118 this.name = name; | |
1119 this.type = type; | |
1120 } | |
1121 | |
1122 /* Private constructor (so don't ddoc this!) | |
1123 * | |
1124 * Constructs a Tag by parsing the string representation, e.g. "<html>". | |
1125 * | |
1126 * The string is passed by reference, and is advanced over all characters | |
1127 * consumed. | |
1128 * | |
1129 * The second parameter is a dummy parameter only, required solely to | |
1130 * distinguish this constructor from the public one. | |
1131 */ | |
1132 private this(ref string s, bool dummy) | |
1133 { | |
1134 tagString = s; | |
1135 try | |
1136 { | |
1137 reqc(s,'<'); | |
1138 if (optc(s,'/')) type = TagType.END; | |
1139 name = munch(s,"^/>"~whitespace); | |
1140 munch(s,whitespace); | |
1141 while(s.length > 0 && s[0] != '>' && s[0] != '/') | |
1142 { | |
1143 string key = munch(s,"^="~whitespace); | |
1144 munch(s,whitespace); | |
1145 reqc(s,'='); | |
1146 munch(s,whitespace); | |
1147 reqc(s,'"'); | |
1148 string val = decode(munch(s,"^\""), DecodeMode.LOOSE); | |
1149 reqc(s,'"'); | |
1150 munch(s,whitespace); | |
1151 attr[key] = val; | |
1152 } | |
1153 if (optc(s,'/')) | |
1154 { | |
1155 if (type == TagType.END) throw new TagException(""); | |
1156 type = TagType.EMPTY; | |
1157 } | |
1158 reqc(s,'>'); | |
1159 tagString.length = (s.ptr - tagString.ptr); | |
1160 } | |
1161 catch(XMLException e) | |
1162 { | |
1163 tagString.length = (s.ptr - tagString.ptr); | |
1164 throw new TagException(tagString); | |
1165 } | |
1166 } | |
1167 | |
1168 const | |
1169 { | |
1170 /** | |
1171 * Compares two Tags for equality | |
1172 * | |
1173 * You should rarely need to call this function. It exists so that Tags | |
1174 * can be used as associative array keys. | |
1175 * | |
1176 * Examples: | |
1177 * -------------- | |
1178 * Tag tag1,tag2 | |
1179 * if (tag1 == tag2) { } | |
1180 * -------------- | |
1181 */ | |
1182 override bool opEquals(Object o) | |
1183 { | |
1184 const tag = toType!(const Tag)(o); | |
1185 return | |
1186 (name != tag.name) ? false : ( | |
1187 (attr != tag.attr) ? false : ( | |
1188 (type != tag.type) ? false : ( | |
1189 true ))); | |
1190 } | |
1191 | |
1192 /** | |
1193 * Compares two Tags | |
1194 * | |
1195 * Examples: | |
1196 * -------------- | |
1197 * Tag tag1,tag2 | |
1198 * if (tag1 < tag2) { } | |
1199 * -------------- | |
1200 */ | |
1201 override int opCmp(Object o) | |
1202 { | |
1203 const tag = toType!(const Tag)(o); | |
1204 return | |
1205 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : | |
1206 ((attr != tag.attr) ? ( attr < tag.attr ? -1 : 1 ) : | |
1207 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : | |
1208 0 ))); | |
1209 } | |
1210 | |
1211 /** | |
1212 * Returns the hash of a Tag | |
1213 * | |
1214 * You should rarely need to call this function. It exists so that Tags | |
1215 * can be used as associative array keys. | |
1216 */ | |
1217 override hash_t toHash() | |
1218 { | |
1219 hash_t hash = 0; | |
1220 foreach(dchar c;name) hash = hash * 11 + c; | |
1221 return hash; | |
1222 } | |
1223 | |
1224 /** | |
1225 * Returns the string representation of a Tag | |
1226 * | |
1227 * Examples: | |
1228 * -------------- | |
1229 * auto tag = new Tag("book",TagType.START); | |
1230 * writefln(tag.toString); // writes "<book>" | |
1231 * -------------- | |
1232 */ | |
1233 override string toString() | |
1234 { | |
1235 if (isEmpty) return toEmptyString(); | |
1236 return (isEnd) ? toEndString() : toStartString(); | |
1237 } | |
1238 | |
1239 private | |
1240 { | |
1241 string toNonEndString() | |
1242 { | |
1243 string s = "<" ~ name; | |
1244 foreach(key,val;attr) | |
1245 s ~= format(" %s=\"%s\"",key,decode(val,DecodeMode.LOOSE)); | |
1246 return s; | |
1247 } | |
1248 | |
1249 string toStartString() { return toNonEndString() ~ ">"; } | |
1250 | |
1251 string toEndString() { return "</" ~ name ~ ">"; } | |
1252 | |
1253 string toEmptyString() { return toNonEndString() ~ " />"; } | |
1254 } | |
1255 | |
1256 /** | |
1257 * Returns true if the Tag is a start tag | |
1258 * | |
1259 * Examples: | |
1260 * -------------- | |
1261 * if (tag.isStart) { } | |
1262 * -------------- | |
1263 */ | |
1264 bool isStart() { return type == TagType.START; } | |
1265 | |
1266 /** | |
1267 * Returns true if the Tag is an end tag | |
1268 * | |
1269 * Examples: | |
1270 * -------------- | |
1271 * if (tag.isEnd) { } | |
1272 * -------------- | |
1273 */ | |
1274 bool isEnd() { return type == TagType.END; } | |
1275 | |
1276 /** | |
1277 * Returns true if the Tag is an empty tag | |
1278 * | |
1279 * Examples: | |
1280 * -------------- | |
1281 * if (tag.isEmpty) { } | |
1282 * -------------- | |
1283 */ | |
1284 bool isEmpty() { return type == TagType.EMPTY; } | |
1285 } | |
1286 } | |
1287 | |
1288 /** | |
1289 * Class representing a comment | |
1290 */ | |
1291 class Comment : Item | |
1292 { | |
1293 private string content; | |
1294 | |
1295 /** | |
1296 * Construct a comment | |
1297 * | |
1298 * Params: | |
1299 * content = the body of the comment | |
1300 * | |
1301 * Throws: CommentException if the comment body is illegal (contains "--" | |
1302 * or exactly equals "-") | |
1303 * | |
1304 * Examples: | |
1305 * -------------- | |
1306 * auto item = new Comment("This is a comment"); | |
1307 * // constructs <!--This is a comment--> | |
1308 * -------------- | |
1309 */ | |
1310 this(string content) | |
1311 { | |
1312 if (content == "-" || content.indexOf("==") != -1) | |
1313 throw new CommentException(content); | |
1314 this.content = content; | |
1315 } | |
1316 | |
1317 /** | |
1318 * Compares two comments for equality | |
1319 * | |
1320 * Examples: | |
1321 * -------------- | |
1322 * Comment item1,item2; | |
1323 * if (item1 == item2) { } | |
1324 * -------------- | |
1325 */ | |
1326 override bool opEquals(Object o) | |
1327 { | |
1328 const item = toType!(const Item)(o); | |
1329 const t = cast(Comment)item; | |
1330 return t !is null && content == t.content; | |
1331 } | |
1332 | |
1333 /** | |
1334 * Compares two comments | |
1335 * | |
1336 * You should rarely need to call this function. It exists so that Comments | |
1337 * can be used as associative array keys. | |
1338 * | |
1339 * Examples: | |
1340 * -------------- | |
1341 * Comment item1,item2; | |
1342 * if (item1 < item2) { } | |
1343 * -------------- | |
1344 */ | |
1345 override int opCmp(Object o) | |
1346 { | |
1347 const item = toType!(const Item)(o); | |
1348 const t = cast(Comment)item; | |
1349 return t !is null && (content != t.content | |
1350 ? (content < t.content ? -1 : 1 ) : 0 ); | |
1351 } | |
1352 | |
1353 /** | |
1354 * Returns the hash of a Comment | |
1355 * | |
1356 * You should rarely need to call this function. It exists so that Comments | |
1357 * can be used as associative array keys. | |
1358 */ | |
1359 override hash_t toHash() { return hash(content); } | |
1360 | |
1361 /** | |
1362 * Returns a string representation of this comment | |
1363 */ | |
1364 override const string toString() { return "<!--" ~ content ~ "-->"; } | |
1365 | |
1366 override const bool isEmptyXML() { return false; } /// Returns false always | |
1367 } | |
1368 | |
1369 /** | |
1370 * Class representing a Character Data section | |
1371 */ | |
1372 class CData : Item | |
1373 { | |
1374 private string content; | |
1375 | |
1376 /** | |
1377 * Construct a chraracter data section | |
1378 * | |
1379 * Params: | |
1380 * content = the body of the character data segment | |
1381 * | |
1382 * Throws: CDataException if the segment body is illegal (contains "]]>") | |
1383 * | |
1384 * Examples: | |
1385 * -------------- | |
1386 * auto item = new CData("<b>hello</b>"); | |
1387 * // constructs <![CDATA[<b>hello</b>]]> | |
1388 * -------------- | |
1389 */ | |
1390 this(string content) | |
1391 { | |
1392 if (content.indexOf("]]>") != -1) throw new CDataException(content); | |
1393 this.content = content; | |
1394 } | |
1395 | |
1396 /** | |
1397 * Compares two CDatas for equality | |
1398 * | |
1399 * Examples: | |
1400 * -------------- | |
1401 * CData item1,item2; | |
1402 * if (item1 == item2) { } | |
1403 * -------------- | |
1404 */ | |
1405 override bool opEquals(Object o) | |
1406 { | |
1407 const item = toType!(const Item)(o); | |
1408 const t = cast(CData)item; | |
1409 return t !is null && content == t.content; | |
1410 } | |
1411 | |
1412 /** | |
1413 * Compares two CDatas | |
1414 * | |
1415 * You should rarely need to call this function. It exists so that CDatas | |
1416 * can be used as associative array keys. | |
1417 * | |
1418 * Examples: | |
1419 * -------------- | |
1420 * CData item1,item2; | |
1421 * if (item1 < item2) { } | |
1422 * -------------- | |
1423 */ | |
1424 override int opCmp(Object o) | |
1425 { | |
1426 const item = toType!(const Item)(o); | |
1427 const t = cast(CData)item; | |
1428 return t !is null && (content != t.content | |
1429 ? (content < t.content ? -1 : 1 ) : 0 ); | |
1430 } | |
1431 | |
1432 /** | |
1433 * Returns the hash of a CData | |
1434 * | |
1435 * You should rarely need to call this function. It exists so that CDatas | |
1436 * can be used as associative array keys. | |
1437 */ | |
1438 override hash_t toHash() { return hash(content); } | |
1439 | |
1440 /** | |
1441 * Returns a string representation of this CData section | |
1442 */ | |
1443 override const string toString() { return cdata ~ content ~ "]]>"; } | |
1444 | |
1445 override const bool isEmptyXML() { return false; } /// Returns false always | |
1446 } | |
1447 | |
1448 /** | |
1449 * Class representing a text (aka Parsed Character Data) section | |
1450 */ | |
1451 class Text : Item | |
1452 { | |
1453 private string content; | |
1454 | |
1455 /** | |
1456 * Construct a text (aka PCData) section | |
1457 * | |
1458 * Params: | |
1459 * content = the text. This function encodes the text before | |
1460 * insertion, so it is safe to insert any text | |
1461 * | |
1462 * Examples: | |
1463 * -------------- | |
1464 * auto Text = new CData("a < b"); | |
1465 * // constructs a < b | |
1466 * -------------- | |
1467 */ | |
1468 this(string content) | |
1469 { | |
1470 this.content = encode(content); | |
1471 } | |
1472 | |
1473 /** | |
1474 * Compares two text sections for equality | |
1475 * | |
1476 * Examples: | |
1477 * -------------- | |
1478 * Text item1,item2; | |
1479 * if (item1 == item2) { } | |
1480 * -------------- | |
1481 */ | |
1482 override bool opEquals(Object o) | |
1483 { | |
1484 const item = toType!(const Item)(o); | |
1485 const t = cast(Text)item; | |
1486 return t !is null && content == t.content; | |
1487 } | |
1488 | |
1489 /** | |
1490 * Compares two text sections | |
1491 * | |
1492 * You should rarely need to call this function. It exists so that Texts | |
1493 * can be used as associative array keys. | |
1494 * | |
1495 * Examples: | |
1496 * -------------- | |
1497 * Text item1,item2; | |
1498 * if (item1 < item2) { } | |
1499 * -------------- | |
1500 */ | |
1501 override int opCmp(Object o) | |
1502 { | |
1503 const item = toType!(const Item)(o); | |
1504 const t = cast(Text)item; | |
1505 return t !is null | |
1506 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); | |
1507 } | |
1508 | |
1509 /** | |
1510 * Returns the hash of a text section | |
1511 * | |
1512 * You should rarely need to call this function. It exists so that Texts | |
1513 * can be used as associative array keys. | |
1514 */ | |
1515 override hash_t toHash() { return hash(content); } | |
1516 | |
1517 /** | |
1518 * Returns a string representation of this Text section | |
1519 */ | |
1520 override const string toString() { return content; } | |
1521 | |
1522 /** | |
1523 * Returns true if the content is the empty string | |
1524 */ | |
1525 override const bool isEmptyXML() { return content.length == 0; } | |
1526 } | |
1527 | |
1528 /** | |
1529 * Class representing an XML Instruction section | |
1530 */ | |
1531 class XMLInstruction : Item | |
1532 { | |
1533 private string content; | |
1534 | |
1535 /** | |
1536 * Construct an XML Instruction section | |
1537 * | |
1538 * Params: | |
1539 * content = the body of the instruction segment | |
1540 * | |
1541 * Throws: XIException if the segment body is illegal (contains ">") | |
1542 * | |
1543 * Examples: | |
1544 * -------------- | |
1545 * auto item = new XMLInstruction("ATTLIST"); | |
1546 * // constructs <!ATTLIST> | |
1547 * -------------- | |
1548 */ | |
1549 this(string content) | |
1550 { | |
1551 if (content.indexOf(">") != -1) throw new XIException(content); | |
1552 this.content = content; | |
1553 } | |
1554 | |
1555 /** | |
1556 * Compares two XML instructions for equality | |
1557 * | |
1558 * Examples: | |
1559 * -------------- | |
1560 * XMLInstruction item1,item2; | |
1561 * if (item1 == item2) { } | |
1562 * -------------- | |
1563 */ | |
1564 override bool opEquals(Object o) | |
1565 { | |
1566 const item = toType!(const Item)(o); | |
1567 const t = cast(XMLInstruction)item; | |
1568 return t !is null && content == t.content; | |
1569 } | |
1570 | |
1571 /** | |
1572 * Compares two XML instructions | |
1573 * | |
1574 * You should rarely need to call this function. It exists so that | |
1575 * XmlInstructions can be used as associative array keys. | |
1576 * | |
1577 * Examples: | |
1578 * -------------- | |
1579 * XMLInstruction item1,item2; | |
1580 * if (item1 < item2) { } | |
1581 * -------------- | |
1582 */ | |
1583 override int opCmp(Object o) | |
1584 { | |
1585 const item = toType!(const Item)(o); | |
1586 const t = cast(XMLInstruction)item; | |
1587 return t !is null | |
1588 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); | |
1589 } | |
1590 | |
1591 /** | |
1592 * Returns the hash of an XMLInstruction | |
1593 * | |
1594 * You should rarely need to call this function. It exists so that | |
1595 * XmlInstructions can be used as associative array keys. | |
1596 */ | |
1597 override hash_t toHash() { return hash(content); } | |
1598 | |
1599 /** | |
1600 * Returns a string representation of this XmlInstruction | |
1601 */ | |
1602 override const string toString() { return "<!" ~ content ~ ">"; } | |
1603 | |
1604 override const bool isEmptyXML() { return false; } /// Returns false always | |
1605 } | |
1606 | |
1607 /** | |
1608 * Class representing a Processing Instruction section | |
1609 */ | |
1610 class ProcessingInstruction : Item | |
1611 { | |
1612 private string content; | |
1613 | |
1614 /** | |
1615 * Construct a Processing Instruction section | |
1616 * | |
1617 * Params: | |
1618 * content = the body of the instruction segment | |
1619 * | |
1620 * Throws: PIException if the segment body is illegal (contains "?>") | |
1621 * | |
1622 * Examples: | |
1623 * -------------- | |
1624 * auto item = new ProcessingInstruction("php"); | |
1625 * // constructs <?php?> | |
1626 * -------------- | |
1627 */ | |
1628 this(string content) | |
1629 { | |
1630 if (content.indexOf("?>") != -1) throw new PIException(content); | |
1631 this.content = content; | |
1632 } | |
1633 | |
1634 /** | |
1635 * Compares two processing instructions for equality | |
1636 * | |
1637 * Examples: | |
1638 * -------------- | |
1639 * ProcessingInstruction item1,item2; | |
1640 * if (item1 == item2) { } | |
1641 * -------------- | |
1642 */ | |
1643 override bool opEquals(Object o) | |
1644 { | |
1645 const item = toType!(const Item)(o); | |
1646 const t = cast(ProcessingInstruction)item; | |
1647 return t !is null && content == t.content; | |
1648 } | |
1649 | |
1650 /** | |
1651 * Compares two processing instructions | |
1652 * | |
1653 * You should rarely need to call this function. It exists so that | |
1654 * ProcessingInstructions can be used as associative array keys. | |
1655 * | |
1656 * Examples: | |
1657 * -------------- | |
1658 * ProcessingInstruction item1,item2; | |
1659 * if (item1 < item2) { } | |
1660 * -------------- | |
1661 */ | |
1662 override int opCmp(Object o) | |
1663 { | |
1664 const item = toType!(const Item)(o); | |
1665 const t = cast(ProcessingInstruction)item; | |
1666 return t !is null | |
1667 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); | |
1668 } | |
1669 | |
1670 /** | |
1671 * Returns the hash of a ProcessingInstruction | |
1672 * | |
1673 * You should rarely need to call this function. It exists so that | |
1674 * ProcessingInstructions can be used as associative array keys. | |
1675 */ | |
1676 override hash_t toHash() { return hash(content); } | |
1677 | |
1678 /** | |
1679 * Returns a string representation of this ProcessingInstruction | |
1680 */ | |
1681 override const string toString() { return "<?" ~ content ~ "?>"; } | |
1682 | |
1683 override const bool isEmptyXML() { return false; } /// Returns false always | |
1684 } | |
1685 | |
1686 /** | |
1687 * Abstract base class for XML items | |
1688 */ | |
1689 abstract class Item | |
1690 { | |
1691 /// Compares with another Item of same type for equality | |
1692 abstract override bool opEquals(Object o); | |
1693 | |
1694 /// Compares with another Item of same type | |
1695 abstract override int opCmp(Object o); | |
1696 | |
1697 /// Returns the hash of this item | |
1698 abstract override hash_t toHash(); | |
1699 | |
1700 /// Returns a string representation of this item | |
1701 abstract override const string toString(); | |
1702 | |
1703 /** | |
1704 * Returns an indented string representation of this item | |
1705 * | |
1706 * Params: | |
1707 * indent = number of spaces by which to indent child elements | |
1708 */ | |
1709 const string[] pretty(uint indent) | |
1710 { | |
1711 string s = strip(toString()); | |
1712 return s.length == 0 ? [] : [ s ]; | |
1713 } | |
1714 | |
1715 /// Returns true if the item represents empty XML text | |
1716 abstract const bool isEmptyXML(); | |
1717 } | |
1718 | |
1719 /** | |
1720 * Class for parsing an XML Document. | |
1721 * | |
1722 * This is a subclass of ElementParser. Most of the useful functions are | |
1723 * documented there. | |
1724 * | |
1725 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
1726 * | |
1727 * Bugs: | |
1728 * Currently only supports UTF documents. | |
1729 * | |
1730 * If there is an encoding attribute in the prolog, it is ignored. | |
1731 * | |
1732 */ | |
1733 class DocumentParser : ElementParser | |
1734 { | |
1735 string xmlText; | |
1736 | |
1737 /** | |
1738 * Constructs a DocumentParser. | |
1739 * | |
1740 * The input to this function MUST be valid XML. | |
1741 * This is enforced by the function's in contract. | |
1742 * | |
1743 * Params: | |
1744 * xmltext = the entire XML document as text | |
1745 * | |
1746 */ | |
1747 this(string xmlText_) | |
1748 in | |
1749 { | |
1750 assert(xmlText_.length != 0); | |
1751 try | |
1752 { | |
1753 // Confirm that the input is valid XML | |
1754 check(xmlText_); | |
1755 } | |
1756 catch (CheckException e) | |
1757 { | |
1758 // And if it's not, tell the user why not | |
1759 assert(false, "\n" ~ e.toString()); | |
1760 } | |
1761 } | |
1762 body | |
1763 { | |
1764 xmlText = xmlText_; | |
1765 s = &xmlText; | |
1766 super(); // Initialize everything | |
1767 parse(); // Parse through the root tag (but not beyond) | |
1768 } | |
1769 } | |
1770 | |
1771 /** | |
1772 * Class for parsing an XML element. | |
1773 * | |
1774 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) | |
1775 * | |
1776 * Note that you cannot construct instances of this class directly. You can | |
1777 * construct a DocumentParser (which is a subclass of ElementParser), but | |
1778 * otherwise, Instances of ElementParser will be created for you by the | |
1779 * library, and passed your way via onStartTag handlers. | |
1780 * | |
1781 */ | |
1782 class ElementParser | |
1783 { | |
1784 alias void delegate(string) Handler; | |
1785 alias void delegate(in Element element) ElementHandler; | |
1786 alias void delegate(ElementParser parser) ParserHandler; | |
1787 | |
1788 private | |
1789 { | |
1790 Tag tag_; | |
1791 string elementStart; | |
1792 string* s; | |
1793 | |
1794 Handler commentHandler = null; | |
1795 Handler cdataHandler = null; | |
1796 Handler xiHandler = null; | |
1797 Handler piHandler = null; | |
1798 Handler rawTextHandler = null; | |
1799 Handler textHandler = null; | |
1800 | |
1801 // Private constructor for start tags | |
1802 this(ElementParser parent) | |
1803 { | |
1804 s = parent.s; | |
1805 this(); | |
1806 tag_ = parent.tag_; | |
1807 } | |
1808 | |
1809 // Private constructor for empty tags | |
1810 this(Tag tag, string* t) | |
1811 { | |
1812 s = t; | |
1813 this(); | |
1814 tag_ = tag; | |
1815 } | |
1816 } | |
1817 | |
1818 /** | |
1819 * The Tag at the start of the element being parsed. You can read this to | |
1820 * determine the tag's name and attributes. | |
1821 */ | |
1822 const const(Tag) tag() { return tag_; } | |
1823 | |
1824 /** | |
1825 * Register a handler which will be called whenever a start tag is | |
1826 * encountered which matches the specified name. You can also pass null as | |
1827 * the name, in which case the handler will be called for any unmatched | |
1828 * start tag. | |
1829 * | |
1830 * Examples: | |
1831 * -------------- | |
1832 * // Call this function whenever a <podcast> start tag is encountered | |
1833 * onStartTag["podcast"] = (ElementParser xml) | |
1834 * { | |
1835 * // Your code here | |
1836 * // | |
1837 * // This is a a closure, so code here may reference | |
1838 * // variables which are outside of this scope | |
1839 * }; | |
1840 * | |
1841 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> | |
1842 * // start tag is encountered | |
1843 * onStartTag["episode"] = &myEpisodeStartHandler; | |
1844 * | |
1845 * // call delegate dg for all other start tags | |
1846 * onStartTag[null] = dg; | |
1847 * -------------- | |
1848 * | |
1849 * This library will supply your function with a new instance of | |
1850 * ElementHandler, which may be used to parse inside the element whose | |
1851 * start tag was just found, or to identify the tag attributes of the | |
1852 * element, etc. | |
1853 * | |
1854 * Note that your function will be called for both start tags and empty | |
1855 * tags. That is, we make no distinction between <br></br> | |
1856 * and <br/>. | |
1857 */ | |
1858 ParserHandler[string] onStartTag; | |
1859 | |
1860 /** | |
1861 * Register a handler which will be called whenever an end tag is | |
1862 * encountered which matches the specified name. You can also pass null as | |
1863 * the name, in which case the handler will be called for any unmatched | |
1864 * end tag. | |
1865 * | |
1866 * Examples: | |
1867 * -------------- | |
1868 * // Call this function whenever a </podcast> end tag is encountered | |
1869 * onEndTag["podcast"] = (in Element e) | |
1870 * { | |
1871 * // Your code here | |
1872 * // | |
1873 * // This is a a closure, so code here may reference | |
1874 * // variables which are outside of this scope | |
1875 * }; | |
1876 * | |
1877 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> | |
1878 * // end tag is encountered | |
1879 * onEndTag["episode"] = &myEpisodeEndHandler; | |
1880 * | |
1881 * // call delegate dg for all other end tags | |
1882 * onEndTag[null] = dg; | |
1883 * -------------- | |
1884 * | |
1885 * Note that your function will be called for both start tags and empty | |
1886 * tags. That is, we make no distinction between <br></br> | |
1887 * and <br/>. | |
1888 */ | |
1889 ElementHandler[string] onEndTag; | |
1890 | |
1891 protected this() | |
1892 { | |
1893 elementStart = *s; | |
1894 } | |
1895 | |
1896 /** | |
1897 * Register a handler which will be called whenever text is encountered. | |
1898 * | |
1899 * Examples: | |
1900 * -------------- | |
1901 * // Call this function whenever text is encountered | |
1902 * onText = (string s) | |
1903 * { | |
1904 * // Your code here | |
1905 * | |
1906 * // The passed parameter s will have been decoded by the time you see | |
1907 * // it, and so may contain any character. | |
1908 * // | |
1909 * // This is a a closure, so code here may reference | |
1910 * // variables which are outside of this scope | |
1911 * }; | |
1912 * -------------- | |
1913 */ | |
1914 void onText(Handler handler) { textHandler = handler; } | |
1915 | |
1916 /** | |
1917 * Register an alternative handler which will be called whenever text | |
1918 * is encountered. This differs from onText in that onText will decode | |
1919 * the text, wheras onTextRaw will not. This allows you to make design | |
1920 * choices, since onText will be more accurate, but slower, while | |
1921 * onTextRaw will be faster, but less accurate. Of course, you can | |
1922 * still call decode() within your handler, if you want, but you'd | |
1923 * probably want to use onTextRaw only in circumstances where you | |
1924 * know that decoding is unnecessary. | |
1925 * | |
1926 * Examples: | |
1927 * -------------- | |
1928 * // Call this function whenever text is encountered | |
1929 * onText = (string s) | |
1930 * { | |
1931 * // Your code here | |
1932 * | |
1933 * // The passed parameter s will NOT have been decoded. | |
1934 * // | |
1935 * // This is a a closure, so code here may reference | |
1936 * // variables which are outside of this scope | |
1937 * }; | |
1938 * -------------- | |
1939 */ | |
1940 void onTextRaw(Handler handler) { rawTextHandler = handler; } | |
1941 | |
1942 /** | |
1943 * Register a handler which will be called whenever a character data | |
1944 * segement is encountered. | |
1945 * | |
1946 * Examples: | |
1947 * -------------- | |
1948 * // Call this function whenever a CData section is encountered | |
1949 * onCData = (string s) | |
1950 * { | |
1951 * // Your code here | |
1952 * | |
1953 * // The passed parameter s does not include the opening <![CDATA[ | |
1954 * // nor closing ]]> | |
1955 * // | |
1956 * // This is a a closure, so code here may reference | |
1957 * // variables which are outside of this scope | |
1958 * }; | |
1959 * -------------- | |
1960 */ | |
1961 void onCData(Handler handler) { cdataHandler = handler; } | |
1962 | |
1963 /** | |
1964 * Register a handler which will be called whenever a comment is | |
1965 * encountered. | |
1966 * | |
1967 * Examples: | |
1968 * -------------- | |
1969 * // Call this function whenever a comment is encountered | |
1970 * onComment = (string s) | |
1971 * { | |
1972 * // Your code here | |
1973 * | |
1974 * // The passed parameter s does not include the opening <!-- nor | |
1975 * // closing --> | |
1976 * // | |
1977 * // This is a a closure, so code here may reference | |
1978 * // variables which are outside of this scope | |
1979 * }; | |
1980 * -------------- | |
1981 */ | |
1982 void onComment(Handler handler) { commentHandler = handler; } | |
1983 | |
1984 /** | |
1985 * Register a handler which will be called whenever a processing | |
1986 * instruction is encountered. | |
1987 * | |
1988 * Examples: | |
1989 * -------------- | |
1990 * // Call this function whenever a processing instruction is encountered | |
1991 * onPI = (string s) | |
1992 * { | |
1993 * // Your code here | |
1994 * | |
1995 * // The passed parameter s does not include the opening <? nor | |
1996 * // closing ?> | |
1997 * // | |
1998 * // This is a a closure, so code here may reference | |
1999 * // variables which are outside of this scope | |
2000 * }; | |
2001 * -------------- | |
2002 */ | |
2003 void onPI(Handler handler) { piHandler = handler; } | |
2004 | |
2005 /** | |
2006 * Register a handler which will be called whenever an XML instruction is | |
2007 * encountered. | |
2008 * | |
2009 * Examples: | |
2010 * -------------- | |
2011 * // Call this function whenever an XML instruction is encountered | |
2012 * // (Note: XML instructions may only occur preceeding the root tag of a | |
2013 * // document). | |
2014 * onPI = (string s) | |
2015 * { | |
2016 * // Your code here | |
2017 * | |
2018 * // The passed parameter s does not include the opening <! nor | |
2019 * // closing > | |
2020 * // | |
2021 * // This is a a closure, so code here may reference | |
2022 * // variables which are outside of this scope | |
2023 * }; | |
2024 * -------------- | |
2025 */ | |
2026 void onXI(Handler handler) { xiHandler = handler; } | |
2027 | |
2028 /** | |
2029 * Parse an XML element. | |
2030 * | |
2031 * Parsing will continue until the end of the current element. Any items | |
2032 * encountered for which a handler has been registered will invoke that | |
2033 * handler. | |
2034 * | |
2035 * Throws: various kinds of XMLException | |
2036 */ | |
2037 void parse() | |
2038 { | |
2039 string t; | |
2040 Tag root = tag_; | |
2041 Tag[string] startTags; | |
2042 if (tag_ !is null) startTags[tag_.name] = tag_; | |
2043 | |
2044 while(s.length != 0) | |
2045 { | |
2046 if (startsWith(*s,"<!--")) | |
2047 { | |
2048 chop(*s,4); | |
2049 t = chop(*s,indexOf(*s,"-->")); | |
2050 if (commentHandler.funcptr !is null) commentHandler(t); | |
2051 chop(*s,3); | |
2052 } | |
2053 else if (startsWith(*s,"<![CDATA[")) | |
2054 { | |
2055 chop(*s,9); | |
2056 t = chop(*s,indexOf(*s,"]]>")); | |
2057 if (cdataHandler.funcptr !is null) cdataHandler(t); | |
2058 chop(*s,3); | |
2059 } | |
2060 else if (startsWith(*s,"<!")) | |
2061 { | |
2062 chop(*s,2); | |
2063 t = chop(*s,indexOf(*s,">")); | |
2064 if (xiHandler.funcptr !is null) xiHandler(t); | |
2065 chop(*s,1); | |
2066 } | |
2067 else if (startsWith(*s,"<?")) | |
2068 { | |
2069 chop(*s,2); | |
2070 t = chop(*s,indexOf(*s,"?>")); | |
2071 if (piHandler.funcptr !is null) piHandler(t); | |
2072 chop(*s,2); | |
2073 } | |
2074 else if (startsWith(*s,"<")) | |
2075 { | |
2076 tag_ = new Tag(*s,true); | |
2077 if (root is null) | |
2078 return; // Return to constructor of derived class | |
2079 | |
2080 if (tag_.isStart) | |
2081 { | |
2082 startTags[tag_.name] = tag_; | |
2083 | |
2084 auto parser = new ElementParser(this); | |
2085 | |
2086 auto handler = tag_.name in onStartTag; | |
2087 if (handler !is null) (*handler)(parser); | |
2088 else | |
2089 { | |
2090 handler = null in onStartTag; | |
2091 if (handler !is null) (*handler)(parser); | |
2092 } | |
2093 } | |
2094 else if (tag_.isEnd) | |
2095 { | |
2096 auto startTag = startTags[tag_.name]; | |
2097 string text; | |
2098 | |
2099 immutable(char)* p = startTag.tagString.ptr | |
2100 + startTag.tagString.length; | |
2101 immutable(char)* q = tag_.tagString.ptr; | |
2102 text = decode(p[0..(q-p)], DecodeMode.LOOSE); | |
2103 | |
2104 auto element = new Element(startTag); | |
2105 if (text.length != 0) element ~= new Text(text); | |
2106 | |
2107 auto handler = tag_.name in onEndTag; | |
2108 if (handler !is null) (*handler)(element); | |
2109 else | |
2110 { | |
2111 handler = null in onEndTag; | |
2112 if (handler !is null) (*handler)(element); | |
2113 } | |
2114 | |
2115 if (tag_.name == root.name) return; | |
2116 } | |
2117 else if (tag_.isEmpty) | |
2118 { | |
2119 Tag startTag = new Tag(tag_.name); | |
2120 | |
2121 // FIX by hed010gy, for bug 2979 | |
2122 // http://d.puremagic.com/issues/show_bug.cgi?id=2979 | |
2123 if (tag_.attr.length > 0) | |
2124 foreach(tn,tv; tag_.attr) startTag.attr[tn]=tv; | |
2125 // END FIX | |
2126 | |
2127 // Handle the pretend start tag | |
2128 string s2; | |
2129 auto parser = new ElementParser(startTag,&s2); | |
2130 auto handler1 = startTag.name in onStartTag; | |
2131 if (handler1 !is null) (*handler1)(parser); | |
2132 else | |
2133 { | |
2134 handler1 = null in onStartTag; | |
2135 if (handler1 !is null) (*handler1)(parser); | |
2136 } | |
2137 | |
2138 // Handle the pretend end tag | |
2139 auto element = new Element(startTag); | |
2140 auto handler2 = tag_.name in onEndTag; | |
2141 if (handler2 !is null) (*handler2)(element); | |
2142 else | |
2143 { | |
2144 handler2 = null in onEndTag; | |
2145 if (handler2 !is null) (*handler2)(element); | |
2146 } | |
2147 } | |
2148 } | |
2149 else | |
2150 { | |
2151 t = chop(*s,indexOf(*s,"<")); | |
2152 if (rawTextHandler.funcptr !is null) | |
2153 rawTextHandler(t); | |
2154 else if (textHandler.funcptr !is null) | |
2155 textHandler(decode(t,DecodeMode.LOOSE)); | |
2156 } | |
2157 } | |
2158 } | |
2159 | |
2160 /** | |
2161 * Returns that part of the element which has already been parsed | |
2162 */ | |
2163 const override string toString() | |
2164 { | |
2165 int n = elementStart.length - s.length; | |
2166 return elementStart[0..n]; | |
2167 } | |
2168 | |
2169 } | |
2170 | |
2171 private | |
2172 { | |
2173 template Check(string msg) | |
2174 { | |
2175 string old = s; | |
2176 | |
2177 void fail() | |
2178 { | |
2179 s = old; | |
2180 throw new Err(s,msg); | |
2181 } | |
2182 | |
2183 void fail(Err e) | |
2184 { | |
2185 s = old; | |
2186 throw new Err(s,msg,e); | |
2187 } | |
2188 | |
2189 void fail(string msg2) | |
2190 { | |
2191 fail(new Err(s,msg2)); | |
2192 } | |
2193 } | |
2194 | |
2195 void checkMisc(ref string s) // rule 27 | |
2196 { | |
2197 mixin Check!("Misc"); | |
2198 | |
2199 try | |
2200 { | |
2201 if (s.startsWith("<!--")) { checkComment(s); } | |
2202 else if (s.startsWith("<?")) { checkPI(s); } | |
2203 else { checkSpace(s); } | |
2204 } | |
2205 catch(Err e) { fail(e); } | |
2206 } | |
2207 | |
2208 void checkDocument(ref string s) // rule 1 | |
2209 { | |
2210 mixin Check!("Document"); | |
2211 try | |
2212 { | |
2213 checkProlog(s); | |
2214 checkElement(s); | |
2215 star!(checkMisc)(s); | |
2216 } | |
2217 catch(Err e) { fail(e); } | |
2218 } | |
2219 | |
2220 void checkChars(ref string s) // rule 2 | |
2221 { | |
2222 // TO DO - Fix std.utf stride and decode functions, then use those | |
2223 // instead | |
2224 | |
2225 mixin Check!("Chars"); | |
2226 | |
2227 dchar c; | |
2228 int n = -1; | |
2229 foreach(int i,dchar d; s) | |
2230 { | |
2231 if (!isChar(d)) | |
2232 { | |
2233 c = d; | |
2234 n = i; | |
2235 break; | |
2236 } | |
2237 } | |
2238 if (n != -1) | |
2239 { | |
2240 s = s[n..$]; | |
2241 fail(format("invalid character: U+%04X",c)); | |
2242 } | |
2243 } | |
2244 | |
2245 void checkSpace(ref string s) // rule 3 | |
2246 { | |
2247 mixin Check!("Whitespace"); | |
2248 munch(s,"\u0020\u0009\u000A\u000D"); | |
2249 if (s is old) fail(); | |
2250 } | |
2251 | |
2252 void checkName(ref string s, out string name) // rule 5 | |
2253 { | |
2254 mixin Check!("Name"); | |
2255 | |
2256 if (s.length == 0) fail(); | |
2257 int n; | |
2258 foreach(int i,dchar c;s) | |
2259 { | |
2260 if (c == '_' || c == ':' || isLetter(c)) continue; | |
2261 if (i == 0) fail(); | |
2262 if (c == '-' || c == '.' || isDigit(c) | |
2263 || isCombiningChar(c) || isExtender(c)) continue; | |
2264 n = i; | |
2265 break; | |
2266 } | |
2267 name = s[0..n]; | |
2268 s = s[n..$]; | |
2269 } | |
2270 | |
2271 void checkAttValue(ref string s) // rule 10 | |
2272 { | |
2273 mixin Check!("AttValue"); | |
2274 | |
2275 if (s.length == 0) fail(); | |
2276 char c = s[0]; | |
2277 if (c != '\u0022' && c != '\u0027') | |
2278 fail("attribute value requires quotes"); | |
2279 s = s[1..$]; | |
2280 for(;;) | |
2281 { | |
2282 munch(s,"^<&"~c); | |
2283 if (s.length == 0) fail("unterminated attribute value"); | |
2284 if (s[0] == '<') fail("< found in attribute value"); | |
2285 if (s[0] == c) break; | |
2286 try { checkReference(s); } catch(Err e) { fail(e); } | |
2287 } | |
2288 s = s[1..$]; | |
2289 } | |
2290 | |
2291 void checkCharData(ref string s) // rule 14 | |
2292 { | |
2293 mixin Check!("CharData"); | |
2294 | |
2295 while (s.length != 0) | |
2296 { | |
2297 if (s.startsWith("&")) break; | |
2298 if (s.startsWith("<")) break; | |
2299 if (s.startsWith("]]>")) fail("]]> found within char data"); | |
2300 s = s[1..$]; | |
2301 } | |
2302 } | |
2303 | |
2304 void checkComment(ref string s) // rule 15 | |
2305 { | |
2306 mixin Check!("Comment"); | |
2307 | |
2308 try { checkLiteral("<!--",s); } catch(Err e) { fail(e); } | |
2309 int n = s.indexOf("--"); | |
2310 if (n == -1) fail("unterminated comment"); | |
2311 s = s[n..$]; | |
2312 try { checkLiteral("-->",s); } catch(Err e) { fail(e); } | |
2313 } | |
2314 | |
2315 void checkPI(ref string s) // rule 16 | |
2316 { | |
2317 mixin Check!("PI"); | |
2318 | |
2319 try | |
2320 { | |
2321 checkLiteral("<?",s); | |
2322 checkEnd("?>",s); | |
2323 } | |
2324 catch(Err e) { fail(e); } | |
2325 } | |
2326 | |
2327 void checkCDSect(ref string s) // rule 18 | |
2328 { | |
2329 mixin Check!("CDSect"); | |
2330 | |
2331 try | |
2332 { | |
2333 checkLiteral(cdata,s); | |
2334 checkEnd("]]>",s); | |
2335 } | |
2336 catch(Err e) { fail(e); } | |
2337 } | |
2338 | |
2339 void checkProlog(ref string s) // rule 22 | |
2340 { | |
2341 mixin Check!("Prolog"); | |
2342 | |
2343 try | |
2344 { | |
2345 checkXMLDecl(s); | |
2346 star!(checkMisc)(s); | |
2347 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); | |
2348 } | |
2349 catch(Err e) { fail(e); } | |
2350 } | |
2351 | |
2352 void checkXMLDecl(ref string s) // rule 23 | |
2353 { | |
2354 mixin Check!("XMLDecl"); | |
2355 | |
2356 try | |
2357 { | |
2358 checkLiteral("<?xml",s); | |
2359 checkVersionInfo(s); | |
2360 opt!(checkEncodingDecl)(s); | |
2361 opt!(checkSDDecl)(s); | |
2362 opt!(checkSpace)(s); | |
2363 checkLiteral("?>",s); | |
2364 } | |
2365 catch(Err e) { fail(e); } | |
2366 } | |
2367 | |
2368 void checkVersionInfo(ref string s) // rule 24 | |
2369 { | |
2370 mixin Check!("VersionInfo"); | |
2371 | |
2372 try | |
2373 { | |
2374 checkSpace(s); | |
2375 checkLiteral("version",s); | |
2376 checkEq(s); | |
2377 quoted!(checkVersionNum)(s); | |
2378 } | |
2379 catch(Err e) { fail(e); } | |
2380 } | |
2381 | |
2382 void checkEq(ref string s) // rule 25 | |
2383 { | |
2384 mixin Check!("Eq"); | |
2385 | |
2386 try | |
2387 { | |
2388 opt!(checkSpace)(s); | |
2389 checkLiteral("=",s); | |
2390 opt!(checkSpace)(s); | |
2391 } | |
2392 catch(Err e) { fail(e); } | |
2393 } | |
2394 | |
2395 void checkVersionNum(ref string s) // rule 26 | |
2396 { | |
2397 mixin Check!("VersionNum"); | |
2398 | |
2399 munch(s,"a-zA-Z0-9_.:-"); | |
2400 if (s is old) fail(); | |
2401 } | |
2402 | |
2403 void checkDocTypeDecl(ref string s) // rule 28 | |
2404 { | |
2405 mixin Check!("DocTypeDecl"); | |
2406 | |
2407 try | |
2408 { | |
2409 checkLiteral("<!DOCTYPE",s); | |
2410 // | |
2411 // TO DO -- ensure DOCTYPE is well formed | |
2412 // (But not yet. That's one of our "future directions") | |
2413 // | |
2414 checkEnd(">",s); | |
2415 } | |
2416 catch(Err e) { fail(e); } | |
2417 } | |
2418 | |
2419 void checkSDDecl(ref string s) // rule 32 | |
2420 { | |
2421 mixin Check!("SDDecl"); | |
2422 | |
2423 try | |
2424 { | |
2425 checkSpace(s); | |
2426 checkLiteral("standalone",s); | |
2427 checkEq(s); | |
2428 } | |
2429 catch(Err e) { fail(e); } | |
2430 | |
2431 int n = 0; | |
2432 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; | |
2433 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; | |
2434 else fail("standalone attribute value must be 'yes', \"yes\"," | |
2435 " 'no' or \"no\""); | |
2436 s = s[n..$]; | |
2437 } | |
2438 | |
2439 void checkElement(ref string s) // rule 39 | |
2440 { | |
2441 mixin Check!("Element"); | |
2442 | |
2443 string sname,ename,t; | |
2444 try { checkTag(s,t,sname); } catch(Err e) { fail(e); } | |
2445 | |
2446 if (t == "STag") | |
2447 { | |
2448 try | |
2449 { | |
2450 checkContent(s); | |
2451 t = s; | |
2452 checkETag(s,ename); | |
2453 } | |
2454 catch(Err e) { fail(e); } | |
2455 | |
2456 if (sname != ename) | |
2457 { | |
2458 s = t; | |
2459 fail("end tag name \"" ~ ename | |
2460 ~ "\" differs from start tag name \""~sname~"\""); | |
2461 } | |
2462 } | |
2463 } | |
2464 | |
2465 // rules 40 and 44 | |
2466 void checkTag(ref string s, out string type, out string name) | |
2467 { | |
2468 mixin Check!("Tag"); | |
2469 | |
2470 try | |
2471 { | |
2472 type = "STag"; | |
2473 checkLiteral("<",s); | |
2474 checkName(s,name); | |
2475 star!(seq!(checkSpace,checkAttribute))(s); | |
2476 opt!(checkSpace)(s); | |
2477 if (s.length != 0 && s[0] == '/') | |
2478 { | |
2479 s = s[1..$]; | |
2480 type = "ETag"; | |
2481 } | |
2482 checkLiteral(">",s); | |
2483 } | |
2484 catch(Err e) { fail(e); } | |
2485 } | |
2486 | |
2487 void checkAttribute(ref string s) // rule 41 | |
2488 { | |
2489 mixin Check!("Attribute"); | |
2490 | |
2491 try | |
2492 { | |
2493 string name; | |
2494 checkName(s,name); | |
2495 checkEq(s); | |
2496 checkAttValue(s); | |
2497 } | |
2498 catch(Err e) { fail(e); } | |
2499 } | |
2500 | |
2501 void checkETag(ref string s, out string name) // rule 42 | |
2502 { | |
2503 mixin Check!("ETag"); | |
2504 | |
2505 try | |
2506 { | |
2507 checkLiteral("</",s); | |
2508 checkName(s,name); | |
2509 opt!(checkSpace)(s); | |
2510 checkLiteral(">",s); | |
2511 } | |
2512 catch(Err e) { fail(e); } | |
2513 } | |
2514 | |
2515 void checkContent(ref string s) // rule 43 | |
2516 { | |
2517 mixin Check!("Content"); | |
2518 | |
2519 try | |
2520 { | |
2521 while (s.length != 0) | |
2522 { | |
2523 old = s; | |
2524 if (s.startsWith("&")) { checkReference(s); } | |
2525 else if (s.startsWith("<!--")) { checkComment(s); } | |
2526 else if (s.startsWith("<?")) { checkPI(s); } | |
2527 else if (s.startsWith(cdata)) { checkCDSect(s); } | |
2528 else if (s.startsWith("</")) { break; } | |
2529 else if (s.startsWith("<")) { checkElement(s); } | |
2530 else { checkCharData(s); } | |
2531 } | |
2532 } | |
2533 catch(Err e) { fail(e); } | |
2534 } | |
2535 | |
2536 void checkCharRef(ref string s, out dchar c) // rule 66 | |
2537 { | |
2538 mixin Check!("CharRef"); | |
2539 | |
2540 c = 0; | |
2541 try { checkLiteral("&#",s); } catch(Err e) { fail(e); } | |
2542 int radix = 10; | |
2543 if (s.length != 0 && s[0] == 'x') | |
2544 { | |
2545 s = s[1..$]; | |
2546 radix = 16; | |
2547 } | |
2548 if (s.length == 0) fail("unterminated character reference"); | |
2549 if (s[0] == ';') | |
2550 fail("character reference must have at least one digit"); | |
2551 while (s.length != 0) | |
2552 { | |
2553 char d = s[0]; | |
2554 int n = 0; | |
2555 switch(d) | |
2556 { | |
2557 case 'F','f': ++n; | |
2558 case 'E','e': ++n; | |
2559 case 'D','d': ++n; | |
2560 case 'C','c': ++n; | |
2561 case 'B','b': ++n; | |
2562 case 'A','a': ++n; | |
2563 case '9': ++n; | |
2564 case '8': ++n; | |
2565 case '7': ++n; | |
2566 case '6': ++n; | |
2567 case '5': ++n; | |
2568 case '4': ++n; | |
2569 case '3': ++n; | |
2570 case '2': ++n; | |
2571 case '1': ++n; | |
2572 case '0': break; | |
2573 default: n = 100; break; | |
2574 } | |
2575 if (n >= radix) break; | |
2576 c *= radix; | |
2577 c += n; | |
2578 s = s[1..$]; | |
2579 } | |
2580 if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); | |
2581 if (s.length == 0 || s[0] != ';') fail("expected ;"); | |
2582 else s = s[1..$]; | |
2583 } | |
2584 | |
2585 void checkReference(ref string s) // rule 67 | |
2586 { | |
2587 mixin Check!("Reference"); | |
2588 | |
2589 try | |
2590 { | |
2591 dchar c; | |
2592 if (s.startsWith("&#")) checkCharRef(s,c); | |
2593 else checkEntityRef(s); | |
2594 } | |
2595 catch(Err e) { fail(e); } | |
2596 } | |
2597 | |
2598 void checkEntityRef(ref string s) // rule 68 | |
2599 { | |
2600 mixin Check!("EntityRef"); | |
2601 | |
2602 try | |
2603 { | |
2604 string name; | |
2605 checkLiteral("&",s); | |
2606 checkName(s,name); | |
2607 checkLiteral(";",s); | |
2608 } | |
2609 catch(Err e) { fail(e); } | |
2610 } | |
2611 | |
2612 void checkEncName(ref string s) // rule 81 | |
2613 { | |
2614 mixin Check!("EncName"); | |
2615 | |
2616 munch(s,"a-zA-Z"); | |
2617 if (s is old) fail(); | |
2618 munch(s,"a-zA-Z0-9_.-"); | |
2619 } | |
2620 | |
2621 void checkEncodingDecl(ref string s) // rule 80 | |
2622 { | |
2623 mixin Check!("EncodingDecl"); | |
2624 | |
2625 try | |
2626 { | |
2627 checkSpace(s); | |
2628 checkLiteral("encoding",s); | |
2629 checkEq(s); | |
2630 quoted!(checkEncName)(s); | |
2631 } | |
2632 catch(Err e) { fail(e); } | |
2633 } | |
2634 | |
2635 // Helper functions | |
2636 | |
2637 void checkLiteral(string literal,ref string s) | |
2638 { | |
2639 mixin Check!("Literal"); | |
2640 | |
2641 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); | |
2642 s = s[literal.length..$]; | |
2643 } | |
2644 | |
2645 void checkEnd(string end,ref string s) | |
2646 { | |
2647 // Deliberately no mixin Check here. | |
2648 | |
2649 int n = s.indexOf(end); | |
2650 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); | |
2651 s = s[n..$]; | |
2652 checkLiteral(end,s); | |
2653 } | |
2654 | |
2655 // Metafunctions -- none of these use mixin Check | |
2656 | |
2657 void opt(alias f)(ref string s) | |
2658 { | |
2659 try { f(s); } catch(Err e) {} | |
2660 } | |
2661 | |
2662 void plus(alias f)(ref string s) | |
2663 { | |
2664 f(s); | |
2665 star!(f)(s); | |
2666 } | |
2667 | |
2668 void star(alias f)(ref string s) | |
2669 { | |
2670 while (s.length != 0) | |
2671 { | |
2672 try { f(s); } | |
2673 catch(Err e) { return; } | |
2674 } | |
2675 } | |
2676 | |
2677 void quoted(alias f)(ref string s) | |
2678 { | |
2679 if (s.startsWith("'")) | |
2680 { | |
2681 checkLiteral("'",s); | |
2682 f(s); | |
2683 checkLiteral("'",s); | |
2684 } | |
2685 else | |
2686 { | |
2687 checkLiteral("\"",s); | |
2688 f(s); | |
2689 checkLiteral("\"",s); | |
2690 } | |
2691 } | |
2692 | |
2693 void seq(alias f,alias g)(ref string s) | |
2694 { | |
2695 f(s); | |
2696 g(s); | |
2697 } | |
2698 } | |
2699 | |
2700 /** | |
2701 * Check an entire XML document for well-formedness | |
2702 * | |
2703 * Params: | |
2704 * s = the document to be checked, passed as a string | |
2705 * | |
2706 * Throws: CheckException if the document is not well formed | |
2707 * | |
2708 * CheckException's toString() method will yield the complete heirarchy of | |
2709 * parse failure (the XML equivalent of a stack trace), giving the line and | |
2710 * column number of every failure at every level. | |
2711 */ | |
2712 void check(string s) | |
2713 { | |
2714 try | |
2715 { | |
2716 checkChars(s); | |
2717 checkDocument(s); | |
2718 if (s.length != 0) throw new Err(s,"Junk found after document"); | |
2719 } | |
2720 catch(Err e) | |
2721 { | |
2722 e.complete(s); | |
2723 throw e; | |
2724 } | |
2725 } | |
2726 | |
2727 unittest | |
2728 { | |
2729 version (none) // WHY ARE WE NOT RUNNING THIS UNIT TEST? | |
2730 { | |
2731 try | |
2732 { | |
2733 check(q"[<?xml version="1.0"?> | |
2734 <catalog> | |
2735 <book id="bk101"> | |
2736 <author>Gambardella, Matthew</author> | |
2737 <title>XML Developer's Guide</title> | |
2738 <genre>Computer</genre> | |
2739 <price>44.95</price> | |
2740 <publish_date>2000-10-01</publish_date> | |
2741 <description>An in-depth look at creating applications | |
2742 with XML.</description> | |
2743 </book> | |
2744 <book id="bk102"> | |
2745 <author>Ralls, Kim</author> | |
2746 <title>Midnight Rain</title> | |
2747 <genre>Fantasy</genres> | |
2748 <price>5.95</price> | |
2749 <publish_date>2000-12-16</publish_date> | |
2750 <description>A former architect battles corporate zombies, | |
2751 an evil sorceress, and her own childhood to become queen | |
2752 of the world.</description> | |
2753 </book> | |
2754 <book id="bk103"> | |
2755 <author>Corets, Eva</author> | |
2756 <title>Maeve Ascendant</title> | |
2757 <genre>Fantasy</genre> | |
2758 <price>5.95</price> | |
2759 <publish_date>2000-11-17</publish_date> | |
2760 <description>After the collapse of a nanotechnology | |
2761 society in England, the young survivors lay the | |
2762 foundation for a new society.</description> | |
2763 </book> | |
2764 </catalog> | |
2765 ]"); | |
2766 assert(false); | |
2767 } | |
2768 catch(CheckException e) | |
2769 { | |
2770 int n = e.toString().indexOf("end tag name \"genres\" differs" | |
2771 " from start tag name \"genre\""); | |
2772 assert(n != -1); | |
2773 } | |
2774 } | |
2775 } | |
2776 | |
2777 unittest | |
2778 { | |
2779 string s = q"EOS | |
2780 <?xml version="1.0"?> | |
2781 <set> | |
2782 <one>A</one> | |
2783 <!-- comment --> | |
2784 <two>B</two> | |
2785 </set> | |
2786 EOS"; | |
2787 try | |
2788 { | |
2789 check(s); | |
2790 } | |
2791 catch (CheckException e) | |
2792 { | |
2793 assert(0, e.toString()); | |
2794 } | |
2795 } | |
2796 | |
2797 unittest | |
2798 { | |
2799 string s = q"EOS | |
2800 <?xml version="1.0" encoding="utf-8"?> <Tests> | |
2801 <Test thing="What & Up">What & Up Second</Test> | |
2802 </Tests> | |
2803 EOS"; | |
2804 auto xml = new DocumentParser(s); | |
2805 | |
2806 xml.onStartTag["Test"] = (ElementParser xml) { | |
2807 assert(xml.tag.attr["thing"] == "What & Up"); | |
2808 }; | |
2809 | |
2810 xml.onEndTag["Test"] = (in Element e) { | |
2811 assert(e.text == "What & Up Second"); | |
2812 }; | |
2813 xml.parse(); | |
2814 } | |
2815 | |
2816 /** The base class for exceptions thrown by this module */ | |
2817 class XMLException : Exception { this(string msg) { super(msg); } } | |
2818 | |
2819 // Other exceptions | |
2820 | |
2821 /// Thrown during Comment constructor | |
2822 class CommentException : XMLException | |
2823 { private this(string msg) { super(msg); } } | |
2824 | |
2825 /// Thrown during CData constructor | |
2826 class CDataException : XMLException | |
2827 { private this(string msg) { super(msg); } } | |
2828 | |
2829 /// Thrown during XMLInstruction constructor | |
2830 class XIException : XMLException | |
2831 { private this(string msg) { super(msg); } } | |
2832 | |
2833 /// Thrown during ProcessingInstruction constructor | |
2834 class PIException : XMLException | |
2835 { private this(string msg) { super(msg); } } | |
2836 | |
2837 /// Thrown during Text constructor | |
2838 class TextException : XMLException | |
2839 { private this(string msg) { super(msg); } } | |
2840 | |
2841 /// Thrown during decode() | |
2842 class DecodeException : XMLException | |
2843 { private this(string msg) { super(msg); } } | |
2844 | |
2845 /// Thrown if comparing with wrong type | |
2846 class InvalidTypeException : XMLException | |
2847 { private this(string msg) { super(msg); } } | |
2848 | |
2849 /// Thrown when parsing for Tags | |
2850 class TagException : XMLException | |
2851 { private this(string msg) { super(msg); } } | |
2852 | |
2853 /** | |
2854 * Thrown during check() | |
2855 */ | |
2856 class CheckException : XMLException | |
2857 { | |
2858 CheckException err; /// Parent in heirarchy | |
2859 private string tail; | |
2860 /** | |
2861 * Name of production rule which failed to parse, | |
2862 * or specific error message | |
2863 */ | |
2864 string msg; | |
2865 uint line = 0; /// Line number at which parse failure occurred | |
2866 uint column = 0; /// Column number at which parse failure occurred | |
2867 | |
2868 private this(string tail,string msg,Err err=null) | |
2869 { | |
2870 super(null); | |
2871 this.tail = tail; | |
2872 this.msg = msg; | |
2873 this.err = err; | |
2874 } | |
2875 | |
2876 private void complete(string entire) | |
2877 { | |
2878 string head = entire[0..$-tail.length]; | |
2879 int n = head.lastIndexOf('\n') + 1; | |
2880 line = head.count("\n") + 1; | |
2881 dstring t; | |
2882 transcode(head[n..$],t); | |
2883 column = t.length + 1; | |
2884 if (err !is null) err.complete(entire); | |
2885 } | |
2886 | |
2887 override const string toString() | |
2888 { | |
2889 string s; | |
2890 if (line != 0) s = format("Line %d, column %d: ",line,column); | |
2891 s ~= msg; | |
2892 s ~= '\n'; | |
2893 if (err !is null) s = err.toString ~ s; | |
2894 return s; | |
2895 } | |
2896 } | |
2897 | |
2898 private alias CheckException Err; | |
2899 | |
2900 // Private helper functions | |
2901 | |
2902 private | |
2903 { | |
2904 T toType(T)(Object o) | |
2905 { | |
2906 T t = cast(T)(o); | |
2907 if (t is null) | |
2908 { | |
2909 throw new InvalidTypeException("Attempt to compare a " | |
2910 ~ T.stringof ~ " with an instance of another type"); | |
2911 } | |
2912 return t; | |
2913 } | |
2914 | |
2915 string chop(ref string s, int n) | |
2916 { | |
2917 if (n == -1) n = s.length; | |
2918 string t = s[0..n]; | |
2919 s = s[n..$]; | |
2920 return t; | |
2921 } | |
2922 | |
2923 bool optc(ref string s, char c) | |
2924 { | |
2925 bool b = s.length != 0 && s[0] == c; | |
2926 if (b) s = s[1..$]; | |
2927 return b; | |
2928 } | |
2929 | |
2930 void reqc(ref string s, char c) | |
2931 { | |
2932 if (s.length == 0 || s[0] != c) throw new TagException(""); | |
2933 s = s[1..$]; | |
2934 } | |
2935 | |
2936 hash_t hash(string s,hash_t h=0) | |
2937 { | |
2938 foreach(dchar c;s) h = h * 11 + c; | |
2939 return h; | |
2940 } | |
2941 | |
2942 // Definitions from the XML specification | |
2943 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, | |
2944 0x10000,0x10FFFF]; | |
2945 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, | |
2946 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, | |
2947 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, | |
2948 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, | |
2949 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, | |
2950 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, | |
2951 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, | |
2952 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, | |
2953 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, | |
2954 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, | |
2955 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, | |
2956 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, | |
2957 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, | |
2958 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, | |
2959 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, | |
2960 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, | |
2961 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, | |
2962 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, | |
2963 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, | |
2964 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, | |
2965 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, | |
2966 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, | |
2967 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, | |
2968 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, | |
2969 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, | |
2970 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, | |
2971 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, | |
2972 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, | |
2973 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, | |
2974 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, | |
2975 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, | |
2976 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, | |
2977 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, | |
2978 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, | |
2979 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, | |
2980 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, | |
2981 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, | |
2982 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, | |
2983 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, | |
2984 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, | |
2985 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; | |
2986 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; | |
2987 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, | |
2988 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, | |
2989 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, | |
2990 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, | |
2991 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, | |
2992 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, | |
2993 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, | |
2994 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, | |
2995 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, | |
2996 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, | |
2997 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, | |
2998 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, | |
2999 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, | |
3000 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, | |
3001 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, | |
3002 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, | |
3003 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, | |
3004 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, | |
3005 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, | |
3006 0x3099,0x3099,0x309A,0x309A]; | |
3007 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, | |
3008 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, | |
3009 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, | |
3010 0x0ED9,0x0F20,0x0F29]; | |
3011 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, | |
3012 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, | |
3013 0x3035,0x309D,0x309E,0x30FC,0x30FE]; | |
3014 | |
3015 bool lookup(const(int)[] table, int c) | |
3016 { | |
3017 while (table.length != 0) | |
3018 { | |
3019 int m = (table.length >> 1) & ~1; | |
3020 if (c < table[m]) | |
3021 { | |
3022 table = table[0..m]; | |
3023 } | |
3024 else if (c > table[m+1]) | |
3025 { | |
3026 table = table[m+2..$]; | |
3027 } | |
3028 else return true; | |
3029 } | |
3030 return false; | |
3031 } | |
3032 | |
3033 string startOf(string s) | |
3034 { | |
3035 string r; | |
3036 foreach(char c;s) | |
3037 { | |
3038 r ~= (c < 0x20 || c > 0x7F) ? '.' : c; | |
3039 if (r.length >= 40) { r ~= "___"; break; } | |
3040 } | |
3041 return r; | |
3042 } | |
3043 | |
3044 void exit(string s=null) | |
3045 { | |
3046 throw new XMLException(s); | |
3047 } | |
3048 } | |
3049 |