annotate trunk/src/dil/Unicode.d @ 747:00f872d949ea

Added method scanCommentText() to DDocEmitter. Added method writeParams() and scanCodeSection(). Added method scanMacro() to MacroParser. Made fixes and improvements to the MacroExpander. Applied other minor fixes.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Mon, 11 Feb 2008 03:15:45 +0100
parents 35184354a502
children 4579e8505d5e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
1 /++
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
2 Author: Aziz Köksal
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
3 License: GPL3
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
4 +/
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
5 module dil.Unicode;
629
d050e211402b Moved files in src/std/ to src/util/.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 518
diff changeset
6 public import util.uni : isUniAlpha;
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
7
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
8 /// U+FFFD = �. Used to replace invalid Unicode characters.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
9 const dchar REPLACEMENT_CHAR = '\uFFFD';
739
49fe21aa387c Added sanitizeText() to dil.Converter.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 737
diff changeset
10 const char[3] REPLACEMENT_STR = \uFFFD; /// Ditto
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
11 /// Invalid character, returned on errors.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
12 const dchar ERROR_CHAR = 0xD800;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
13
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
14 /++
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
15 Returns true if this character is not a surrogate
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
16 code point and not higher than 0x10FFFF.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
17 +/
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
18 bool isValidChar(dchar d)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
19 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
20 return d < 0xD800 || d > 0xDFFF && d <= 0x10FFFF;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
21 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
22
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
23 /++
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
24 There are a total of 66 noncharacters.
741
35184354a502 Added method textBody() to IdentValueParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 739
diff changeset
25 Returns: true if this is one of them.
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
26 See_also: Chapter 16.7 Noncharacters in Unicode 5.0
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
27 +/
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
28 bool isNoncharacter(dchar d)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
29 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
30 return 0xFDD0 <= d && d <= 0xFDEF || // 32
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
31 d <= 0x10FFFF && (d & 0xFFFF) >= 0xFFFE; // 34
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
32 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
33
747
00f872d949ea Added method scanCommentText() to DDocEmitter.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 741
diff changeset
34 /// Returns: true if this is a trail byte of a UTF-8 sequence.
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
35 bool isTrailByte(ubyte b)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
36 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
37 return (b & 0xC0) == 0x80; // 10xx_xxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
38 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
39
747
00f872d949ea Added method scanCommentText() to DDocEmitter.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 741
diff changeset
40 /// Returns: true if this is a lead byte of a UTF-8 sequence.
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
41 bool isLeadByte(ubyte b)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
42 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
43 return (b & 0xC0) == 0xC0; // 11xx_xxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
44 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
45
722
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
46 /// Advances ref_p only if this is a valid Unicode alpha character.
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
47 bool isUnicodeAlpha(ref char* ref_p, char* end)
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
48 in { assert(ref_p && ref_p < end); }
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
49 body
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
50 {
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
51 if (*ref_p < 0x80)
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
52 return false;
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
53 auto p = ref_p;
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
54 auto c = decode(p, end);
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
55 if (!isUniAlpha(c))
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
56 return false;
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
57 ref_p = p;
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
58 return true;
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
59 }
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
60
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
61 /// index is set one past the last trail byte of the valid UTF-8 sequence.
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
62 dchar decode(char[] str, ref size_t index)
722
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
63 in { assert(str.length && index < str.length); }
737
f88b5285b86b Implemented DDocEmitter.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 722
diff changeset
64 out(c) { assert(isValidChar(c) || c == ERROR_CHAR); }
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
65 body
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
66 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
67 char* p = str.ptr + index;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
68 char* end = str.ptr + str.length;
722
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
69 dchar c = decode(p, end);
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
70 if (c != ERROR_CHAR)
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
71 index = p - str.ptr + 1;
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
72 return c;
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
73 }
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
74
722
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
75 /// ref_p is set to the last trail byte of the valid UTF-8 sequence.
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
76 dchar decode(ref char* ref_p, char* end)
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
77 in { assert(ref_p && ref_p < end); }
737
f88b5285b86b Implemented DDocEmitter.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 722
diff changeset
78 out(c) { assert(isValidChar(c) || c == ERROR_CHAR); }
722
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
79 body
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
80 {
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
81 char* p = ref_p;
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
82 dchar c = *p;
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
83
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
84 if (c < 0x80)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
85 {
722
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
86 ref_p++;
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
87 return c;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
88 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
89
722
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
90 p++; // Move to second byte.
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
91 if (!(p < end))
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
92 return ERROR_CHAR;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
93
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
94 // Error if second byte is not a trail byte.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
95 if (!isTrailByte(*p))
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
96 return ERROR_CHAR;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
97
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
98 // Check for overlong sequences.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
99 switch (c)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
100 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
101 case 0xE0, // 11100000 100xxxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
102 0xF0, // 11110000 1000xxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
103 0xF8, // 11111000 10000xxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
104 0xFC: // 11111100 100000xx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
105 if ((*p & c) == 0x80)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
106 return ERROR_CHAR;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
107 default:
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
108 if ((c & 0xFE) == 0xC0) // 1100000x
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
109 return ERROR_CHAR;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
110 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
111
722
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
112 const char[] checkNextByte = "if (!(++p < end && isTrailByte(*p)))"
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
113 " return ERROR_CHAR;";
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
114 const char[] appendSixBits = "c = (c << 6) | *p & 0b0011_1111;";
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
115
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
116 // Decode
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
117 if ((c & 0b1110_0000) == 0b1100_0000)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
118 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
119 // 110xxxxx 10xxxxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
120 c &= 0b0001_1111;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
121 mixin(appendSixBits);
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
122 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
123 else if ((c & 0b1111_0000) == 0b1110_0000)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
124 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
125 // 1110xxxx 10xxxxxx 10xxxxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
126 c &= 0b0000_1111;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
127 mixin(appendSixBits ~
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
128 checkNextByte ~ appendSixBits);
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
129 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
130 else if ((c & 0b1111_1000) == 0b1111_0000)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
131 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
132 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
133 c &= 0b0000_0111;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
134 mixin(appendSixBits ~
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
135 checkNextByte ~ appendSixBits ~
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
136 checkNextByte ~ appendSixBits);
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
137 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
138 else
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
139 // 5 and 6 byte UTF-8 sequences are not allowed yet.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
140 // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
141 // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
142 return ERROR_CHAR;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
143
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
144 assert(isTrailByte(*p));
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
145
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
146 if (!isValidChar(c))
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
147 return ERROR_CHAR;
722
ceaac6a24258 Added isUnicodeAlpha() for DDocParser and MacroParser.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 629
diff changeset
148 ref_p = p;
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
149 return c;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
150 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
151
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
152 /// Encodes a character and appends it to str.
518
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
153 void encode(ref char[] str, dchar c)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
154 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
155 assert(isValidChar(c), "check if character is valid before calling encode().");
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
156
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
157 char[6] b = void;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
158 if (c < 0x80)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
159 str ~= c;
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
160 if (c < 0x800)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
161 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
162 b[0] = 0xC0 | (c >> 6);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
163 b[1] = 0x80 | (c & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
164 str ~= b[0..2];
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
165 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
166 else if (c < 0x10000)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
167 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
168 b[0] = 0xE0 | (c >> 12);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
169 b[1] = 0x80 | ((c >> 6) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
170 b[2] = 0x80 | (c & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
171 str ~= b[0..3];
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
172 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
173 else if (c < 0x200000)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
174 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
175 b[0] = 0xF0 | (c >> 18);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
176 b[1] = 0x80 | ((c >> 12) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
177 b[2] = 0x80 | ((c >> 6) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
178 b[3] = 0x80 | (c & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
179 str ~= b[0..4];
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
180 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
181 /+ // There are no 5 and 6 byte UTF-8 sequences yet.
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
182 else if (c < 0x4000000)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
183 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
184 b[0] = 0xF8 | (c >> 24);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
185 b[1] = 0x80 | ((c >> 18) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
186 b[2] = 0x80 | ((c >> 12) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
187 b[3] = 0x80 | ((c >> 6) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
188 b[4] = 0x80 | (c & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
189 str ~= b[0..5];
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
190 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
191 else if (c < 0x80000000)
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
192 {
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
193 b[0] = 0xFC | (c >> 30);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
194 b[1] = 0x80 | ((c >> 24) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
195 b[2] = 0x80 | ((c >> 18) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
196 b[3] = 0x80 | ((c >> 12) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
197 b[4] = 0x80 | ((c >> 6) & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
198 b[5] = 0x80 | (c & 0x3F);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
199 str ~= b[0..6];
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
200 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
201 +/
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
202 else
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
203 assert(0);
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
204 }
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
205
8f86bb9ef715 Added module dil.Converter and dil.FileBOM.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 510
diff changeset
206 /// Encodes a character and appends it to str.
510
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
207 void encode(ref wchar[] str, dchar c)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
208 in { assert(isValidChar(c)); }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
209 body
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
210 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
211 if (c < 0x10000)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
212 str ~= cast(wchar)c;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
213 else
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
214 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
215 // Encode with surrogate pair.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
216 wchar[2] pair = void;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
217 c -= 0x10000; // c'
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
218 // higher10bits(c') | 0b1101_10xx_xxxx_xxxx
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
219 pair[0] = (c >> 10) | 0xD800;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
220 // lower10bits(c') | 0b1101_11yy_yyyy_yyyy
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
221 pair[1] = (c & 0x3FF) | 0xDC00;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
222 str ~= pair;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
223 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
224 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
225
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
226 /++
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
227 Returns a decoded character from a UTF-16 sequence.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
228 In case of an error in the sequence 0xD800 is returned.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
229 Params:
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
230 str = the UTF-16 sequence.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
231 index = where to start from.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
232 +/
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
233 dchar decode(wchar[] str, ref size_t index)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
234 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
235 assert(str.length && index < str.length);
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
236 dchar c = str[index];
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
237 if (0xD800 > c || c > 0xDFFF)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
238 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
239 ++index;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
240 return c;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
241 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
242 if (c <= 0xDBFF && index+1 != str.length)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
243 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
244 wchar c2 = str[index+1];
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
245 if (0xDC00 <= c2 && c2 <= 0xDFFF)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
246 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
247 // (c - 0xD800) << 10 + 0x10000 ->
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
248 // (c - 0xD800 + 0x40) << 10 ->
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
249 c = (c - 0xD7C0) << 10;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
250 c |= (c2 & 0x3FF);
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
251 index += 2;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
252 return c;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
253 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
254 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
255 return ERROR_CHAR;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
256 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
257
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
258 /++
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
259 Returns a decoded character from a UTF-16 sequence.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
260 In case of an error in the sequence 0xD800 is returned.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
261 Params:
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
262 p = start of the UTF-16 sequence.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
263 end = one past the end of the sequence.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
264 +/
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
265 dchar decode(ref wchar* p, wchar* end)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
266 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
267 assert(p && p < end);
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
268 dchar c = *p;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
269 if (0xD800 > c || c > 0xDFFF)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
270 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
271 ++p;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
272 return c;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
273 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
274 if (c <= 0xDBFF && p+1 != end)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
275 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
276 wchar c2 = p[1];
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
277 if (0xDC00 <= c2 && c2 <= 0xDFFF)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
278 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
279 c = (c - 0xD7C0) << 10;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
280 c |= (c2 & 0x3FF);
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
281 p += 2;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
282 return c;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
283 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
284 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
285 return ERROR_CHAR;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
286 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
287
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
288 /// Decode a character from a zero-terminated string.
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
289 dchar decode(ref wchar* p)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
290 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
291 assert(p);
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
292 dchar c = *p;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
293 if (0xD800 > c || c > 0xDFFF)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
294 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
295 ++p;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
296 return c;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
297 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
298 if (c <= 0xDBFF)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
299 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
300 wchar c2 = p[1];
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
301 if (0xDC00 <= c2 && c2 <= 0xDFFF)
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
302 {
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
303 c = (c - 0xD7C0) << 10;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
304 c |= (c2 & 0x3FF);
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
305 p += 2;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
306 return c;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
307 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
308 }
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
309 return ERROR_CHAR;
dd3ce87b3569 Added module dil.Unicode.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
diff changeset
310 }