Mercurial > projects > dil
comparison trunk/src/dil/Unicode.d @ 769:5e3ef1b2011c
Added and improved documentation.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sun, 17 Feb 2008 02:21:55 +0100 |
parents | 4579e8505d5e |
children | c1d5cfd7aa44 |
comparison
equal
deleted
inserted
replaced
768:d84349a60f5c | 769:5e3ef1b2011c |
---|---|
9 const dchar REPLACEMENT_CHAR = '\uFFFD'; | 9 const dchar REPLACEMENT_CHAR = '\uFFFD'; |
10 const char[3] REPLACEMENT_STR = \uFFFD; /// Ditto | 10 const char[3] REPLACEMENT_STR = \uFFFD; /// Ditto |
11 /// Invalid character, returned on errors. | 11 /// Invalid character, returned on errors. |
12 const dchar ERROR_CHAR = 0xD800; | 12 const dchar ERROR_CHAR = 0xD800; |
13 | 13 |
14 /++ | 14 /// Returns: true if this character is not a surrogate |
15 Returns true if this character is not a surrogate | 15 /// code point and not higher than 0x10FFFF. |
16 code point and not higher than 0x10FFFF. | |
17 +/ | |
18 bool isValidChar(dchar d) | 16 bool isValidChar(dchar d) |
19 { | 17 { |
20 return d < 0xD800 || d > 0xDFFF && d <= 0x10FFFF; | 18 return d < 0xD800 || d > 0xDFFF && d <= 0x10FFFF; |
21 } | 19 } |
22 | 20 |
23 /++ | 21 /// There are a total of 66 noncharacters. |
24 There are a total of 66 noncharacters. | 22 /// Returns: true if this is one of them. |
25 Returns: true if this is one of them. | 23 /// See_also: Chapter 16.7 Noncharacters in Unicode 5.0 |
26 See_also: Chapter 16.7 Noncharacters in Unicode 5.0 | |
27 +/ | |
28 bool isNoncharacter(dchar d) | 24 bool isNoncharacter(dchar d) |
29 { | 25 { |
30 return 0xFDD0 <= d && d <= 0xFDEF || // 32 | 26 return 0xFDD0 <= d && d <= 0xFDEF || // 32 |
31 d <= 0x10FFFF && (d & 0xFFFF) >= 0xFFFE; // 34 | 27 d <= 0x10FFFF && (d & 0xFFFF) >= 0xFFFE; // 34 |
32 } | 28 } |
220 pair[1] = (c & 0x3FF) | 0xDC00; | 216 pair[1] = (c & 0x3FF) | 0xDC00; |
221 str ~= pair; | 217 str ~= pair; |
222 } | 218 } |
223 } | 219 } |
224 | 220 |
225 /++ | 221 /// Returns a decoded character from a UTF-16 sequence. |
226 Returns a decoded character from a UTF-16 sequence. | 222 /// Returns: ERROR_CHAR in case of an error in the sequence. |
227 In case of an error in the sequence ERROR_CHAR is returned. | 223 /// Params: |
228 Params: | 224 /// str = the UTF-16 sequence. |
229 str = the UTF-16 sequence. | 225 /// index = where to start from. |
230 index = where to start from. | |
231 +/ | |
232 dchar decode(wchar[] str, ref size_t index) | 226 dchar decode(wchar[] str, ref size_t index) |
233 { | 227 { |
234 assert(str.length && index < str.length); | 228 assert(str.length && index < str.length); |
235 dchar c = str[index]; | 229 dchar c = str[index]; |
236 if (0xD800 > c || c > 0xDFFF) | 230 if (0xD800 > c || c > 0xDFFF) |
252 } | 246 } |
253 } | 247 } |
254 return ERROR_CHAR; | 248 return ERROR_CHAR; |
255 } | 249 } |
256 | 250 |
257 /++ | 251 /// Returns a decoded character from a UTF-16 sequence. |
258 Returns a decoded character from a UTF-16 sequence. | 252 /// Returns: ERROR_CHAR in case of an error in the sequence. |
259 In case of an error in the sequence ERROR_CHAR is returned. | 253 /// Params: |
260 Params: | 254 /// p = start of the UTF-16 sequence. |
261 p = start of the UTF-16 sequence. | 255 /// end = one past the end of the sequence. |
262 end = one past the end of the sequence. | |
263 +/ | |
264 dchar decode(ref wchar* p, wchar* end) | 256 dchar decode(ref wchar* p, wchar* end) |
265 { | 257 { |
266 assert(p && p < end); | 258 assert(p && p < end); |
267 dchar c = *p; | 259 dchar c = *p; |
268 if (0xD800 > c || c > 0xDFFF) | 260 if (0xD800 > c || c > 0xDFFF) |