comparison trunk/src/dil/Unicode.d @ 769:5e3ef1b2011c

Added and improved documentation.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Sun, 17 Feb 2008 02:21:55 +0100
parents 4579e8505d5e
children c1d5cfd7aa44
comparison
equal deleted inserted replaced
768:d84349a60f5c 769:5e3ef1b2011c
9 const dchar REPLACEMENT_CHAR = '\uFFFD'; 9 const dchar REPLACEMENT_CHAR = '\uFFFD';
10 const char[3] REPLACEMENT_STR = \uFFFD; /// Ditto 10 const char[3] REPLACEMENT_STR = \uFFFD; /// Ditto
11 /// Invalid character, returned on errors. 11 /// Invalid character, returned on errors.
12 const dchar ERROR_CHAR = 0xD800; 12 const dchar ERROR_CHAR = 0xD800;
13 13
14 /++ 14 /// Returns: true if this character is not a surrogate
15 Returns true if this character is not a surrogate 15 /// code point and not higher than 0x10FFFF.
16 code point and not higher than 0x10FFFF.
17 +/
18 bool isValidChar(dchar d) 16 bool isValidChar(dchar d)
19 { 17 {
20 return d < 0xD800 || d > 0xDFFF && d <= 0x10FFFF; 18 return d < 0xD800 || d > 0xDFFF && d <= 0x10FFFF;
21 } 19 }
22 20
23 /++ 21 /// There are a total of 66 noncharacters.
24 There are a total of 66 noncharacters. 22 /// Returns: true if this is one of them.
25 Returns: true if this is one of them. 23 /// See_also: Chapter 16.7 Noncharacters in Unicode 5.0
26 See_also: Chapter 16.7 Noncharacters in Unicode 5.0
27 +/
28 bool isNoncharacter(dchar d) 24 bool isNoncharacter(dchar d)
29 { 25 {
30 return 0xFDD0 <= d && d <= 0xFDEF || // 32 26 return 0xFDD0 <= d && d <= 0xFDEF || // 32
31 d <= 0x10FFFF && (d & 0xFFFF) >= 0xFFFE; // 34 27 d <= 0x10FFFF && (d & 0xFFFF) >= 0xFFFE; // 34
32 } 28 }
220 pair[1] = (c & 0x3FF) | 0xDC00; 216 pair[1] = (c & 0x3FF) | 0xDC00;
221 str ~= pair; 217 str ~= pair;
222 } 218 }
223 } 219 }
224 220
225 /++ 221 /// Returns a decoded character from a UTF-16 sequence.
226 Returns a decoded character from a UTF-16 sequence. 222 /// Returns: ERROR_CHAR in case of an error in the sequence.
227 In case of an error in the sequence ERROR_CHAR is returned. 223 /// Params:
228 Params: 224 /// str = the UTF-16 sequence.
229 str = the UTF-16 sequence. 225 /// index = where to start from.
230 index = where to start from.
231 +/
232 dchar decode(wchar[] str, ref size_t index) 226 dchar decode(wchar[] str, ref size_t index)
233 { 227 {
234 assert(str.length && index < str.length); 228 assert(str.length && index < str.length);
235 dchar c = str[index]; 229 dchar c = str[index];
236 if (0xD800 > c || c > 0xDFFF) 230 if (0xD800 > c || c > 0xDFFF)
252 } 246 }
253 } 247 }
254 return ERROR_CHAR; 248 return ERROR_CHAR;
255 } 249 }
256 250
257 /++ 251 /// Returns a decoded character from a UTF-16 sequence.
258 Returns a decoded character from a UTF-16 sequence. 252 /// Returns: ERROR_CHAR in case of an error in the sequence.
259 In case of an error in the sequence ERROR_CHAR is returned. 253 /// Params:
260 Params: 254 /// p = start of the UTF-16 sequence.
261 p = start of the UTF-16 sequence. 255 /// end = one past the end of the sequence.
262 end = one past the end of the sequence.
263 +/
264 dchar decode(ref wchar* p, wchar* end) 256 dchar decode(ref wchar* p, wchar* end)
265 { 257 {
266 assert(p && p < end); 258 assert(p && p < end);
267 dchar c = *p; 259 dchar c = *p;
268 if (0xD800 > c || c > 0xDFFF) 260 if (0xD800 > c || c > 0xDFFF)