Mercurial > projects > dil
comparison trunk/src/dil/Converter.d @ 739:49fe21aa387c
Added sanitizeText() to dil.Converter.
Cleaned predefined.ddoc up a bit.
Removed makeString() from dil.doc.Macro.
Added REPLACEMENT_STR to dil.Unicode.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sat, 09 Feb 2008 14:24:35 +0100 |
parents | 9e811db780a6 |
children | 90668b83ae5e |
comparison
equal
deleted
inserted
replaced
738:2afcc305831a | 739:49fe21aa387c |
---|---|
235 assert(0); | 235 assert(0); |
236 } | 236 } |
237 return text; | 237 return text; |
238 } | 238 } |
239 } | 239 } |
240 | |
241 /// Replaces invalid UTF-8 sequences with U+FFFD (if there's enough space,) | |
242 /// and Newlines with '\n'. | |
243 string sanitizeText(string text) | |
244 { | |
245 if (!text.length) | |
246 return null; | |
247 | |
248 char* p = text.ptr; | |
249 char* end = p + text.length; | |
250 char* q = p; | |
251 | |
252 for (; p < end; p++, q++) | |
253 { | |
254 assert(q <= p); | |
255 switch (*p) | |
256 { | |
257 case '\r': | |
258 if (p+1 < end && p[1] == '\n') | |
259 p++; | |
260 case '\n': | |
261 *q = '\n'; | |
262 continue; | |
263 default: | |
264 if (isascii(*p)) | |
265 break; | |
266 if (p+2 < end && isUnicodeNewline(p)) | |
267 { | |
268 p += 2; | |
269 goto case '\n'; | |
270 } | |
271 auto p2 = p; // Beginning of the UTF-8 sequence. | |
272 dchar c = decode(p, end); | |
273 if (c == ERROR_CHAR) | |
274 { // Skip to next ASCII character or valid UTF-8 sequence. | |
275 while (++p < end && isTrailByte(*p)) | |
276 {} | |
277 alias REPLACEMENT_STR R; | |
278 if (q+2 < p) // Copy replacement char if there is enough space. | |
279 (*q = R[0]), (*++q = R[1]), (*++q = R[2]); | |
280 p--; | |
281 } | |
282 else | |
283 { // Copy the valid UTF-8 sequence. | |
284 while (p2 <= p) // p points to the last trail byte. | |
285 *q++ = *p2++; // Copy code units. | |
286 q--; | |
287 } | |
288 continue; | |
289 } | |
290 assert(isascii(*p)); | |
291 *q = *p; | |
292 } | |
293 assert(p == end); | |
294 text.length = text.length - (p - q); | |
295 //text = text.ptr[0 .. q - text.ptr]; // Another way. | |
296 return text; | |
297 } |