Mercurial > projects > ldc
comparison druntime/src/compiler/ldc/aApplyR.d @ 1458:e0b2d67cfe7c
Added druntime (this should be removed once it works).
author | Robert Clipsham <robert@octarineparrot.com> |
---|---|
date | Tue, 02 Jun 2009 17:43:06 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1456:7b218ec1044f | 1458:e0b2d67cfe7c |
---|---|
1 /** | |
2 * This code handles decoding UTF strings for foreach_reverse loops. There are | |
3 * 6 combinations of conversions between char, wchar, and dchar, and 2 of each | |
4 * of those. | |
5 * | |
6 * Copyright: Copyright Digital Mars 2004 - 2009. | |
7 * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>. | |
8 * Authors: Walter Bright, Sean Kelly | |
9 * | |
10 * Copyright Digital Mars 2004 - 2009. | |
11 * Distributed under the Boost Software License, Version 1.0. | |
12 * (See accompanying file LICENSE_1_0.txt or copy at | |
13 * http://www.boost.org/LICENSE_1_0.txt) | |
14 */ | |
15 module rt.aApplyR; | |
16 | |
17 /* This code handles decoding UTF strings for foreach_reverse loops. | |
18 * There are 6 combinations of conversions between char, wchar, | |
19 * and dchar, and 2 of each of those. | |
20 */ | |
21 | |
22 private import rt.util.utf; | |
23 | |
24 /**********************************************/ | |
25 /* 1 argument versions */ | |
26 | |
27 // dg is D, but _aApplyRcd() is C | |
28 extern (D) typedef int delegate(void *) dg_t; | |
29 | |
30 extern (C) int _aApplyRcd1(in char[] aa, dg_t dg) | |
31 { int result; | |
32 | |
33 debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length); | |
34 for (size_t i = aa.length; i != 0; ) | |
35 { dchar d; | |
36 | |
37 i--; | |
38 d = aa[i]; | |
39 if (d & 0x80) | |
40 { char c = cast(char)d; | |
41 uint j; | |
42 uint m = 0x3F; | |
43 d = 0; | |
44 while ((c & 0xC0) != 0xC0) | |
45 { if (i == 0) | |
46 onUnicodeError("Invalid UTF-8 sequence", 0); | |
47 i--; | |
48 d |= (c & 0x3F) << j; | |
49 j += 6; | |
50 m >>= 1; | |
51 c = aa[i]; | |
52 } | |
53 d |= (c & m) << j; | |
54 } | |
55 result = dg(cast(void *)&d); | |
56 if (result) | |
57 break; | |
58 } | |
59 return result; | |
60 } | |
61 | |
62 unittest | |
63 { | |
64 debug(apply) printf("_aApplyRcd1.unittest\n"); | |
65 | |
66 auto s = "hello"c[]; | |
67 int i; | |
68 | |
69 foreach_reverse(dchar d; s) | |
70 { | |
71 switch (i) | |
72 { | |
73 case 0: assert(d == 'o'); break; | |
74 case 1: assert(d == 'l'); break; | |
75 case 2: assert(d == 'l'); break; | |
76 case 3: assert(d == 'e'); break; | |
77 case 4: assert(d == 'h'); break; | |
78 default: assert(0); | |
79 } | |
80 i++; | |
81 } | |
82 assert(i == 5); | |
83 | |
84 s = "a\u1234\U00100456b"; | |
85 i = 0; | |
86 foreach_reverse(dchar d; s) | |
87 { | |
88 //printf("i = %d, d = %x\n", i, d); | |
89 switch (i) | |
90 { | |
91 case 0: assert(d == 'b'); break; | |
92 case 1: assert(d == '\U00100456'); break; | |
93 case 2: assert(d == '\u1234'); break; | |
94 case 3: assert(d == 'a'); break; | |
95 default: assert(0); | |
96 } | |
97 i++; | |
98 } | |
99 assert(i == 4); | |
100 } | |
101 | |
102 /*****************************/ | |
103 | |
104 extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg) | |
105 { int result; | |
106 | |
107 debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length); | |
108 for (size_t i = aa.length; i != 0; ) | |
109 { dchar d; | |
110 | |
111 i--; | |
112 d = aa[i]; | |
113 if (d >= 0xDC00 && d <= 0xDFFF) | |
114 { if (i == 0) | |
115 onUnicodeError("Invalid UTF-16 sequence", 0); | |
116 i--; | |
117 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); | |
118 } | |
119 result = dg(cast(void *)&d); | |
120 if (result) | |
121 break; | |
122 } | |
123 return result; | |
124 } | |
125 | |
126 unittest | |
127 { | |
128 debug(apply) printf("_aApplyRwd1.unittest\n"); | |
129 | |
130 auto s = "hello"w[]; | |
131 int i; | |
132 | |
133 foreach_reverse(dchar d; s) | |
134 { | |
135 switch (i) | |
136 { | |
137 case 0: assert(d == 'o'); break; | |
138 case 1: assert(d == 'l'); break; | |
139 case 2: assert(d == 'l'); break; | |
140 case 3: assert(d == 'e'); break; | |
141 case 4: assert(d == 'h'); break; | |
142 default: assert(0); | |
143 } | |
144 i++; | |
145 } | |
146 assert(i == 5); | |
147 | |
148 s = "a\u1234\U00100456b"; | |
149 i = 0; | |
150 foreach_reverse(dchar d; s) | |
151 { | |
152 //printf("i = %d, d = %x\n", i, d); | |
153 switch (i) | |
154 { | |
155 case 0: assert(d == 'b'); break; | |
156 case 1: assert(d == '\U00100456'); break; | |
157 case 2: assert(d == '\u1234'); break; | |
158 case 3: assert(d == 'a'); break; | |
159 default: assert(0); | |
160 } | |
161 i++; | |
162 } | |
163 assert(i == 4); | |
164 } | |
165 | |
166 /*****************************/ | |
167 | |
168 extern (C) int _aApplyRcw1(in char[] aa, dg_t dg) | |
169 { int result; | |
170 | |
171 debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length); | |
172 for (size_t i = aa.length; i != 0; ) | |
173 { dchar d; | |
174 wchar w; | |
175 | |
176 i--; | |
177 w = aa[i]; | |
178 if (w & 0x80) | |
179 { char c = cast(char)w; | |
180 uint j; | |
181 uint m = 0x3F; | |
182 d = 0; | |
183 while ((c & 0xC0) != 0xC0) | |
184 { if (i == 0) | |
185 onUnicodeError("Invalid UTF-8 sequence", 0); | |
186 i--; | |
187 d |= (c & 0x3F) << j; | |
188 j += 6; | |
189 m >>= 1; | |
190 c = aa[i]; | |
191 } | |
192 d |= (c & m) << j; | |
193 | |
194 if (d <= 0xFFFF) | |
195 w = cast(wchar) d; | |
196 else | |
197 { | |
198 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); | |
199 result = dg(cast(void *)&w); | |
200 if (result) | |
201 break; | |
202 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); | |
203 } | |
204 } | |
205 result = dg(cast(void *)&w); | |
206 if (result) | |
207 break; | |
208 } | |
209 return result; | |
210 } | |
211 | |
212 unittest | |
213 { | |
214 debug(apply) printf("_aApplyRcw1.unittest\n"); | |
215 | |
216 auto s = "hello"c[]; | |
217 int i; | |
218 | |
219 foreach_reverse(wchar d; s) | |
220 { | |
221 switch (i) | |
222 { | |
223 case 0: assert(d == 'o'); break; | |
224 case 1: assert(d == 'l'); break; | |
225 case 2: assert(d == 'l'); break; | |
226 case 3: assert(d == 'e'); break; | |
227 case 4: assert(d == 'h'); break; | |
228 default: assert(0); | |
229 } | |
230 i++; | |
231 } | |
232 assert(i == 5); | |
233 | |
234 s = "a\u1234\U00100456b"; | |
235 i = 0; | |
236 foreach_reverse(wchar d; s) | |
237 { | |
238 //printf("i = %d, d = %x\n", i, d); | |
239 switch (i) | |
240 { | |
241 case 0: assert(d == 'b'); break; | |
242 case 1: assert(d == 0xDBC1); break; | |
243 case 2: assert(d == 0xDC56); break; | |
244 case 3: assert(d == 0x1234); break; | |
245 case 4: assert(d == 'a'); break; | |
246 default: assert(0); | |
247 } | |
248 i++; | |
249 } | |
250 assert(i == 5); | |
251 } | |
252 | |
253 /*****************************/ | |
254 | |
255 extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg) | |
256 { int result; | |
257 | |
258 debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length); | |
259 for (size_t i = aa.length; i != 0; ) | |
260 { dchar d; | |
261 char c; | |
262 | |
263 i--; | |
264 d = aa[i]; | |
265 if (d >= 0xDC00 && d <= 0xDFFF) | |
266 { if (i == 0) | |
267 onUnicodeError("Invalid UTF-16 sequence", 0); | |
268 i--; | |
269 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); | |
270 } | |
271 | |
272 if (d & ~0x7F) | |
273 { | |
274 char[4] buf; | |
275 | |
276 auto b = toUTF8(buf, d); | |
277 foreach (char c2; b) | |
278 { | |
279 result = dg(cast(void *)&c2); | |
280 if (result) | |
281 return result; | |
282 } | |
283 continue; | |
284 } | |
285 c = cast(char)d; | |
286 result = dg(cast(void *)&c); | |
287 if (result) | |
288 break; | |
289 } | |
290 return result; | |
291 } | |
292 | |
293 unittest | |
294 { | |
295 debug(apply) printf("_aApplyRwc1.unittest\n"); | |
296 | |
297 auto s = "hello"w[]; | |
298 int i; | |
299 | |
300 foreach_reverse(char d; s) | |
301 { | |
302 switch (i) | |
303 { | |
304 case 0: assert(d == 'o'); break; | |
305 case 1: assert(d == 'l'); break; | |
306 case 2: assert(d == 'l'); break; | |
307 case 3: assert(d == 'e'); break; | |
308 case 4: assert(d == 'h'); break; | |
309 default: assert(0); | |
310 } | |
311 i++; | |
312 } | |
313 assert(i == 5); | |
314 | |
315 s = "a\u1234\U00100456b"; | |
316 i = 0; | |
317 foreach_reverse(char d; s) | |
318 { | |
319 //printf("i = %d, d = %x\n", i, d); | |
320 switch (i) | |
321 { | |
322 case 0: assert(d == 'b'); break; | |
323 case 1: assert(d == 0xF4); break; | |
324 case 2: assert(d == 0x80); break; | |
325 case 3: assert(d == 0x91); break; | |
326 case 4: assert(d == 0x96); break; | |
327 case 5: assert(d == 0xE1); break; | |
328 case 6: assert(d == 0x88); break; | |
329 case 7: assert(d == 0xB4); break; | |
330 case 8: assert(d == 'a'); break; | |
331 default: assert(0); | |
332 } | |
333 i++; | |
334 } | |
335 assert(i == 9); | |
336 } | |
337 | |
338 /*****************************/ | |
339 | |
340 extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg) | |
341 { int result; | |
342 | |
343 debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length); | |
344 for (size_t i = aa.length; i != 0;) | |
345 { dchar d = aa[--i]; | |
346 char c; | |
347 | |
348 if (d & ~0x7F) | |
349 { | |
350 char[4] buf; | |
351 | |
352 auto b = toUTF8(buf, d); | |
353 foreach (char c2; b) | |
354 { | |
355 result = dg(cast(void *)&c2); | |
356 if (result) | |
357 return result; | |
358 } | |
359 continue; | |
360 } | |
361 else | |
362 { | |
363 c = cast(char)d; | |
364 } | |
365 result = dg(cast(void *)&c); | |
366 if (result) | |
367 break; | |
368 } | |
369 return result; | |
370 } | |
371 | |
372 unittest | |
373 { | |
374 debug(apply) printf("_aApplyRdc1.unittest\n"); | |
375 | |
376 auto s = "hello"d[]; | |
377 int i; | |
378 | |
379 foreach_reverse(char d; s) | |
380 { | |
381 switch (i) | |
382 { | |
383 case 0: assert(d == 'o'); break; | |
384 case 1: assert(d == 'l'); break; | |
385 case 2: assert(d == 'l'); break; | |
386 case 3: assert(d == 'e'); break; | |
387 case 4: assert(d == 'h'); break; | |
388 default: assert(0); | |
389 } | |
390 i++; | |
391 } | |
392 assert(i == 5); | |
393 | |
394 s = "a\u1234\U00100456b"; | |
395 i = 0; | |
396 foreach_reverse(char d; s) | |
397 { | |
398 //printf("i = %d, d = %x\n", i, d); | |
399 switch (i) | |
400 { | |
401 case 0: assert(d == 'b'); break; | |
402 case 1: assert(d == 0xF4); break; | |
403 case 2: assert(d == 0x80); break; | |
404 case 3: assert(d == 0x91); break; | |
405 case 4: assert(d == 0x96); break; | |
406 case 5: assert(d == 0xE1); break; | |
407 case 6: assert(d == 0x88); break; | |
408 case 7: assert(d == 0xB4); break; | |
409 case 8: assert(d == 'a'); break; | |
410 default: assert(0); | |
411 } | |
412 i++; | |
413 } | |
414 assert(i == 9); | |
415 } | |
416 | |
417 /*****************************/ | |
418 | |
419 extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg) | |
420 { int result; | |
421 | |
422 debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length); | |
423 for (size_t i = aa.length; i != 0; ) | |
424 { dchar d = aa[--i]; | |
425 wchar w; | |
426 | |
427 if (d <= 0xFFFF) | |
428 w = cast(wchar) d; | |
429 else | |
430 { | |
431 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); | |
432 result = dg(cast(void *)&w); | |
433 if (result) | |
434 break; | |
435 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); | |
436 } | |
437 result = dg(cast(void *)&w); | |
438 if (result) | |
439 break; | |
440 } | |
441 return result; | |
442 } | |
443 | |
444 unittest | |
445 { | |
446 debug(apply) printf("_aApplyRdw1.unittest\n"); | |
447 | |
448 auto s = "hello"d[]; | |
449 int i; | |
450 | |
451 foreach_reverse(wchar d; s) | |
452 { | |
453 switch (i) | |
454 { | |
455 case 0: assert(d == 'o'); break; | |
456 case 1: assert(d == 'l'); break; | |
457 case 2: assert(d == 'l'); break; | |
458 case 3: assert(d == 'e'); break; | |
459 case 4: assert(d == 'h'); break; | |
460 default: assert(0); | |
461 } | |
462 i++; | |
463 } | |
464 assert(i == 5); | |
465 | |
466 s = "a\u1234\U00100456b"; | |
467 i = 0; | |
468 foreach_reverse(wchar d; s) | |
469 { | |
470 //printf("i = %d, d = %x\n", i, d); | |
471 switch (i) | |
472 { | |
473 case 0: assert(d == 'b'); break; | |
474 case 1: assert(d == 0xDBC1); break; | |
475 case 2: assert(d == 0xDC56); break; | |
476 case 3: assert(d == 0x1234); break; | |
477 case 4: assert(d == 'a'); break; | |
478 default: assert(0); | |
479 } | |
480 i++; | |
481 } | |
482 assert(i == 5); | |
483 } | |
484 | |
485 | |
486 /****************************************************************************/ | |
487 /* 2 argument versions */ | |
488 | |
489 // dg is D, but _aApplyRcd2() is C | |
490 extern (D) typedef int delegate(void *, void *) dg2_t; | |
491 | |
492 extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg) | |
493 { int result; | |
494 size_t i; | |
495 size_t len = aa.length; | |
496 | |
497 debug(apply) printf("_aApplyRcd2(), len = %d\n", len); | |
498 for (i = len; i != 0; ) | |
499 { dchar d; | |
500 | |
501 i--; | |
502 d = aa[i]; | |
503 if (d & 0x80) | |
504 { char c = cast(char)d; | |
505 uint j; | |
506 uint m = 0x3F; | |
507 d = 0; | |
508 while ((c & 0xC0) != 0xC0) | |
509 { if (i == 0) | |
510 onUnicodeError("Invalid UTF-8 sequence", 0); | |
511 i--; | |
512 d |= (c & 0x3F) << j; | |
513 j += 6; | |
514 m >>= 1; | |
515 c = aa[i]; | |
516 } | |
517 d |= (c & m) << j; | |
518 } | |
519 result = dg(&i, cast(void *)&d); | |
520 if (result) | |
521 break; | |
522 } | |
523 return result; | |
524 } | |
525 | |
526 unittest | |
527 { | |
528 debug(apply) printf("_aApplyRcd2.unittest\n"); | |
529 | |
530 auto s = "hello"c[]; | |
531 int i; | |
532 | |
533 foreach_reverse(k, dchar d; s) | |
534 { | |
535 assert(k == 4 - i); | |
536 switch (i) | |
537 { | |
538 case 0: assert(d == 'o'); break; | |
539 case 1: assert(d == 'l'); break; | |
540 case 2: assert(d == 'l'); break; | |
541 case 3: assert(d == 'e'); break; | |
542 case 4: assert(d == 'h'); break; | |
543 default: assert(0); | |
544 } | |
545 i++; | |
546 } | |
547 assert(i == 5); | |
548 | |
549 s = "a\u1234\U00100456b"; | |
550 i = 0; | |
551 foreach_reverse(k, dchar d; s) | |
552 { | |
553 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
554 switch (i) | |
555 { | |
556 case 0: assert(d == 'b'); assert(k == 8); break; | |
557 case 1: assert(d == '\U00100456'); assert(k == 4); break; | |
558 case 2: assert(d == '\u1234'); assert(k == 1); break; | |
559 case 3: assert(d == 'a'); assert(k == 0); break; | |
560 default: assert(0); | |
561 } | |
562 i++; | |
563 } | |
564 assert(i == 4); | |
565 } | |
566 | |
567 /*****************************/ | |
568 | |
569 extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg) | |
570 { int result; | |
571 | |
572 debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length); | |
573 for (size_t i = aa.length; i != 0; ) | |
574 { dchar d; | |
575 | |
576 i--; | |
577 d = aa[i]; | |
578 if (d >= 0xDC00 && d <= 0xDFFF) | |
579 { if (i == 0) | |
580 onUnicodeError("Invalid UTF-16 sequence", 0); | |
581 i--; | |
582 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); | |
583 } | |
584 result = dg(&i, cast(void *)&d); | |
585 if (result) | |
586 break; | |
587 } | |
588 return result; | |
589 } | |
590 | |
591 unittest | |
592 { | |
593 debug(apply) printf("_aApplyRwd2.unittest\n"); | |
594 | |
595 auto s = "hello"w[]; | |
596 int i; | |
597 | |
598 foreach_reverse(k, dchar d; s) | |
599 { | |
600 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
601 assert(k == 4 - i); | |
602 switch (i) | |
603 { | |
604 case 0: assert(d == 'o'); break; | |
605 case 1: assert(d == 'l'); break; | |
606 case 2: assert(d == 'l'); break; | |
607 case 3: assert(d == 'e'); break; | |
608 case 4: assert(d == 'h'); break; | |
609 default: assert(0); | |
610 } | |
611 i++; | |
612 } | |
613 assert(i == 5); | |
614 | |
615 s = "a\u1234\U00100456b"; | |
616 i = 0; | |
617 foreach_reverse(k, dchar d; s) | |
618 { | |
619 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
620 switch (i) | |
621 { | |
622 case 0: assert(k == 4); assert(d == 'b'); break; | |
623 case 1: assert(k == 2); assert(d == '\U00100456'); break; | |
624 case 2: assert(k == 1); assert(d == '\u1234'); break; | |
625 case 3: assert(k == 0); assert(d == 'a'); break; | |
626 default: assert(0); | |
627 } | |
628 i++; | |
629 } | |
630 assert(i == 4); | |
631 } | |
632 | |
633 /*****************************/ | |
634 | |
635 extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg) | |
636 { int result; | |
637 | |
638 debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length); | |
639 for (size_t i = aa.length; i != 0; ) | |
640 { dchar d; | |
641 wchar w; | |
642 | |
643 i--; | |
644 w = aa[i]; | |
645 if (w & 0x80) | |
646 { char c = cast(char)w; | |
647 uint j; | |
648 uint m = 0x3F; | |
649 d = 0; | |
650 while ((c & 0xC0) != 0xC0) | |
651 { if (i == 0) | |
652 onUnicodeError("Invalid UTF-8 sequence", 0); | |
653 i--; | |
654 d |= (c & 0x3F) << j; | |
655 j += 6; | |
656 m >>= 1; | |
657 c = aa[i]; | |
658 } | |
659 d |= (c & m) << j; | |
660 | |
661 if (d <= 0xFFFF) | |
662 w = cast(wchar) d; | |
663 else | |
664 { | |
665 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); | |
666 result = dg(&i, cast(void *)&w); | |
667 if (result) | |
668 break; | |
669 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); | |
670 } | |
671 } | |
672 result = dg(&i, cast(void *)&w); | |
673 if (result) | |
674 break; | |
675 } | |
676 return result; | |
677 } | |
678 | |
679 unittest | |
680 { | |
681 debug(apply) printf("_aApplyRcw2.unittest\n"); | |
682 | |
683 auto s = "hello"c[]; | |
684 int i; | |
685 | |
686 foreach_reverse(k, wchar d; s) | |
687 { | |
688 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
689 assert(k == 4 - i); | |
690 switch (i) | |
691 { | |
692 case 0: assert(d == 'o'); break; | |
693 case 1: assert(d == 'l'); break; | |
694 case 2: assert(d == 'l'); break; | |
695 case 3: assert(d == 'e'); break; | |
696 case 4: assert(d == 'h'); break; | |
697 default: assert(0); | |
698 } | |
699 i++; | |
700 } | |
701 assert(i == 5); | |
702 | |
703 s = "a\u1234\U00100456b"; | |
704 i = 0; | |
705 foreach_reverse(k, wchar d; s) | |
706 { | |
707 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
708 switch (i) | |
709 { | |
710 case 0: assert(k == 8); assert(d == 'b'); break; | |
711 case 1: assert(k == 4); assert(d == 0xDBC1); break; | |
712 case 2: assert(k == 4); assert(d == 0xDC56); break; | |
713 case 3: assert(k == 1); assert(d == 0x1234); break; | |
714 case 4: assert(k == 0); assert(d == 'a'); break; | |
715 default: assert(0); | |
716 } | |
717 i++; | |
718 } | |
719 assert(i == 5); | |
720 } | |
721 | |
722 /*****************************/ | |
723 | |
724 extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg) | |
725 { int result; | |
726 | |
727 debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length); | |
728 for (size_t i = aa.length; i != 0; ) | |
729 { dchar d; | |
730 char c; | |
731 | |
732 i--; | |
733 d = aa[i]; | |
734 if (d >= 0xDC00 && d <= 0xDFFF) | |
735 { if (i == 0) | |
736 onUnicodeError("Invalid UTF-16 sequence", 0); | |
737 i--; | |
738 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); | |
739 } | |
740 | |
741 if (d & ~0x7F) | |
742 { | |
743 char[4] buf; | |
744 | |
745 auto b = toUTF8(buf, d); | |
746 foreach (char c2; b) | |
747 { | |
748 result = dg(&i, cast(void *)&c2); | |
749 if (result) | |
750 return result; | |
751 } | |
752 continue; | |
753 } | |
754 c = cast(char)d; | |
755 result = dg(&i, cast(void *)&c); | |
756 if (result) | |
757 break; | |
758 } | |
759 return result; | |
760 } | |
761 | |
762 unittest | |
763 { | |
764 debug(apply) printf("_aApplyRwc2.unittest\n"); | |
765 | |
766 auto s = "hello"w[]; | |
767 int i; | |
768 | |
769 foreach_reverse(k, char d; s) | |
770 { | |
771 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
772 assert(k == 4 - i); | |
773 switch (i) | |
774 { | |
775 case 0: assert(d == 'o'); break; | |
776 case 1: assert(d == 'l'); break; | |
777 case 2: assert(d == 'l'); break; | |
778 case 3: assert(d == 'e'); break; | |
779 case 4: assert(d == 'h'); break; | |
780 default: assert(0); | |
781 } | |
782 i++; | |
783 } | |
784 assert(i == 5); | |
785 | |
786 s = "a\u1234\U00100456b"; | |
787 i = 0; | |
788 foreach_reverse(k, char d; s) | |
789 { | |
790 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
791 switch (i) | |
792 { | |
793 case 0: assert(k == 4); assert(d == 'b'); break; | |
794 case 1: assert(k == 2); assert(d == 0xF4); break; | |
795 case 2: assert(k == 2); assert(d == 0x80); break; | |
796 case 3: assert(k == 2); assert(d == 0x91); break; | |
797 case 4: assert(k == 2); assert(d == 0x96); break; | |
798 case 5: assert(k == 1); assert(d == 0xE1); break; | |
799 case 6: assert(k == 1); assert(d == 0x88); break; | |
800 case 7: assert(k == 1); assert(d == 0xB4); break; | |
801 case 8: assert(k == 0); assert(d == 'a'); break; | |
802 default: assert(0); | |
803 } | |
804 i++; | |
805 } | |
806 assert(i == 9); | |
807 } | |
808 | |
809 /*****************************/ | |
810 | |
811 extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg) | |
812 { int result; | |
813 | |
814 debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length); | |
815 for (size_t i = aa.length; i != 0; ) | |
816 { dchar d = aa[--i]; | |
817 char c; | |
818 | |
819 if (d & ~0x7F) | |
820 { | |
821 char[4] buf; | |
822 | |
823 auto b = toUTF8(buf, d); | |
824 foreach (char c2; b) | |
825 { | |
826 result = dg(&i, cast(void *)&c2); | |
827 if (result) | |
828 return result; | |
829 } | |
830 continue; | |
831 } | |
832 else | |
833 { c = cast(char)d; | |
834 } | |
835 result = dg(&i, cast(void *)&c); | |
836 if (result) | |
837 break; | |
838 } | |
839 return result; | |
840 } | |
841 | |
842 unittest | |
843 { | |
844 debug(apply) printf("_aApplyRdc2.unittest\n"); | |
845 | |
846 auto s = "hello"d[]; | |
847 int i; | |
848 | |
849 foreach_reverse(k, char d; s) | |
850 { | |
851 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
852 assert(k == 4 - i); | |
853 switch (i) | |
854 { | |
855 case 0: assert(d == 'o'); break; | |
856 case 1: assert(d == 'l'); break; | |
857 case 2: assert(d == 'l'); break; | |
858 case 3: assert(d == 'e'); break; | |
859 case 4: assert(d == 'h'); break; | |
860 default: assert(0); | |
861 } | |
862 i++; | |
863 } | |
864 assert(i == 5); | |
865 | |
866 s = "a\u1234\U00100456b"; | |
867 i = 0; | |
868 foreach_reverse(k, char d; s) | |
869 { | |
870 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
871 switch (i) | |
872 { | |
873 case 0: assert(k == 3); assert(d == 'b'); break; | |
874 case 1: assert(k == 2); assert(d == 0xF4); break; | |
875 case 2: assert(k == 2); assert(d == 0x80); break; | |
876 case 3: assert(k == 2); assert(d == 0x91); break; | |
877 case 4: assert(k == 2); assert(d == 0x96); break; | |
878 case 5: assert(k == 1); assert(d == 0xE1); break; | |
879 case 6: assert(k == 1); assert(d == 0x88); break; | |
880 case 7: assert(k == 1); assert(d == 0xB4); break; | |
881 case 8: assert(k == 0); assert(d == 'a'); break; | |
882 default: assert(0); | |
883 } | |
884 i++; | |
885 } | |
886 assert(i == 9); | |
887 } | |
888 | |
889 /*****************************/ | |
890 | |
891 extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg) | |
892 { int result; | |
893 | |
894 debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length); | |
895 for (size_t i = aa.length; i != 0; ) | |
896 { dchar d = aa[--i]; | |
897 wchar w; | |
898 | |
899 if (d <= 0xFFFF) | |
900 w = cast(wchar) d; | |
901 else | |
902 { | |
903 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); | |
904 result = dg(&i, cast(void *)&w); | |
905 if (result) | |
906 break; | |
907 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); | |
908 } | |
909 result = dg(&i, cast(void *)&w); | |
910 if (result) | |
911 break; | |
912 } | |
913 return result; | |
914 } | |
915 | |
916 unittest | |
917 { | |
918 debug(apply) printf("_aApplyRdw2.unittest\n"); | |
919 | |
920 auto s = "hello"d[]; | |
921 int i; | |
922 | |
923 foreach_reverse(k, wchar d; s) | |
924 { | |
925 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
926 assert(k == 4 - i); | |
927 switch (i) | |
928 { | |
929 case 0: assert(d == 'o'); break; | |
930 case 1: assert(d == 'l'); break; | |
931 case 2: assert(d == 'l'); break; | |
932 case 3: assert(d == 'e'); break; | |
933 case 4: assert(d == 'h'); break; | |
934 default: assert(0); | |
935 } | |
936 i++; | |
937 } | |
938 assert(i == 5); | |
939 | |
940 s = "a\u1234\U00100456b"; | |
941 i = 0; | |
942 foreach_reverse(k, wchar d; s) | |
943 { | |
944 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
945 switch (i) | |
946 { | |
947 case 0: assert(k == 3); assert(d == 'b'); break; | |
948 case 1: assert(k == 2); assert(d == 0xDBC1); break; | |
949 case 2: assert(k == 2); assert(d == 0xDC56); break; | |
950 case 3: assert(k == 1); assert(d == 0x1234); break; | |
951 case 4: assert(k == 0); assert(d == 'a'); break; | |
952 default: assert(0); | |
953 } | |
954 i++; | |
955 } | |
956 assert(i == 5); | |
957 } |