Mercurial > projects > ldc
comparison lphobos/internal/aApplyR.d @ 86:fd32135dca3e trunk
[svn r90] Major updates to the gen directory. Redesigned the 'elem' struct. Much more... !!!
Lots of bugfixes.
Added support for special foreach on strings.
Added std.array, std.utf, std.ctype and std.uni to phobos.
Changed all the .c files in the gen dir to .cpp (it *is* C++ after all)
author | lindquist |
---|---|
date | Sat, 03 Nov 2007 14:44:58 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
85:f869c636a113 | 86:fd32135dca3e |
---|---|
1 | |
2 /** | |
3 * Part of the D programming language runtime library. | |
4 */ | |
5 | |
6 /* | |
7 * Copyright (C) 2004-2006 by Digital Mars, www.digitalmars.com | |
8 * Written by Walter Bright | |
9 * | |
10 * This software is provided 'as-is', without any express or implied | |
11 * warranty. In no event will the authors be held liable for any damages | |
12 * arising from the use of this software. | |
13 * | |
14 * Permission is granted to anyone to use this software for any purpose, | |
15 * including commercial applications, and to alter it and redistribute it | |
16 * freely, in both source and binary form, subject to the following | |
17 * restrictions: | |
18 * | |
19 * o The origin of this software must not be misrepresented; you must not | |
20 * claim that you wrote the original software. If you use this software | |
21 * in a product, an acknowledgment in the product documentation would be | |
22 * appreciated but is not required. | |
23 * o Altered source versions must be plainly marked as such, and must not | |
24 * be misrepresented as being the original software. | |
25 * o This notice may not be removed or altered from any source | |
26 * distribution. | |
27 */ | |
28 | |
29 /* This code handles decoding UTF strings for foreach_reverse loops. | |
30 * There are 6 combinations of conversions between char, wchar, | |
31 * and dchar, and 2 of each of those. | |
32 */ | |
33 | |
34 import std.utf; | |
35 | |
36 //debug=apply; | |
37 | |
38 /**********************************************/ | |
39 /* 1 argument versions */ | |
40 | |
41 // dg is D, but _aApplyRcd() is C | |
42 extern (D) typedef int delegate(void *) dg_t; | |
43 | |
44 extern (C) int _aApplyRcd1(char[] aa, dg_t dg) | |
45 { int result; | |
46 | |
47 debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length); | |
48 for (size_t i = aa.length; i != 0; ) | |
49 { dchar d; | |
50 | |
51 i--; | |
52 d = aa[i]; | |
53 if (d & 0x80) | |
54 { char c = cast(char)d; | |
55 uint j; | |
56 uint m = 0x3F; | |
57 d = 0; | |
58 while ((c & 0xC0) != 0xC0) | |
59 { if (i == 0) | |
60 throw new std.utf.UtfException("Invalid UTF-8 sequence", 0); | |
61 i--; | |
62 d |= (c & 0x3F) << j; | |
63 j += 6; | |
64 m >>= 1; | |
65 c = aa[i]; | |
66 } | |
67 d |= (c & m) << j; | |
68 } | |
69 result = dg(cast(void *)&d); | |
70 if (result) | |
71 break; | |
72 } | |
73 return result; | |
74 } | |
75 | |
76 unittest | |
77 { | |
78 debug(apply) printf("_aApplyRcd1.unittest\n"); | |
79 | |
80 char[] s = "hello"; | |
81 int i; | |
82 | |
83 foreach_reverse(dchar d; s) | |
84 { | |
85 switch (i) | |
86 { | |
87 case 0: assert(d == 'o'); break; | |
88 case 1: assert(d == 'l'); break; | |
89 case 2: assert(d == 'l'); break; | |
90 case 3: assert(d == 'e'); break; | |
91 case 4: assert(d == 'h'); break; | |
92 default: assert(0); | |
93 } | |
94 i++; | |
95 } | |
96 assert(i == 5); | |
97 | |
98 s = "a\u1234\U00100456b"; | |
99 i = 0; | |
100 foreach_reverse(dchar d; s) | |
101 { | |
102 //printf("i = %d, d = %x\n", i, d); | |
103 switch (i) | |
104 { | |
105 case 0: assert(d == 'b'); break; | |
106 case 1: assert(d == '\U00100456'); break; | |
107 case 2: assert(d == '\u1234'); break; | |
108 case 3: assert(d == 'a'); break; | |
109 default: assert(0); | |
110 } | |
111 i++; | |
112 } | |
113 assert(i == 4); | |
114 } | |
115 | |
116 /*****************************/ | |
117 | |
118 extern (C) int _aApplyRwd1(wchar[] aa, dg_t dg) | |
119 { int result; | |
120 | |
121 debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length); | |
122 for (size_t i = aa.length; i != 0; ) | |
123 { dchar d; | |
124 | |
125 i--; | |
126 d = aa[i]; | |
127 if (d >= 0xDC00 && d <= 0xDFFF) | |
128 { if (i == 0) | |
129 throw new std.utf.UtfException("Invalid UTF-16 sequence", 0); | |
130 i--; | |
131 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); | |
132 } | |
133 result = dg(cast(void *)&d); | |
134 if (result) | |
135 break; | |
136 } | |
137 return result; | |
138 } | |
139 | |
140 unittest | |
141 { | |
142 debug(apply) printf("_aApplyRwd1.unittest\n"); | |
143 | |
144 wchar[] s = "hello"; | |
145 int i; | |
146 | |
147 foreach_reverse(dchar d; s) | |
148 { | |
149 switch (i) | |
150 { | |
151 case 0: assert(d == 'o'); break; | |
152 case 1: assert(d == 'l'); break; | |
153 case 2: assert(d == 'l'); break; | |
154 case 3: assert(d == 'e'); break; | |
155 case 4: assert(d == 'h'); break; | |
156 default: assert(0); | |
157 } | |
158 i++; | |
159 } | |
160 assert(i == 5); | |
161 | |
162 s = "a\u1234\U00100456b"; | |
163 i = 0; | |
164 foreach_reverse(dchar d; s) | |
165 { | |
166 //printf("i = %d, d = %x\n", i, d); | |
167 switch (i) | |
168 { | |
169 case 0: assert(d == 'b'); break; | |
170 case 1: assert(d == '\U00100456'); break; | |
171 case 2: assert(d == '\u1234'); break; | |
172 case 3: assert(d == 'a'); break; | |
173 default: assert(0); | |
174 } | |
175 i++; | |
176 } | |
177 assert(i == 4); | |
178 } | |
179 | |
180 /*****************************/ | |
181 | |
182 extern (C) int _aApplyRcw1(char[] aa, dg_t dg) | |
183 { int result; | |
184 | |
185 debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length); | |
186 for (size_t i = aa.length; i != 0; ) | |
187 { dchar d; | |
188 wchar w; | |
189 | |
190 i--; | |
191 w = aa[i]; | |
192 if (w & 0x80) | |
193 { char c = cast(char)w; | |
194 uint j; | |
195 uint m = 0x3F; | |
196 d = 0; | |
197 while ((c & 0xC0) != 0xC0) | |
198 { if (i == 0) | |
199 throw new std.utf.UtfException("Invalid UTF-8 sequence", 0); | |
200 i--; | |
201 d |= (c & 0x3F) << j; | |
202 j += 6; | |
203 m >>= 1; | |
204 c = aa[i]; | |
205 } | |
206 d |= (c & m) << j; | |
207 | |
208 if (d <= 0xFFFF) | |
209 w = cast(wchar) d; | |
210 else | |
211 { | |
212 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); | |
213 result = dg(cast(void *)&w); | |
214 if (result) | |
215 break; | |
216 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); | |
217 } | |
218 } | |
219 result = dg(cast(void *)&w); | |
220 if (result) | |
221 break; | |
222 } | |
223 return result; | |
224 } | |
225 | |
226 unittest | |
227 { | |
228 debug(apply) printf("_aApplyRcw1.unittest\n"); | |
229 | |
230 char[] s = "hello"; | |
231 int i; | |
232 | |
233 foreach_reverse(wchar d; s) | |
234 { | |
235 switch (i) | |
236 { | |
237 case 0: assert(d == 'o'); break; | |
238 case 1: assert(d == 'l'); break; | |
239 case 2: assert(d == 'l'); break; | |
240 case 3: assert(d == 'e'); break; | |
241 case 4: assert(d == 'h'); break; | |
242 default: assert(0); | |
243 } | |
244 i++; | |
245 } | |
246 assert(i == 5); | |
247 | |
248 s = "a\u1234\U00100456b"; | |
249 i = 0; | |
250 foreach_reverse(wchar d; s) | |
251 { | |
252 //printf("i = %d, d = %x\n", i, d); | |
253 switch (i) | |
254 { | |
255 case 0: assert(d == 'b'); break; | |
256 case 1: assert(d == 0xDBC1); break; | |
257 case 2: assert(d == 0xDC56); break; | |
258 case 3: assert(d == 0x1234); break; | |
259 case 4: assert(d == 'a'); break; | |
260 default: assert(0); | |
261 } | |
262 i++; | |
263 } | |
264 assert(i == 5); | |
265 } | |
266 | |
267 /*****************************/ | |
268 | |
269 extern (C) int _aApplyRwc1(wchar[] aa, dg_t dg) | |
270 { int result; | |
271 | |
272 debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length); | |
273 for (size_t i = aa.length; i != 0; ) | |
274 { dchar d; | |
275 char c; | |
276 | |
277 i--; | |
278 d = aa[i]; | |
279 if (d >= 0xDC00 && d <= 0xDFFF) | |
280 { if (i == 0) | |
281 throw new std.utf.UtfException("Invalid UTF-16 sequence", 0); | |
282 i--; | |
283 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); | |
284 } | |
285 | |
286 if (d & ~0x7F) | |
287 { | |
288 char[4] buf; | |
289 char[] b; | |
290 | |
291 b = std.utf.toUTF8(buf, d); | |
292 foreach (char c2; b) | |
293 { | |
294 result = dg(cast(void *)&c2); | |
295 if (result) | |
296 return result; | |
297 } | |
298 continue; | |
299 } | |
300 c = cast(char)d; | |
301 result = dg(cast(void *)&c); | |
302 if (result) | |
303 break; | |
304 } | |
305 return result; | |
306 } | |
307 | |
308 unittest | |
309 { | |
310 debug(apply) printf("_aApplyRwc1.unittest\n"); | |
311 | |
312 wchar[] s = "hello"; | |
313 int i; | |
314 | |
315 foreach_reverse(char d; s) | |
316 { | |
317 switch (i) | |
318 { | |
319 case 0: assert(d == 'o'); break; | |
320 case 1: assert(d == 'l'); break; | |
321 case 2: assert(d == 'l'); break; | |
322 case 3: assert(d == 'e'); break; | |
323 case 4: assert(d == 'h'); break; | |
324 default: assert(0); | |
325 } | |
326 i++; | |
327 } | |
328 assert(i == 5); | |
329 | |
330 s = "a\u1234\U00100456b"; | |
331 i = 0; | |
332 foreach_reverse(char d; s) | |
333 { | |
334 //printf("i = %d, d = %x\n", i, d); | |
335 switch (i) | |
336 { | |
337 case 0: assert(d == 'b'); break; | |
338 case 1: assert(d == 0xF4); break; | |
339 case 2: assert(d == 0x80); break; | |
340 case 3: assert(d == 0x91); break; | |
341 case 4: assert(d == 0x96); break; | |
342 case 5: assert(d == 0xE1); break; | |
343 case 6: assert(d == 0x88); break; | |
344 case 7: assert(d == 0xB4); break; | |
345 case 8: assert(d == 'a'); break; | |
346 default: assert(0); | |
347 } | |
348 i++; | |
349 } | |
350 assert(i == 9); | |
351 } | |
352 | |
353 /*****************************/ | |
354 | |
355 extern (C) int _aApplyRdc1(dchar[] aa, dg_t dg) | |
356 { int result; | |
357 | |
358 debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length); | |
359 for (size_t i = aa.length; i != 0;) | |
360 { dchar d = aa[--i]; | |
361 char c; | |
362 | |
363 if (d & ~0x7F) | |
364 { | |
365 char[4] buf; | |
366 char[] b; | |
367 | |
368 b = std.utf.toUTF8(buf, d); | |
369 foreach (char c2; b) | |
370 { | |
371 result = dg(cast(void *)&c2); | |
372 if (result) | |
373 return result; | |
374 } | |
375 continue; | |
376 } | |
377 else | |
378 { | |
379 c = cast(char)d; | |
380 } | |
381 result = dg(cast(void *)&c); | |
382 if (result) | |
383 break; | |
384 } | |
385 return result; | |
386 } | |
387 | |
388 unittest | |
389 { | |
390 debug(apply) printf("_aApplyRdc1.unittest\n"); | |
391 | |
392 dchar[] s = "hello"; | |
393 int i; | |
394 | |
395 foreach_reverse(char d; s) | |
396 { | |
397 switch (i) | |
398 { | |
399 case 0: assert(d == 'o'); break; | |
400 case 1: assert(d == 'l'); break; | |
401 case 2: assert(d == 'l'); break; | |
402 case 3: assert(d == 'e'); break; | |
403 case 4: assert(d == 'h'); break; | |
404 default: assert(0); | |
405 } | |
406 i++; | |
407 } | |
408 assert(i == 5); | |
409 | |
410 s = "a\u1234\U00100456b"; | |
411 i = 0; | |
412 foreach_reverse(char d; s) | |
413 { | |
414 //printf("i = %d, d = %x\n", i, d); | |
415 switch (i) | |
416 { | |
417 case 0: assert(d == 'b'); break; | |
418 case 1: assert(d == 0xF4); break; | |
419 case 2: assert(d == 0x80); break; | |
420 case 3: assert(d == 0x91); break; | |
421 case 4: assert(d == 0x96); break; | |
422 case 5: assert(d == 0xE1); break; | |
423 case 6: assert(d == 0x88); break; | |
424 case 7: assert(d == 0xB4); break; | |
425 case 8: assert(d == 'a'); break; | |
426 default: assert(0); | |
427 } | |
428 i++; | |
429 } | |
430 assert(i == 9); | |
431 } | |
432 | |
433 /*****************************/ | |
434 | |
435 extern (C) int _aApplyRdw1(dchar[] aa, dg_t dg) | |
436 { int result; | |
437 | |
438 debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length); | |
439 for (size_t i = aa.length; i != 0; ) | |
440 { dchar d = aa[--i]; | |
441 wchar w; | |
442 | |
443 if (d <= 0xFFFF) | |
444 w = cast(wchar) d; | |
445 else | |
446 { | |
447 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); | |
448 result = dg(cast(void *)&w); | |
449 if (result) | |
450 break; | |
451 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); | |
452 } | |
453 result = dg(cast(void *)&w); | |
454 if (result) | |
455 break; | |
456 } | |
457 return result; | |
458 } | |
459 | |
460 unittest | |
461 { | |
462 debug(apply) printf("_aApplyRdw1.unittest\n"); | |
463 | |
464 dchar[] s = "hello"; | |
465 int i; | |
466 | |
467 foreach_reverse(wchar d; s) | |
468 { | |
469 switch (i) | |
470 { | |
471 case 0: assert(d == 'o'); break; | |
472 case 1: assert(d == 'l'); break; | |
473 case 2: assert(d == 'l'); break; | |
474 case 3: assert(d == 'e'); break; | |
475 case 4: assert(d == 'h'); break; | |
476 default: assert(0); | |
477 } | |
478 i++; | |
479 } | |
480 assert(i == 5); | |
481 | |
482 s = "a\u1234\U00100456b"; | |
483 i = 0; | |
484 foreach_reverse(wchar d; s) | |
485 { | |
486 //printf("i = %d, d = %x\n", i, d); | |
487 switch (i) | |
488 { | |
489 case 0: assert(d == 'b'); break; | |
490 case 1: assert(d == 0xDBC1); break; | |
491 case 2: assert(d == 0xDC56); break; | |
492 case 3: assert(d == 0x1234); break; | |
493 case 4: assert(d == 'a'); break; | |
494 default: assert(0); | |
495 } | |
496 i++; | |
497 } | |
498 assert(i == 5); | |
499 } | |
500 | |
501 | |
502 /****************************************************************************/ | |
503 /* 2 argument versions */ | |
504 | |
505 // dg is D, but _aApplyRcd2() is C | |
506 extern (D) typedef int delegate(void *, void *) dg2_t; | |
507 | |
508 extern (C) int _aApplyRcd2(char[] aa, dg2_t dg) | |
509 { int result; | |
510 size_t i; | |
511 size_t len = aa.length; | |
512 | |
513 debug(apply) printf("_aApplyRcd2(), len = %d\n", len); | |
514 for (i = len; i != 0; ) | |
515 { dchar d; | |
516 | |
517 i--; | |
518 d = aa[i]; | |
519 if (d & 0x80) | |
520 { char c = cast(char)d; | |
521 uint j; | |
522 uint m = 0x3F; | |
523 d = 0; | |
524 while ((c & 0xC0) != 0xC0) | |
525 { if (i == 0) | |
526 throw new std.utf.UtfException("Invalid UTF-8 sequence", 0); | |
527 i--; | |
528 d |= (c & 0x3F) << j; | |
529 j += 6; | |
530 m >>= 1; | |
531 c = aa[i]; | |
532 } | |
533 d |= (c & m) << j; | |
534 } | |
535 result = dg(&i, cast(void *)&d); | |
536 if (result) | |
537 break; | |
538 } | |
539 return result; | |
540 } | |
541 | |
542 unittest | |
543 { | |
544 debug(apply) printf("_aApplyRcd2.unittest\n"); | |
545 | |
546 char[] s = "hello"; | |
547 int i; | |
548 | |
549 foreach_reverse(k, dchar d; s) | |
550 { | |
551 assert(k == 4 - i); | |
552 switch (i) | |
553 { | |
554 case 0: assert(d == 'o'); break; | |
555 case 1: assert(d == 'l'); break; | |
556 case 2: assert(d == 'l'); break; | |
557 case 3: assert(d == 'e'); break; | |
558 case 4: assert(d == 'h'); break; | |
559 default: assert(0); | |
560 } | |
561 i++; | |
562 } | |
563 assert(i == 5); | |
564 | |
565 s = "a\u1234\U00100456b"; | |
566 i = 0; | |
567 foreach_reverse(k, dchar d; s) | |
568 { | |
569 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
570 switch (i) | |
571 { | |
572 case 0: assert(d == 'b'); assert(k == 8); break; | |
573 case 1: assert(d == '\U00100456'); assert(k == 4); break; | |
574 case 2: assert(d == '\u1234'); assert(k == 1); break; | |
575 case 3: assert(d == 'a'); assert(k == 0); break; | |
576 default: assert(0); | |
577 } | |
578 i++; | |
579 } | |
580 assert(i == 4); | |
581 } | |
582 | |
583 /*****************************/ | |
584 | |
585 extern (C) int _aApplyRwd2(wchar[] aa, dg2_t dg) | |
586 { int result; | |
587 | |
588 debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length); | |
589 for (size_t i = aa.length; i != 0; ) | |
590 { dchar d; | |
591 | |
592 i--; | |
593 d = aa[i]; | |
594 if (d >= 0xDC00 && d <= 0xDFFF) | |
595 { if (i == 0) | |
596 throw new std.utf.UtfException("Invalid UTF-16 sequence", 0); | |
597 i--; | |
598 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); | |
599 } | |
600 result = dg(&i, cast(void *)&d); | |
601 if (result) | |
602 break; | |
603 } | |
604 return result; | |
605 } | |
606 | |
607 unittest | |
608 { | |
609 debug(apply) printf("_aApplyRwd2.unittest\n"); | |
610 | |
611 wchar[] s = "hello"; | |
612 int i; | |
613 | |
614 foreach_reverse(k, dchar d; s) | |
615 { | |
616 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
617 assert(k == 4 - i); | |
618 switch (i) | |
619 { | |
620 case 0: assert(d == 'o'); break; | |
621 case 1: assert(d == 'l'); break; | |
622 case 2: assert(d == 'l'); break; | |
623 case 3: assert(d == 'e'); break; | |
624 case 4: assert(d == 'h'); break; | |
625 default: assert(0); | |
626 } | |
627 i++; | |
628 } | |
629 assert(i == 5); | |
630 | |
631 s = "a\u1234\U00100456b"; | |
632 i = 0; | |
633 foreach_reverse(k, dchar d; s) | |
634 { | |
635 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
636 switch (i) | |
637 { | |
638 case 0: assert(k == 4); assert(d == 'b'); break; | |
639 case 1: assert(k == 2); assert(d == '\U00100456'); break; | |
640 case 2: assert(k == 1); assert(d == '\u1234'); break; | |
641 case 3: assert(k == 0); assert(d == 'a'); break; | |
642 default: assert(0); | |
643 } | |
644 i++; | |
645 } | |
646 assert(i == 4); | |
647 } | |
648 | |
649 /*****************************/ | |
650 | |
651 extern (C) int _aApplyRcw2(char[] aa, dg2_t dg) | |
652 { int result; | |
653 | |
654 debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length); | |
655 for (size_t i = aa.length; i != 0; ) | |
656 { dchar d; | |
657 wchar w; | |
658 | |
659 i--; | |
660 w = aa[i]; | |
661 if (w & 0x80) | |
662 { char c = cast(char)w; | |
663 uint j; | |
664 uint m = 0x3F; | |
665 d = 0; | |
666 while ((c & 0xC0) != 0xC0) | |
667 { if (i == 0) | |
668 throw new std.utf.UtfException("Invalid UTF-8 sequence", 0); | |
669 i--; | |
670 d |= (c & 0x3F) << j; | |
671 j += 6; | |
672 m >>= 1; | |
673 c = aa[i]; | |
674 } | |
675 d |= (c & m) << j; | |
676 | |
677 if (d <= 0xFFFF) | |
678 w = cast(wchar) d; | |
679 else | |
680 { | |
681 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); | |
682 result = dg(&i, cast(void *)&w); | |
683 if (result) | |
684 break; | |
685 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); | |
686 } | |
687 } | |
688 result = dg(&i, cast(void *)&w); | |
689 if (result) | |
690 break; | |
691 } | |
692 return result; | |
693 } | |
694 | |
695 unittest | |
696 { | |
697 debug(apply) printf("_aApplyRcw2.unittest\n"); | |
698 | |
699 char[] s = "hello"; | |
700 int i; | |
701 | |
702 foreach_reverse(k, wchar d; s) | |
703 { | |
704 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
705 assert(k == 4 - i); | |
706 switch (i) | |
707 { | |
708 case 0: assert(d == 'o'); break; | |
709 case 1: assert(d == 'l'); break; | |
710 case 2: assert(d == 'l'); break; | |
711 case 3: assert(d == 'e'); break; | |
712 case 4: assert(d == 'h'); break; | |
713 default: assert(0); | |
714 } | |
715 i++; | |
716 } | |
717 assert(i == 5); | |
718 | |
719 s = "a\u1234\U00100456b"; | |
720 i = 0; | |
721 foreach_reverse(k, wchar d; s) | |
722 { | |
723 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
724 switch (i) | |
725 { | |
726 case 0: assert(k == 8); assert(d == 'b'); break; | |
727 case 1: assert(k == 4); assert(d == 0xDBC1); break; | |
728 case 2: assert(k == 4); assert(d == 0xDC56); break; | |
729 case 3: assert(k == 1); assert(d == 0x1234); break; | |
730 case 4: assert(k == 0); assert(d == 'a'); break; | |
731 default: assert(0); | |
732 } | |
733 i++; | |
734 } | |
735 assert(i == 5); | |
736 } | |
737 | |
738 /*****************************/ | |
739 | |
740 extern (C) int _aApplyRwc2(wchar[] aa, dg2_t dg) | |
741 { int result; | |
742 | |
743 debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length); | |
744 for (size_t i = aa.length; i != 0; ) | |
745 { dchar d; | |
746 char c; | |
747 | |
748 i--; | |
749 d = aa[i]; | |
750 if (d >= 0xDC00 && d <= 0xDFFF) | |
751 { if (i == 0) | |
752 throw new std.utf.UtfException("Invalid UTF-16 sequence", 0); | |
753 i--; | |
754 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); | |
755 } | |
756 | |
757 if (d & ~0x7F) | |
758 { | |
759 char[4] buf; | |
760 char[] b; | |
761 | |
762 b = std.utf.toUTF8(buf, d); | |
763 foreach (char c2; b) | |
764 { | |
765 result = dg(&i, cast(void *)&c2); | |
766 if (result) | |
767 return result; | |
768 } | |
769 continue; | |
770 } | |
771 c = cast(char)d; | |
772 result = dg(&i, cast(void *)&c); | |
773 if (result) | |
774 break; | |
775 } | |
776 return result; | |
777 } | |
778 | |
779 unittest | |
780 { | |
781 debug(apply) printf("_aApplyRwc2.unittest\n"); | |
782 | |
783 wchar[] s = "hello"; | |
784 int i; | |
785 | |
786 foreach_reverse(k, char d; s) | |
787 { | |
788 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
789 assert(k == 4 - i); | |
790 switch (i) | |
791 { | |
792 case 0: assert(d == 'o'); break; | |
793 case 1: assert(d == 'l'); break; | |
794 case 2: assert(d == 'l'); break; | |
795 case 3: assert(d == 'e'); break; | |
796 case 4: assert(d == 'h'); break; | |
797 default: assert(0); | |
798 } | |
799 i++; | |
800 } | |
801 assert(i == 5); | |
802 | |
803 s = "a\u1234\U00100456b"; | |
804 i = 0; | |
805 foreach_reverse(k, char d; s) | |
806 { | |
807 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
808 switch (i) | |
809 { | |
810 case 0: assert(k == 4); assert(d == 'b'); break; | |
811 case 1: assert(k == 2); assert(d == 0xF4); break; | |
812 case 2: assert(k == 2); assert(d == 0x80); break; | |
813 case 3: assert(k == 2); assert(d == 0x91); break; | |
814 case 4: assert(k == 2); assert(d == 0x96); break; | |
815 case 5: assert(k == 1); assert(d == 0xE1); break; | |
816 case 6: assert(k == 1); assert(d == 0x88); break; | |
817 case 7: assert(k == 1); assert(d == 0xB4); break; | |
818 case 8: assert(k == 0); assert(d == 'a'); break; | |
819 default: assert(0); | |
820 } | |
821 i++; | |
822 } | |
823 assert(i == 9); | |
824 } | |
825 | |
826 /*****************************/ | |
827 | |
828 extern (C) int _aApplyRdc2(dchar[] aa, dg2_t dg) | |
829 { int result; | |
830 | |
831 debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length); | |
832 for (size_t i = aa.length; i != 0; ) | |
833 { dchar d = aa[--i]; | |
834 char c; | |
835 | |
836 if (d & ~0x7F) | |
837 { | |
838 char[4] buf; | |
839 char[] b; | |
840 | |
841 b = std.utf.toUTF8(buf, d); | |
842 foreach (char c2; b) | |
843 { | |
844 result = dg(&i, cast(void *)&c2); | |
845 if (result) | |
846 return result; | |
847 } | |
848 continue; | |
849 } | |
850 else | |
851 { c = cast(char)d; | |
852 } | |
853 result = dg(&i, cast(void *)&c); | |
854 if (result) | |
855 break; | |
856 } | |
857 return result; | |
858 } | |
859 | |
860 unittest | |
861 { | |
862 debug(apply) printf("_aApplyRdc2.unittest\n"); | |
863 | |
864 dchar[] s = "hello"; | |
865 int i; | |
866 | |
867 foreach_reverse(k, char d; s) | |
868 { | |
869 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
870 assert(k == 4 - i); | |
871 switch (i) | |
872 { | |
873 case 0: assert(d == 'o'); break; | |
874 case 1: assert(d == 'l'); break; | |
875 case 2: assert(d == 'l'); break; | |
876 case 3: assert(d == 'e'); break; | |
877 case 4: assert(d == 'h'); break; | |
878 default: assert(0); | |
879 } | |
880 i++; | |
881 } | |
882 assert(i == 5); | |
883 | |
884 s = "a\u1234\U00100456b"; | |
885 i = 0; | |
886 foreach_reverse(k, char d; s) | |
887 { | |
888 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
889 switch (i) | |
890 { | |
891 case 0: assert(k == 3); assert(d == 'b'); break; | |
892 case 1: assert(k == 2); assert(d == 0xF4); break; | |
893 case 2: assert(k == 2); assert(d == 0x80); break; | |
894 case 3: assert(k == 2); assert(d == 0x91); break; | |
895 case 4: assert(k == 2); assert(d == 0x96); break; | |
896 case 5: assert(k == 1); assert(d == 0xE1); break; | |
897 case 6: assert(k == 1); assert(d == 0x88); break; | |
898 case 7: assert(k == 1); assert(d == 0xB4); break; | |
899 case 8: assert(k == 0); assert(d == 'a'); break; | |
900 default: assert(0); | |
901 } | |
902 i++; | |
903 } | |
904 assert(i == 9); | |
905 } | |
906 | |
907 /*****************************/ | |
908 | |
909 extern (C) int _aApplyRdw2(dchar[] aa, dg2_t dg) | |
910 { int result; | |
911 | |
912 debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length); | |
913 for (size_t i = aa.length; i != 0; ) | |
914 { dchar d = aa[--i]; | |
915 wchar w; | |
916 | |
917 if (d <= 0xFFFF) | |
918 w = cast(wchar) d; | |
919 else | |
920 { | |
921 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); | |
922 result = dg(&i, cast(void *)&w); | |
923 if (result) | |
924 break; | |
925 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); | |
926 } | |
927 result = dg(&i, cast(void *)&w); | |
928 if (result) | |
929 break; | |
930 } | |
931 return result; | |
932 } | |
933 | |
934 unittest | |
935 { | |
936 debug(apply) printf("_aApplyRdw2.unittest\n"); | |
937 | |
938 dchar[] s = "hello"; | |
939 int i; | |
940 | |
941 foreach_reverse(k, wchar d; s) | |
942 { | |
943 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
944 assert(k == 4 - i); | |
945 switch (i) | |
946 { | |
947 case 0: assert(d == 'o'); break; | |
948 case 1: assert(d == 'l'); break; | |
949 case 2: assert(d == 'l'); break; | |
950 case 3: assert(d == 'e'); break; | |
951 case 4: assert(d == 'h'); break; | |
952 default: assert(0); | |
953 } | |
954 i++; | |
955 } | |
956 assert(i == 5); | |
957 | |
958 s = "a\u1234\U00100456b"; | |
959 i = 0; | |
960 foreach_reverse(k, wchar d; s) | |
961 { | |
962 //printf("i = %d, k = %d, d = %x\n", i, k, d); | |
963 switch (i) | |
964 { | |
965 case 0: assert(k == 3); assert(d == 'b'); break; | |
966 case 1: assert(k == 2); assert(d == 0xDBC1); break; | |
967 case 2: assert(k == 2); assert(d == 0xDC56); break; | |
968 case 3: assert(k == 1); assert(d == 0x1234); break; | |
969 case 4: assert(k == 0); assert(d == 'a'); break; | |
970 default: assert(0); | |
971 } | |
972 i++; | |
973 } | |
974 assert(i == 5); | |
975 } | |
976 | |
977 |