132
|
1
|
|
2 /**
|
|
3 * Part of the D programming language runtime library.
|
|
4 */
|
|
5
|
|
6 /*
|
|
7 * Copyright (C) 2004-2006 by Digital Mars, www.digitalmars.com
|
|
8 * Written by Walter Bright
|
|
9 *
|
|
10 * This software is provided 'as-is', without any express or implied
|
|
11 * warranty. In no event will the authors be held liable for any damages
|
|
12 * arising from the use of this software.
|
|
13 *
|
|
14 * Permission is granted to anyone to use this software for any purpose,
|
|
15 * including commercial applications, and to alter it and redistribute it
|
|
16 * freely, in both source and binary form, subject to the following
|
|
17 * restrictions:
|
|
18 *
|
|
19 * o The origin of this software must not be misrepresented; you must not
|
|
20 * claim that you wrote the original software. If you use this software
|
|
21 * in a product, an acknowledgment in the product documentation would be
|
|
22 * appreciated but is not required.
|
|
23 * o Altered source versions must be plainly marked as such, and must not
|
|
24 * be misrepresented as being the original software.
|
|
25 * o This notice may not be removed or altered from any source
|
|
26 * distribution.
|
|
27 */
|
|
28
|
|
29 /*
|
|
30 * Modified by Sean Kelly <sean@f4.ca> for use with Tango.
|
|
31 */
|
|
32
|
|
33 /* This code handles decoding UTF strings for foreach_reverse loops.
|
|
34 * There are 6 combinations of conversions between char, wchar,
|
|
35 * and dchar, and 2 of each of those.
|
|
36 */
|
|
37
|
|
38 private import util.utf;
|
|
39
|
|
40 /**********************************************/
|
|
41 /* 1 argument versions */
|
|
42
|
|
43 // dg is D, but _aApplyRcd() is C
|
|
44 extern (D) typedef int delegate(void *) dg_t;
|
|
45
|
|
46 extern (C) int _aApplyRcd1(in char[] aa, dg_t dg)
|
|
47 { int result;
|
|
48
|
|
49 debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length);
|
|
50 for (size_t i = aa.length; i != 0; )
|
|
51 { dchar d;
|
|
52
|
|
53 i--;
|
|
54 d = aa[i];
|
|
55 if (d & 0x80)
|
|
56 { char c = cast(char)d;
|
|
57 uint j;
|
|
58 uint m = 0x3F;
|
|
59 d = 0;
|
|
60 while ((c & 0xC0) != 0xC0)
|
|
61 { if (i == 0)
|
|
62 onUnicodeError("Invalid UTF-8 sequence", 0);
|
|
63 i--;
|
|
64 d |= (c & 0x3F) << j;
|
|
65 j += 6;
|
|
66 m >>= 1;
|
|
67 c = aa[i];
|
|
68 }
|
|
69 d |= (c & m) << j;
|
|
70 }
|
|
71 result = dg(cast(void *)&d);
|
|
72 if (result)
|
|
73 break;
|
|
74 }
|
|
75 return result;
|
|
76 }
|
|
77
|
|
78 unittest
|
|
79 {
|
|
80 debug(apply) printf("_aApplyRcd1.unittest\n");
|
|
81
|
|
82 auto s = "hello"c;
|
|
83 int i;
|
|
84
|
|
85 foreach_reverse(dchar d; s)
|
|
86 {
|
|
87 switch (i)
|
|
88 {
|
|
89 case 0: assert(d == 'o'); break;
|
|
90 case 1: assert(d == 'l'); break;
|
|
91 case 2: assert(d == 'l'); break;
|
|
92 case 3: assert(d == 'e'); break;
|
|
93 case 4: assert(d == 'h'); break;
|
|
94 default: assert(0);
|
|
95 }
|
|
96 i++;
|
|
97 }
|
|
98 assert(i == 5);
|
|
99
|
|
100 s = "a\u1234\U00100456b";
|
|
101 i = 0;
|
|
102 foreach_reverse(dchar d; s)
|
|
103 {
|
|
104 //printf("i = %d, d = %x\n", i, d);
|
|
105 switch (i)
|
|
106 {
|
|
107 case 0: assert(d == 'b'); break;
|
|
108 case 1: assert(d == '\U00100456'); break;
|
|
109 case 2: assert(d == '\u1234'); break;
|
|
110 case 3: assert(d == 'a'); break;
|
|
111 default: assert(0);
|
|
112 }
|
|
113 i++;
|
|
114 }
|
|
115 assert(i == 4);
|
|
116 }
|
|
117
|
|
118 /*****************************/
|
|
119
|
|
120 extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg)
|
|
121 { int result;
|
|
122
|
|
123 debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length);
|
|
124 for (size_t i = aa.length; i != 0; )
|
|
125 { dchar d;
|
|
126
|
|
127 i--;
|
|
128 d = aa[i];
|
|
129 if (d >= 0xDC00 && d <= 0xDFFF)
|
|
130 { if (i == 0)
|
|
131 onUnicodeError("Invalid UTF-16 sequence", 0);
|
|
132 i--;
|
|
133 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
|
|
134 }
|
|
135 result = dg(cast(void *)&d);
|
|
136 if (result)
|
|
137 break;
|
|
138 }
|
|
139 return result;
|
|
140 }
|
|
141
|
|
142 unittest
|
|
143 {
|
|
144 debug(apply) printf("_aApplyRwd1.unittest\n");
|
|
145
|
|
146 auto s = "hello"w;
|
|
147 int i;
|
|
148
|
|
149 foreach_reverse(dchar d; s)
|
|
150 {
|
|
151 switch (i)
|
|
152 {
|
|
153 case 0: assert(d == 'o'); break;
|
|
154 case 1: assert(d == 'l'); break;
|
|
155 case 2: assert(d == 'l'); break;
|
|
156 case 3: assert(d == 'e'); break;
|
|
157 case 4: assert(d == 'h'); break;
|
|
158 default: assert(0);
|
|
159 }
|
|
160 i++;
|
|
161 }
|
|
162 assert(i == 5);
|
|
163
|
|
164 s = "a\u1234\U00100456b";
|
|
165 i = 0;
|
|
166 foreach_reverse(dchar d; s)
|
|
167 {
|
|
168 //printf("i = %d, d = %x\n", i, d);
|
|
169 switch (i)
|
|
170 {
|
|
171 case 0: assert(d == 'b'); break;
|
|
172 case 1: assert(d == '\U00100456'); break;
|
|
173 case 2: assert(d == '\u1234'); break;
|
|
174 case 3: assert(d == 'a'); break;
|
|
175 default: assert(0);
|
|
176 }
|
|
177 i++;
|
|
178 }
|
|
179 assert(i == 4);
|
|
180 }
|
|
181
|
|
182 /*****************************/
|
|
183
|
|
184 extern (C) int _aApplyRcw1(in char[] aa, dg_t dg)
|
|
185 { int result;
|
|
186
|
|
187 debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length);
|
|
188 for (size_t i = aa.length; i != 0; )
|
|
189 { dchar d;
|
|
190 wchar w;
|
|
191
|
|
192 i--;
|
|
193 w = aa[i];
|
|
194 if (w & 0x80)
|
|
195 { char c = cast(char)w;
|
|
196 uint j;
|
|
197 uint m = 0x3F;
|
|
198 d = 0;
|
|
199 while ((c & 0xC0) != 0xC0)
|
|
200 { if (i == 0)
|
|
201 onUnicodeError("Invalid UTF-8 sequence", 0);
|
|
202 i--;
|
|
203 d |= (c & 0x3F) << j;
|
|
204 j += 6;
|
|
205 m >>= 1;
|
|
206 c = aa[i];
|
|
207 }
|
|
208 d |= (c & m) << j;
|
|
209
|
|
210 if (d <= 0xFFFF)
|
|
211 w = cast(wchar) d;
|
|
212 else
|
|
213 {
|
|
214 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
|
|
215 result = dg(cast(void *)&w);
|
|
216 if (result)
|
|
217 break;
|
|
218 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
|
|
219 }
|
|
220 }
|
|
221 result = dg(cast(void *)&w);
|
|
222 if (result)
|
|
223 break;
|
|
224 }
|
|
225 return result;
|
|
226 }
|
|
227
|
|
228 unittest
|
|
229 {
|
|
230 debug(apply) printf("_aApplyRcw1.unittest\n");
|
|
231
|
|
232 auto s = "hello"c;
|
|
233 int i;
|
|
234
|
|
235 foreach_reverse(wchar d; s)
|
|
236 {
|
|
237 switch (i)
|
|
238 {
|
|
239 case 0: assert(d == 'o'); break;
|
|
240 case 1: assert(d == 'l'); break;
|
|
241 case 2: assert(d == 'l'); break;
|
|
242 case 3: assert(d == 'e'); break;
|
|
243 case 4: assert(d == 'h'); break;
|
|
244 default: assert(0);
|
|
245 }
|
|
246 i++;
|
|
247 }
|
|
248 assert(i == 5);
|
|
249
|
|
250 s = "a\u1234\U00100456b";
|
|
251 i = 0;
|
|
252 foreach_reverse(wchar d; s)
|
|
253 {
|
|
254 //printf("i = %d, d = %x\n", i, d);
|
|
255 switch (i)
|
|
256 {
|
|
257 case 0: assert(d == 'b'); break;
|
|
258 case 1: assert(d == 0xDBC1); break;
|
|
259 case 2: assert(d == 0xDC56); break;
|
|
260 case 3: assert(d == 0x1234); break;
|
|
261 case 4: assert(d == 'a'); break;
|
|
262 default: assert(0);
|
|
263 }
|
|
264 i++;
|
|
265 }
|
|
266 assert(i == 5);
|
|
267 }
|
|
268
|
|
269 /*****************************/
|
|
270
|
|
271 extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg)
|
|
272 { int result;
|
|
273
|
|
274 debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length);
|
|
275 for (size_t i = aa.length; i != 0; )
|
|
276 { dchar d;
|
|
277 char c;
|
|
278
|
|
279 i--;
|
|
280 d = aa[i];
|
|
281 if (d >= 0xDC00 && d <= 0xDFFF)
|
|
282 { if (i == 0)
|
|
283 onUnicodeError("Invalid UTF-16 sequence", 0);
|
|
284 i--;
|
|
285 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
|
|
286 }
|
|
287
|
|
288 if (d & ~0x7F)
|
|
289 {
|
|
290 char[4] buf;
|
|
291
|
|
292 auto b = toUTF8(buf, d);
|
|
293 foreach (char c2; b)
|
|
294 {
|
|
295 result = dg(cast(void *)&c2);
|
|
296 if (result)
|
|
297 return result;
|
|
298 }
|
|
299 continue;
|
|
300 }
|
|
301 c = cast(char)d;
|
|
302 result = dg(cast(void *)&c);
|
|
303 if (result)
|
|
304 break;
|
|
305 }
|
|
306 return result;
|
|
307 }
|
|
308
|
|
309 unittest
|
|
310 {
|
|
311 debug(apply) printf("_aApplyRwc1.unittest\n");
|
|
312
|
|
313 auto s = "hello"w;
|
|
314 int i;
|
|
315
|
|
316 foreach_reverse(char d; s)
|
|
317 {
|
|
318 switch (i)
|
|
319 {
|
|
320 case 0: assert(d == 'o'); break;
|
|
321 case 1: assert(d == 'l'); break;
|
|
322 case 2: assert(d == 'l'); break;
|
|
323 case 3: assert(d == 'e'); break;
|
|
324 case 4: assert(d == 'h'); break;
|
|
325 default: assert(0);
|
|
326 }
|
|
327 i++;
|
|
328 }
|
|
329 assert(i == 5);
|
|
330
|
|
331 s = "a\u1234\U00100456b";
|
|
332 i = 0;
|
|
333 foreach_reverse(char d; s)
|
|
334 {
|
|
335 //printf("i = %d, d = %x\n", i, d);
|
|
336 switch (i)
|
|
337 {
|
|
338 case 0: assert(d == 'b'); break;
|
|
339 case 1: assert(d == 0xF4); break;
|
|
340 case 2: assert(d == 0x80); break;
|
|
341 case 3: assert(d == 0x91); break;
|
|
342 case 4: assert(d == 0x96); break;
|
|
343 case 5: assert(d == 0xE1); break;
|
|
344 case 6: assert(d == 0x88); break;
|
|
345 case 7: assert(d == 0xB4); break;
|
|
346 case 8: assert(d == 'a'); break;
|
|
347 default: assert(0);
|
|
348 }
|
|
349 i++;
|
|
350 }
|
|
351 assert(i == 9);
|
|
352 }
|
|
353
|
|
354 /*****************************/
|
|
355
|
|
356 extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg)
|
|
357 { int result;
|
|
358
|
|
359 debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length);
|
|
360 for (size_t i = aa.length; i != 0;)
|
|
361 { dchar d = aa[--i];
|
|
362 char c;
|
|
363
|
|
364 if (d & ~0x7F)
|
|
365 {
|
|
366 char[4] buf;
|
|
367
|
|
368 auto b = toUTF8(buf, d);
|
|
369 foreach (char c2; b)
|
|
370 {
|
|
371 result = dg(cast(void *)&c2);
|
|
372 if (result)
|
|
373 return result;
|
|
374 }
|
|
375 continue;
|
|
376 }
|
|
377 else
|
|
378 {
|
|
379 c = cast(char)d;
|
|
380 }
|
|
381 result = dg(cast(void *)&c);
|
|
382 if (result)
|
|
383 break;
|
|
384 }
|
|
385 return result;
|
|
386 }
|
|
387
|
|
388 unittest
|
|
389 {
|
|
390 debug(apply) printf("_aApplyRdc1.unittest\n");
|
|
391
|
|
392 auto s = "hello"d;
|
|
393 int i;
|
|
394
|
|
395 foreach_reverse(char d; s)
|
|
396 {
|
|
397 switch (i)
|
|
398 {
|
|
399 case 0: assert(d == 'o'); break;
|
|
400 case 1: assert(d == 'l'); break;
|
|
401 case 2: assert(d == 'l'); break;
|
|
402 case 3: assert(d == 'e'); break;
|
|
403 case 4: assert(d == 'h'); break;
|
|
404 default: assert(0);
|
|
405 }
|
|
406 i++;
|
|
407 }
|
|
408 assert(i == 5);
|
|
409
|
|
410 s = "a\u1234\U00100456b";
|
|
411 i = 0;
|
|
412 foreach_reverse(char d; s)
|
|
413 {
|
|
414 //printf("i = %d, d = %x\n", i, d);
|
|
415 switch (i)
|
|
416 {
|
|
417 case 0: assert(d == 'b'); break;
|
|
418 case 1: assert(d == 0xF4); break;
|
|
419 case 2: assert(d == 0x80); break;
|
|
420 case 3: assert(d == 0x91); break;
|
|
421 case 4: assert(d == 0x96); break;
|
|
422 case 5: assert(d == 0xE1); break;
|
|
423 case 6: assert(d == 0x88); break;
|
|
424 case 7: assert(d == 0xB4); break;
|
|
425 case 8: assert(d == 'a'); break;
|
|
426 default: assert(0);
|
|
427 }
|
|
428 i++;
|
|
429 }
|
|
430 assert(i == 9);
|
|
431 }
|
|
432
|
|
433 /*****************************/
|
|
434
|
|
435 extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg)
|
|
436 { int result;
|
|
437
|
|
438 debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length);
|
|
439 for (size_t i = aa.length; i != 0; )
|
|
440 { dchar d = aa[--i];
|
|
441 wchar w;
|
|
442
|
|
443 if (d <= 0xFFFF)
|
|
444 w = cast(wchar) d;
|
|
445 else
|
|
446 {
|
|
447 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
|
|
448 result = dg(cast(void *)&w);
|
|
449 if (result)
|
|
450 break;
|
|
451 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
|
|
452 }
|
|
453 result = dg(cast(void *)&w);
|
|
454 if (result)
|
|
455 break;
|
|
456 }
|
|
457 return result;
|
|
458 }
|
|
459
|
|
460 unittest
|
|
461 {
|
|
462 debug(apply) printf("_aApplyRdw1.unittest\n");
|
|
463
|
|
464 auto s = "hello"d;
|
|
465 int i;
|
|
466
|
|
467 foreach_reverse(wchar d; s)
|
|
468 {
|
|
469 switch (i)
|
|
470 {
|
|
471 case 0: assert(d == 'o'); break;
|
|
472 case 1: assert(d == 'l'); break;
|
|
473 case 2: assert(d == 'l'); break;
|
|
474 case 3: assert(d == 'e'); break;
|
|
475 case 4: assert(d == 'h'); break;
|
|
476 default: assert(0);
|
|
477 }
|
|
478 i++;
|
|
479 }
|
|
480 assert(i == 5);
|
|
481
|
|
482 s = "a\u1234\U00100456b";
|
|
483 i = 0;
|
|
484 foreach_reverse(wchar d; s)
|
|
485 {
|
|
486 //printf("i = %d, d = %x\n", i, d);
|
|
487 switch (i)
|
|
488 {
|
|
489 case 0: assert(d == 'b'); break;
|
|
490 case 1: assert(d == 0xDBC1); break;
|
|
491 case 2: assert(d == 0xDC56); break;
|
|
492 case 3: assert(d == 0x1234); break;
|
|
493 case 4: assert(d == 'a'); break;
|
|
494 default: assert(0);
|
|
495 }
|
|
496 i++;
|
|
497 }
|
|
498 assert(i == 5);
|
|
499 }
|
|
500
|
|
501
|
|
502 /****************************************************************************/
|
|
503 /* 2 argument versions */
|
|
504
|
|
505 // dg is D, but _aApplyRcd2() is C
|
|
506 extern (D) typedef int delegate(void *, void *) dg2_t;
|
|
507
|
|
508 extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg)
|
|
509 { int result;
|
|
510 size_t i;
|
|
511 size_t len = aa.length;
|
|
512
|
|
513 debug(apply) printf("_aApplyRcd2(), len = %d\n", len);
|
|
514 for (i = len; i != 0; )
|
|
515 { dchar d;
|
|
516
|
|
517 i--;
|
|
518 d = aa[i];
|
|
519 if (d & 0x80)
|
|
520 { char c = cast(char)d;
|
|
521 uint j;
|
|
522 uint m = 0x3F;
|
|
523 d = 0;
|
|
524 while ((c & 0xC0) != 0xC0)
|
|
525 { if (i == 0)
|
|
526 onUnicodeError("Invalid UTF-8 sequence", 0);
|
|
527 i--;
|
|
528 d |= (c & 0x3F) << j;
|
|
529 j += 6;
|
|
530 m >>= 1;
|
|
531 c = aa[i];
|
|
532 }
|
|
533 d |= (c & m) << j;
|
|
534 }
|
|
535 result = dg(&i, cast(void *)&d);
|
|
536 if (result)
|
|
537 break;
|
|
538 }
|
|
539 return result;
|
|
540 }
|
|
541
|
|
542 unittest
|
|
543 {
|
|
544 debug(apply) printf("_aApplyRcd2.unittest\n");
|
|
545
|
|
546 auto s = "hello"c;
|
|
547 int i;
|
|
548
|
|
549 foreach_reverse(k, dchar d; s)
|
|
550 {
|
|
551 assert(k == 4 - i);
|
|
552 switch (i)
|
|
553 {
|
|
554 case 0: assert(d == 'o'); break;
|
|
555 case 1: assert(d == 'l'); break;
|
|
556 case 2: assert(d == 'l'); break;
|
|
557 case 3: assert(d == 'e'); break;
|
|
558 case 4: assert(d == 'h'); break;
|
|
559 default: assert(0);
|
|
560 }
|
|
561 i++;
|
|
562 }
|
|
563 assert(i == 5);
|
|
564
|
|
565 s = "a\u1234\U00100456b";
|
|
566 i = 0;
|
|
567 foreach_reverse(k, dchar d; s)
|
|
568 {
|
|
569 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
570 switch (i)
|
|
571 {
|
|
572 case 0: assert(d == 'b'); assert(k == 8); break;
|
|
573 case 1: assert(d == '\U00100456'); assert(k == 4); break;
|
|
574 case 2: assert(d == '\u1234'); assert(k == 1); break;
|
|
575 case 3: assert(d == 'a'); assert(k == 0); break;
|
|
576 default: assert(0);
|
|
577 }
|
|
578 i++;
|
|
579 }
|
|
580 assert(i == 4);
|
|
581 }
|
|
582
|
|
583 /*****************************/
|
|
584
|
|
585 extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg)
|
|
586 { int result;
|
|
587
|
|
588 debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length);
|
|
589 for (size_t i = aa.length; i != 0; )
|
|
590 { dchar d;
|
|
591
|
|
592 i--;
|
|
593 d = aa[i];
|
|
594 if (d >= 0xDC00 && d <= 0xDFFF)
|
|
595 { if (i == 0)
|
|
596 onUnicodeError("Invalid UTF-16 sequence", 0);
|
|
597 i--;
|
|
598 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
|
|
599 }
|
|
600 result = dg(&i, cast(void *)&d);
|
|
601 if (result)
|
|
602 break;
|
|
603 }
|
|
604 return result;
|
|
605 }
|
|
606
|
|
607 unittest
|
|
608 {
|
|
609 debug(apply) printf("_aApplyRwd2.unittest\n");
|
|
610
|
|
611 auto s = "hello"w;
|
|
612 int i;
|
|
613
|
|
614 foreach_reverse(k, dchar d; s)
|
|
615 {
|
|
616 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
617 assert(k == 4 - i);
|
|
618 switch (i)
|
|
619 {
|
|
620 case 0: assert(d == 'o'); break;
|
|
621 case 1: assert(d == 'l'); break;
|
|
622 case 2: assert(d == 'l'); break;
|
|
623 case 3: assert(d == 'e'); break;
|
|
624 case 4: assert(d == 'h'); break;
|
|
625 default: assert(0);
|
|
626 }
|
|
627 i++;
|
|
628 }
|
|
629 assert(i == 5);
|
|
630
|
|
631 s = "a\u1234\U00100456b";
|
|
632 i = 0;
|
|
633 foreach_reverse(k, dchar d; s)
|
|
634 {
|
|
635 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
636 switch (i)
|
|
637 {
|
|
638 case 0: assert(k == 4); assert(d == 'b'); break;
|
|
639 case 1: assert(k == 2); assert(d == '\U00100456'); break;
|
|
640 case 2: assert(k == 1); assert(d == '\u1234'); break;
|
|
641 case 3: assert(k == 0); assert(d == 'a'); break;
|
|
642 default: assert(0);
|
|
643 }
|
|
644 i++;
|
|
645 }
|
|
646 assert(i == 4);
|
|
647 }
|
|
648
|
|
649 /*****************************/
|
|
650
|
|
651 extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg)
|
|
652 { int result;
|
|
653
|
|
654 debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length);
|
|
655 for (size_t i = aa.length; i != 0; )
|
|
656 { dchar d;
|
|
657 wchar w;
|
|
658
|
|
659 i--;
|
|
660 w = aa[i];
|
|
661 if (w & 0x80)
|
|
662 { char c = cast(char)w;
|
|
663 uint j;
|
|
664 uint m = 0x3F;
|
|
665 d = 0;
|
|
666 while ((c & 0xC0) != 0xC0)
|
|
667 { if (i == 0)
|
|
668 onUnicodeError("Invalid UTF-8 sequence", 0);
|
|
669 i--;
|
|
670 d |= (c & 0x3F) << j;
|
|
671 j += 6;
|
|
672 m >>= 1;
|
|
673 c = aa[i];
|
|
674 }
|
|
675 d |= (c & m) << j;
|
|
676
|
|
677 if (d <= 0xFFFF)
|
|
678 w = cast(wchar) d;
|
|
679 else
|
|
680 {
|
|
681 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
|
|
682 result = dg(&i, cast(void *)&w);
|
|
683 if (result)
|
|
684 break;
|
|
685 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
|
|
686 }
|
|
687 }
|
|
688 result = dg(&i, cast(void *)&w);
|
|
689 if (result)
|
|
690 break;
|
|
691 }
|
|
692 return result;
|
|
693 }
|
|
694
|
|
695 unittest
|
|
696 {
|
|
697 debug(apply) printf("_aApplyRcw2.unittest\n");
|
|
698
|
|
699 auto s = "hello"c;
|
|
700 int i;
|
|
701
|
|
702 foreach_reverse(k, wchar d; s)
|
|
703 {
|
|
704 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
705 assert(k == 4 - i);
|
|
706 switch (i)
|
|
707 {
|
|
708 case 0: assert(d == 'o'); break;
|
|
709 case 1: assert(d == 'l'); break;
|
|
710 case 2: assert(d == 'l'); break;
|
|
711 case 3: assert(d == 'e'); break;
|
|
712 case 4: assert(d == 'h'); break;
|
|
713 default: assert(0);
|
|
714 }
|
|
715 i++;
|
|
716 }
|
|
717 assert(i == 5);
|
|
718
|
|
719 s = "a\u1234\U00100456b";
|
|
720 i = 0;
|
|
721 foreach_reverse(k, wchar d; s)
|
|
722 {
|
|
723 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
724 switch (i)
|
|
725 {
|
|
726 case 0: assert(k == 8); assert(d == 'b'); break;
|
|
727 case 1: assert(k == 4); assert(d == 0xDBC1); break;
|
|
728 case 2: assert(k == 4); assert(d == 0xDC56); break;
|
|
729 case 3: assert(k == 1); assert(d == 0x1234); break;
|
|
730 case 4: assert(k == 0); assert(d == 'a'); break;
|
|
731 default: assert(0);
|
|
732 }
|
|
733 i++;
|
|
734 }
|
|
735 assert(i == 5);
|
|
736 }
|
|
737
|
|
738 /*****************************/
|
|
739
|
|
740 extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg)
|
|
741 { int result;
|
|
742
|
|
743 debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length);
|
|
744 for (size_t i = aa.length; i != 0; )
|
|
745 { dchar d;
|
|
746 char c;
|
|
747
|
|
748 i--;
|
|
749 d = aa[i];
|
|
750 if (d >= 0xDC00 && d <= 0xDFFF)
|
|
751 { if (i == 0)
|
|
752 onUnicodeError("Invalid UTF-16 sequence", 0);
|
|
753 i--;
|
|
754 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
|
|
755 }
|
|
756
|
|
757 if (d & ~0x7F)
|
|
758 {
|
|
759 char[4] buf;
|
|
760
|
|
761 auto b = toUTF8(buf, d);
|
|
762 foreach (char c2; b)
|
|
763 {
|
|
764 result = dg(&i, cast(void *)&c2);
|
|
765 if (result)
|
|
766 return result;
|
|
767 }
|
|
768 continue;
|
|
769 }
|
|
770 c = cast(char)d;
|
|
771 result = dg(&i, cast(void *)&c);
|
|
772 if (result)
|
|
773 break;
|
|
774 }
|
|
775 return result;
|
|
776 }
|
|
777
|
|
778 unittest
|
|
779 {
|
|
780 debug(apply) printf("_aApplyRwc2.unittest\n");
|
|
781
|
|
782 auto s = "hello"w;
|
|
783 int i;
|
|
784
|
|
785 foreach_reverse(k, char d; s)
|
|
786 {
|
|
787 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
788 assert(k == 4 - i);
|
|
789 switch (i)
|
|
790 {
|
|
791 case 0: assert(d == 'o'); break;
|
|
792 case 1: assert(d == 'l'); break;
|
|
793 case 2: assert(d == 'l'); break;
|
|
794 case 3: assert(d == 'e'); break;
|
|
795 case 4: assert(d == 'h'); break;
|
|
796 default: assert(0);
|
|
797 }
|
|
798 i++;
|
|
799 }
|
|
800 assert(i == 5);
|
|
801
|
|
802 s = "a\u1234\U00100456b";
|
|
803 i = 0;
|
|
804 foreach_reverse(k, char d; s)
|
|
805 {
|
|
806 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
807 switch (i)
|
|
808 {
|
|
809 case 0: assert(k == 4); assert(d == 'b'); break;
|
|
810 case 1: assert(k == 2); assert(d == 0xF4); break;
|
|
811 case 2: assert(k == 2); assert(d == 0x80); break;
|
|
812 case 3: assert(k == 2); assert(d == 0x91); break;
|
|
813 case 4: assert(k == 2); assert(d == 0x96); break;
|
|
814 case 5: assert(k == 1); assert(d == 0xE1); break;
|
|
815 case 6: assert(k == 1); assert(d == 0x88); break;
|
|
816 case 7: assert(k == 1); assert(d == 0xB4); break;
|
|
817 case 8: assert(k == 0); assert(d == 'a'); break;
|
|
818 default: assert(0);
|
|
819 }
|
|
820 i++;
|
|
821 }
|
|
822 assert(i == 9);
|
|
823 }
|
|
824
|
|
825 /*****************************/
|
|
826
|
|
827 extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg)
|
|
828 { int result;
|
|
829
|
|
830 debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length);
|
|
831 for (size_t i = aa.length; i != 0; )
|
|
832 { dchar d = aa[--i];
|
|
833 char c;
|
|
834
|
|
835 if (d & ~0x7F)
|
|
836 {
|
|
837 char[4] buf;
|
|
838
|
|
839 auto b = toUTF8(buf, d);
|
|
840 foreach (char c2; b)
|
|
841 {
|
|
842 result = dg(&i, cast(void *)&c2);
|
|
843 if (result)
|
|
844 return result;
|
|
845 }
|
|
846 continue;
|
|
847 }
|
|
848 else
|
|
849 { c = cast(char)d;
|
|
850 }
|
|
851 result = dg(&i, cast(void *)&c);
|
|
852 if (result)
|
|
853 break;
|
|
854 }
|
|
855 return result;
|
|
856 }
|
|
857
|
|
858 unittest
|
|
859 {
|
|
860 debug(apply) printf("_aApplyRdc2.unittest\n");
|
|
861
|
|
862 auto s = "hello"d;
|
|
863 int i;
|
|
864
|
|
865 foreach_reverse(k, char d; s)
|
|
866 {
|
|
867 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
868 assert(k == 4 - i);
|
|
869 switch (i)
|
|
870 {
|
|
871 case 0: assert(d == 'o'); break;
|
|
872 case 1: assert(d == 'l'); break;
|
|
873 case 2: assert(d == 'l'); break;
|
|
874 case 3: assert(d == 'e'); break;
|
|
875 case 4: assert(d == 'h'); break;
|
|
876 default: assert(0);
|
|
877 }
|
|
878 i++;
|
|
879 }
|
|
880 assert(i == 5);
|
|
881
|
|
882 s = "a\u1234\U00100456b";
|
|
883 i = 0;
|
|
884 foreach_reverse(k, char d; s)
|
|
885 {
|
|
886 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
887 switch (i)
|
|
888 {
|
|
889 case 0: assert(k == 3); assert(d == 'b'); break;
|
|
890 case 1: assert(k == 2); assert(d == 0xF4); break;
|
|
891 case 2: assert(k == 2); assert(d == 0x80); break;
|
|
892 case 3: assert(k == 2); assert(d == 0x91); break;
|
|
893 case 4: assert(k == 2); assert(d == 0x96); break;
|
|
894 case 5: assert(k == 1); assert(d == 0xE1); break;
|
|
895 case 6: assert(k == 1); assert(d == 0x88); break;
|
|
896 case 7: assert(k == 1); assert(d == 0xB4); break;
|
|
897 case 8: assert(k == 0); assert(d == 'a'); break;
|
|
898 default: assert(0);
|
|
899 }
|
|
900 i++;
|
|
901 }
|
|
902 assert(i == 9);
|
|
903 }
|
|
904
|
|
905 /*****************************/
|
|
906
|
|
907 extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg)
|
|
908 { int result;
|
|
909
|
|
910 debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length);
|
|
911 for (size_t i = aa.length; i != 0; )
|
|
912 { dchar d = aa[--i];
|
|
913 wchar w;
|
|
914
|
|
915 if (d <= 0xFFFF)
|
|
916 w = cast(wchar) d;
|
|
917 else
|
|
918 {
|
|
919 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
|
|
920 result = dg(&i, cast(void *)&w);
|
|
921 if (result)
|
|
922 break;
|
|
923 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
|
|
924 }
|
|
925 result = dg(&i, cast(void *)&w);
|
|
926 if (result)
|
|
927 break;
|
|
928 }
|
|
929 return result;
|
|
930 }
|
|
931
|
|
932 unittest
|
|
933 {
|
|
934 debug(apply) printf("_aApplyRdw2.unittest\n");
|
|
935
|
|
936 auto s = "hello"d;
|
|
937 int i;
|
|
938
|
|
939 foreach_reverse(k, wchar d; s)
|
|
940 {
|
|
941 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
942 assert(k == 4 - i);
|
|
943 switch (i)
|
|
944 {
|
|
945 case 0: assert(d == 'o'); break;
|
|
946 case 1: assert(d == 'l'); break;
|
|
947 case 2: assert(d == 'l'); break;
|
|
948 case 3: assert(d == 'e'); break;
|
|
949 case 4: assert(d == 'h'); break;
|
|
950 default: assert(0);
|
|
951 }
|
|
952 i++;
|
|
953 }
|
|
954 assert(i == 5);
|
|
955
|
|
956 s = "a\u1234\U00100456b";
|
|
957 i = 0;
|
|
958 foreach_reverse(k, wchar d; s)
|
|
959 {
|
|
960 //printf("i = %d, k = %d, d = %x\n", i, k, d);
|
|
961 switch (i)
|
|
962 {
|
|
963 case 0: assert(k == 3); assert(d == 'b'); break;
|
|
964 case 1: assert(k == 2); assert(d == 0xDBC1); break;
|
|
965 case 2: assert(k == 2); assert(d == 0xDC56); break;
|
|
966 case 3: assert(k == 1); assert(d == 0x1234); break;
|
|
967 case 4: assert(k == 0); assert(d == 'a'); break;
|
|
968 default: assert(0);
|
|
969 }
|
|
970 i++;
|
|
971 }
|
|
972 assert(i == 5);
|
|
973 }
|
|
974
|
|
975
|