Mercurial > projects > ldc
comparison lphobos/std/string.d @ 108:288fe1029e1f trunk
[svn r112] Fixed 'case 1,2,3:' style case statements.
Fixed a bunch of bugs with return/break/continue in loops.
Fixed support for the DMDFE hidden implicit return value variable. This can be needed for some foreach statements where the loop body is converted to a nested delegate, but also possibly returns from the function.
Added std.math to phobos.
Added AA runtime support code, done ground work for implementing AAs.
Several other bugfixes.
author | lindquist |
---|---|
date | Tue, 20 Nov 2007 05:29:20 +0100 |
parents | |
children | 373489eeaf90 |
comparison
equal
deleted
inserted
replaced
107:3efbcc81ba45 | 108:288fe1029e1f |
---|---|
1 | |
2 // Written in the D programming language. | |
3 | |
4 /** | |
5 * String handling functions. | |
6 * | |
7 * To copy or not to copy? | |
8 * When a function takes a string as a parameter, and returns a string, | |
9 * is that string the same as the input string, modified in place, or | |
10 * is it a modified copy of the input string? The D array convention is | |
11 * "copy-on-write". This means that if no modifications are done, the | |
12 * original string (or slices of it) can be returned. If any modifications | |
13 * are done, the returned string is a copy. | |
14 * | |
15 * Macros: | |
16 * WIKI = Phobos/StdString | |
17 * Copyright: | |
18 * Public Domain | |
19 */ | |
20 | |
21 /* Author: | |
22 * Walter Bright, Digital Mars, www.digitalmars.com | |
23 */ | |
24 | |
25 // The code is not optimized for speed, that will have to wait | |
26 // until the design is solidified. | |
27 | |
28 module std.string; | |
29 | |
30 //debug=string; // uncomment to turn on debugging printf's | |
31 | |
32 //private import std.stdio; | |
33 private import std.c.stdio; | |
34 private import std.c.stdlib; | |
35 private import std.c.string; | |
36 private import std.utf; | |
37 private import std.uni; | |
38 private import std.array; | |
39 private import std.format; | |
40 private import std.ctype; | |
41 private import std.stdarg; | |
42 | |
43 extern (C) | |
44 { | |
45 | |
46 size_t wcslen(wchar *); | |
47 int wcscmp(wchar *, wchar *); | |
48 } | |
49 | |
50 /* ************* Exceptions *************** */ | |
51 | |
52 /// Thrown on errors in string functions. | |
53 class StringException : Exception | |
54 { | |
55 this(char[] msg) /// Constructor | |
56 { | |
57 super(msg); | |
58 } | |
59 } | |
60 | |
61 /* ************* Constants *************** */ | |
62 | |
63 const char[16] hexdigits = "0123456789ABCDEF"; /// 0..9A..F | |
64 const char[10] digits = "0123456789"; /// 0..9 | |
65 const char[8] octdigits = "01234567"; /// 0..7 | |
66 const char[26] lowercase = "abcdefghijklmnopqrstuvwxyz"; /// a..z | |
67 const char[26] uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; /// A..Z | |
68 const char[52] letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
69 "abcdefghijklmnopqrstuvwxyz"; /// A..Za..z | |
70 const char[6] whitespace = " \t\v\r\n\f"; /// ASCII whitespace | |
71 | |
72 const dchar LS = '\u2028'; /// UTF line separator | |
73 const dchar PS = '\u2029'; /// UTF paragraph separator | |
74 | |
75 /// Newline sequence for this system | |
76 version (Windows) | |
77 const char[2] newline = "\r\n"; | |
78 else version (linux) | |
79 const char[1] newline = "\n"; | |
80 | |
81 /********************************** | |
82 * Returns true if c is whitespace | |
83 */ | |
84 | |
85 bool iswhite(dchar c) | |
86 { | |
87 return (c <= 0x7F) | |
88 ? find(whitespace, c) != -1 | |
89 : (c == PS || c == LS); | |
90 } | |
91 | |
92 /********************************* | |
93 * Convert string to integer. | |
94 */ | |
95 | |
96 long atoi(char[] s) | |
97 { | |
98 return std.c.stdlib.atoi(toStringz(s)); | |
99 } | |
100 | |
101 /************************************* | |
102 * Convert string to real. | |
103 */ | |
104 | |
105 real atof(char[] s) | |
106 { char* endptr; | |
107 | |
108 auto result = strtold(toStringz(s), &endptr); | |
109 return result; | |
110 } | |
111 | |
112 /********************************** | |
113 * Compare two strings. cmp is case sensitive, icmp is case insensitive. | |
114 * Returns: | |
115 * <table border=1 cellpadding=4 cellspacing=0> | |
116 * $(TR $(TD < 0) $(TD s1 < s2)) | |
117 * $(TR $(TD = 0) $(TD s1 == s2)) | |
118 * $(TR $(TD > 0) $(TD s1 > s2)) | |
119 * </table> | |
120 */ | |
121 | |
122 int cmp(char[] s1, char[] s2) | |
123 { | |
124 auto len = s1.length; | |
125 int result; | |
126 | |
127 //printf("cmp('%.*s', '%.*s')\n", s1, s2); | |
128 if (s2.length < len) | |
129 len = s2.length; | |
130 result = memcmp(s1.ptr, s2.ptr, len); | |
131 if (result == 0) | |
132 result = cast(int)s1.length - cast(int)s2.length; | |
133 return result; | |
134 } | |
135 | |
136 /********************************* | |
137 * ditto | |
138 */ | |
139 | |
140 int icmp(char[] s1, char[] s2) | |
141 { | |
142 auto len = s1.length; | |
143 int result; | |
144 | |
145 if (s2.length < len) | |
146 len = s2.length; | |
147 version (Win32) | |
148 { | |
149 result = memicmp(s1.ptr, s2.ptr, len); | |
150 } | |
151 version (linux) | |
152 { | |
153 for (size_t i = 0; i < len; i++) | |
154 { | |
155 if (s1[i] != s2[i]) | |
156 { | |
157 char c1 = s1[i]; | |
158 char c2 = s2[i]; | |
159 | |
160 if (c1 >= 'A' && c1 <= 'Z') | |
161 c1 += cast(int)'a' - cast(int)'A'; | |
162 if (c2 >= 'A' && c2 <= 'Z') | |
163 c2 += cast(int)'a' - cast(int)'A'; | |
164 result = cast(int)c1 - cast(int)c2; | |
165 if (result) | |
166 break; | |
167 } | |
168 } | |
169 } | |
170 if (result == 0) | |
171 result = cast(int)s1.length - cast(int)s2.length; | |
172 return result; | |
173 } | |
174 | |
175 unittest | |
176 { | |
177 int result; | |
178 | |
179 debug(string) printf("string.cmp.unittest\n"); | |
180 result = icmp("abc", "abc"); | |
181 assert(result == 0); | |
182 result = icmp(null, null); | |
183 assert(result == 0); | |
184 result = icmp("", ""); | |
185 assert(result == 0); | |
186 result = icmp("abc", "abcd"); | |
187 assert(result < 0); | |
188 result = icmp("abcd", "abc"); | |
189 assert(result > 0); | |
190 result = icmp("abc", "abd"); | |
191 assert(result < 0); | |
192 result = icmp("bbc", "abc"); | |
193 assert(result > 0); | |
194 } | |
195 | |
196 /* ******************************** | |
197 * Converts a D array of chars to a C-style 0 terminated string. | |
198 * Deprecated: replaced with toStringz(). | |
199 */ | |
200 | |
201 deprecated char* toCharz(char[] s) | |
202 { | |
203 return toStringz(s); | |
204 } | |
205 | |
206 /********************************* | |
207 * Convert array of chars s[] to a C-style 0 terminated string. | |
208 */ | |
209 | |
210 char* toStringz(char[] s) | |
211 in | |
212 { | |
213 } | |
214 out (result) | |
215 { | |
216 if (result) | |
217 { assert(strlen(result) == s.length); | |
218 assert(memcmp(result, s.ptr, s.length) == 0); | |
219 } | |
220 } | |
221 body | |
222 { | |
223 char[] copy; | |
224 | |
225 if (s.length == 0) | |
226 return ""; | |
227 | |
228 /+ Unfortunately, this isn't reliable. | |
229 We could make this work if string literals are put | |
230 in read-only memory and we test if s[] is pointing into | |
231 that. | |
232 | |
233 /* Peek past end of s[], if it's 0, no conversion necessary. | |
234 * Note that the compiler will put a 0 past the end of static | |
235 * strings, and the storage allocator will put a 0 past the end | |
236 * of newly allocated char[]'s. | |
237 */ | |
238 char* p = &s[0] + s.length; | |
239 if (*p == 0) | |
240 return s; | |
241 +/ | |
242 | |
243 // Need to make a copy | |
244 copy = new char[s.length + 1]; | |
245 copy[0..s.length] = s; | |
246 copy[s.length] = 0; | |
247 return copy.ptr; | |
248 } | |
249 | |
250 unittest | |
251 { | |
252 debug(string) printf("string.toStringz.unittest\n"); | |
253 | |
254 char* p = toStringz("foo"); | |
255 assert(strlen(p) == 3); | |
256 char foo[] = "abbzxyzzy"; | |
257 p = toStringz(foo[3..5]); | |
258 assert(strlen(p) == 2); | |
259 | |
260 char[] test = ""; | |
261 p = toStringz(test); | |
262 assert(*p == 0); | |
263 } | |
264 | |
265 /****************************************** | |
266 * find, ifind _find first occurrence of c in string s. | |
267 * rfind, irfind _find last occurrence of c in string s. | |
268 * | |
269 * find, rfind are case sensitive; ifind, irfind are case insensitive. | |
270 * Returns: | |
271 * Index in s where c is found, -1 if not found. | |
272 */ | |
273 | |
274 int find(char[] s, dchar c) | |
275 { | |
276 if (c <= 0x7F) | |
277 { // Plain old ASCII | |
278 auto p = cast(char*)memchr(s.ptr, c, s.length); | |
279 if (p) | |
280 return p - cast(char *)s; | |
281 else | |
282 return -1; | |
283 } | |
284 | |
285 // c is a universal character | |
286 foreach (int i, dchar c2; s) | |
287 { | |
288 if (c == c2) | |
289 return i; | |
290 } | |
291 return -1; | |
292 } | |
293 | |
294 unittest | |
295 { | |
296 debug(string) printf("string.find.unittest\n"); | |
297 | |
298 int i; | |
299 | |
300 i = find(null, cast(dchar)'a'); | |
301 assert(i == -1); | |
302 i = find("def", cast(dchar)'a'); | |
303 assert(i == -1); | |
304 i = find("abba", cast(dchar)'a'); | |
305 assert(i == 0); | |
306 i = find("def", cast(dchar)'f'); | |
307 assert(i == 2); | |
308 } | |
309 | |
310 | |
311 /****************************************** | |
312 * ditto | |
313 */ | |
314 | |
315 int ifind(char[] s, dchar c) | |
316 { | |
317 char* p; | |
318 | |
319 if (c <= 0x7F) | |
320 { // Plain old ASCII | |
321 char c1 = cast(char) std.ctype.tolower(c); | |
322 | |
323 foreach (int i, char c2; s) | |
324 { | |
325 c2 = cast(char)std.ctype.tolower(c2); | |
326 if (c1 == c2) | |
327 return i; | |
328 } | |
329 } | |
330 else | |
331 { // c is a universal character | |
332 dchar c1 = std.uni.toUniLower(c); | |
333 | |
334 foreach (int i, dchar c2; s) | |
335 { | |
336 c2 = std.uni.toUniLower(c2); | |
337 if (c1 == c2) | |
338 return i; | |
339 } | |
340 } | |
341 return -1; | |
342 } | |
343 | |
344 unittest | |
345 { | |
346 debug(string) printf("string.ifind.unittest\n"); | |
347 | |
348 int i; | |
349 | |
350 i = ifind(null, cast(dchar)'a'); | |
351 assert(i == -1); | |
352 i = ifind("def", cast(dchar)'a'); | |
353 assert(i == -1); | |
354 i = ifind("Abba", cast(dchar)'a'); | |
355 assert(i == 0); | |
356 i = ifind("def", cast(dchar)'F'); | |
357 assert(i == 2); | |
358 | |
359 char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun."; | |
360 | |
361 i = ifind("def", cast(char)'f'); | |
362 assert(i == 2); | |
363 | |
364 i = ifind(sPlts, cast(char)'P'); | |
365 assert(i == 23); | |
366 i = ifind(sPlts, cast(char)'R'); | |
367 assert(i == 2); | |
368 } | |
369 | |
370 | |
371 /****************************************** | |
372 * ditto | |
373 */ | |
374 | |
375 int rfind(char[] s, dchar c) | |
376 { | |
377 size_t i; | |
378 | |
379 if (c <= 0x7F) | |
380 { // Plain old ASCII | |
381 for (i = s.length; i-- != 0;) | |
382 { | |
383 if (s[i] == c) | |
384 break; | |
385 } | |
386 return i; | |
387 } | |
388 | |
389 // c is a universal character | |
390 char[4] buf; | |
391 char[] t; | |
392 t = std.utf.toUTF8(buf, c); | |
393 return rfind(s, t); | |
394 } | |
395 | |
396 unittest | |
397 { | |
398 debug(string) printf("string.rfind.unittest\n"); | |
399 | |
400 int i; | |
401 | |
402 i = rfind(null, cast(dchar)'a'); | |
403 assert(i == -1); | |
404 i = rfind("def", cast(dchar)'a'); | |
405 assert(i == -1); | |
406 i = rfind("abba", cast(dchar)'a'); | |
407 assert(i == 3); | |
408 i = rfind("def", cast(dchar)'f'); | |
409 assert(i == 2); | |
410 } | |
411 | |
412 /****************************************** | |
413 * ditto | |
414 */ | |
415 | |
416 int irfind(char[] s, dchar c) | |
417 { | |
418 size_t i; | |
419 | |
420 if (c <= 0x7F) | |
421 { // Plain old ASCII | |
422 char c1 = cast(char) std.ctype.tolower(c); | |
423 | |
424 for (i = s.length; i-- != 0;) | |
425 { char c2 = s[i]; | |
426 | |
427 c2 = cast(char) std.ctype.tolower(c2); | |
428 if (c1 == c2) | |
429 break; | |
430 } | |
431 } | |
432 else | |
433 { // c is a universal character | |
434 dchar c1 = std.uni.toUniLower(c); | |
435 | |
436 for (i = s.length; i-- != 0;) | |
437 { char cx = s[i]; | |
438 | |
439 if (cx <= 0x7F) | |
440 continue; // skip, since c is not ASCII | |
441 if ((cx & 0xC0) == 0x80) | |
442 continue; // skip non-starting UTF-8 chars | |
443 | |
444 size_t j = i; | |
445 dchar c2 = std.utf.decode(s, j); | |
446 c2 = std.uni.toUniLower(c2); | |
447 if (c1 == c2) | |
448 break; | |
449 } | |
450 } | |
451 return i; | |
452 } | |
453 | |
454 unittest | |
455 { | |
456 debug(string) printf("string.irfind.unittest\n"); | |
457 | |
458 int i; | |
459 | |
460 i = irfind(null, cast(dchar)'a'); | |
461 assert(i == -1); | |
462 i = irfind("def", cast(dchar)'a'); | |
463 assert(i == -1); | |
464 i = irfind("AbbA", cast(dchar)'a'); | |
465 assert(i == 3); | |
466 i = irfind("def", cast(dchar)'F'); | |
467 assert(i == 2); | |
468 | |
469 char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun."; | |
470 | |
471 i = irfind("def", cast(char)'f'); | |
472 assert(i == 2); | |
473 | |
474 i = irfind(sPlts, cast(char)'M'); | |
475 assert(i == 34); | |
476 i = irfind(sPlts, cast(char)'S'); | |
477 assert(i == 40); | |
478 } | |
479 | |
480 | |
481 /****************************************** | |
482 * find, ifind _find first occurrence of sub[] in string s[]. | |
483 * rfind, irfind _find last occurrence of sub[] in string s[]. | |
484 * | |
485 * find, rfind are case sensitive; ifind, irfind are case insensitive. | |
486 * Returns: | |
487 * Index in s where c is found, -1 if not found. | |
488 */ | |
489 | |
490 int find(char[] s, char[] sub) | |
491 out (result) | |
492 { | |
493 if (result == -1) | |
494 { | |
495 } | |
496 else | |
497 { | |
498 assert(0 <= result && result < s.length - sub.length + 1); | |
499 assert(memcmp(&s[result], sub.ptr, sub.length) == 0); | |
500 } | |
501 } | |
502 body | |
503 { | |
504 auto sublength = sub.length; | |
505 | |
506 if (sublength == 0) | |
507 return 0; | |
508 | |
509 if (s.length >= sublength) | |
510 { | |
511 auto c = sub[0]; | |
512 if (sublength == 1) | |
513 { | |
514 auto p = cast(char*)memchr(s.ptr, c, s.length); | |
515 if (p) | |
516 return p - &s[0]; | |
517 } | |
518 else | |
519 { | |
520 size_t imax = s.length - sublength + 1; | |
521 | |
522 // Remainder of sub[] | |
523 char *q = &sub[1]; | |
524 sublength--; | |
525 | |
526 for (size_t i = 0; i < imax; i++) | |
527 { | |
528 char *p = cast(char*)memchr(&s[i], c, imax - i); | |
529 if (!p) | |
530 break; | |
531 i = p - &s[0]; | |
532 if (memcmp(p + 1, q, sublength) == 0) | |
533 return i; | |
534 } | |
535 } | |
536 } | |
537 return -1; | |
538 } | |
539 | |
540 | |
541 unittest | |
542 { | |
543 debug(string) printf("string.find.unittest\n"); | |
544 | |
545 int i; | |
546 | |
547 i = find(null, "a"); | |
548 assert(i == -1); | |
549 i = find("def", "a"); | |
550 assert(i == -1); | |
551 i = find("abba", "a"); | |
552 assert(i == 0); | |
553 i = find("def", "f"); | |
554 assert(i == 2); | |
555 i = find("dfefffg", "fff"); | |
556 assert(i == 3); | |
557 i = find("dfeffgfff", "fff"); | |
558 assert(i == 6); | |
559 } | |
560 | |
561 /****************************************** | |
562 * ditto | |
563 */ | |
564 | |
565 int ifind(char[] s, char[] sub) | |
566 out (result) | |
567 { | |
568 if (result == -1) | |
569 { | |
570 } | |
571 else | |
572 { | |
573 assert(0 <= result && result < s.length - sub.length + 1); | |
574 assert(icmp(s[result .. result + sub.length], sub) == 0); | |
575 } | |
576 } | |
577 body | |
578 { | |
579 auto sublength = sub.length; | |
580 int i; | |
581 | |
582 if (sublength == 0) | |
583 return 0; | |
584 | |
585 if (s.length < sublength) | |
586 return -1; | |
587 | |
588 auto c = sub[0]; | |
589 if (sublength == 1) | |
590 { | |
591 i = ifind(s, c); | |
592 } | |
593 else if (c <= 0x7F) | |
594 { | |
595 size_t imax = s.length - sublength + 1; | |
596 | |
597 // Remainder of sub[] | |
598 char[] subn = sub[1 .. sublength]; | |
599 | |
600 for (i = 0; i < imax; i++) | |
601 { | |
602 auto j = ifind(s[i .. imax], c); | |
603 if (j == -1) | |
604 return -1; | |
605 i += j; | |
606 if (icmp(s[i + 1 .. i + sublength], subn) == 0) | |
607 return i; | |
608 } | |
609 i = -1; | |
610 } | |
611 else | |
612 { | |
613 size_t imax = s.length - sublength; | |
614 | |
615 for (i = 0; i <= imax; i++) | |
616 { | |
617 if (icmp(s[i .. i + sublength], sub) == 0) | |
618 return i; | |
619 } | |
620 i = -1; | |
621 } | |
622 return i; | |
623 } | |
624 | |
625 | |
626 unittest | |
627 { | |
628 debug(string) printf("string.ifind.unittest\n"); | |
629 | |
630 int i; | |
631 | |
632 i = ifind(null, "a"); | |
633 assert(i == -1); | |
634 i = ifind("def", "a"); | |
635 assert(i == -1); | |
636 i = ifind("abba", "a"); | |
637 assert(i == 0); | |
638 i = ifind("def", "f"); | |
639 assert(i == 2); | |
640 i = ifind("dfefffg", "fff"); | |
641 assert(i == 3); | |
642 i = ifind("dfeffgfff", "fff"); | |
643 assert(i == 6); | |
644 | |
645 char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun."; | |
646 char[] sMars = "Who\'s \'My Favorite Maritian?\'"; | |
647 | |
648 i = ifind(sMars, "MY fAVe"); | |
649 assert(i == -1); | |
650 i = ifind(sMars, "mY fAVOriTe"); | |
651 assert(i == 7); | |
652 i = ifind(sPlts, "mArS:"); | |
653 assert(i == 0); | |
654 i = ifind(sPlts, "rOcK"); | |
655 assert(i == 17); | |
656 i = ifind(sPlts, "Un."); | |
657 assert(i == 41); | |
658 i = ifind(sPlts, sPlts); | |
659 assert(i == 0); | |
660 | |
661 i = ifind("\u0100", "\u0100"); | |
662 assert(i == 0); | |
663 | |
664 // Thanks to Carlos Santander B. and zwang | |
665 i = ifind("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y", | |
666 "page-break-before"); | |
667 assert(i == -1); | |
668 } | |
669 | |
670 /****************************************** | |
671 * ditto | |
672 */ | |
673 | |
674 int rfind(char[] s, char[] sub) | |
675 out (result) | |
676 { | |
677 if (result == -1) | |
678 { | |
679 } | |
680 else | |
681 { | |
682 assert(0 <= result && result < s.length - sub.length + 1); | |
683 assert(memcmp(&s[0] + result, sub.ptr, sub.length) == 0); | |
684 } | |
685 } | |
686 body | |
687 { | |
688 char c; | |
689 | |
690 if (sub.length == 0) | |
691 return s.length; | |
692 c = sub[0]; | |
693 if (sub.length == 1) | |
694 return rfind(s, c); | |
695 for (int i = s.length - sub.length; i >= 0; i--) | |
696 { | |
697 if (s[i] == c) | |
698 { | |
699 if (memcmp(&s[i + 1], &sub[1], sub.length - 1) == 0) | |
700 return i; | |
701 } | |
702 } | |
703 return -1; | |
704 } | |
705 | |
706 unittest | |
707 { | |
708 int i; | |
709 | |
710 debug(string) printf("string.rfind.unittest\n"); | |
711 i = rfind("abcdefcdef", "c"); | |
712 assert(i == 6); | |
713 i = rfind("abcdefcdef", "cd"); | |
714 assert(i == 6); | |
715 i = rfind("abcdefcdef", "x"); | |
716 assert(i == -1); | |
717 i = rfind("abcdefcdef", "xy"); | |
718 assert(i == -1); | |
719 i = rfind("abcdefcdef", ""); | |
720 assert(i == 10); | |
721 } | |
722 | |
723 | |
724 /****************************************** | |
725 * ditto | |
726 */ | |
727 | |
728 int irfind(char[] s, char[] sub) | |
729 out (result) | |
730 { | |
731 if (result == -1) | |
732 { | |
733 } | |
734 else | |
735 { | |
736 assert(0 <= result && result < s.length - sub.length + 1); | |
737 assert(icmp(s[result .. result + sub.length], sub) == 0); | |
738 } | |
739 } | |
740 body | |
741 { | |
742 dchar c; | |
743 | |
744 if (sub.length == 0) | |
745 return s.length; | |
746 c = sub[0]; | |
747 if (sub.length == 1) | |
748 return irfind(s, c); | |
749 if (c <= 0x7F) | |
750 { | |
751 c = std.ctype.tolower(c); | |
752 for (int i = s.length - sub.length; i >= 0; i--) | |
753 { | |
754 if (std.ctype.tolower(s[i]) == c) | |
755 { | |
756 if (icmp(s[i + 1 .. i + sub.length], sub[1 .. sub.length]) == 0) | |
757 return i; | |
758 } | |
759 } | |
760 } | |
761 else | |
762 { | |
763 for (int i = s.length - sub.length; i >= 0; i--) | |
764 { | |
765 if (icmp(s[i .. i + sub.length], sub) == 0) | |
766 return i; | |
767 } | |
768 } | |
769 return -1; | |
770 } | |
771 | |
772 unittest | |
773 { | |
774 int i; | |
775 | |
776 debug(string) printf("string.irfind.unittest\n"); | |
777 i = irfind("abcdefCdef", "c"); | |
778 assert(i == 6); | |
779 i = irfind("abcdefCdef", "cD"); | |
780 assert(i == 6); | |
781 i = irfind("abcdefcdef", "x"); | |
782 assert(i == -1); | |
783 i = irfind("abcdefcdef", "xy"); | |
784 assert(i == -1); | |
785 i = irfind("abcdefcdef", ""); | |
786 assert(i == 10); | |
787 | |
788 char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun."; | |
789 char[] sMars = "Who\'s \'My Favorite Maritian?\'"; | |
790 | |
791 i = irfind("abcdefcdef", "c"); | |
792 assert(i == 6); | |
793 i = irfind("abcdefcdef", "cd"); | |
794 assert(i == 6); | |
795 i = irfind( "abcdefcdef", "def" ); | |
796 assert(i == 7); | |
797 | |
798 i = irfind(sMars, "RiTE maR"); | |
799 assert(i == 14); | |
800 i = irfind(sPlts, "FOuRTh"); | |
801 assert(i == 10); | |
802 i = irfind(sMars, "whO\'s \'MY"); | |
803 assert(i == 0); | |
804 i = irfind(sMars, sMars); | |
805 assert(i == 0); | |
806 } | |
807 | |
808 | |
809 /************************************ | |
810 * Convert string s[] to lower case. | |
811 */ | |
812 | |
813 string tolower(string s) | |
814 { | |
815 int changed; | |
816 char[] r; | |
817 | |
818 for (size_t i = 0; i < s.length; i++) | |
819 { | |
820 auto c = s[i]; | |
821 if ('A' <= c && c <= 'Z') | |
822 { | |
823 if (!changed) | |
824 { | |
825 r = s.dup; | |
826 changed = 1; | |
827 } | |
828 r[i] = cast(char) (c + (cast(char)'a' - 'A')); | |
829 } | |
830 else if (c > 0x7F) | |
831 { | |
832 foreach (size_t j, dchar dc; s[i .. length]) | |
833 { | |
834 if (std.uni.isUniUpper(dc)) | |
835 { | |
836 dc = std.uni.toUniLower(dc); | |
837 if (!changed) | |
838 { | |
839 r = s[0 .. i + j].dup; | |
840 changed = 2; | |
841 } | |
842 } | |
843 if (changed) | |
844 { | |
845 if (changed == 1) | |
846 { r = r[0 .. i + j]; | |
847 changed = 2; | |
848 } | |
849 std.utf.encode(r, dc); | |
850 } | |
851 } | |
852 break; | |
853 } | |
854 } | |
855 return changed ? r : s; | |
856 } | |
857 | |
858 unittest | |
859 { | |
860 debug(string) printf("string.tolower.unittest\n"); | |
861 | |
862 char[] s1 = "FoL"; | |
863 char[] s2; | |
864 | |
865 s2 = tolower(s1); | |
866 assert(cmp(s2, "fol") == 0); | |
867 assert(s2 != s1); | |
868 | |
869 s1 = "A\u0100B\u0101d"; | |
870 s2 = tolower(s1); | |
871 assert(cmp(s2, "a\u0101b\u0101d") == 0); | |
872 assert(s2 !is s1); | |
873 | |
874 s1 = "A\u0460B\u0461d"; | |
875 s2 = tolower(s1); | |
876 assert(cmp(s2, "a\u0461b\u0461d") == 0); | |
877 assert(s2 !is s1); | |
878 | |
879 s1 = "\u0130"; | |
880 s2 = tolower(s1); | |
881 assert(s2 == "i"); | |
882 assert(s2 !is s1); | |
883 } | |
884 | |
885 /************************************ | |
886 * Convert string s[] to upper case. | |
887 */ | |
888 | |
889 string toupper(string s) | |
890 { | |
891 int changed; | |
892 char[] r; | |
893 | |
894 for (size_t i = 0; i < s.length; i++) | |
895 { | |
896 auto c = s[i]; | |
897 if ('a' <= c && c <= 'z') | |
898 { | |
899 if (!changed) | |
900 { | |
901 r = s.dup; | |
902 changed = 1; | |
903 } | |
904 r[i] = cast(char) (c - (cast(char)'a' - 'A')); | |
905 } | |
906 else if (c > 0x7F) | |
907 { | |
908 foreach (size_t j, dchar dc; s[i .. length]) | |
909 { | |
910 if (std.uni.isUniLower(dc)) | |
911 { | |
912 dc = std.uni.toUniUpper(dc); | |
913 if (!changed) | |
914 { | |
915 r = s[0 .. i + j].dup; | |
916 changed = 2; | |
917 } | |
918 } | |
919 if (changed) | |
920 { | |
921 if (changed == 1) | |
922 { r = r[0 .. i + j]; | |
923 changed = 2; | |
924 } | |
925 std.utf.encode(r, dc); | |
926 } | |
927 } | |
928 break; | |
929 } | |
930 } | |
931 return changed ? r : s; | |
932 } | |
933 | |
934 unittest | |
935 { | |
936 debug(string) printf("string.toupper.unittest\n"); | |
937 | |
938 char[] s1 = "FoL"; | |
939 char[] s2; | |
940 | |
941 s2 = toupper(s1); | |
942 assert(cmp(s2, "FOL") == 0); | |
943 assert(s2 !is s1); | |
944 | |
945 s1 = "a\u0100B\u0101d"; | |
946 s2 = toupper(s1); | |
947 assert(cmp(s2, "A\u0100B\u0100D") == 0); | |
948 assert(s2 !is s1); | |
949 | |
950 s1 = "a\u0460B\u0461d"; | |
951 s2 = toupper(s1); | |
952 assert(cmp(s2, "A\u0460B\u0460D") == 0); | |
953 assert(s2 !is s1); | |
954 } | |
955 | |
956 | |
957 /******************************************** | |
958 * Capitalize first character of string s[], convert rest of string s[] | |
959 * to lower case. | |
960 */ | |
961 | |
962 char[] capitalize(char[] s) | |
963 { | |
964 int changed; | |
965 int i; | |
966 char[] r = s; | |
967 | |
968 changed = 0; | |
969 | |
970 foreach (size_t i, dchar c; s) | |
971 { dchar c2; | |
972 | |
973 if (i == 0) | |
974 { | |
975 c2 = std.uni.toUniUpper(c); | |
976 if (c != c2) | |
977 { | |
978 changed = 1; | |
979 r = null; | |
980 } | |
981 } | |
982 else | |
983 { | |
984 c2 = std.uni.toUniLower(c); | |
985 if (c != c2) | |
986 { | |
987 if (!changed) | |
988 { changed = 1; | |
989 r = s[0 .. i].dup; | |
990 } | |
991 } | |
992 } | |
993 if (changed) | |
994 std.utf.encode(r, c2); | |
995 } | |
996 return r; | |
997 } | |
998 | |
999 | |
1000 unittest | |
1001 { | |
1002 debug(string) printf("string.toupper.capitalize\n"); | |
1003 | |
1004 char[] s1 = "FoL"; | |
1005 char[] s2; | |
1006 | |
1007 s2 = capitalize(s1); | |
1008 assert(cmp(s2, "Fol") == 0); | |
1009 assert(s2 !is s1); | |
1010 | |
1011 s2 = capitalize(s1[0 .. 2]); | |
1012 assert(cmp(s2, "Fo") == 0); | |
1013 assert(s2.ptr == s1.ptr); | |
1014 | |
1015 s1 = "fOl"; | |
1016 s2 = capitalize(s1); | |
1017 assert(cmp(s2, "Fol") == 0); | |
1018 assert(s2 !is s1); | |
1019 } | |
1020 | |
1021 | |
1022 /******************************************** | |
1023 * Capitalize all words in string s[]. | |
1024 * Remove leading and trailing whitespace. | |
1025 * Replace all sequences of whitespace with a single space. | |
1026 */ | |
1027 | |
1028 char[] capwords(char[] s) | |
1029 { | |
1030 char[] r; | |
1031 bool inword = false; | |
1032 size_t istart = 0; | |
1033 size_t i; | |
1034 | |
1035 for (i = 0; i < s.length; i++) | |
1036 { | |
1037 switch (s[i]) | |
1038 { | |
1039 case ' ': | |
1040 case '\t': | |
1041 case '\f': | |
1042 case '\r': | |
1043 case '\n': | |
1044 case '\v': | |
1045 if (inword) | |
1046 { | |
1047 r ~= capitalize(s[istart .. i]); | |
1048 inword = false; | |
1049 } | |
1050 break; | |
1051 | |
1052 default: | |
1053 if (!inword) | |
1054 { | |
1055 if (r.length) | |
1056 r ~= ' '; | |
1057 istart = i; | |
1058 inword = true; | |
1059 } | |
1060 break; | |
1061 } | |
1062 } | |
1063 if (inword) | |
1064 { | |
1065 r ~= capitalize(s[istart .. i]); | |
1066 } | |
1067 | |
1068 return r; | |
1069 } | |
1070 | |
1071 | |
1072 unittest | |
1073 { | |
1074 debug(string) printf("string.capwords.unittest\n"); | |
1075 | |
1076 char[] s1 = "\tfoo abc(aD)* \t (q PTT "; | |
1077 char[] s2; | |
1078 | |
1079 s2 = capwords(s1); | |
1080 //writefln("s2 = '%s'", s2); | |
1081 assert(cmp(s2, "Foo Abc(ad)* (q Ptt") == 0); | |
1082 } | |
1083 | |
1084 /******************************************** | |
1085 * Return a string that consists of s[] repeated n times. | |
1086 */ | |
1087 | |
1088 char[] repeat(char[] s, size_t n) | |
1089 { | |
1090 if (n == 0) | |
1091 return null; | |
1092 if (n == 1) | |
1093 return s; | |
1094 char[] r = new char[n * s.length]; | |
1095 if (s.length == 1) | |
1096 r[] = s[0]; | |
1097 else | |
1098 { auto len = s.length; | |
1099 | |
1100 for (size_t i = 0; i < n * len; i += len) | |
1101 { | |
1102 r[i .. i + len] = s[]; | |
1103 } | |
1104 } | |
1105 return r; | |
1106 } | |
1107 | |
1108 | |
1109 unittest | |
1110 { | |
1111 debug(string) printf("string.repeat.unittest\n"); | |
1112 | |
1113 char[] s; | |
1114 | |
1115 s = repeat("1234", 0); | |
1116 assert(s is null); | |
1117 s = repeat("1234", 1); | |
1118 assert(cmp(s, "1234") == 0); | |
1119 s = repeat("1234", 2); | |
1120 assert(cmp(s, "12341234") == 0); | |
1121 s = repeat("1", 4); | |
1122 assert(cmp(s, "1111") == 0); | |
1123 s = repeat(null, 4); | |
1124 assert(s is null); | |
1125 } | |
1126 | |
1127 | |
1128 /******************************************** | |
1129 * Concatenate all the strings in words[] together into one | |
1130 * string; use sep[] as the separator. | |
1131 */ | |
1132 | |
1133 char[] join(char[][] words, char[] sep) | |
1134 { | |
1135 char[] result; | |
1136 | |
1137 if (words.length) | |
1138 { | |
1139 size_t len = 0; | |
1140 size_t i; | |
1141 | |
1142 for (i = 0; i < words.length; i++) | |
1143 len += words[i].length; | |
1144 | |
1145 auto seplen = sep.length; | |
1146 len += (words.length - 1) * seplen; | |
1147 | |
1148 result = new char[len]; | |
1149 | |
1150 size_t j; | |
1151 i = 0; | |
1152 while (true) | |
1153 { | |
1154 uint wlen = words[i].length; | |
1155 | |
1156 result[j .. j + wlen] = words[i]; | |
1157 j += wlen; | |
1158 i++; | |
1159 if (i >= words.length) | |
1160 break; | |
1161 result[j .. j + seplen] = sep; | |
1162 j += seplen; | |
1163 } | |
1164 assert(j == len); | |
1165 } | |
1166 return result; | |
1167 } | |
1168 | |
1169 unittest | |
1170 { | |
1171 debug(string) printf("string.join.unittest\n"); | |
1172 | |
1173 char[] word1 = "peter"; | |
1174 char[] word2 = "paul"; | |
1175 char[] word3 = "jerry"; | |
1176 char[][3] words; | |
1177 char[] r; | |
1178 int i; | |
1179 | |
1180 words[0] = word1; | |
1181 words[1] = word2; | |
1182 words[2] = word3; | |
1183 r = join(words, ","); | |
1184 i = cmp(r, "peter,paul,jerry"); | |
1185 assert(i == 0); | |
1186 } | |
1187 | |
1188 | |
1189 /************************************** | |
1190 * Split s[] into an array of words, | |
1191 * using whitespace as the delimiter. | |
1192 */ | |
1193 | |
1194 char[][] split(char[] s) | |
1195 { | |
1196 size_t i; | |
1197 size_t istart = 0; | |
1198 bool inword = false; | |
1199 char[][] words; | |
1200 | |
1201 for (i = 0; i < s.length; i++) | |
1202 { | |
1203 switch (s[i]) | |
1204 { | |
1205 case ' ': | |
1206 case '\t': | |
1207 case '\f': | |
1208 case '\r': | |
1209 case '\n': | |
1210 case '\v': | |
1211 if (inword) | |
1212 { | |
1213 words ~= s[istart .. i]; | |
1214 inword = false; | |
1215 } | |
1216 break; | |
1217 | |
1218 default: | |
1219 if (!inword) | |
1220 { istart = i; | |
1221 inword = true; | |
1222 } | |
1223 break; | |
1224 } | |
1225 } | |
1226 if (inword) | |
1227 words ~= s[istart .. i]; | |
1228 return words; | |
1229 } | |
1230 | |
1231 unittest | |
1232 { | |
1233 debug(string) printf("string.split1\n"); | |
1234 | |
1235 char[] s = " peter paul\tjerry "; | |
1236 char[][] words; | |
1237 int i; | |
1238 | |
1239 words = split(s); | |
1240 assert(words.length == 3); | |
1241 i = cmp(words[0], "peter"); | |
1242 assert(i == 0); | |
1243 i = cmp(words[1], "paul"); | |
1244 assert(i == 0); | |
1245 i = cmp(words[2], "jerry"); | |
1246 assert(i == 0); | |
1247 } | |
1248 | |
1249 | |
1250 /************************************** | |
1251 * Split s[] into an array of words, | |
1252 * using delim[] as the delimiter. | |
1253 */ | |
1254 | |
1255 char[][] split(char[] s, char[] delim) | |
1256 in | |
1257 { | |
1258 assert(delim.length > 0); | |
1259 } | |
1260 body | |
1261 { | |
1262 size_t i; | |
1263 size_t j; | |
1264 char[][] words; | |
1265 | |
1266 i = 0; | |
1267 if (s.length) | |
1268 { | |
1269 if (delim.length == 1) | |
1270 { char c = delim[0]; | |
1271 size_t nwords = 0; | |
1272 char* p = &s[0]; | |
1273 char* pend = p + s.length; | |
1274 | |
1275 while (true) | |
1276 { | |
1277 nwords++; | |
1278 p = cast(char*)memchr(p, c, pend - p); | |
1279 if (!p) | |
1280 break; | |
1281 p++; | |
1282 if (p == pend) | |
1283 { nwords++; | |
1284 break; | |
1285 } | |
1286 } | |
1287 words.length = nwords; | |
1288 | |
1289 int wordi = 0; | |
1290 i = 0; | |
1291 while (true) | |
1292 { | |
1293 p = cast(char*)memchr(&s[i], c, s.length - i); | |
1294 if (!p) | |
1295 { | |
1296 words[wordi] = s[i .. s.length]; | |
1297 break; | |
1298 } | |
1299 j = p - &s[0]; | |
1300 words[wordi] = s[i .. j]; | |
1301 wordi++; | |
1302 i = j + 1; | |
1303 if (i == s.length) | |
1304 { | |
1305 words[wordi] = ""; | |
1306 break; | |
1307 } | |
1308 } | |
1309 assert(wordi + 1 == nwords); | |
1310 } | |
1311 else | |
1312 { size_t nwords = 0; | |
1313 | |
1314 while (true) | |
1315 { | |
1316 nwords++; | |
1317 j = find(s[i .. s.length], delim); | |
1318 if (j == -1) | |
1319 break; | |
1320 i += j + delim.length; | |
1321 if (i == s.length) | |
1322 { nwords++; | |
1323 break; | |
1324 } | |
1325 assert(i < s.length); | |
1326 } | |
1327 words.length = nwords; | |
1328 | |
1329 int wordi = 0; | |
1330 i = 0; | |
1331 while (true) | |
1332 { | |
1333 j = find(s[i .. s.length], delim); | |
1334 if (j == -1) | |
1335 { | |
1336 words[wordi] = s[i .. s.length]; | |
1337 break; | |
1338 } | |
1339 words[wordi] = s[i .. i + j]; | |
1340 wordi++; | |
1341 i += j + delim.length; | |
1342 if (i == s.length) | |
1343 { | |
1344 words[wordi] = ""; | |
1345 break; | |
1346 } | |
1347 assert(i < s.length); | |
1348 } | |
1349 assert(wordi + 1 == nwords); | |
1350 } | |
1351 } | |
1352 return words; | |
1353 } | |
1354 | |
1355 unittest | |
1356 { | |
1357 debug(string) printf("string.split2\n"); | |
1358 | |
1359 char[] s = ",peter,paul,jerry,"; | |
1360 char[][] words; | |
1361 int i; | |
1362 | |
1363 words = split(s, ","); | |
1364 assert(words.length == 5); | |
1365 i = cmp(words[0], ""); | |
1366 assert(i == 0); | |
1367 i = cmp(words[1], "peter"); | |
1368 assert(i == 0); | |
1369 i = cmp(words[2], "paul"); | |
1370 assert(i == 0); | |
1371 i = cmp(words[3], "jerry"); | |
1372 assert(i == 0); | |
1373 i = cmp(words[4], ""); | |
1374 assert(i == 0); | |
1375 | |
1376 s = s[0 .. s.length - 1]; // lop off trailing ',' | |
1377 words = split(s, ","); | |
1378 assert(words.length == 4); | |
1379 i = cmp(words[3], "jerry"); | |
1380 assert(i == 0); | |
1381 | |
1382 s = s[1 .. s.length]; // lop off leading ',' | |
1383 words = split(s, ","); | |
1384 assert(words.length == 3); | |
1385 i = cmp(words[0], "peter"); | |
1386 assert(i == 0); | |
1387 | |
1388 char[] s2 = ",,peter,,paul,,jerry,,"; | |
1389 | |
1390 words = split(s2, ",,"); | |
1391 //printf("words.length = %d\n", words.length); | |
1392 assert(words.length == 5); | |
1393 i = cmp(words[0], ""); | |
1394 assert(i == 0); | |
1395 i = cmp(words[1], "peter"); | |
1396 assert(i == 0); | |
1397 i = cmp(words[2], "paul"); | |
1398 assert(i == 0); | |
1399 i = cmp(words[3], "jerry"); | |
1400 assert(i == 0); | |
1401 i = cmp(words[4], ""); | |
1402 assert(i == 0); | |
1403 | |
1404 s2 = s2[0 .. s2.length - 2]; // lop off trailing ',,' | |
1405 words = split(s2, ",,"); | |
1406 assert(words.length == 4); | |
1407 i = cmp(words[3], "jerry"); | |
1408 assert(i == 0); | |
1409 | |
1410 s2 = s2[2 .. s2.length]; // lop off leading ',,' | |
1411 words = split(s2, ",,"); | |
1412 assert(words.length == 3); | |
1413 i = cmp(words[0], "peter"); | |
1414 assert(i == 0); | |
1415 } | |
1416 | |
1417 | |
1418 /************************************** | |
1419 * Split s[] into an array of lines, | |
1420 * using CR, LF, or CR-LF as the delimiter. | |
1421 * The delimiter is not included in the line. | |
1422 */ | |
1423 | |
1424 char[][] splitlines(char[] s) | |
1425 { | |
1426 uint i; | |
1427 uint istart; | |
1428 uint nlines; | |
1429 char[][] lines; | |
1430 | |
1431 nlines = 0; | |
1432 for (i = 0; i < s.length; i++) | |
1433 { char c; | |
1434 | |
1435 c = s[i]; | |
1436 if (c == '\r' || c == '\n') | |
1437 { | |
1438 nlines++; | |
1439 istart = i + 1; | |
1440 if (c == '\r' && i + 1 < s.length && s[i + 1] == '\n') | |
1441 { | |
1442 i++; | |
1443 istart++; | |
1444 } | |
1445 } | |
1446 } | |
1447 if (istart != i) | |
1448 nlines++; | |
1449 | |
1450 lines = new char[][nlines]; | |
1451 nlines = 0; | |
1452 istart = 0; | |
1453 for (i = 0; i < s.length; i++) | |
1454 { char c; | |
1455 | |
1456 c = s[i]; | |
1457 if (c == '\r' || c == '\n') | |
1458 { | |
1459 lines[nlines] = s[istart .. i]; | |
1460 nlines++; | |
1461 istart = i + 1; | |
1462 if (c == '\r' && i + 1 < s.length && s[i + 1] == '\n') | |
1463 { | |
1464 i++; | |
1465 istart++; | |
1466 } | |
1467 } | |
1468 } | |
1469 if (istart != i) | |
1470 { lines[nlines] = s[istart .. i]; | |
1471 nlines++; | |
1472 } | |
1473 | |
1474 assert(nlines == lines.length); | |
1475 return lines; | |
1476 } | |
1477 | |
1478 unittest | |
1479 { | |
1480 debug(string) printf("string.splitlines\n"); | |
1481 | |
1482 char[] s = "\rpeter\n\rpaul\r\njerry\n"; | |
1483 char[][] lines; | |
1484 int i; | |
1485 | |
1486 lines = splitlines(s); | |
1487 //printf("lines.length = %d\n", lines.length); | |
1488 assert(lines.length == 5); | |
1489 //printf("lines[0] = %llx, '%.*s'\n", lines[0], lines[0]); | |
1490 assert(lines[0].length == 0); | |
1491 i = cmp(lines[1], "peter"); | |
1492 assert(i == 0); | |
1493 assert(lines[2].length == 0); | |
1494 i = cmp(lines[3], "paul"); | |
1495 assert(i == 0); | |
1496 i = cmp(lines[4], "jerry"); | |
1497 assert(i == 0); | |
1498 | |
1499 s = s[0 .. s.length - 1]; // lop off trailing \n | |
1500 lines = splitlines(s); | |
1501 //printf("lines.length = %d\n", lines.length); | |
1502 assert(lines.length == 5); | |
1503 i = cmp(lines[4], "jerry"); | |
1504 assert(i == 0); | |
1505 } | |
1506 | |
1507 | |
1508 /***************************************** | |
1509 * Strips leading or trailing whitespace, or both. | |
1510 */ | |
1511 | |
1512 char[] stripl(char[] s) | |
1513 { | |
1514 uint i; | |
1515 | |
1516 for (i = 0; i < s.length; i++) | |
1517 { | |
1518 if (!std.ctype.isspace(s[i])) | |
1519 break; | |
1520 } | |
1521 return s[i .. s.length]; | |
1522 } | |
1523 | |
1524 char[] stripr(char[] s) /// ditto | |
1525 { | |
1526 uint i; | |
1527 | |
1528 for (i = s.length; i > 0; i--) | |
1529 { | |
1530 if (!std.ctype.isspace(s[i - 1])) | |
1531 break; | |
1532 } | |
1533 return s[0 .. i]; | |
1534 } | |
1535 | |
1536 char[] strip(char[] s) /// ditto | |
1537 { | |
1538 return stripr(stripl(s)); | |
1539 } | |
1540 | |
1541 unittest | |
1542 { | |
1543 debug(string) printf("string.strip.unittest\n"); | |
1544 char[] s; | |
1545 int i; | |
1546 | |
1547 s = strip(" foo\t "); | |
1548 i = cmp(s, "foo"); | |
1549 assert(i == 0); | |
1550 } | |
1551 | |
1552 /******************************************* | |
1553 * Returns s[] sans trailing delimiter[], if any. | |
1554 * If delimiter[] is null, removes trailing CR, LF, or CRLF, if any. | |
1555 */ | |
1556 | |
1557 char[] chomp(char[] s, char[] delimiter = null) | |
1558 { | |
1559 if (delimiter is null) | |
1560 { auto len = s.length; | |
1561 | |
1562 if (len) | |
1563 { auto c = s[len - 1]; | |
1564 | |
1565 if (c == '\r') // if ends in CR | |
1566 len--; | |
1567 else if (c == '\n') // if ends in LF | |
1568 { | |
1569 len--; | |
1570 if (len && s[len - 1] == '\r') | |
1571 len--; // remove CR-LF | |
1572 } | |
1573 } | |
1574 return s[0 .. len]; | |
1575 } | |
1576 else if (s.length >= delimiter.length) | |
1577 { | |
1578 if (s[length - delimiter.length .. length] == delimiter) | |
1579 return s[0 .. length - delimiter.length]; | |
1580 } | |
1581 return s; | |
1582 } | |
1583 | |
1584 unittest | |
1585 { | |
1586 debug(string) printf("string.chomp.unittest\n"); | |
1587 char[] s; | |
1588 | |
1589 s = chomp(null); | |
1590 assert(s is null); | |
1591 s = chomp("hello"); | |
1592 assert(s == "hello"); | |
1593 s = chomp("hello\n"); | |
1594 assert(s == "hello"); | |
1595 s = chomp("hello\r"); | |
1596 assert(s == "hello"); | |
1597 s = chomp("hello\r\n"); | |
1598 assert(s == "hello"); | |
1599 s = chomp("hello\n\r"); | |
1600 assert(s == "hello\n"); | |
1601 s = chomp("hello\n\n"); | |
1602 assert(s == "hello\n"); | |
1603 s = chomp("hello\r\r"); | |
1604 assert(s == "hello\r"); | |
1605 s = chomp("hello\nxxx\n"); | |
1606 assert(s == "hello\nxxx"); | |
1607 | |
1608 s = chomp(null, null); | |
1609 assert(s is null); | |
1610 s = chomp("hello", "o"); | |
1611 assert(s == "hell"); | |
1612 s = chomp("hello", "p"); | |
1613 assert(s == "hello"); | |
1614 s = chomp("hello", null); | |
1615 assert(s == "hello"); | |
1616 s = chomp("hello", "llo"); | |
1617 assert(s == "he"); | |
1618 } | |
1619 | |
1620 | |
1621 /*********************************************** | |
1622 * Returns s[] sans trailing character, if there is one. | |
1623 * If last two characters are CR-LF, then both are removed. | |
1624 */ | |
1625 | |
1626 char[] chop(char[] s) | |
1627 { auto len = s.length; | |
1628 | |
1629 if (len) | |
1630 { | |
1631 if (len >= 2 && s[len - 1] == '\n' && s[len - 2] == '\r') | |
1632 return s[0 .. len - 2]; | |
1633 | |
1634 // If we're in a tail of a UTF-8 sequence, back up | |
1635 while ((s[len - 1] & 0xC0) == 0x80) | |
1636 { | |
1637 len--; | |
1638 if (len == 0) | |
1639 throw new std.utf.UtfException("invalid UTF sequence", 0); | |
1640 } | |
1641 | |
1642 return s[0 .. len - 1]; | |
1643 } | |
1644 return s; | |
1645 } | |
1646 | |
1647 | |
1648 unittest | |
1649 { | |
1650 debug(string) printf("string.chop.unittest\n"); | |
1651 char[] s; | |
1652 | |
1653 s = chop(null); | |
1654 assert(s is null); | |
1655 s = chop("hello"); | |
1656 assert(s == "hell"); | |
1657 s = chop("hello\r\n"); | |
1658 assert(s == "hello"); | |
1659 s = chop("hello\n\r"); | |
1660 assert(s == "hello\n"); | |
1661 } | |
1662 | |
1663 | |
1664 /******************************************* | |
1665 * Left justify, right justify, or center string s[] | |
1666 * in field width chars wide. | |
1667 */ | |
1668 | |
1669 char[] ljustify(char[] s, int width) | |
1670 { | |
1671 if (s.length >= width) | |
1672 return s; | |
1673 char[] r = new char[width]; | |
1674 r[0..s.length] = s; | |
1675 r[s.length .. width] = cast(char)' '; | |
1676 return r; | |
1677 } | |
1678 | |
1679 /// ditto | |
1680 char[] rjustify(char[] s, int width) | |
1681 { | |
1682 if (s.length >= width) | |
1683 return s; | |
1684 char[] r = new char[width]; | |
1685 r[0 .. width - s.length] = cast(char)' '; | |
1686 r[width - s.length .. width] = s; | |
1687 return r; | |
1688 } | |
1689 | |
1690 /// ditto | |
1691 char[] center(char[] s, int width) | |
1692 { | |
1693 if (s.length >= width) | |
1694 return s; | |
1695 char[] r = new char[width]; | |
1696 int left = (width - s.length) / 2; | |
1697 r[0 .. left] = cast(char)' '; | |
1698 r[left .. left + s.length] = s; | |
1699 r[left + s.length .. width] = cast(char)' '; | |
1700 return r; | |
1701 } | |
1702 | |
1703 unittest | |
1704 { | |
1705 debug(string) printf("string.justify.unittest\n"); | |
1706 | |
1707 char[] s = "hello"; | |
1708 char[] r; | |
1709 int i; | |
1710 | |
1711 r = ljustify(s, 8); | |
1712 i = cmp(r, "hello "); | |
1713 assert(i == 0); | |
1714 | |
1715 r = rjustify(s, 8); | |
1716 i = cmp(r, " hello"); | |
1717 assert(i == 0); | |
1718 | |
1719 r = center(s, 8); | |
1720 i = cmp(r, " hello "); | |
1721 assert(i == 0); | |
1722 | |
1723 r = zfill(s, 8); | |
1724 i = cmp(r, "000hello"); | |
1725 assert(i == 0); | |
1726 } | |
1727 | |
1728 | |
1729 /***************************************** | |
1730 * Same as rjustify(), but fill with '0's. | |
1731 */ | |
1732 | |
1733 char[] zfill(char[] s, int width) | |
1734 { | |
1735 if (s.length >= width) | |
1736 return s; | |
1737 char[] r = new char[width]; | |
1738 r[0 .. width - s.length] = cast(char)'0'; | |
1739 r[width - s.length .. width] = s; | |
1740 return r; | |
1741 } | |
1742 | |
1743 /******************************************** | |
1744 * Replace occurrences of from[] with to[] in s[]. | |
1745 */ | |
1746 | |
1747 char[] replace(char[] s, char[] from, char[] to) | |
1748 { | |
1749 char[] p; | |
1750 int i; | |
1751 size_t istart; | |
1752 | |
1753 //printf("replace('%.*s','%.*s','%.*s')\n", s, from, to); | |
1754 if (from.length == 0) | |
1755 return s; | |
1756 istart = 0; | |
1757 while (istart < s.length) | |
1758 { | |
1759 i = find(s[istart .. s.length], from); | |
1760 if (i == -1) | |
1761 { | |
1762 p ~= s[istart .. s.length]; | |
1763 break; | |
1764 } | |
1765 p ~= s[istart .. istart + i]; | |
1766 p ~= to; | |
1767 istart += i + from.length; | |
1768 } | |
1769 return p; | |
1770 } | |
1771 | |
1772 unittest | |
1773 { | |
1774 debug(string) printf("string.replace.unittest\n"); | |
1775 | |
1776 char[] s = "This is a foo foo list"; | |
1777 char[] from = "foo"; | |
1778 char[] to = "silly"; | |
1779 char[] r; | |
1780 int i; | |
1781 | |
1782 r = replace(s, from, to); | |
1783 i = cmp(r, "This is a silly silly list"); | |
1784 assert(i == 0); | |
1785 | |
1786 r = replace(s, "", to); | |
1787 i = cmp(r, "This is a foo foo list"); | |
1788 assert(i == 0); | |
1789 } | |
1790 | |
1791 /***************************** | |
1792 * Return a _string that is string[] with slice[] replaced by replacement[]. | |
1793 */ | |
1794 | |
1795 char[] replaceSlice(char[] string, char[] slice, char[] replacement) | |
1796 in | |
1797 { | |
1798 // Verify that slice[] really is a slice of string[] | |
1799 int so = cast(char*)slice - cast(char*)string; | |
1800 assert(so >= 0); | |
1801 //printf("string.length = %d, so = %d, slice.length = %d\n", string.length, so, slice.length); | |
1802 assert(string.length >= so + slice.length); | |
1803 } | |
1804 body | |
1805 { | |
1806 char[] result; | |
1807 int so = cast(char*)slice - cast(char*)string; | |
1808 | |
1809 result.length = string.length - slice.length + replacement.length; | |
1810 | |
1811 result[0 .. so] = string[0 .. so]; | |
1812 result[so .. so + replacement.length] = replacement; | |
1813 result[so + replacement.length .. result.length] = string[so + slice.length .. string.length]; | |
1814 | |
1815 return result; | |
1816 } | |
1817 | |
1818 unittest | |
1819 { | |
1820 debug(string) printf("string.replaceSlice.unittest\n"); | |
1821 | |
1822 char[] string = "hello"; | |
1823 char[] slice = string[2 .. 4]; | |
1824 | |
1825 char[] r = replaceSlice(string, slice, "bar"); | |
1826 int i; | |
1827 i = cmp(r, "hebaro"); | |
1828 assert(i == 0); | |
1829 } | |
1830 | |
1831 /********************************************** | |
1832 * Insert sub[] into s[] at location index. | |
1833 */ | |
1834 | |
1835 char[] insert(char[] s, size_t index, char[] sub) | |
1836 in | |
1837 { | |
1838 assert(0 <= index && index <= s.length); | |
1839 } | |
1840 body | |
1841 { | |
1842 if (sub.length == 0) | |
1843 return s; | |
1844 | |
1845 if (s.length == 0) | |
1846 return sub; | |
1847 | |
1848 int newlength = s.length + sub.length; | |
1849 char[] result = new char[newlength]; | |
1850 | |
1851 result[0 .. index] = s[0 .. index]; | |
1852 result[index .. index + sub.length] = sub; | |
1853 result[index + sub.length .. newlength] = s[index .. s.length]; | |
1854 return result; | |
1855 } | |
1856 | |
1857 unittest | |
1858 { | |
1859 debug(string) printf("string.insert.unittest\n"); | |
1860 | |
1861 char[] r; | |
1862 int i; | |
1863 | |
1864 r = insert("abcd", 0, "e"); | |
1865 i = cmp(r, "eabcd"); | |
1866 assert(i == 0); | |
1867 | |
1868 r = insert("abcd", 4, "e"); | |
1869 i = cmp(r, "abcde"); | |
1870 assert(i == 0); | |
1871 | |
1872 r = insert("abcd", 2, "ef"); | |
1873 i = cmp(r, "abefcd"); | |
1874 assert(i == 0); | |
1875 | |
1876 r = insert(null, 0, "e"); | |
1877 i = cmp(r, "e"); | |
1878 assert(i == 0); | |
1879 | |
1880 r = insert("abcd", 0, null); | |
1881 i = cmp(r, "abcd"); | |
1882 assert(i == 0); | |
1883 } | |
1884 | |
1885 /*********************************************** | |
1886 * Count up all instances of sub[] in s[]. | |
1887 */ | |
1888 | |
1889 size_t count(char[] s, char[] sub) | |
1890 { | |
1891 size_t i; | |
1892 int j; | |
1893 int count = 0; | |
1894 | |
1895 for (i = 0; i < s.length; i += j + sub.length) | |
1896 { | |
1897 j = find(s[i .. s.length], sub); | |
1898 if (j == -1) | |
1899 break; | |
1900 count++; | |
1901 } | |
1902 return count; | |
1903 } | |
1904 | |
1905 unittest | |
1906 { | |
1907 debug(string) printf("string.count.unittest\n"); | |
1908 | |
1909 char[] s = "This is a fofofof list"; | |
1910 char[] sub = "fof"; | |
1911 int i; | |
1912 | |
1913 i = count(s, sub); | |
1914 assert(i == 2); | |
1915 } | |
1916 | |
1917 | |
1918 /************************************************ | |
1919 * Replace tabs with the appropriate number of spaces. | |
1920 * tabsize is the distance between tab stops. | |
1921 */ | |
1922 | |
1923 char[] expandtabs(char[] string, int tabsize = 8) | |
1924 { | |
1925 bool changes = false; | |
1926 char[] result = string; | |
1927 int column; | |
1928 int nspaces; | |
1929 | |
1930 foreach (size_t i, dchar c; string) | |
1931 { | |
1932 switch (c) | |
1933 { | |
1934 case '\t': | |
1935 nspaces = tabsize - (column % tabsize); | |
1936 if (!changes) | |
1937 { | |
1938 changes = true; | |
1939 result = null; | |
1940 result.length = string.length + nspaces - 1; | |
1941 result.length = i + nspaces; | |
1942 result[0 .. i] = string[0 .. i]; | |
1943 result[i .. i + nspaces] = ' '; | |
1944 } | |
1945 else | |
1946 { int j = result.length; | |
1947 result.length = j + nspaces; | |
1948 result[j .. j + nspaces] = ' '; | |
1949 } | |
1950 column += nspaces; | |
1951 break; | |
1952 | |
1953 case '\r': | |
1954 case '\n': | |
1955 case PS: | |
1956 case LS: | |
1957 column = 0; | |
1958 goto L1; | |
1959 | |
1960 default: | |
1961 column++; | |
1962 L1: | |
1963 if (changes) | |
1964 { | |
1965 if (c <= 0x7F) | |
1966 result ~= cast(char)c; | |
1967 else | |
1968 std.utf.encode(result, c); | |
1969 } | |
1970 break; | |
1971 } | |
1972 } | |
1973 | |
1974 return result; | |
1975 } | |
1976 | |
1977 unittest | |
1978 { | |
1979 debug(string) printf("string.expandtabs.unittest\n"); | |
1980 | |
1981 char[] s = "This \tis\t a fofof\tof list"; | |
1982 char[] r; | |
1983 int i; | |
1984 | |
1985 r = expandtabs(s, 8); | |
1986 i = cmp(r, "This is a fofof of list"); | |
1987 assert(i == 0); | |
1988 | |
1989 r = expandtabs(null); | |
1990 assert(r == null); | |
1991 r = expandtabs(""); | |
1992 assert(r.length == 0); | |
1993 r = expandtabs("a"); | |
1994 assert(r == "a"); | |
1995 r = expandtabs("\t"); | |
1996 assert(r == " "); | |
1997 r = expandtabs( " ab\tasdf "); | |
1998 //writefln("r = '%s'", r); | |
1999 assert(r == " ab asdf "); | |
2000 // TODO: need UTF test case | |
2001 } | |
2002 | |
2003 | |
2004 /******************************************* | |
2005 * Replace spaces in string with the optimal number of tabs. | |
2006 * Trailing spaces or tabs in a line are removed. | |
2007 * Params: | |
2008 * string = String to convert. | |
2009 * tabsize = Tab columns are tabsize spaces apart. tabsize defaults to 8. | |
2010 */ | |
2011 | |
2012 char[] entab(char[] string, int tabsize = 8) | |
2013 { | |
2014 bool changes = false; | |
2015 char[] result = string; | |
2016 | |
2017 int nspaces = 0; | |
2018 int nwhite = 0; | |
2019 int column = 0; // column number | |
2020 | |
2021 foreach (size_t i, dchar c; string) | |
2022 { | |
2023 | |
2024 void change() | |
2025 { | |
2026 changes = true; | |
2027 result = null; | |
2028 result.length = string.length; | |
2029 result.length = i; | |
2030 result[0 .. i] = string[0 .. i]; | |
2031 } | |
2032 | |
2033 switch (c) | |
2034 { | |
2035 case '\t': | |
2036 nwhite++; | |
2037 if (nspaces) | |
2038 { | |
2039 if (!changes) | |
2040 change(); | |
2041 | |
2042 int j = result.length - nspaces; | |
2043 int ntabs = (((column - nspaces) % tabsize) + nspaces) / tabsize; | |
2044 result.length = j + ntabs; | |
2045 result[j .. j + ntabs] = '\t'; | |
2046 nwhite += ntabs - nspaces; | |
2047 nspaces = 0; | |
2048 } | |
2049 column = (column + tabsize) / tabsize * tabsize; | |
2050 break; | |
2051 | |
2052 case '\r': | |
2053 case '\n': | |
2054 case PS: | |
2055 case LS: | |
2056 // Truncate any trailing spaces or tabs | |
2057 if (nwhite) | |
2058 { | |
2059 if (!changes) | |
2060 change(); | |
2061 result = result[0 .. result.length - nwhite]; | |
2062 } | |
2063 break; | |
2064 | |
2065 default: | |
2066 if (nspaces >= 2 && (column % tabsize) == 0) | |
2067 { | |
2068 if (!changes) | |
2069 change(); | |
2070 | |
2071 int j = result.length - nspaces; | |
2072 int ntabs = (nspaces + tabsize - 1) / tabsize; | |
2073 result.length = j + ntabs; | |
2074 result[j .. j + ntabs] = '\t'; | |
2075 nwhite += ntabs - nspaces; | |
2076 nspaces = 0; | |
2077 } | |
2078 if (c == ' ') | |
2079 { nwhite++; | |
2080 nspaces++; | |
2081 } | |
2082 else | |
2083 { nwhite = 0; | |
2084 nspaces = 0; | |
2085 } | |
2086 column++; | |
2087 break; | |
2088 } | |
2089 if (changes) | |
2090 { | |
2091 if (c <= 0x7F) | |
2092 result ~= cast(char)c; | |
2093 else | |
2094 std.utf.encode(result, c); | |
2095 } | |
2096 } | |
2097 | |
2098 // Truncate any trailing spaces or tabs | |
2099 if (nwhite) | |
2100 result = result[0 .. result.length - nwhite]; | |
2101 | |
2102 return result; | |
2103 } | |
2104 | |
2105 unittest | |
2106 { | |
2107 debug(string) printf("string.entab.unittest\n"); | |
2108 | |
2109 char[] r; | |
2110 | |
2111 r = entab(null); | |
2112 assert(r == null); | |
2113 r = entab(""); | |
2114 assert(r.length == 0); | |
2115 r = entab("a"); | |
2116 assert(r == "a"); | |
2117 r = entab(" "); | |
2118 assert(r == ""); | |
2119 r = entab(" x"); | |
2120 assert(r == "\tx"); | |
2121 r = entab(" ab asdf "); | |
2122 assert(r == " ab\tasdf"); | |
2123 r = entab(" ab asdf "); | |
2124 assert(r == " ab\t asdf"); | |
2125 r = entab(" ab \t asdf "); | |
2126 assert(r == " ab\t asdf"); | |
2127 r = entab("1234567 \ta"); | |
2128 assert(r == "1234567\t\ta"); | |
2129 r = entab("1234567 \ta"); | |
2130 assert(r == "1234567\t\ta"); | |
2131 r = entab("1234567 \ta"); | |
2132 assert(r == "1234567\t\ta"); | |
2133 r = entab("1234567 \ta"); | |
2134 assert(r == "1234567\t\ta"); | |
2135 r = entab("1234567 \ta"); | |
2136 assert(r == "1234567\t\ta"); | |
2137 r = entab("1234567 \ta"); | |
2138 assert(r == "1234567\t\ta"); | |
2139 r = entab("1234567 \ta"); | |
2140 assert(r == "1234567\t\ta"); | |
2141 r = entab("1234567 \ta"); | |
2142 assert(r == "1234567\t\ta"); | |
2143 r = entab("1234567 \ta"); | |
2144 assert(r == "1234567\t\t\ta"); | |
2145 // TODO: need UTF test case | |
2146 } | |
2147 | |
2148 | |
2149 | |
2150 /************************************ | |
2151 * Construct translation table for translate(). | |
2152 * BUG: only works with ASCII | |
2153 */ | |
2154 | |
2155 char[] maketrans(char[] from, char[] to) | |
2156 in | |
2157 { | |
2158 assert(from.length == to.length); | |
2159 assert(from.length <= 128); | |
2160 foreach (char c; from) | |
2161 { | |
2162 assert(c <= 0x7F); | |
2163 } | |
2164 foreach (char c; to) | |
2165 { | |
2166 assert(c <= 0x7F); | |
2167 } | |
2168 } | |
2169 body | |
2170 { | |
2171 char[] t = new char[256]; | |
2172 int i; | |
2173 | |
2174 for (i = 0; i < t.length; i++) | |
2175 t[i] = cast(char)i; | |
2176 | |
2177 for (i = 0; i < from.length; i++) | |
2178 t[from[i]] = to[i]; | |
2179 | |
2180 return t; | |
2181 } | |
2182 | |
2183 /****************************************** | |
2184 * Translate characters in s[] using table created by maketrans(). | |
2185 * Delete chars in delchars[]. | |
2186 * BUG: only works with ASCII | |
2187 */ | |
2188 | |
2189 char[] translate(char[] s, char[] transtab, char[] delchars) | |
2190 in | |
2191 { | |
2192 assert(transtab.length == 256); | |
2193 } | |
2194 body | |
2195 { | |
2196 char[] r; | |
2197 int count; | |
2198 bool[256] deltab; | |
2199 | |
2200 deltab[] = false; | |
2201 foreach (char c; delchars) | |
2202 { | |
2203 deltab[c] = true; | |
2204 } | |
2205 | |
2206 count = 0; | |
2207 foreach (char c; s) | |
2208 { | |
2209 if (!deltab[c]) | |
2210 count++; | |
2211 //printf("s[%d] = '%c', count = %d\n", i, s[i], count); | |
2212 } | |
2213 | |
2214 r = new char[count]; | |
2215 count = 0; | |
2216 foreach (char c; s) | |
2217 { | |
2218 if (!deltab[c]) | |
2219 { | |
2220 r[count] = transtab[c]; | |
2221 count++; | |
2222 } | |
2223 } | |
2224 | |
2225 return r; | |
2226 } | |
2227 | |
2228 unittest | |
2229 { | |
2230 debug(string) printf("string.translate.unittest\n"); | |
2231 | |
2232 char[] from = "abcdef"; | |
2233 char[] to = "ABCDEF"; | |
2234 char[] s = "The quick dog fox"; | |
2235 char[] t; | |
2236 char[] r; | |
2237 int i; | |
2238 | |
2239 t = maketrans(from, to); | |
2240 r = translate(s, t, "kg"); | |
2241 //printf("r = '%.*s'\n", r); | |
2242 i = cmp(r, "ThE quiC Do Fox"); | |
2243 assert(i == 0); | |
2244 } | |
2245 | |
2246 /*********************************************** | |
2247 * Convert to char[]. | |
2248 */ | |
2249 | |
2250 char[] toString(bool b) | |
2251 { | |
2252 return b ? "true" : "false"; | |
2253 } | |
2254 | |
2255 /// ditto | |
2256 char[] toString(char c) | |
2257 { | |
2258 char[] result = new char[2]; | |
2259 result[0] = c; | |
2260 result[1] = 0; | |
2261 return result[0 .. 1]; | |
2262 } | |
2263 | |
2264 unittest | |
2265 { | |
2266 debug(string) printf("string.toString(char).unittest\n"); | |
2267 | |
2268 char[] s = "foo"; | |
2269 char[] s2; | |
2270 foreach (char c; s) | |
2271 { | |
2272 s2 ~= std.string.toString(c); | |
2273 } | |
2274 //printf("%.*s", s2); | |
2275 assert(s2 == "foo"); | |
2276 } | |
2277 | |
2278 char[] toString(ubyte ub) { return toString(cast(uint) ub); } /// ditto | |
2279 char[] toString(ushort us) { return toString(cast(uint) us); } /// ditto | |
2280 | |
2281 /// ditto | |
2282 char[] toString(uint u) | |
2283 { char[uint.sizeof * 3] buffer = void; | |
2284 int ndigits; | |
2285 char[] result; | |
2286 | |
2287 ndigits = 0; | |
2288 if (u < 10) | |
2289 // Avoid storage allocation for simple stuff | |
2290 result = digits[u .. u + 1]; | |
2291 else | |
2292 { | |
2293 while (u) | |
2294 { | |
2295 uint c = (u % 10) + '0'; | |
2296 u /= 10; | |
2297 ndigits++; | |
2298 buffer[buffer.length - ndigits] = cast(char)c; | |
2299 } | |
2300 result = new char[ndigits]; | |
2301 result[] = buffer[buffer.length - ndigits .. buffer.length]; | |
2302 } | |
2303 return result; | |
2304 } | |
2305 | |
2306 unittest | |
2307 { | |
2308 debug(string) printf("string.toString(uint).unittest\n"); | |
2309 | |
2310 char[] r; | |
2311 int i; | |
2312 | |
2313 r = toString(0u); | |
2314 i = cmp(r, "0"); | |
2315 assert(i == 0); | |
2316 | |
2317 r = toString(9u); | |
2318 i = cmp(r, "9"); | |
2319 assert(i == 0); | |
2320 | |
2321 r = toString(123u); | |
2322 i = cmp(r, "123"); | |
2323 assert(i == 0); | |
2324 } | |
2325 | |
2326 /// ditto | |
2327 char[] toString(ulong u) | |
2328 { char[ulong.sizeof * 3] buffer; | |
2329 int ndigits; | |
2330 char[] result; | |
2331 | |
2332 if (u < 0x1_0000_0000) | |
2333 return toString(cast(uint)u); | |
2334 ndigits = 0; | |
2335 while (u) | |
2336 { | |
2337 char c = cast(char)((u % 10) + '0'); | |
2338 u /= 10; | |
2339 ndigits++; | |
2340 buffer[buffer.length - ndigits] = c; | |
2341 } | |
2342 result = new char[ndigits]; | |
2343 result[] = buffer[buffer.length - ndigits .. buffer.length]; | |
2344 return result; | |
2345 } | |
2346 | |
2347 unittest | |
2348 { | |
2349 debug(string) printf("string.toString(ulong).unittest\n"); | |
2350 | |
2351 char[] r; | |
2352 int i; | |
2353 | |
2354 r = toString(0uL); | |
2355 i = cmp(r, "0"); | |
2356 assert(i == 0); | |
2357 | |
2358 r = toString(9uL); | |
2359 i = cmp(r, "9"); | |
2360 assert(i == 0); | |
2361 | |
2362 r = toString(123uL); | |
2363 i = cmp(r, "123"); | |
2364 assert(i == 0); | |
2365 } | |
2366 | |
2367 char[] toString(byte b) { return toString(cast(int) b); } /// ditto | |
2368 char[] toString(short s) { return toString(cast(int) s); } /// ditto | |
2369 | |
2370 /// ditto | |
2371 char[] toString(int i) | |
2372 { char[1 + int.sizeof * 3] buffer; | |
2373 char[] result; | |
2374 | |
2375 if (i >= 0) | |
2376 return toString(cast(uint)i); | |
2377 | |
2378 uint u = -i; | |
2379 int ndigits = 1; | |
2380 while (u) | |
2381 { | |
2382 char c = cast(char)((u % 10) + '0'); | |
2383 u /= 10; | |
2384 buffer[buffer.length - ndigits] = c; | |
2385 ndigits++; | |
2386 } | |
2387 buffer[buffer.length - ndigits] = '-'; | |
2388 result = new char[ndigits]; | |
2389 result[] = buffer[buffer.length - ndigits .. buffer.length]; | |
2390 return result; | |
2391 } | |
2392 | |
2393 unittest | |
2394 { | |
2395 debug(string) printf("string.toString(int).unittest\n"); | |
2396 | |
2397 char[] r; | |
2398 int i; | |
2399 | |
2400 r = toString(0); | |
2401 i = cmp(r, "0"); | |
2402 assert(i == 0); | |
2403 | |
2404 r = toString(9); | |
2405 i = cmp(r, "9"); | |
2406 assert(i == 0); | |
2407 | |
2408 r = toString(123); | |
2409 i = cmp(r, "123"); | |
2410 assert(i == 0); | |
2411 | |
2412 r = toString(-0); | |
2413 i = cmp(r, "0"); | |
2414 assert(i == 0); | |
2415 | |
2416 r = toString(-9); | |
2417 i = cmp(r, "-9"); | |
2418 assert(i == 0); | |
2419 | |
2420 r = toString(-123); | |
2421 i = cmp(r, "-123"); | |
2422 assert(i == 0); | |
2423 } | |
2424 | |
2425 /// ditto | |
2426 char[] toString(long i) | |
2427 { char[1 + long.sizeof * 3] buffer; | |
2428 char[] result; | |
2429 | |
2430 if (i >= 0) | |
2431 return toString(cast(ulong)i); | |
2432 if (cast(int)i == i) | |
2433 return toString(cast(int)i); | |
2434 | |
2435 ulong u = cast(ulong)(-i); | |
2436 int ndigits = 1; | |
2437 while (u) | |
2438 { | |
2439 char c = cast(char)((u % 10) + '0'); | |
2440 u /= 10; | |
2441 buffer[buffer.length - ndigits] = c; | |
2442 ndigits++; | |
2443 } | |
2444 buffer[buffer.length - ndigits] = '-'; | |
2445 result = new char[ndigits]; | |
2446 result[] = buffer[buffer.length - ndigits .. buffer.length]; | |
2447 return result; | |
2448 } | |
2449 | |
2450 unittest | |
2451 { | |
2452 debug(string) printf("string.toString(long).unittest\n"); | |
2453 | |
2454 char[] r; | |
2455 int i; | |
2456 | |
2457 r = toString(0L); | |
2458 i = cmp(r, "0"); | |
2459 assert(i == 0); | |
2460 | |
2461 r = toString(9L); | |
2462 i = cmp(r, "9"); | |
2463 assert(i == 0); | |
2464 | |
2465 r = toString(123L); | |
2466 i = cmp(r, "123"); | |
2467 assert(i == 0); | |
2468 | |
2469 r = toString(-0L); | |
2470 i = cmp(r, "0"); | |
2471 assert(i == 0); | |
2472 | |
2473 r = toString(-9L); | |
2474 i = cmp(r, "-9"); | |
2475 assert(i == 0); | |
2476 | |
2477 r = toString(-123L); | |
2478 i = cmp(r, "-123"); | |
2479 assert(i == 0); | |
2480 } | |
2481 | |
2482 /// ditto | |
2483 char[] toString(float f) { return toString(cast(double) f); } | |
2484 | |
2485 /// ditto | |
2486 char[] toString(double d) | |
2487 { | |
2488 char[20] buffer; | |
2489 | |
2490 int len = sprintf(buffer.ptr, "%g", d); | |
2491 return buffer[0 .. len].dup; | |
2492 } | |
2493 | |
2494 /// ditto | |
2495 char[] toString(real r) | |
2496 { | |
2497 char[20] buffer; | |
2498 | |
2499 int len = sprintf(buffer.ptr, "%Lg", r); | |
2500 return buffer[0 .. len].dup; | |
2501 } | |
2502 | |
2503 /// ditto | |
2504 char[] toString(ifloat f) { return toString(cast(idouble) f); } | |
2505 | |
2506 /// ditto | |
2507 char[] toString(idouble d) | |
2508 { | |
2509 char[21] buffer; | |
2510 | |
2511 int len = sprintf(buffer.ptr, "%gi", d); | |
2512 return buffer[0 .. len].dup; | |
2513 } | |
2514 | |
2515 /// ditto | |
2516 char[] toString(ireal r) | |
2517 { | |
2518 char[21] buffer; | |
2519 | |
2520 int len = sprintf(buffer.ptr, "%Lgi", r); | |
2521 return buffer[0 .. len].dup; | |
2522 } | |
2523 | |
2524 /// ditto | |
2525 char[] toString(cfloat f) { return toString(cast(cdouble) f); } | |
2526 | |
2527 /// ditto | |
2528 char[] toString(cdouble d) | |
2529 { | |
2530 char[20 + 1 + 20 + 1] buffer; | |
2531 | |
2532 int len = sprintf(buffer.ptr, "%g+%gi", d.re, d.im); | |
2533 return buffer[0 .. len].dup; | |
2534 } | |
2535 | |
2536 /// ditto | |
2537 char[] toString(creal r) | |
2538 { | |
2539 char[20 + 1 + 20 + 1] buffer; | |
2540 | |
2541 int len = sprintf(buffer.ptr, "%Lg+%Lgi", r.re, r.im); | |
2542 return buffer[0 .. len].dup; | |
2543 } | |
2544 | |
2545 | |
2546 /****************************************** | |
2547 * Convert value to string in _radix radix. | |
2548 * | |
2549 * radix must be a value from 2 to 36. | |
2550 * value is treated as a signed value only if radix is 10. | |
2551 * The characters A through Z are used to represent values 10 through 36. | |
2552 */ | |
2553 char[] toString(long value, uint radix) | |
2554 in | |
2555 { | |
2556 assert(radix >= 2 && radix <= 36); | |
2557 } | |
2558 body | |
2559 { | |
2560 if (radix == 10) | |
2561 return toString(value); // handle signed cases only for radix 10 | |
2562 return toString(cast(ulong)value, radix); | |
2563 } | |
2564 | |
2565 /// ditto | |
2566 char[] toString(ulong value, uint radix) | |
2567 in | |
2568 { | |
2569 assert(radix >= 2 && radix <= 36); | |
2570 } | |
2571 body | |
2572 { | |
2573 char[value.sizeof * 8] buffer; | |
2574 uint i = buffer.length; | |
2575 | |
2576 if (value < radix && value < hexdigits.length) | |
2577 return hexdigits[cast(size_t)value .. cast(size_t)value + 1]; | |
2578 | |
2579 do | |
2580 { ubyte c; | |
2581 | |
2582 c = cast(ubyte)(value % radix); | |
2583 value = value / radix; | |
2584 i--; | |
2585 buffer[i] = cast(char)((c < 10) ? c + '0' : c + 'A' - 10); | |
2586 } while (value); | |
2587 return buffer[i .. length].dup; | |
2588 } | |
2589 | |
2590 unittest | |
2591 { | |
2592 debug(string) printf("string.toString(ulong, uint).unittest\n"); | |
2593 | |
2594 char[] r; | |
2595 int i; | |
2596 | |
2597 r = toString(-10L, 10u); | |
2598 assert(r == "-10"); | |
2599 | |
2600 r = toString(15L, 2u); | |
2601 //writefln("r = '%s'", r); | |
2602 assert(r == "1111"); | |
2603 | |
2604 r = toString(1L, 2u); | |
2605 //writefln("r = '%s'", r); | |
2606 assert(r == "1"); | |
2607 | |
2608 r = toString(0x1234AFL, 16u); | |
2609 //writefln("r = '%s'", r); | |
2610 assert(r == "1234AF"); | |
2611 } | |
2612 | |
2613 /************************************************* | |
2614 * Convert C-style 0 terminated string s to char[] string. | |
2615 */ | |
2616 | |
2617 char[] toString(char *s) | |
2618 { | |
2619 return s ? s[0 .. strlen(s)] : cast(char[])null; | |
2620 } | |
2621 | |
2622 unittest | |
2623 { | |
2624 debug(string) printf("string.toString(char*).unittest\n"); | |
2625 | |
2626 char[] r; | |
2627 int i; | |
2628 | |
2629 r = toString(null); | |
2630 i = cmp(r, ""); | |
2631 assert(i == 0); | |
2632 | |
2633 r = toString("foo\0"); | |
2634 i = cmp(r, "foo"); | |
2635 assert(i == 0); | |
2636 } | |
2637 | |
2638 | |
2639 /***************************************************** | |
2640 * Format arguments into a string. | |
2641 */ | |
2642 | |
2643 | |
2644 char[] format(...) | |
2645 { | |
2646 char[] s; | |
2647 | |
2648 void putc(dchar c) | |
2649 { | |
2650 std.utf.encode(s, c); | |
2651 } | |
2652 | |
2653 std.format.doFormat(&putc, _arguments, _argptr); | |
2654 return s; | |
2655 } | |
2656 | |
2657 | |
2658 /***************************************************** | |
2659 * Format arguments into string <i>s</i> which must be large | |
2660 * enough to hold the result. Throws ArrayBoundsError if it is not. | |
2661 * Returns: s | |
2662 */ | |
2663 char[] sformat(char[] s, ...) | |
2664 { size_t i; | |
2665 | |
2666 void putc(dchar c) | |
2667 { | |
2668 if (c <= 0x7F) | |
2669 { | |
2670 if (i >= s.length) | |
2671 throw new ArrayBoundsError("std.string.sformat", 0); | |
2672 s[i] = cast(char)c; | |
2673 ++i; | |
2674 } | |
2675 else | |
2676 { char[4] buf; | |
2677 char[] b; | |
2678 | |
2679 b = std.utf.toUTF8(buf, c); | |
2680 if (i + b.length > s.length) | |
2681 throw new ArrayBoundsError("std.string.sformat", 0); | |
2682 s[i..i+b.length] = b[]; | |
2683 i += b.length; | |
2684 } | |
2685 } | |
2686 | |
2687 std.format.doFormat(&putc, _arguments, _argptr); | |
2688 return s[0 .. i]; | |
2689 } | |
2690 | |
2691 | |
2692 unittest | |
2693 { | |
2694 debug(string) printf("std.string.format.unittest\n"); | |
2695 | |
2696 char[] r; | |
2697 int i; | |
2698 /+ | |
2699 r = format(null); | |
2700 i = cmp(r, ""); | |
2701 assert(i == 0); | |
2702 +/ | |
2703 r = format("foo"); | |
2704 i = cmp(r, "foo"); | |
2705 assert(i == 0); | |
2706 | |
2707 r = format("foo%%"); | |
2708 i = cmp(r, "foo%"); | |
2709 assert(i == 0); | |
2710 | |
2711 r = format("foo%s", 'C'); | |
2712 i = cmp(r, "fooC"); | |
2713 assert(i == 0); | |
2714 | |
2715 r = format("%s foo", "bar"); | |
2716 i = cmp(r, "bar foo"); | |
2717 assert(i == 0); | |
2718 | |
2719 r = format("%s foo %s", "bar", "abc"); | |
2720 i = cmp(r, "bar foo abc"); | |
2721 assert(i == 0); | |
2722 | |
2723 r = format("foo %d", -123); | |
2724 i = cmp(r, "foo -123"); | |
2725 assert(i == 0); | |
2726 | |
2727 r = format("foo %d", 123); | |
2728 i = cmp(r, "foo 123"); | |
2729 assert(i == 0); | |
2730 } | |
2731 | |
2732 | |
2733 /*********************************************** | |
2734 * See if character c is in the pattern. | |
2735 * Patterns: | |
2736 * | |
2737 * A <i>pattern</i> is an array of characters much like a <i>character | |
2738 * class</i> in regular expressions. A sequence of characters | |
2739 * can be given, such as "abcde". The '-' can represent a range | |
2740 * of characters, as "a-e" represents the same pattern as "abcde". | |
2741 * "a-fA-F0-9" represents all the hex characters. | |
2742 * If the first character of a pattern is '^', then the pattern | |
2743 * is negated, i.e. "^0-9" means any character except a digit. | |
2744 * The functions inPattern, <b>countchars</b>, <b>removeschars</b>, | |
2745 * and <b>squeeze</b> | |
2746 * use patterns. | |
2747 * | |
2748 * Note: In the future, the pattern syntax may be improved | |
2749 * to be more like regular expression character classes. | |
2750 */ | |
2751 | |
2752 bool inPattern(dchar c, char[] pattern) | |
2753 { | |
2754 bool result = false; | |
2755 int range = 0; | |
2756 dchar lastc; | |
2757 | |
2758 foreach (size_t i, dchar p; pattern) | |
2759 { | |
2760 if (p == '^' && i == 0) | |
2761 { result = true; | |
2762 if (i + 1 == pattern.length) | |
2763 return (c == p); // or should this be an error? | |
2764 } | |
2765 else if (range) | |
2766 { | |
2767 range = 0; | |
2768 if (lastc <= c && c <= p || c == p) | |
2769 return !result; | |
2770 } | |
2771 else if (p == '-' && i > result && i + 1 < pattern.length) | |
2772 { | |
2773 range = 1; | |
2774 continue; | |
2775 } | |
2776 else if (c == p) | |
2777 return !result; | |
2778 lastc = p; | |
2779 } | |
2780 return result; | |
2781 } | |
2782 | |
2783 | |
2784 unittest | |
2785 { | |
2786 debug(string) printf("std.string.inPattern.unittest\n"); | |
2787 | |
2788 int i; | |
2789 | |
2790 i = inPattern('x', "x"); | |
2791 assert(i == 1); | |
2792 i = inPattern('x', "y"); | |
2793 assert(i == 0); | |
2794 i = inPattern('x', cast(char[])null); | |
2795 assert(i == 0); | |
2796 i = inPattern('x', "^y"); | |
2797 assert(i == 1); | |
2798 i = inPattern('x', "yxxy"); | |
2799 assert(i == 1); | |
2800 i = inPattern('x', "^yxxy"); | |
2801 assert(i == 0); | |
2802 i = inPattern('x', "^abcd"); | |
2803 assert(i == 1); | |
2804 i = inPattern('^', "^^"); | |
2805 assert(i == 0); | |
2806 i = inPattern('^', "^"); | |
2807 assert(i == 1); | |
2808 i = inPattern('^', "a^"); | |
2809 assert(i == 1); | |
2810 i = inPattern('x', "a-z"); | |
2811 assert(i == 1); | |
2812 i = inPattern('x', "A-Z"); | |
2813 assert(i == 0); | |
2814 i = inPattern('x', "^a-z"); | |
2815 assert(i == 0); | |
2816 i = inPattern('x', "^A-Z"); | |
2817 assert(i == 1); | |
2818 i = inPattern('-', "a-"); | |
2819 assert(i == 1); | |
2820 i = inPattern('-', "^A-"); | |
2821 assert(i == 0); | |
2822 i = inPattern('a', "z-a"); | |
2823 assert(i == 1); | |
2824 i = inPattern('z', "z-a"); | |
2825 assert(i == 1); | |
2826 i = inPattern('x', "z-a"); | |
2827 assert(i == 0); | |
2828 } | |
2829 | |
2830 | |
2831 /*********************************************** | |
2832 * See if character c is in the intersection of the patterns. | |
2833 */ | |
2834 | |
2835 int inPattern(dchar c, char[][] patterns) | |
2836 { int result; | |
2837 | |
2838 foreach (char[] pattern; patterns) | |
2839 { | |
2840 if (!inPattern(c, pattern)) | |
2841 { result = 0; | |
2842 break; | |
2843 } | |
2844 result = 1; | |
2845 } | |
2846 return result; | |
2847 } | |
2848 | |
2849 | |
2850 /******************************************** | |
2851 * Count characters in s that match pattern. | |
2852 */ | |
2853 | |
2854 size_t countchars(char[] s, char[] pattern) | |
2855 { | |
2856 size_t count; | |
2857 | |
2858 foreach (dchar c; s) | |
2859 { | |
2860 count += inPattern(c, pattern); | |
2861 } | |
2862 return count; | |
2863 } | |
2864 | |
2865 | |
2866 unittest | |
2867 { | |
2868 debug(string) printf("std.string.count.unittest\n"); | |
2869 | |
2870 size_t c; | |
2871 | |
2872 c = countchars("abc", "a-c"); | |
2873 assert(c == 3); | |
2874 c = countchars("hello world", "or"); | |
2875 assert(c == 3); | |
2876 } | |
2877 | |
2878 | |
2879 /******************************************** | |
2880 * Return string that is s with all characters removed that match pattern. | |
2881 */ | |
2882 | |
2883 char[] removechars(char[] s, char[] pattern) | |
2884 { | |
2885 char[] r = s; | |
2886 int changed; | |
2887 size_t j; | |
2888 | |
2889 foreach (size_t i, dchar c; s) | |
2890 { | |
2891 if (!inPattern(c, pattern)) | |
2892 { | |
2893 if (changed) | |
2894 { | |
2895 if (r is s) | |
2896 r = s[0 .. j].dup; | |
2897 std.utf.encode(r, c); | |
2898 } | |
2899 } | |
2900 else if (!changed) | |
2901 { changed = 1; | |
2902 j = i; | |
2903 } | |
2904 } | |
2905 if (changed && r is s) | |
2906 r = s[0 .. j].dup; | |
2907 return r; | |
2908 } | |
2909 | |
2910 | |
2911 unittest | |
2912 { | |
2913 debug(string) printf("std.string.remove.unittest\n"); | |
2914 | |
2915 char[] r; | |
2916 | |
2917 r = removechars("abc", "a-c"); | |
2918 assert(r is null); | |
2919 r = removechars("hello world", "or"); | |
2920 assert(r == "hell wld"); | |
2921 r = removechars("hello world", "d"); | |
2922 assert(r == "hello worl"); | |
2923 } | |
2924 | |
2925 | |
2926 /*************************************************** | |
2927 * Return string where sequences of a character in s[] from pattern[] | |
2928 * are replaced with a single instance of that character. | |
2929 * If pattern is null, it defaults to all characters. | |
2930 */ | |
2931 | |
2932 char[] squeeze(char[] s, char[] pattern = null) | |
2933 { | |
2934 char[] r = s; | |
2935 dchar lastc; | |
2936 size_t lasti; | |
2937 int run; | |
2938 bool changed; | |
2939 | |
2940 foreach (size_t i, dchar c; s) | |
2941 { | |
2942 if (run && lastc == c) | |
2943 { | |
2944 changed = true; | |
2945 } | |
2946 else if (pattern is null || inPattern(c, pattern)) | |
2947 { | |
2948 run = 1; | |
2949 if (changed) | |
2950 { if (r is s) | |
2951 r = s[0 .. lasti].dup; | |
2952 std.utf.encode(r, c); | |
2953 } | |
2954 else | |
2955 lasti = i + std.utf.stride(s, i); | |
2956 lastc = c; | |
2957 } | |
2958 else | |
2959 { | |
2960 run = 0; | |
2961 if (changed) | |
2962 { if (r is s) | |
2963 r = s[0 .. lasti].dup; | |
2964 std.utf.encode(r, c); | |
2965 } | |
2966 } | |
2967 } | |
2968 if (changed) | |
2969 { | |
2970 if (r is s) | |
2971 r = s[0 .. lasti]; | |
2972 } | |
2973 return r; | |
2974 } | |
2975 | |
2976 | |
2977 unittest | |
2978 { | |
2979 debug(string) printf("std.string.squeeze.unittest\n"); | |
2980 char[] s,r; | |
2981 | |
2982 r = squeeze("hello"); | |
2983 //writefln("r = '%s'", r); | |
2984 assert(r == "helo"); | |
2985 s = "abcd"; | |
2986 r = squeeze(s); | |
2987 assert(r is s); | |
2988 s = "xyzz"; | |
2989 r = squeeze(s); | |
2990 assert(r.ptr == s.ptr); // should just be a slice | |
2991 r = squeeze("hello goodbyee", "oe"); | |
2992 assert(r == "hello godbye"); | |
2993 } | |
2994 | |
2995 | |
2996 /********************************************** | |
2997 * Return string that is the 'successor' to s[]. | |
2998 * If the rightmost character is a-zA-Z0-9, it is incremented within | |
2999 * its case or digits. If it generates a carry, the process is | |
3000 * repeated with the one to its immediate left. | |
3001 */ | |
3002 | |
3003 char[] succ(char[] s) | |
3004 { | |
3005 if (s.length && isalnum(s[length - 1])) | |
3006 { | |
3007 char[] r = s.dup; | |
3008 size_t i = r.length - 1; | |
3009 | |
3010 while (1) | |
3011 { dchar c = s[i]; | |
3012 dchar carry; | |
3013 | |
3014 switch (c) | |
3015 { | |
3016 case '9': | |
3017 c = '0'; | |
3018 carry = '1'; | |
3019 goto Lcarry; | |
3020 case 'z': | |
3021 case 'Z': | |
3022 c -= 'Z' - 'A'; | |
3023 carry = c; | |
3024 Lcarry: | |
3025 r[i] = cast(char)c; | |
3026 if (i == 0) | |
3027 { | |
3028 char[] t = new char[r.length + 1]; | |
3029 t[0] = cast(char)carry; | |
3030 t[1 .. length] = r[]; | |
3031 return t; | |
3032 } | |
3033 i--; | |
3034 break; | |
3035 | |
3036 default: | |
3037 if (std.ctype.isalnum(c)) | |
3038 r[i]++; | |
3039 return r; | |
3040 } | |
3041 } | |
3042 } | |
3043 return s; | |
3044 } | |
3045 | |
3046 unittest | |
3047 { | |
3048 debug(string) printf("std.string.succ.unittest\n"); | |
3049 | |
3050 char[] r; | |
3051 | |
3052 r = succ(null); | |
3053 assert(r is null); | |
3054 r = succ("!@#$%"); | |
3055 assert(r == "!@#$%"); | |
3056 r = succ("1"); | |
3057 assert(r == "2"); | |
3058 r = succ("9"); | |
3059 assert(r == "10"); | |
3060 r = succ("999"); | |
3061 assert(r == "1000"); | |
3062 r = succ("zz99"); | |
3063 assert(r == "aaa00"); | |
3064 } | |
3065 | |
3066 | |
3067 /*********************************************** | |
3068 * Replaces characters in str[] that are in from[] | |
3069 * with corresponding characters in to[] and returns the resulting | |
3070 * string. | |
3071 * Params: | |
3072 * modifiers = a string of modifier characters | |
3073 * Modifiers: | |
3074 <table border=1 cellspacing=0 cellpadding=5> | |
3075 <tr> <th>Modifier <th>Description | |
3076 <tr> <td><b>c</b> <td>Complement the list of characters in from[] | |
3077 <tr> <td><b>d</b> <td>Removes matching characters with no corresponding replacement in to[] | |
3078 <tr> <td><b>s</b> <td>Removes adjacent duplicates in the replaced characters | |
3079 </table> | |
3080 | |
3081 If modifier <b>d</b> is present, then the number of characters | |
3082 in to[] may be only 0 or 1. | |
3083 | |
3084 If modifier <b>d</b> is not present and to[] is null, | |
3085 then to[] is taken _to be the same as from[]. | |
3086 | |
3087 If modifier <b>d</b> is not present and to[] is shorter | |
3088 than from[], then to[] is extended by replicating the | |
3089 last character in to[]. | |
3090 | |
3091 Both from[] and to[] may contain ranges using the <b>-</b> | |
3092 character, for example <b>a-d</b> is synonymous with <b>abcd</b>. | |
3093 Neither accept a leading <b>^</b> as meaning the complement of | |
3094 the string (use the <b>c</b> modifier for that). | |
3095 */ | |
3096 | |
3097 char[] tr(char[] str, char[] from, char[] to, char[] modifiers = null) | |
3098 { | |
3099 int mod_c; | |
3100 int mod_d; | |
3101 int mod_s; | |
3102 | |
3103 foreach (char c; modifiers) | |
3104 { | |
3105 switch (c) | |
3106 { | |
3107 case 'c': mod_c = 1; break; // complement | |
3108 case 'd': mod_d = 1; break; // delete unreplaced chars | |
3109 case 's': mod_s = 1; break; // squeeze duplicated replaced chars | |
3110 default: assert(0); | |
3111 } | |
3112 } | |
3113 | |
3114 if (to is null && !mod_d) | |
3115 to = from; | |
3116 | |
3117 char[] result = new char[str.length]; | |
3118 result.length = 0; | |
3119 int m; | |
3120 dchar lastc; | |
3121 | |
3122 foreach (dchar c; str) | |
3123 { dchar lastf; | |
3124 dchar lastt; | |
3125 dchar newc; | |
3126 int n = 0; | |
3127 | |
3128 for (size_t i = 0; i < from.length; ) | |
3129 { | |
3130 dchar f = std.utf.decode(from, i); | |
3131 //writefln("\tf = '%s', c = '%s', lastf = '%x', '%x', i = %d, %d", f, c, lastf, dchar.init, i, from.length); | |
3132 if (f == '-' && lastf != dchar.init && i < from.length) | |
3133 { | |
3134 dchar nextf = std.utf.decode(from, i); | |
3135 //writefln("\tlastf = '%s', c = '%s', nextf = '%s'", lastf, c, nextf); | |
3136 if (lastf <= c && c <= nextf) | |
3137 { | |
3138 n += c - lastf - 1; | |
3139 if (mod_c) | |
3140 goto Lnotfound; | |
3141 goto Lfound; | |
3142 } | |
3143 n += nextf - lastf; | |
3144 lastf = lastf.init; | |
3145 continue; | |
3146 } | |
3147 | |
3148 if (c == f) | |
3149 { if (mod_c) | |
3150 goto Lnotfound; | |
3151 goto Lfound; | |
3152 } | |
3153 lastf = f; | |
3154 n++; | |
3155 } | |
3156 if (!mod_c) | |
3157 goto Lnotfound; | |
3158 n = 0; // consider it 'found' at position 0 | |
3159 | |
3160 Lfound: | |
3161 | |
3162 // Find the nth character in to[] | |
3163 //writefln("\tc = '%s', n = %d", c, n); | |
3164 dchar nextt; | |
3165 for (size_t i = 0; i < to.length; ) | |
3166 { dchar t = std.utf.decode(to, i); | |
3167 if (t == '-' && lastt != dchar.init && i < to.length) | |
3168 { | |
3169 nextt = std.utf.decode(to, i); | |
3170 //writefln("\tlastt = '%s', c = '%s', nextt = '%s', n = %d", lastt, c, nextt, n); | |
3171 n -= nextt - lastt; | |
3172 if (n < 0) | |
3173 { | |
3174 newc = nextt + n + 1; | |
3175 goto Lnewc; | |
3176 } | |
3177 lastt = dchar.init; | |
3178 continue; | |
3179 } | |
3180 if (n == 0) | |
3181 { newc = t; | |
3182 goto Lnewc; | |
3183 } | |
3184 lastt = t; | |
3185 nextt = t; | |
3186 n--; | |
3187 } | |
3188 if (mod_d) | |
3189 continue; | |
3190 newc = nextt; | |
3191 | |
3192 Lnewc: | |
3193 if (mod_s && m && newc == lastc) | |
3194 continue; | |
3195 std.utf.encode(result, newc); | |
3196 m = 1; | |
3197 lastc = newc; | |
3198 continue; | |
3199 | |
3200 Lnotfound: | |
3201 std.utf.encode(result, c); | |
3202 lastc = c; | |
3203 m = 0; | |
3204 } | |
3205 return result; | |
3206 } | |
3207 | |
3208 unittest | |
3209 { | |
3210 debug(string) printf("std.string.tr.unittest\n"); | |
3211 | |
3212 char[] r; | |
3213 //writefln("r = '%s'", r); | |
3214 | |
3215 r = tr("abcdef", "cd", "CD"); | |
3216 assert(r == "abCDef"); | |
3217 | |
3218 r = tr("abcdef", "b-d", "B-D"); | |
3219 assert(r == "aBCDef"); | |
3220 | |
3221 r = tr("abcdefgh", "b-dh", "B-Dx"); | |
3222 assert(r == "aBCDefgx"); | |
3223 | |
3224 r = tr("abcdefgh", "b-dh", "B-CDx"); | |
3225 assert(r == "aBCDefgx"); | |
3226 | |
3227 r = tr("abcdefgh", "b-dh", "B-BCDx"); | |
3228 assert(r == "aBCDefgx"); | |
3229 | |
3230 r = tr("abcdef", "ef", "*", "c"); | |
3231 assert(r == "****ef"); | |
3232 | |
3233 r = tr("abcdef", "ef", "", "d"); | |
3234 assert(r == "abcd"); | |
3235 | |
3236 r = tr("hello goodbye", "lo", null, "s"); | |
3237 assert(r == "helo godbye"); | |
3238 | |
3239 r = tr("hello goodbye", "lo", "x", "s"); | |
3240 assert(r == "hex gxdbye"); | |
3241 | |
3242 r = tr("14-Jul-87", "a-zA-Z", " ", "cs"); | |
3243 assert(r == " Jul "); | |
3244 | |
3245 r = tr("Abc", "AAA", "XYZ"); | |
3246 assert(r == "Xbc"); | |
3247 } | |
3248 | |
3249 | |
3250 /* ************************************************ | |
3251 * Version : v0.3 | |
3252 * Author : David L. 'SpottedTiger' Davis | |
3253 * Date Created : 31.May.05 Compiled and Tested with dmd v0.125 | |
3254 * Date Modified : 01.Jun.05 Modified the function to handle the | |
3255 * : imaginary and complex float-point | |
3256 * : datatypes. | |
3257 * : | |
3258 * Licence : Public Domain / Contributed to Digital Mars | |
3259 */ | |
3260 | |
3261 /** | |
3262 * [in] char[] s can be formatted in the following ways: | |
3263 * | |
3264 * Integer Whole Number: | |
3265 * (for byte, ubyte, short, ushort, int, uint, long, and ulong) | |
3266 * ['+'|'-']digit(s)[U|L|UL] | |
3267 * | |
3268 * examples: 123, 123UL, 123L, +123U, -123L | |
3269 * | |
3270 * Floating-Point Number: | |
3271 * (for float, double, real, ifloat, idouble, and ireal) | |
3272 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] | |
3273 * or [nan|nani|inf|-inf] | |
3274 * | |
3275 * examples: +123., -123.01, 123.3e-10f, 123.3e-10fi, 123.3e-10L | |
3276 * | |
3277 * (for cfloat, cdouble, and creal) | |
3278 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+] | |
3279 * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] | |
3280 * or [nan|nani|nan+nani|inf|-inf] | |
3281 * | |
3282 * examples: nan, -123e-1+456.9e-10Li, +123e+10+456i, 123+456 | |
3283 * | |
3284 * [in] bool bAllowSep | |
3285 * False by default, but when set to true it will accept the | |
3286 * separator characters "," and "_" within the string, but these | |
3287 * characters should be stripped from the string before using any | |
3288 * of the conversion functions like toInt(), toFloat(), and etc | |
3289 * else an error will occur. | |
3290 * | |
3291 * Also please note, that no spaces are allowed within the string | |
3292 * anywhere whether it's a leading, trailing, or embedded space(s), | |
3293 * thus they too must be stripped from the string before using this | |
3294 * function, or any of the conversion functions. | |
3295 */ | |
3296 | |
3297 final bool isNumeric(in char[] s, in bool bAllowSep = false) | |
3298 { | |
3299 int iLen = s.length; | |
3300 bool bDecimalPoint = false; | |
3301 bool bExponent = false; | |
3302 bool bComplex = false; | |
3303 char[] sx = std.string.tolower(s); | |
3304 int j = 0; | |
3305 char c; | |
3306 | |
3307 //writefln("isNumeric(char[], bool = false) called!"); | |
3308 // Empty string, return false | |
3309 if (iLen == 0) | |
3310 return false; | |
3311 | |
3312 // Check for NaN (Not a Number) | |
3313 if (sx == "nan" || sx == "nani" || sx == "nan+nani") | |
3314 return true; | |
3315 | |
3316 // Check for Infinity | |
3317 if (sx == "inf" || sx == "-inf") | |
3318 return true; | |
3319 | |
3320 // A sign is allowed only in the 1st character | |
3321 if (sx[0] == '-' || sx[0] == '+') | |
3322 j++; | |
3323 | |
3324 for (int i = j; i < iLen; i++) | |
3325 { | |
3326 c = sx[i]; | |
3327 | |
3328 // Digits are good, continue checking | |
3329 // with the next character... ;) | |
3330 if (c >= '0' && c <= '9') | |
3331 continue; | |
3332 | |
3333 // Check for the complex type, and if found | |
3334 // reset the flags for checking the 2nd number. | |
3335 else if (c == '+') | |
3336 if (i > 0) | |
3337 { | |
3338 bDecimalPoint = false; | |
3339 bExponent = false; | |
3340 bComplex = true; | |
3341 continue; | |
3342 } | |
3343 else | |
3344 return false; | |
3345 | |
3346 // Allow only one exponent per number | |
3347 else if (c == 'e') | |
3348 { | |
3349 // A 2nd exponent found, return not a number | |
3350 if (bExponent) | |
3351 return false; | |
3352 | |
3353 if (i + 1 < iLen) | |
3354 { | |
3355 // Look forward for the sign, and if | |
3356 // missing then this is not a number. | |
3357 if (sx[i + 1] != '-' && sx[i + 1] != '+') | |
3358 return false; | |
3359 else | |
3360 { | |
3361 bExponent = true; | |
3362 i++; | |
3363 } | |
3364 } | |
3365 else | |
3366 // Ending in "E", return not a number | |
3367 return false; | |
3368 } | |
3369 // Allow only one decimal point per number to be used | |
3370 else if (c == '.' ) | |
3371 { | |
3372 // A 2nd decimal point found, return not a number | |
3373 if (bDecimalPoint) | |
3374 return false; | |
3375 | |
3376 bDecimalPoint = true; | |
3377 continue; | |
3378 } | |
3379 // Check for ending literal characters: "f,u,l,i,ul,fi,li", | |
3380 // and wheater they're being used with the correct datatype. | |
3381 else if (i == iLen - 2) | |
3382 { | |
3383 // Integer Whole Number | |
3384 if (sx[i..iLen] == "ul" && | |
3385 (!bDecimalPoint && !bExponent && !bComplex)) | |
3386 return true; | |
3387 // Floating-Point Number | |
3388 else if ((sx[i..iLen] == "fi" || sx[i..iLen] == "li") && | |
3389 (bDecimalPoint || bExponent || bComplex)) | |
3390 return true; | |
3391 else if (sx[i..iLen] == "ul" && | |
3392 (bDecimalPoint || bExponent || bComplex)) | |
3393 return false; | |
3394 // Could be a Integer or a Float, thus | |
3395 // all these suffixes are valid for both | |
3396 else if (sx[i..iLen] == "ul" || | |
3397 sx[i..iLen] == "fi" || | |
3398 sx[i..iLen] == "li") | |
3399 return true; | |
3400 else | |
3401 return false; | |
3402 } | |
3403 else if (i == iLen - 1) | |
3404 { | |
3405 // Integer Whole Number | |
3406 if ((c == 'u' || c == 'l') && | |
3407 (!bDecimalPoint && !bExponent && !bComplex)) | |
3408 return true; | |
3409 // Check to see if the last character in the string | |
3410 // is the required 'i' character | |
3411 else if (bComplex) | |
3412 if (c == 'i') | |
3413 return true; | |
3414 else | |
3415 return false; | |
3416 // Floating-Point Number | |
3417 else if ((c == 'l' || c == 'f' || c == 'i') && | |
3418 (bDecimalPoint || bExponent)) | |
3419 return true; | |
3420 // Could be a Integer or a Float, thus | |
3421 // all these suffixes are valid for both | |
3422 else if (c == 'l' || c == 'f' || c == 'i') | |
3423 return true; | |
3424 else | |
3425 return false; | |
3426 } | |
3427 else | |
3428 // Check if separators are allow | |
3429 // to be in the numeric string | |
3430 if (bAllowSep == true && (c == '_' || c == ',')) | |
3431 continue; | |
3432 else | |
3433 return false; | |
3434 } | |
3435 | |
3436 return true; | |
3437 } | |
3438 | |
3439 /// Allow any object as a parameter | |
3440 bool isNumeric(...) | |
3441 { | |
3442 return isNumeric(_arguments, _argptr); | |
3443 } | |
3444 | |
3445 /// Check only the first parameter, all others will be ignored. | |
3446 bool isNumeric(TypeInfo[] _arguments, va_list _argptr) | |
3447 { | |
3448 char[] s = ""; | |
3449 wchar[] ws = ""; | |
3450 dchar[] ds = ""; | |
3451 | |
3452 //writefln("isNumeric(...) called!"); | |
3453 if (_arguments.length == 0) | |
3454 return false; | |
3455 | |
3456 if (_arguments[0] == typeid(char[])) | |
3457 return isNumeric(va_arg!(char[])(_argptr)); | |
3458 else if (_arguments[0] == typeid(wchar[])) | |
3459 return isNumeric(std.utf.toUTF8(va_arg!(wchar[])(_argptr))); | |
3460 else if (_arguments[0] == typeid(dchar[])) | |
3461 return isNumeric(std.utf.toUTF8(va_arg!(dchar[])(_argptr))); | |
3462 else if (_arguments[0] == typeid(real)) | |
3463 return true; | |
3464 else if (_arguments[0] == typeid(double)) | |
3465 return true; | |
3466 else if (_arguments[0] == typeid(float)) | |
3467 return true; | |
3468 else if (_arguments[0] == typeid(ulong)) | |
3469 return true; | |
3470 else if (_arguments[0] == typeid(long)) | |
3471 return true; | |
3472 else if (_arguments[0] == typeid(uint)) | |
3473 return true; | |
3474 else if (_arguments[0] == typeid(int)) | |
3475 return true; | |
3476 else if (_arguments[0] == typeid(ushort)) | |
3477 return true; | |
3478 else if (_arguments[0] == typeid(short)) | |
3479 return true; | |
3480 else if (_arguments[0] == typeid(ubyte)) | |
3481 { | |
3482 s.length = 1; | |
3483 s[0]= va_arg!(ubyte)(_argptr); | |
3484 return isNumeric(cast(char[])s); | |
3485 } | |
3486 else if (_arguments[0] == typeid(byte)) | |
3487 { | |
3488 s.length = 1; | |
3489 s[0] = va_arg!(byte)(_argptr); | |
3490 return isNumeric(cast(char[])s); | |
3491 } | |
3492 else if (_arguments[0] == typeid(ireal)) | |
3493 return true; | |
3494 else if (_arguments[0] == typeid(idouble)) | |
3495 return true; | |
3496 else if (_arguments[0] == typeid(ifloat)) | |
3497 return true; | |
3498 else if (_arguments[0] == typeid(creal)) | |
3499 return true; | |
3500 else if (_arguments[0] == typeid(cdouble)) | |
3501 return true; | |
3502 else if (_arguments[0] == typeid(cfloat)) | |
3503 return true; | |
3504 else if (_arguments[0] == typeid(char)) | |
3505 { | |
3506 s.length = 1; | |
3507 s[0] = va_arg!(char)(_argptr); | |
3508 return isNumeric(s); | |
3509 } | |
3510 else if (_arguments[0] == typeid(wchar)) | |
3511 { | |
3512 ws.length = 1; | |
3513 ws[0] = va_arg!(wchar)(_argptr); | |
3514 return isNumeric(std.utf.toUTF8(ws)); | |
3515 } | |
3516 else if (_arguments[0] == typeid(dchar)) | |
3517 { | |
3518 ds.length = 1; | |
3519 ds[0] = va_arg!(dchar)(_argptr); | |
3520 return isNumeric(std.utf.toUTF8(ds)); | |
3521 } | |
3522 //else if (_arguments[0] == typeid(cent)) | |
3523 // return true; | |
3524 //else if (_arguments[0] == typeid(ucent)) | |
3525 // return true; | |
3526 else | |
3527 return false; | |
3528 } | |
3529 | |
3530 unittest | |
3531 { | |
3532 debug (string) printf("isNumeric(in char[], bool = false).unittest\n"); | |
3533 char[] s; | |
3534 | |
3535 // Test the isNumeric(in char[]) function | |
3536 assert(isNumeric("1") == true ); | |
3537 assert(isNumeric("1.0") == true ); | |
3538 assert(isNumeric("1e-1") == true ); | |
3539 assert(isNumeric("12345xxxx890") == false ); | |
3540 assert(isNumeric("567L") == true ); | |
3541 assert(isNumeric("23UL") == true ); | |
3542 assert(isNumeric("-123..56f") == false ); | |
3543 assert(isNumeric("12.3.5.6") == false ); | |
3544 assert(isNumeric(" 12.356") == false ); | |
3545 assert(isNumeric("123 5.6") == false ); | |
3546 assert(isNumeric("1233E-1+1.0e-1i") == true ); | |
3547 | |
3548 assert(isNumeric("123.00E-5+1234.45E-12Li") == true); | |
3549 assert(isNumeric("123.00e-5+1234.45E-12iL") == false); | |
3550 assert(isNumeric("123.00e-5+1234.45e-12uL") == false); | |
3551 assert(isNumeric("123.00E-5+1234.45e-12lu") == false); | |
3552 | |
3553 assert(isNumeric("123fi") == true); | |
3554 assert(isNumeric("123li") == true); | |
3555 assert(isNumeric("--123L") == false); | |
3556 assert(isNumeric("+123.5UL") == false); | |
3557 assert(isNumeric("123f") == true); | |
3558 assert(isNumeric("123.u") == false); | |
3559 | |
3560 assert(isNumeric(std.string.toString(real.nan)) == true); | |
3561 assert(isNumeric(std.string.toString(-real.infinity)) == true); | |
3562 assert(isNumeric(std.string.toString(123e+2+1234.78Li)) == true); | |
3563 | |
3564 s = "$250.99-"; | |
3565 assert(isNumeric(s[1..s.length - 2]) == true); | |
3566 assert(isNumeric(s) == false); | |
3567 assert(isNumeric(s[0..s.length - 1]) == false); | |
3568 | |
3569 // These test calling the isNumeric(...) function | |
3570 assert(isNumeric(1,123UL) == true); | |
3571 assert(isNumeric('2') == true); | |
3572 assert(isNumeric('x') == false); | |
3573 assert(isNumeric(cast(byte)0x57) == false); // 'W' | |
3574 assert(isNumeric(cast(byte)0x37) == true); // '7' | |
3575 assert(isNumeric(cast(wchar[])"145.67") == true); | |
3576 assert(isNumeric(cast(dchar[])"145.67U") == false); | |
3577 assert(isNumeric(123_000.23fi) == true); | |
3578 assert(isNumeric(123.00E-5+1234.45E-12Li) == true); | |
3579 assert(isNumeric(real.nan) == true); | |
3580 assert(isNumeric(-real.infinity) == true); | |
3581 } | |
3582 | |
3583 | |
3584 /***************************** | |
3585 * Soundex algorithm. | |
3586 * | |
3587 * The Soundex algorithm converts a word into 4 characters | |
3588 * based on how the word sounds phonetically. The idea is that | |
3589 * two spellings that sound alike will have the same Soundex | |
3590 * value, which means that Soundex can be used for fuzzy matching | |
3591 * of names. | |
3592 * | |
3593 * Params: | |
3594 * string = String to convert to Soundex representation. | |
3595 * buffer = Optional 4 char array to put the resulting Soundex | |
3596 * characters into. If null, the return value | |
3597 * buffer will be allocated on the heap. | |
3598 * Returns: | |
3599 * The four character array with the Soundex result in it. | |
3600 * Returns null if there is no Soundex representation for the string. | |
3601 * | |
3602 * See_Also: | |
3603 * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia), | |
3604 * $(LINK2 http://www.archives.gov/publications/general-info-leaflets/55.html, The Soundex Indexing System) | |
3605 * | |
3606 * Bugs: | |
3607 * Only works well with English names. | |
3608 * There are other arguably better Soundex algorithms, | |
3609 * but this one is the standard one. | |
3610 */ | |
3611 | |
3612 char[] soundex(char[] string, char[] buffer = null) | |
3613 in | |
3614 { | |
3615 assert(!buffer || buffer.length >= 4); | |
3616 } | |
3617 out (result) | |
3618 { | |
3619 if (result) | |
3620 { | |
3621 assert(result.length == 4); | |
3622 assert(result[0] >= 'A' && result[0] <= 'Z'); | |
3623 foreach (char c; result[1 .. 4]) | |
3624 assert(c >= '0' && c <= '6'); | |
3625 } | |
3626 } | |
3627 body | |
3628 { | |
3629 static char[26] dex = | |
3630 // ABCDEFGHIJKLMNOPQRSTUVWXYZ | |
3631 "01230120022455012623010202"; | |
3632 | |
3633 int b = 0; | |
3634 char lastc; | |
3635 foreach (char c; string) | |
3636 { | |
3637 if (c >= 'a' && c <= 'z') | |
3638 c -= 'a' - 'A'; | |
3639 else if (c >= 'A' && c <= 'Z') | |
3640 { | |
3641 ; | |
3642 } | |
3643 else | |
3644 { lastc = lastc.init; | |
3645 continue; | |
3646 } | |
3647 if (b == 0) | |
3648 { | |
3649 if (!buffer) | |
3650 buffer = new char[4]; | |
3651 buffer[0] = c; | |
3652 b++; | |
3653 lastc = dex[c - 'A']; | |
3654 } | |
3655 else | |
3656 { | |
3657 if (c == 'H' || c == 'W') | |
3658 continue; | |
3659 if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U') | |
3660 lastc = lastc.init; | |
3661 c = dex[c - 'A']; | |
3662 if (c != '0' && c != lastc) | |
3663 { | |
3664 buffer[b] = c; | |
3665 b++; | |
3666 lastc = c; | |
3667 } | |
3668 } | |
3669 if (b == 4) | |
3670 goto Lret; | |
3671 } | |
3672 if (b == 0) | |
3673 buffer = null; | |
3674 else | |
3675 buffer[b .. 4] = '0'; | |
3676 Lret: | |
3677 return buffer; | |
3678 } | |
3679 | |
3680 unittest | |
3681 { char[4] buffer; | |
3682 | |
3683 assert(soundex(null) == null); | |
3684 assert(soundex("") == null); | |
3685 assert(soundex("0123^&^^**&^") == null); | |
3686 assert(soundex("Euler") == "E460"); | |
3687 assert(soundex(" Ellery ") == "E460"); | |
3688 assert(soundex("Gauss") == "G200"); | |
3689 assert(soundex("Ghosh") == "G200"); | |
3690 assert(soundex("Hilbert") == "H416"); | |
3691 assert(soundex("Heilbronn") == "H416"); | |
3692 assert(soundex("Knuth") == "K530"); | |
3693 assert(soundex("Kant", buffer) == "K530"); | |
3694 assert(soundex("Lloyd") == "L300"); | |
3695 assert(soundex("Ladd") == "L300"); | |
3696 assert(soundex("Lukasiewicz", buffer) == "L222"); | |
3697 assert(soundex("Lissajous") == "L222"); | |
3698 assert(soundex("Robert") == "R163"); | |
3699 assert(soundex("Rupert") == "R163"); | |
3700 assert(soundex("Rubin") == "R150"); | |
3701 assert(soundex("Washington") == "W252"); | |
3702 assert(soundex("Lee") == "L000"); | |
3703 assert(soundex("Gutierrez") == "G362"); | |
3704 assert(soundex("Pfister") == "P236"); | |
3705 assert(soundex("Jackson") == "J250"); | |
3706 assert(soundex("Tymczak") == "T522"); | |
3707 assert(soundex("Ashcraft") == "A261"); | |
3708 | |
3709 assert(soundex("Woo") == "W000"); | |
3710 assert(soundex("Pilgrim") == "P426"); | |
3711 assert(soundex("Flingjingwaller") == "F452"); | |
3712 assert(soundex("PEARSE") == "P620"); | |
3713 assert(soundex("PIERCE") == "P620"); | |
3714 assert(soundex("Price") == "P620"); | |
3715 assert(soundex("CATHY") == "C300"); | |
3716 assert(soundex("KATHY") == "K300"); | |
3717 assert(soundex("Jones") == "J520"); | |
3718 assert(soundex("johnsons") == "J525"); | |
3719 assert(soundex("Hardin") == "H635"); | |
3720 assert(soundex("Martinez") == "M635"); | |
3721 } | |
3722 | |
3723 | |
3724 /*************************************************** | |
3725 * Construct an associative array consisting of all | |
3726 * abbreviations that uniquely map to the strings in values. | |
3727 * | |
3728 * This is useful in cases where the user is expected to type | |
3729 * in one of a known set of strings, and the program will helpfully | |
3730 * autocomplete the string once sufficient characters have been | |
3731 * entered that uniquely identify it. | |
3732 * Example: | |
3733 * --- | |
3734 * import std.stdio; | |
3735 * import std.string; | |
3736 * | |
3737 * void main() | |
3738 * { | |
3739 * static char[][] list = [ "food", "foxy" ]; | |
3740 * | |
3741 * auto abbrevs = std.string.abbrev(list); | |
3742 * | |
3743 * foreach (key, value; abbrevs) | |
3744 * { | |
3745 * writefln("%s => %s", key, value); | |
3746 * } | |
3747 * } | |
3748 * --- | |
3749 * produces the output: | |
3750 * <pre> | |
3751 * fox => foxy | |
3752 * food => food | |
3753 * foxy => foxy | |
3754 * foo => food | |
3755 * </pre> | |
3756 */ | |
3757 | |
3758 char[][char[]] abbrev(char[][] values) | |
3759 { | |
3760 char[][char[]] result; | |
3761 | |
3762 // Make a copy when sorting so we follow COW principles. | |
3763 values = values.dup.sort; | |
3764 | |
3765 size_t values_length = values.length; | |
3766 size_t lasti = values_length; | |
3767 size_t nexti; | |
3768 | |
3769 char[] nv; | |
3770 char[] lv; | |
3771 | |
3772 for (size_t i = 0; i < values_length; i = nexti) | |
3773 { char[] value = values[i]; | |
3774 | |
3775 // Skip dups | |
3776 for (nexti = i + 1; nexti < values_length; nexti++) | |
3777 { nv = values[nexti]; | |
3778 if (value != values[nexti]) | |
3779 break; | |
3780 } | |
3781 | |
3782 for (size_t j = 0; j < value.length; j += std.utf.stride(value, j)) | |
3783 { char[] v = value[0 .. j]; | |
3784 | |
3785 if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) && | |
3786 (lasti == values_length || j > lv.length || v != lv[0 .. j])) | |
3787 result[v] = value; | |
3788 } | |
3789 result[value] = value; | |
3790 lasti = i; | |
3791 lv = value; | |
3792 } | |
3793 | |
3794 return result; | |
3795 } | |
3796 | |
3797 unittest | |
3798 { | |
3799 debug(string) printf("string.abbrev.unittest\n"); | |
3800 | |
3801 char[][] values; | |
3802 values ~= "hello"; | |
3803 values ~= "hello"; | |
3804 values ~= "he"; | |
3805 | |
3806 char[][char[]] r; | |
3807 | |
3808 r = abbrev(values); | |
3809 char[][] keys = r.keys.dup; | |
3810 keys.sort; | |
3811 | |
3812 assert(keys.length == 4); | |
3813 assert(keys[0] == "he"); | |
3814 assert(keys[1] == "hel"); | |
3815 assert(keys[2] == "hell"); | |
3816 assert(keys[3] == "hello"); | |
3817 | |
3818 assert(r[keys[0]] == "he"); | |
3819 assert(r[keys[1]] == "hello"); | |
3820 assert(r[keys[2]] == "hello"); | |
3821 assert(r[keys[3]] == "hello"); | |
3822 } | |
3823 | |
3824 | |
3825 /****************************************** | |
3826 * Compute column number after string if string starts in the | |
3827 * leftmost column, which is numbered starting from 0. | |
3828 */ | |
3829 | |
3830 size_t column(char[] string, int tabsize = 8) | |
3831 { | |
3832 size_t column; | |
3833 | |
3834 foreach (dchar c; string) | |
3835 { | |
3836 switch (c) | |
3837 { | |
3838 case '\t': | |
3839 column = (column + tabsize) / tabsize * tabsize; | |
3840 break; | |
3841 | |
3842 case '\r': | |
3843 case '\n': | |
3844 case PS: | |
3845 case LS: | |
3846 column = 0; | |
3847 break; | |
3848 | |
3849 default: | |
3850 column++; | |
3851 break; | |
3852 } | |
3853 } | |
3854 return column; | |
3855 } | |
3856 | |
3857 unittest | |
3858 { | |
3859 debug(string) printf("string.column.unittest\n"); | |
3860 | |
3861 assert(column(null) == 0); | |
3862 assert(column("") == 0); | |
3863 assert(column("\t") == 8); | |
3864 assert(column("abc\t") == 8); | |
3865 assert(column("12345678\t") == 16); | |
3866 } | |
3867 | |
3868 /****************************************** | |
3869 * Wrap text into a paragraph. | |
3870 * | |
3871 * The input text string s is formed into a paragraph | |
3872 * by breaking it up into a sequence of lines, delineated | |
3873 * by \n, such that the number of columns is not exceeded | |
3874 * on each line. | |
3875 * The last line is terminated with a \n. | |
3876 * Params: | |
3877 * s = text string to be wrapped | |
3878 * columns = maximum number of _columns in the paragraph | |
3879 * firstindent = string used to _indent first line of the paragraph | |
3880 * indent = string to use to _indent following lines of the paragraph | |
3881 * tabsize = column spacing of tabs | |
3882 * Returns: | |
3883 * The resulting paragraph. | |
3884 */ | |
3885 | |
3886 char[] wrap(char[] s, int columns = 80, char[] firstindent = null, | |
3887 char[] indent = null, int tabsize = 8) | |
3888 { | |
3889 char[] result; | |
3890 int col; | |
3891 int spaces; | |
3892 bool inword; | |
3893 bool first = true; | |
3894 size_t wordstart; | |
3895 | |
3896 result.length = firstindent.length + s.length; | |
3897 result.length = firstindent.length; | |
3898 result[] = firstindent[]; | |
3899 col = column(result, tabsize); | |
3900 foreach (size_t i, dchar c; s) | |
3901 { | |
3902 if (iswhite(c)) | |
3903 { | |
3904 if (inword) | |
3905 { | |
3906 if (first) | |
3907 { | |
3908 ; | |
3909 } | |
3910 else if (col + 1 + (i - wordstart) > columns) | |
3911 { | |
3912 result ~= '\n'; | |
3913 result ~= indent; | |
3914 col = column(indent, tabsize); | |
3915 } | |
3916 else | |
3917 { result ~= ' '; | |
3918 col += 1; | |
3919 } | |
3920 result ~= s[wordstart .. i]; | |
3921 col += i - wordstart; | |
3922 inword = false; | |
3923 first = false; | |
3924 } | |
3925 } | |
3926 else | |
3927 { | |
3928 if (!inword) | |
3929 { | |
3930 wordstart = i; | |
3931 inword = true; | |
3932 } | |
3933 } | |
3934 } | |
3935 | |
3936 if (inword) | |
3937 { | |
3938 if (col + 1 + (s.length - wordstart) >= columns) | |
3939 { | |
3940 result ~= '\n'; | |
3941 result ~= indent; | |
3942 } | |
3943 else if (result.length != firstindent.length) | |
3944 result ~= ' '; | |
3945 result ~= s[wordstart .. s.length]; | |
3946 } | |
3947 result ~= '\n'; | |
3948 | |
3949 return result; | |
3950 } | |
3951 | |
3952 unittest | |
3953 { | |
3954 debug(string) printf("string.wrap.unittest\n"); | |
3955 | |
3956 assert(wrap(null) == "\n"); | |
3957 assert(wrap(" a b df ") == "a b df\n"); | |
3958 //writefln("'%s'", wrap(" a b df ",3)); | |
3959 assert(wrap(" a b df ", 3) == "a b\ndf\n"); | |
3960 assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n"); | |
3961 //writefln("'%s'", wrap(" abcd df ",3)); | |
3962 assert(wrap(" abcd df ", 3) == "abcd\ndf\n"); | |
3963 assert(wrap("x") == "x\n"); | |
3964 assert(wrap("u u") == "u u\n"); | |
3965 } | |
3966 | |
3967 | |
3968 /*************************** | |
3969 * Does string s[] start with an email address? | |
3970 * Returns: | |
3971 * null it does not | |
3972 * char[] it does, and this is the slice of s[] that is that email address | |
3973 * References: | |
3974 * RFC2822 | |
3975 */ | |
3976 char[] isEmail(char[] s) | |
3977 { size_t i; | |
3978 | |
3979 if (!isalpha(s[0])) | |
3980 goto Lno; | |
3981 | |
3982 for (i = 1; 1; i++) | |
3983 { | |
3984 if (i == s.length) | |
3985 goto Lno; | |
3986 auto c = s[i]; | |
3987 if (isalnum(c)) | |
3988 continue; | |
3989 if (c == '-' || c == '_' || c == '.') | |
3990 continue; | |
3991 if (c != '@') | |
3992 goto Lno; | |
3993 i++; | |
3994 break; | |
3995 } | |
3996 //writefln("test1 '%s'", s[0 .. i]); | |
3997 | |
3998 /* Now do the part past the '@' | |
3999 */ | |
4000 size_t lastdot; | |
4001 for (; i < s.length; i++) | |
4002 { | |
4003 auto c = s[i]; | |
4004 if (isalnum(c)) | |
4005 continue; | |
4006 if (c == '-' || c == '_') | |
4007 continue; | |
4008 if (c == '.') | |
4009 { | |
4010 lastdot = i; | |
4011 continue; | |
4012 } | |
4013 break; | |
4014 } | |
4015 if (!lastdot || (i - lastdot != 3 && i - lastdot != 4)) | |
4016 goto Lno; | |
4017 | |
4018 return s[0 .. i]; | |
4019 | |
4020 Lno: | |
4021 return null; | |
4022 } | |
4023 | |
4024 | |
4025 /*************************** | |
4026 * Does string s[] start with a URL? | |
4027 * Returns: | |
4028 * null it does not | |
4029 * char[] it does, and this is the slice of s[] that is that URL | |
4030 */ | |
4031 | |
4032 char[] isURL(char[] s) | |
4033 { | |
4034 /* Must start with one of: | |
4035 * http:// | |
4036 * https:// | |
4037 * www. | |
4038 */ | |
4039 | |
4040 size_t i; | |
4041 | |
4042 if (s.length <= 4) | |
4043 goto Lno; | |
4044 | |
4045 //writefln("isURL(%s)", s); | |
4046 if (s.length > 7 && std.string.icmp(s[0 .. 7], "http://") == 0) | |
4047 i = 7; | |
4048 else if (s.length > 8 && std.string.icmp(s[0 .. 8], "https://") == 0) | |
4049 i = 8; | |
4050 // if (icmp(s[0 .. 4], "www.") == 0) | |
4051 // i = 4; | |
4052 else | |
4053 goto Lno; | |
4054 | |
4055 size_t lastdot; | |
4056 for (; i < s.length; i++) | |
4057 { | |
4058 auto c = s[i]; | |
4059 if (isalnum(c)) | |
4060 continue; | |
4061 if (c == '-' || c == '_' || c == '?' || | |
4062 c == '=' || c == '%' || c == '&' || | |
4063 c == '/' || c == '+' || c == '#' || | |
4064 c == '~') | |
4065 continue; | |
4066 if (c == '.') | |
4067 { | |
4068 lastdot = i; | |
4069 continue; | |
4070 } | |
4071 break; | |
4072 } | |
4073 //if (!lastdot || (i - lastdot != 3 && i - lastdot != 4)) | |
4074 if (!lastdot) | |
4075 goto Lno; | |
4076 | |
4077 return s[0 .. i]; | |
4078 | |
4079 Lno: | |
4080 return null; | |
4081 } | |
4082 | |
4083 |