comparison tango/tango/text/Util.d @ 132:1700239cab2e trunk

[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author lindquist
date Fri, 11 Jan 2008 17:57:40 +0100
parents
children
comparison
equal deleted inserted replaced
131:5825d48b27d1 132:1700239cab2e
1 /*******************************************************************************
2
3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved
4
5 license: BSD style: $(LICENSE)
6
7 version: Apr 2004: Initial release
8 Dec 2006: South Seas version
9
10 author: Kris
11
12
13 Placeholder for a variety of wee functions. These functions are all
14 templated with the intent of being used for arrays of char, wchar,
15 and dchar. However, they operate correctly with other array types
16 also.
17
18 Several of these functions return an index value, representing where
19 some criteria was identified. When said criteria is not matched, the
20 functions return a value representing the array length provided to
21 them. That is, for those scenarios where C functions might typically
22 return -1 these functions return length instead. This operate nicely
23 with D slices:
24 ---
25 auto text = "happy:faces";
26
27 assert (text[0 .. locate (text, ':')] == "happy");
28
29 assert (text[0 .. locate (text, '!')] == "happy:faces");
30 ---
31
32 The contains() function is more convenient for trivial lookup
33 cases:
34 ---
35 if (contains ("fubar", '!'))
36 ...
37 ---
38
39 Note that where some functions expect a uint as an argument, the
40 D template-matching algorithm will fail where an int is provided
41 instead. This is the typically the cause of "template not found"
42 errors. Also note that name overloading is not supported cleanly
43 by IFTI at this time, so is not applied here.
44
45
46 Applying the D "import alias" mechanism to this module is highly
47 recommended, in order to limit namespace pollution:
48 ---
49 import Util = tango.text.Util;
50
51 auto s = Util.trim (" foo ");
52 ---
53
54
55 Function templates:
56 ---
57 trim (source) // trim whitespace
58 triml (source) // trim whitespace
59 trimr (source) // trim whitespace
60 strip (source, match) // trim elements
61 stripl (source, match) // trim left elements
62 stripr (source, match) // trim right elements
63 delimit (src, set) // split on delims
64 split (source, pattern) // split on pattern
65 splitLines (source); // split on lines
66 head (source, pattern, tail) // split to head & tail
67 join (source, postfix, output) // join text segments
68 repeat (source, count, output) // repeat source
69 replace (source, match, replacement) // replace chars
70 substitute (source, match, replacement) // replace patterns
71 contains (source, match) // has char?
72 containsPattern (source, match) // has pattern?
73 locate (source, match, start) // find char
74 locatePrior (source, match, start) // find prior char
75 locatePattern (source, match, start); // find pattern
76 locatePatternPrior (source, match, start); // find prior pattern
77 indexOf (s*, match, length) // low-level lookup
78 mismatch (s1*, s2*, length) // low-level compare
79 matching (s1*, s2*, length) // low-level compare
80 isSpace (match) // is whitespace?
81 layout (destination, format ...) // featherweight printf
82 lines (str) // foreach lines
83 quotes (str, set) // foreach quotes
84 delimiters (str, set) // foreach delimiters
85 patterns (str, pattern) // foreach patterns
86 ---
87
88 *******************************************************************************/
89
90 module tango.text.Util;
91
92 /******************************************************************************
93
94 Trim the provided array by stripping whitespace from both
95 ends. Returns a slice of the original content
96
97 ******************************************************************************/
98
99 T[] trim(T) (T[] source)
100 {
101 T* head = source.ptr,
102 tail = head + source.length;
103
104 while (head < tail && isSpace(*head))
105 ++head;
106
107 while (tail > head && isSpace(*(tail-1)))
108 --tail;
109
110 return head [0 .. tail - head];
111 }
112
113 /******************************************************************************
114
115 Trim the provided array by stripping whitespace from the left.
116 Returns a slice of the original content
117
118 ******************************************************************************/
119
120 T[] triml(T) (T[] source)
121 {
122 T* head = source.ptr,
123 tail = head + source.length;
124
125 while (head < tail && isSpace(*head))
126 ++head;
127
128 return head [0 .. tail - head];
129 }
130
131 /******************************************************************************
132
133 Trim the provided array by stripping whitespace from the right.
134 Returns a slice of the original content
135
136 ******************************************************************************/
137
138 T[] trimr(T) (T[] source)
139 {
140 T* head = source.ptr,
141 tail = head + source.length;
142
143 while (tail > head && isSpace(*(tail-1)))
144 --tail;
145
146 return head [0 .. tail - head];
147 }
148
149 /******************************************************************************
150
151 Trim the given array by stripping the provided match from
152 both ends. Returns a slice of the original content
153
154 ******************************************************************************/
155
156 T[] strip(T) (T[] source, T match)
157 {
158 T* head = source.ptr,
159 tail = head + source.length;
160
161 while (head < tail && *head is match)
162 ++head;
163
164 while (tail > head && *(tail-1) is match)
165 --tail;
166
167 return head [0 .. tail - head];
168 }
169
170 /******************************************************************************
171
172 Trim the given array by stripping the provided match from
173 the left hand side. Returns a slice of the original content
174
175 ******************************************************************************/
176
177 T[] stripl(T) (T[] source, T match)
178 {
179 T* head = source.ptr,
180 tail = head + source.length;
181
182 while (head < tail && *head is match)
183 ++head;
184
185 return head [0 .. tail - head];
186 }
187
188 /******************************************************************************
189
190 Chop the given source by stripping the provided match from
191 the left hand side. Returns a slice of the original content
192
193 ******************************************************************************/
194
195 T[] chopl(T) (T[] source, T[] match)
196 {
197 if (match.length <= source.length)
198 if (source[0 .. match.length] == match)
199 source = source [match.length .. $];
200
201 return source;
202 }
203
204 /******************************************************************************
205
206 Chop the given source by stripping the provided match from
207 the right hand side. Returns a slice of the original content
208
209 ******************************************************************************/
210
211 T[] chopr(T) (T[] source, T[] match)
212 {
213 if (match.length <= source.length)
214 if (source[$-match.length .. $] == match)
215 source = source [0 .. $-match.length];
216
217 return source;
218 }
219
220 /******************************************************************************
221
222 Trim the given array by stripping the provided match from
223 the right hand side. Returns a slice of the original content
224
225 ******************************************************************************/
226
227 T[] stripr(T) (T[] source, T match)
228 {
229 T* head = source.ptr,
230 tail = head + source.length;
231
232 while (tail > head && *(tail-1) is match)
233 --tail;
234
235 return head [0 .. tail - head];
236 }
237
238 /******************************************************************************
239
240 Replace all instances of one element with another (in place)
241
242 ******************************************************************************/
243
244 T[] replace(T) (T[] source, T match, T replacement)
245 {
246 foreach (inout c; source)
247 if (c is match)
248 c = replacement;
249 return source;
250 }
251
252 /******************************************************************************
253
254 Replace all instances of one array with another
255
256 ******************************************************************************/
257
258 T[] substitute(T) (T[] source, T[] match, T[] replacement)
259 {
260 T[] output;
261
262 foreach (s; patterns (source, match, replacement))
263 output ~= s;
264 return output;
265 }
266
267 /******************************************************************************
268
269 Returns whether or not the provided array contains an instance
270 of the given match
271
272 ******************************************************************************/
273
274 bool contains(T) (T[] source, T match)
275 {
276 return indexOf (source.ptr, match, source.length) != source.length;
277 }
278
279 /******************************************************************************
280
281 Returns whether or not the provided array contains an instance
282 of the given match
283
284 ******************************************************************************/
285
286 bool containsPattern(T) (T[] source, T[] match)
287 {
288 return locatePattern (source, match) != source.length;
289 }
290
291 /******************************************************************************
292
293 Return the index of the next instance of 'match' starting at
294 position 'start', or source.length where there is no match.
295
296 Parameter 'start' defaults to 0
297
298 ******************************************************************************/
299
300 uint locate(T, U=uint) (T[] source, T match, U start=0)
301 {return locate!(T) (source, match, start);}
302
303 uint locate(T) (T[] source, T match, uint start=0)
304 {
305 if (start > source.length)
306 start = source.length;
307
308 return indexOf (source.ptr+start, match, source.length - start) + start;
309 }
310
311 /******************************************************************************
312
313 Return the index of the prior instance of 'match' starting
314 just before 'start', or source.length where there is no match.
315
316 Parameter 'start' defaults to source.length
317
318 ******************************************************************************/
319
320 uint locatePrior(T, U=uint) (T[] source, T match, U start=uint.max)
321 {return locatePrior!(T)(source, match, start);}
322
323 uint locatePrior(T) (T[] source, T match, uint start=uint.max)
324 {
325 if (start > source.length)
326 start = source.length;
327
328 while (start > 0)
329 if (source[--start] is match)
330 return start;
331 return source.length;
332 }
333
334 /******************************************************************************
335
336 Return the index of the next instance of 'match' starting at
337 position 'start', or source.length where there is no match.
338
339 Parameter 'start' defaults to 0
340
341 ******************************************************************************/
342
343 uint locatePattern(T, U=uint) (T[] source, T[] match, U start=0)
344 {return locatePattern!(T) (source, match, start);}
345
346 uint locatePattern(T) (T[] source, T[] match, uint start=0)
347 {
348 uint idx;
349 T* p = source.ptr + start;
350 uint extent = source.length - start - match.length + 1;
351
352 if (match.length && extent <= source.length)
353 while (extent)
354 if ((idx = indexOf (p, match[0], extent)) is extent)
355 break;
356 else
357 if (matching (p+=idx, match.ptr, match.length))
358 return p - source.ptr;
359 else
360 {
361 extent -= (idx+1);
362 ++p;
363 }
364
365 return source.length;
366 }
367
368 /******************************************************************************
369
370 Return the index of the prior instance of 'match' starting
371 just before 'start', or source.length where there is no match.
372
373 Parameter 'start' defaults to source.length
374
375 ******************************************************************************/
376
377 uint locatePatternPrior(T, U=uint) (T[] source, T[] match, U start=uint.max)
378 {return locatePatternPrior!(T)(source, match, start);}
379
380 uint locatePatternPrior(T) (T[] source, T[] match, uint start=uint.max)
381 {
382 auto len = source.length;
383
384 if (start > len)
385 start = len;
386
387 if (match.length && match.length <= len)
388 while (start)
389 {
390 start = locatePrior (source, match[0], start);
391 if (start is len)
392 break;
393 else
394 if ((start + match.length) <= len)
395 if (matching (source.ptr+start, match.ptr, match.length))
396 return start;
397 }
398
399 return len;
400 }
401
402 /******************************************************************************
403
404 Split the provided array on the first pattern instance, and
405 return the resultant head and tail. The pattern is excluded
406 from the two segments.
407
408 Where a segment is not found, tail will be null and the return
409 value will be the original array.
410
411 ******************************************************************************/
412
413 T[] head(T) (T[] src, T[] pattern, out T[] tail)
414 {
415 auto i = locatePattern (src, pattern);
416 if (i != src.length)
417 {
418 tail = src [i + pattern.length .. $];
419 src = src [0 .. i];
420 }
421 return src;
422 }
423
424 /******************************************************************************
425
426 Split the provided array on the last pattern instance, and
427 return the resultant head and tail. The pattern is excluded
428 from the two segments.
429
430 Where a segment is not found, head will be null and the return
431 value will be the original array.
432
433 ******************************************************************************/
434
435 T[] tail(T) (T[] src, T[] pattern, out T[] head)
436 {
437 auto i = locatePatternPrior (src, pattern);
438 if (i != src.length)
439 {
440 head = src [0 .. i];
441 src = src [i + pattern.length .. $];
442 }
443 return src;
444 }
445
446 /******************************************************************************
447
448 Split the provided array wherever a delimiter-set instance is
449 found, and return the resultant segments. The delimiters are
450 excluded from each of the segments. Note that delimiters are
451 matched as a set of alternates rather than as a pattern.
452
453 Splitting on a single delimiter is considerably faster than
454 splitting upon a set of alternatives
455
456 ******************************************************************************/
457
458 T[][] delimit(T) (T[] src, T[] set)
459 {
460 T[][] result;
461
462 foreach (segment; delimiters (src, set))
463 result ~= segment;
464 return result;
465 }
466
467 /******************************************************************************
468
469 Split the provided array wherever a pattern instance is
470 found, and return the resultant segments. The pattern is
471 excluded from each of the segments.
472
473 ******************************************************************************/
474
475 T[][] split(T) (T[] src, T[] pattern)
476 {
477 T[][] result;
478
479 foreach (segment; patterns (src, pattern))
480 result ~= segment;
481 return result;
482 }
483
484 /******************************************************************************
485
486 Convert text into a set of lines, where each line is identified
487 by a \n or \r\n combination. The line terminator is stripped from
488 each resultant array
489
490 ******************************************************************************/
491
492 T[][] splitLines(T) (T[] src)
493 {
494 int count;
495
496 foreach (line; lines (src))
497 ++count;
498
499 T[][] result = new T[][count];
500
501 count = 0;
502 foreach (line; lines (src))
503 result [count++] = line;
504
505 return result;
506 }
507
508 /******************************************************************************
509
510 Combine a series of text segments together, each appended with an
511 optional postfix pattern. An optional output buffer can be provided
512 to avoid heap activity - it should be large enough to contain the
513 entire output, otherwise the heap will be used instead.
514
515 Returns a valid slice of the output, containing the concatenated
516 text.
517
518 ******************************************************************************/
519
520 T[] join(T) (T[][] src, T[] postfix=null, T[] dst=null)
521 {
522 uint len = src.length * postfix.length;
523
524 foreach (segment; src)
525 len += segment.length;
526
527 if (dst.length < len)
528 dst.length = len;
529
530 T* p = dst.ptr;
531 foreach (segment; src)
532 {
533 p[0 .. segment.length] = segment;
534 p += segment.length;
535 p[0 .. postfix.length] = postfix;
536 p += postfix.length;
537 }
538
539 // remove trailing seperator
540 if (len)
541 len -= postfix.length;
542 return dst [0 .. len];
543 }
544
545 /******************************************************************************
546
547 Combine a series of text segments together, each appended with an
548 optional postfix pattern. An optional output buffer can be provided
549 to avoid heap activity - it should be large enough to contain the
550 entire output, otherwise the heap will be used instead.
551
552 Returns a valid slice of the output, containing the concatenated
553 text.
554
555 ******************************************************************************/
556
557 T[] repeat(T, U=uint) (T[] src, U count, T[] dst=null)
558 {return repeat!(T)(src, count, dst);}
559
560 T[] repeat(T) (T[] src, uint count, T[] dst=null)
561 {
562 uint len = src.length * count;
563 if (len is 0)
564 return null;
565
566 if (dst.length < len)
567 dst.length = len;
568
569 for (auto p = dst.ptr; count--; p += src.length)
570 p[0 .. src.length] = src;
571
572 return dst [0 .. len];
573 }
574
575 /******************************************************************************
576
577 Is the argument a whitespace character?
578
579 ******************************************************************************/
580
581 bool isSpace(T) (T c)
582 {
583 static if (T.sizeof is 1)
584 return (c <= 32 && (c is ' ' | c is '\t' | c is '\r' | c is '\n' | c is '\f' | c is '\v'));
585 else
586 return (c <= 32 && (c is ' ' | c is '\t' | c is '\r' | c is '\n' | c is '\f' | c is '\v')) || (c is '\u2028' | c is '\u2029');
587 }
588
589 /******************************************************************************
590
591 Return whether or not the two arrays have matching content
592
593 ******************************************************************************/
594
595 bool matching(T, U=uint) (T* s1, T* s2, U length)
596 {return matching!(T) (s1, s2, length);}
597
598 bool matching(T) (T* s1, T* s2, uint length)
599 {
600 return mismatch(s1, s2, length) is length;
601 }
602
603 /******************************************************************************
604
605 Returns the index of the first match in str, failing once
606 length is reached. Note that we return 'length' for failure
607 and a 0-based index on success
608
609 ******************************************************************************/
610
611 uint indexOf(T, U=uint) (T* str, T match, U length)
612 {return indexOf!(T) (str, match, length);}
613
614 uint indexOf(T) (T* str, T match, uint length)
615 {
616 version (D_InlineAsm_X86)
617 {
618 static if (T.sizeof == 1)
619 {
620 asm {
621 mov EDI, str;
622 mov ECX, length;
623 movzx EAX, match;
624 mov ESI, ECX;
625 and ESI, ESI;
626 jz end;
627
628 cld;
629 repnz;
630 scasb;
631 jnz end;
632 sub ESI, ECX;
633 dec ESI;
634 end:;
635 mov EAX, ESI;
636 }
637 }
638 else static if (T.sizeof == 2)
639 {
640 asm {
641 mov EDI, str;
642 mov ECX, length;
643 movzx EAX, match;
644 mov ESI, ECX;
645 and ESI, ESI;
646 jz end;
647
648 cld;
649 repnz;
650 scasw;
651 jnz end;
652 sub ESI, ECX;
653 dec ESI;
654 end:;
655 mov EAX, ESI;
656 }
657 }
658 else static if (T.sizeof == 4)
659 {
660 asm {
661 mov EDI, str;
662 mov ECX, length;
663 mov EAX, match;
664 mov ESI, ECX;
665 and ESI, ESI;
666 jz end;
667
668 cld;
669 repnz;
670 scasd;
671 jnz end;
672 sub ESI, ECX;
673 dec ESI;
674 end:;
675 mov EAX, ESI;
676 }
677 }
678 else
679 {
680 auto len = length;
681 for (auto p=str-1; len--;)
682 if (*++p is match)
683 return p - str;
684 return length;
685 }
686 }
687 else
688 {
689 auto len = length;
690 for (auto p=str-1; len--;)
691 if (*++p is match)
692 return p - str;
693 return length;
694 }
695 }
696
697 /******************************************************************************
698
699 Returns the index of a mismatch between s1 & s2, failing when
700 length is reached. Note that we return 'length' upon failure
701 (array content matches) and a 0-based index upon success.
702
703 Use this as a faster opEquals (the assembler version). Also
704 provides the basis for a much faster opCmp, since the index
705 of the first mismatched character can be used to determine
706 the (greater or less than zero) return value
707
708 ******************************************************************************/
709
710 uint mismatch(T, U=uint) (T* s1, T* s2, U length)
711 {return mismatch!(T)(s1, s2, length);}
712
713 uint mismatch(T) (T* s1, T* s2, uint length)
714 {
715 version (D_InlineAsm_X86)
716 {
717 static if (T.sizeof == 1)
718 {
719 asm {
720 mov EDI, s1;
721 mov ESI, s2;
722 mov ECX, length;
723 mov EAX, ECX;
724 and EAX, EAX;
725 jz end;
726
727 cld;
728 repz;
729 cmpsb;
730 jz end;
731 sub EAX, ECX;
732 dec EAX;
733 end:;
734 }
735 }
736 else static if (T.sizeof == 2)
737 {
738 asm {
739 mov EDI, s1;
740 mov ESI, s2;
741 mov ECX, length;
742 mov EAX, ECX;
743 and EAX, EAX;
744 jz end;
745
746 cld;
747 repz;
748 cmpsw;
749 jz end;
750 sub EAX, ECX;
751 dec EAX;
752 sar ESI, 1;
753 end:;
754 }
755 }
756 else static if (T.sizeof == 4)
757 {
758 asm {
759 mov EDI, s1;
760 mov ESI, s2;
761 mov ECX, length;
762 mov EAX, ECX;
763 and EAX, EAX;
764 jz end;
765
766 cld;
767 repz;
768 cmpsd;
769 jz end;
770 sub EAX, ECX;
771 dec EAX;
772 sar ESI, 2;
773 end:;
774 }
775 }
776 else
777 {
778 auto len = length;
779 for (auto p=s1-1; len--;)
780 if (*++p != *s2++)
781 return p - s1;
782 return length;
783 }
784 }
785 else
786 {
787 auto len = length;
788 for (auto p=s1-1; len--;)
789 if (*++p != *s2++)
790 return p - s1;
791 return length;
792 }
793 }
794
795 /******************************************************************************
796
797 Freachable iterator to isolate lines.
798
799 Converts text into a set of lines, where each line is identified
800 by a \n or \r\n combination. The line terminator is stripped from
801 each resultant array.
802
803 ---
804 foreach (line; lines ("one\ntwo\nthree"))
805 ...
806 ---
807
808 ******************************************************************************/
809
810 LineFreach!(T) lines(T) (T[] src)
811 {
812 LineFreach!(T) lines;
813 lines.src = src;
814 return lines;
815 }
816
817 /******************************************************************************
818
819 Freachable iterator to isolate text elements.
820
821 Splits the provided array wherever a delimiter-set instance is
822 found, and return the resultant segments. The delimiters are
823 excluded from each of the segments. Note that delimiters are
824 matched as a set of alternates rather than as a pattern.
825
826 Splitting on a single delimiter is considerably faster than
827 splitting upon a set of alternatives.
828
829 ---
830 foreach (segment; delimiters ("one,two;three", ",;"))
831 ...
832 ---
833
834 ******************************************************************************/
835
836 DelimFreach!(T) delimiters(T) (T[] src, T[] set)
837 {
838 DelimFreach!(T) elements;
839 elements.set = set;
840 elements.src = src;
841 return elements;
842 }
843
844 /******************************************************************************
845
846 Freachable iterator to isolate text elements.
847
848 Split the provided array wherever a pattern instance is found,
849 and return the resultant segments. Pattern are excluded from
850 each of the segments, and an optional sub argument enables
851 replacement.
852
853 ---
854 foreach (segment; patterns ("one, two, three", ", "))
855 ...
856 ---
857
858 ******************************************************************************/
859
860 PatternFreach!(T) patterns(T) (T[] src, T[] pattern, T[] sub=null)
861 {
862 PatternFreach!(T) elements;
863 elements.pattern = pattern;
864 elements.sub = sub;
865 elements.src = src;
866 return elements;
867 }
868
869 /******************************************************************************
870
871 Freachable iterator to isolate optionally quoted text elements.
872
873 As per elements(), but with the extension of being quote-aware;
874 the set of delimiters is ignored inside a pair of quotes. Note
875 that an unterminated quote will consume remaining content.
876
877 ---
878 foreach (quote; quotes ("one two 'three four' five", " "))
879 ...
880 ---
881
882 ******************************************************************************/
883
884 QuoteFreach!(T) quotes(T) (T[] src, T[] set)
885 {
886 QuoteFreach!(T) quotes;
887 quotes.set = set;
888 quotes.src = src;
889 return quotes;
890 }
891
892 /*******************************************************************************
893
894 Arranges text strings in order, using indices to specify where
895 each particular argument should be positioned within the text.
896 This is handy for collating I18N components, or as a simplistic
897 and lightweight formatter. Indices range from zero through nine.
898
899 ---
900 // write ordered text to the console
901 char[64] tmp;
902
903 Cout (layout (tmp, "%1 is after %0", "zero", "one")).newline;
904 ---
905
906 *******************************************************************************/
907
908 T[] layout(T) (T[] output, T[][] layout ...)
909 {
910 static T[] badarg = "{index out of range}";
911 static T[] toosmall = "{output buffer too small}";
912
913 int pos,
914 args;
915 bool state;
916
917 args = layout.length - 1;
918 foreach (c; layout[0])
919 {
920 if (state)
921 {
922 state = false;
923 if (c >= '0' && c <= '9')
924 {
925 uint index = c - '0';
926 if (index < args)
927 {
928 T[] x = layout[index+1];
929
930 int limit = pos + x.length;
931 if (limit < output.length)
932 {
933 output [pos .. limit] = x;
934 pos = limit;
935 continue;
936 }
937 else
938 return toosmall;
939 }
940 else
941 return badarg;
942 }
943 }
944 else
945 if (c is '%')
946 {
947 state = true;
948 continue;
949 }
950
951 if (pos < output.length)
952 {
953 output[pos] = c;
954 ++pos;
955 }
956 else
957 return toosmall;
958 }
959
960 return output [0..pos];
961 }
962
963 /******************************************************************************
964
965 jhash() -- hash a variable-length key into a 32-bit value
966
967 k : the key (the unaligned variable-length array of bytes)
968 len : the length of the key, counting by bytes
969 level : can be any 4-byte value
970
971 Returns a 32-bit value. Every bit of the key affects every bit of
972 the return value. Every 1-bit and 2-bit delta achieves avalanche.
973
974 About 4.3*len + 80 X86 instructions, with excellent pipelining
975
976 The best hash table sizes are powers of 2. There is no need to do
977 mod a prime (mod is sooo slow!). If you need less than 32 bits,
978 use a bitmask. For example, if you need only 10 bits, do
979
980 h = (h & hashmask(10));
981
982 In which case, the hash table should have hashsize(10) elements.
983 If you are hashing n strings (ub1 **)k, do it like this:
984
985 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
986
987 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use
988 this code any way you wish, private, educational, or commercial.
989 It's free.
990
991 See http://burlteburtle.net/bob/hash/evahash.html
992 Use for hash table lookup, or anything where one collision in 2^32
993 is acceptable. Do NOT use for cryptographic purposes.
994
995 ******************************************************************************/
996
997 uint jhash (ubyte* k, uint len, uint c = 0)
998 {
999 uint a = 0x9e3779b9,
1000 b = 0x9e3779b9,
1001 i = len;
1002
1003 // handle most of the key
1004 while (i >= 12)
1005 {
1006 a += *cast(uint *)(k+0);
1007 b += *cast(uint *)(k+4);
1008 c += *cast(uint *)(k+8);
1009
1010 a -= b; a -= c; a ^= (c>>13);
1011 b -= c; b -= a; b ^= (a<<8);
1012 c -= a; c -= b; c ^= (b>>13);
1013 a -= b; a -= c; a ^= (c>>12);
1014 b -= c; b -= a; b ^= (a<<16);
1015 c -= a; c -= b; c ^= (b>>5);
1016 a -= b; a -= c; a ^= (c>>3);
1017 b -= c; b -= a; b ^= (a<<10);
1018 c -= a; c -= b; c ^= (b>>15);
1019 k += 12; i -= 12;
1020 }
1021
1022 // handle the last 11 bytes
1023 c += len;
1024 switch (i)
1025 {
1026 case 11: c+=(cast(uint)k[10]<<24);
1027 case 10: c+=(cast(uint)k[9]<<16);
1028 case 9 : c+=(cast(uint)k[8]<<8);
1029 case 8 : b+=(cast(uint)k[7]<<24);
1030 case 7 : b+=(cast(uint)k[6]<<16);
1031 case 6 : b+=(cast(uint)k[5]<<8);
1032 case 5 : b+=(cast(uint)k[4]);
1033 case 4 : a+=(cast(uint)k[3]<<24);
1034 case 3 : a+=(cast(uint)k[2]<<16);
1035 case 2 : a+=(cast(uint)k[1]<<8);
1036 case 1 : a+=(cast(uint)k[0]);
1037 default:
1038 }
1039
1040 a -= b; a -= c; a ^= (c>>13);
1041 b -= c; b -= a; b ^= (a<<8);
1042 c -= a; c -= b; c ^= (b>>13);
1043 a -= b; a -= c; a ^= (c>>12);
1044 b -= c; b -= a; b ^= (a<<16);
1045 c -= a; c -= b; c ^= (b>>5);
1046 a -= b; a -= c; a ^= (c>>3);
1047 b -= c; b -= a; b ^= (a<<10);
1048 c -= a; c -= b; c ^= (b>>15);
1049
1050 return c;
1051 }
1052
1053 /// ditto
1054 uint jhash (void[] x, uint c = 0)
1055 {
1056 return jhash (cast(ubyte*) x.ptr, x.length, c);
1057 }
1058
1059
1060 /******************************************************************************
1061
1062 Helper struct for iterator lines()
1063
1064 ******************************************************************************/
1065
1066 private struct LineFreach(T)
1067 {
1068 private T[] src;
1069
1070 int opApply (int delegate (inout T[] line) dg)
1071 {
1072 uint ret,
1073 pos,
1074 mark;
1075 T[] line;
1076
1077 const T nl = '\n';
1078 const T cr = '\r';
1079
1080 while ((pos = locate (src, nl, mark)) < src.length)
1081 {
1082 auto end = pos;
1083 if (end && src[end-1] is cr)
1084 --end;
1085
1086 line = src [mark .. end];
1087 if ((ret = dg (line)) != 0)
1088 return ret;
1089 mark = pos + 1;
1090 }
1091
1092 line = src [mark .. $];
1093 if (mark < src.length)
1094 ret = dg (line);
1095
1096 return ret;
1097 }
1098 }
1099
1100 /******************************************************************************
1101
1102 Helper struct for iterator delimiters()
1103
1104 ******************************************************************************/
1105
1106 private struct DelimFreach(T)
1107 {
1108 private T[] src;
1109 private T[] set;
1110
1111 int opApply (int delegate (inout T[] token) dg)
1112 {
1113 uint ret,
1114 pos,
1115 mark;
1116 T[] token;
1117
1118 // optimize for single delimiter case
1119 if (set.length is 1)
1120 while ((pos = locate (src, set[0], mark)) < src.length)
1121 {
1122 token = src [mark .. pos];
1123 if ((ret = dg (token)) != 0)
1124 return ret;
1125 mark = pos + 1;
1126 }
1127 else
1128 if (set.length > 1)
1129 foreach (i, elem; src)
1130 if (contains (set, elem))
1131 {
1132 token = src [mark .. i];
1133 if ((ret = dg (token)) != 0)
1134 return ret;
1135 mark = i + 1;
1136 }
1137
1138 token = src [mark .. $];
1139 if (mark < src.length)
1140 ret = dg (token);
1141
1142 return ret;
1143 }
1144 }
1145
1146 /******************************************************************************
1147
1148 Helper struct for iterator patterns()
1149
1150 ******************************************************************************/
1151
1152 private struct PatternFreach(T)
1153 {
1154 private T[] src,
1155 sub,
1156 pattern;
1157
1158 int opApply (int delegate (inout T[] token) dg)
1159 {
1160 uint ret,
1161 pos,
1162 mark;
1163 T[] token;
1164
1165 // optimize for single-element pattern
1166 if (pattern.length is 1)
1167 while ((pos = locate (src, pattern[0], mark)) < src.length)
1168 {
1169 token = src [mark .. pos];
1170 if ((ret = dg(token)) != 0)
1171 return ret;
1172 if (sub.ptr)
1173 if ((ret = dg(sub)) != 0)
1174 return ret;
1175 mark = pos + 1;
1176 }
1177 else
1178 if (pattern.length > 1)
1179 while ((pos = locatePattern (src, pattern, mark)) < src.length)
1180 {
1181 token = src [mark .. pos];
1182 if ((ret = dg(token)) != 0)
1183 return ret;
1184 if (sub.ptr)
1185 if ((ret = dg(sub)) != 0)
1186 return ret;
1187 mark = pos + pattern.length;
1188 }
1189
1190 token = src [mark .. $];
1191 if (mark < src.length)
1192 ret = dg (token);
1193
1194 return ret;
1195 }
1196 }
1197
1198 /******************************************************************************
1199
1200 Helper struct for iterator quotes()
1201
1202 ******************************************************************************/
1203
1204 private struct QuoteFreach(T)
1205 {
1206 private T[] src;
1207 private T[] set;
1208
1209 int opApply (int delegate (inout T[] token) dg)
1210 {
1211 int ret,
1212 mark;
1213 T[] token;
1214
1215 if (set.length)
1216 for (uint i=0; i < src.length; ++i)
1217 {
1218 T c = src[i];
1219 if (c is '"' || c is '\'')
1220 i = locate (src, c, i+1);
1221 else
1222 if (contains (set, c))
1223 {
1224 token = src [mark .. i];
1225 if ((ret = dg (token)) != 0)
1226 return ret;
1227 mark = i + 1;
1228 }
1229 }
1230
1231 token = src [mark .. $];
1232 if (mark < src.length)
1233 ret = dg (token);
1234
1235 return ret;
1236 }
1237 }
1238
1239
1240 /******************************************************************************
1241
1242 ******************************************************************************/
1243
1244 debug (UnitTest)
1245 {
1246 //void main() {}
1247
1248 unittest
1249 {
1250 char[64] tmp;
1251
1252 assert (isSpace (' ') && !isSpace ('d'));
1253
1254 assert (indexOf ("abc".ptr, 'a', 3) is 0);
1255 assert (indexOf ("abc".ptr, 'b', 3) is 1);
1256 assert (indexOf ("abc".ptr, 'c', 3) is 2);
1257 assert (indexOf ("abc".ptr, 'd', 3) is 3);
1258
1259 assert (indexOf ("abc"d.ptr, cast(dchar)'c', 3) is 2);
1260 assert (indexOf ("abc"d.ptr, cast(dchar)'d', 3) is 3);
1261
1262 assert (indexOf ("abc"w.ptr, cast(wchar)'c', 3) is 2);
1263 assert (indexOf ("abc"w.ptr, cast(wchar)'d', 3) is 3);
1264
1265 assert (mismatch ("abc".ptr, "abc".ptr, 3) is 3);
1266 assert (mismatch ("abc".ptr, "abd".ptr, 3) is 2);
1267 assert (mismatch ("abc".ptr, "acc".ptr, 3) is 1);
1268 assert (mismatch ("abc".ptr, "ccc".ptr, 3) is 0);
1269
1270 assert (mismatch ("abc"w.ptr, "abc"w.ptr, 3) is 3);
1271 assert (mismatch ("abc"w.ptr, "acc"w.ptr, 3) is 1);
1272
1273 assert (mismatch ("abc"d.ptr, "abc"d.ptr, 3) is 3);
1274 assert (mismatch ("abc"d.ptr, "acc"d.ptr, 3) is 1);
1275
1276 assert (matching ("abc".ptr, "abc".ptr, 3));
1277 assert (matching ("abc".ptr, "abb".ptr, 3) is false);
1278
1279 assert (contains ("abc", 'a'));
1280 assert (contains ("abc", 'b'));
1281 assert (contains ("abc", 'c'));
1282 assert (contains ("abc", 'd') is false);
1283
1284 assert (containsPattern ("abc", "ab"));
1285 assert (containsPattern ("abc", "bc"));
1286 assert (containsPattern ("abc", "abc"));
1287 assert (containsPattern ("abc", "zabc") is false);
1288 assert (containsPattern ("abc", "abcd") is false);
1289 assert (containsPattern ("abc", "za") is false);
1290 assert (containsPattern ("abc", "cd") is false);
1291
1292 assert (trim ("") == "");
1293 assert (trim (" abc ") == "abc");
1294 assert (trim (" ") == "");
1295
1296 assert (strip ("", '%') == "");
1297 assert (strip ("%abc%%%", '%') == "abc");
1298 assert (strip ("#####", '#') == "");
1299 assert (stripl ("#####", '#') == "");
1300 assert (stripl (" ###", ' ') == "###");
1301 assert (stripl ("#####", 's') == "#####");
1302 assert (stripr ("#####", '#') == "");
1303 assert (stripr ("### ", ' ') == "###");
1304 assert (stripr ("#####", 's') == "#####");
1305
1306 assert (replace ("abc".dup, 'b', ':') == "a:c");
1307 assert (substitute ("abc".dup, "bc", "x") == "ax");
1308
1309 assert (locate ("abc", 'c', 1) is 2);
1310
1311 assert (locate ("abc", 'c') is 2);
1312 assert (locate ("abc", 'a') is 0);
1313 assert (locate ("abc", 'd') is 3);
1314 assert (locate ("", 'c') is 0);
1315
1316 assert (locatePrior ("abce", 'c') is 2);
1317 assert (locatePrior ("abce", 'a') is 0);
1318 assert (locatePrior ("abce", 'd') is 4);
1319 assert (locatePrior ("abce", 'c', 3) is 2);
1320 assert (locatePrior ("abce", 'c', 2) is 4);
1321 assert (locatePrior ("", 'c') is 0);
1322
1323 auto x = delimit ("::b", ":");
1324 assert (x.length is 3 && x[0] == "" && x[1] == "" && x[2] == "b");
1325 x = delimit ("a:bc:d", ":");
1326 assert (x.length is 3 && x[0] == "a" && x[1] == "bc" && x[2] == "d");
1327 x = delimit ("abcd", ":");
1328 assert (x.length is 1 && x[0] == "abcd");
1329 x = delimit ("abcd:", ":");
1330 assert (x.length is 1 && x[0] == "abcd");
1331 x = delimit ("a;b$c#d:e@f", ";:$#@");
1332 assert (x.length is 6 && x[0]=="a" && x[1]=="b" && x[2]=="c" &&
1333 x[3]=="d" && x[4]=="e" && x[5]=="f");
1334
1335 assert (locatePattern ("abcdefg", "") is 7);
1336 assert (locatePattern ("abcdefg", "g") is 6);
1337 assert (locatePattern ("abcdefg", "abcdefg") is 0);
1338 assert (locatePattern ("abcdefg", "abcdefgx") is 7);
1339 assert (locatePattern ("abcdefg", "cce") is 7);
1340 assert (locatePattern ("abcdefg", "cde") is 2);
1341 assert (locatePattern ("abcdefgcde", "cde", 3) is 7);
1342
1343 assert (locatePatternPrior ("abcdefg", "") is 7);
1344 assert (locatePatternPrior ("abcdefg", "cce") is 7);
1345 assert (locatePatternPrior ("abcdefg", "cde") is 2);
1346 assert (locatePatternPrior ("abcdefgcde", "cde", 6) is 2);
1347 assert (locatePatternPrior ("abcdefgcde", "cde", 4) is 2);
1348 assert (locatePatternPrior ("abcdefg", "abcdefgx") is 7);
1349
1350 x = splitLines ("a\nb\n");
1351 assert (x.length is 2 && x[0] == "a" && x[1] == "b");
1352 x = splitLines ("a\r\n");
1353 assert (x.length is 1 && x[0] == "a");
1354 x = splitLines ("a");
1355 assert (x.length is 1 && x[0] == "a");
1356 x = splitLines ("");
1357 assert (x.length is 0);
1358
1359 char[][] q;
1360 foreach (element; quotes ("1 'avcc cc ' 3", " "))
1361 q ~= element;
1362 assert (q.length is 3 && q[0] == "1" && q[1] == "'avcc cc '" && q[2] == "3");
1363
1364 assert (layout (tmp, "%1,%%%c %0", "abc", "efg") == "efg,%c abc");
1365
1366 x = split ("one, two, three", ",");
1367 assert (x.length is 3 && x[0] == "one" && x[1] == " two" && x[2] == " three");
1368 x = split ("one, two, three", ", ");
1369 assert (x.length is 3 && x[0] == "one" && x[1] == "two" && x[2] == "three");
1370 x = split ("one, two, three", ",,");
1371 assert (x.length is 1 && x[0] == "one, two, three");
1372
1373 char[] h, t;
1374 h = head ("one:two:three", ":", t);
1375 assert (h == "one" && t == "two:three");
1376 h = head ("one:::two:three", ":::", t);
1377 assert (h == "one" && t == "two:three");
1378 h = head ("one:two:three", "*", t);
1379 assert (h == "one:two:three" && t is null);
1380
1381 t = tail ("one:two:three", ":", h);
1382 assert (h == "one:two" && t == "three");
1383 t = tail ("one:::two:three", ":::", h);
1384 assert (h == "one" && t == "two:three");
1385 t = tail ("one:two:three", "*", h);
1386 assert (t == "one:two:three" && h is null);
1387
1388 assert (chopl("hello world", "hello ") == "world");
1389 assert (chopl("hello", "hello") == "");
1390 assert (chopl("hello world", " ") == "hello world");
1391 assert (chopl("hello world", "") == "hello world");
1392
1393 assert (chopr("hello world", " world") == "hello");
1394 assert (chopr("hello", "hello") == "");
1395 assert (chopr("hello world", " ") == "hello world");
1396 assert (chopr("hello world", "") == "hello world");
1397
1398 char[][] foo = ["one", "two", "three"];
1399 auto j = join (foo);
1400 assert (j == "onetwothree");
1401 j = join (foo, ", ");
1402 assert (j == "one, two, three");
1403 j = join (foo, " ", tmp);
1404 assert (j == "one two three");
1405 assert (j.ptr is tmp.ptr);
1406
1407 assert (repeat ("abc", 0) == "");
1408 assert (repeat ("abc", 1) == "abc");
1409 assert (repeat ("abc", 2) == "abcabc");
1410 assert (repeat ("abc", 4) == "abcabcabcabc");
1411 assert (repeat ("", 4) == "");
1412 char[10] rep;
1413 assert (repeat ("abc", 0, rep) == "");
1414 assert (repeat ("abc", 1, rep) == "abc");
1415 assert (repeat ("abc", 2, rep) == "abcabc");
1416 assert (repeat ("", 4, rep) == "");
1417 }
1418 }
1419
1420
1421