Mercurial > projects > ldc

diff tango/tango/text/Util.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author: lindquist
date: Fri, 11 Jan 2008 17:57:40 +0100
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tango/tango/text/Util.d	Fri Jan 11 17:57:40 2008 +0100
@@ -0,0 +1,1421 @@
+/*******************************************************************************
+
+        copyright:      Copyright (c) 2004 Kris Bell. All rights reserved
+
+        license:        BSD style: $(LICENSE)
+
+        version:        Apr 2004: Initial release
+                        Dec 2006: South Seas version
+
+        author:         Kris
+
+
+        Placeholder for a variety of wee functions. These functions are all
+        templated with the intent of being used for arrays of char, wchar,
+        and dchar. However, they operate correctly with other array types
+        also.
+
+        Several of these functions return an index value, representing where
+        some criteria was identified. When said criteria is not matched, the
+        functions return a value representing the array length provided to
+        them. That is, for those scenarios where C functions might typically
+        return -1 these functions return length instead. This operate nicely
+        with D slices:
+        ---
+        auto text = "happy:faces";
+        
+        assert (text[0 .. locate (text, ':')] == "happy");
+        
+        assert (text[0 .. locate (text, '!')] == "happy:faces");
+        ---
+
+        The contains() function is more convenient for trivial lookup
+        cases:
+        ---
+        if (contains ("fubar", '!'))
+            ...
+        ---
+
+        Note that where some functions expect a uint as an argument, the
+        D template-matching algorithm will fail where an int is provided
+        instead. This is the typically the cause of "template not found"
+        errors. Also note that name overloading is not supported cleanly
+        by IFTI at this time, so is not applied here.
+
+
+        Applying the D "import alias" mechanism to this module is highly
+        recommended, in order to limit namespace pollution:
+        ---
+        import Util = tango.text.Util;
+
+        auto s = Util.trim ("  foo ");
+        ---
+                
+
+        Function templates:
+        ---
+        trim (source)                               // trim whitespace
+        triml (source)                              // trim whitespace
+        trimr (source)                              // trim whitespace
+        strip (source, match)                       // trim elements
+        stripl (source, match)                      // trim left elements
+        stripr (source, match)                      // trim right elements
+        delimit (src, set)                          // split on delims
+        split (source, pattern)                     // split on pattern
+        splitLines (source);                        // split on lines
+        head (source, pattern, tail)                // split to head & tail
+        join (source, postfix, output)              // join text segments
+        repeat (source, count, output)              // repeat source 
+        replace (source, match, replacement)        // replace chars
+        substitute (source, match, replacement)     // replace patterns
+        contains (source, match)                    // has char?
+        containsPattern (source, match)             // has pattern?
+        locate (source, match, start)               // find char
+        locatePrior (source, match, start)          // find prior char
+        locatePattern (source, match, start);       // find pattern
+        locatePatternPrior (source, match, start);  // find prior pattern
+        indexOf (s*, match, length)                 // low-level lookup
+        mismatch (s1*, s2*, length)                 // low-level compare
+        matching (s1*, s2*, length)                 // low-level compare
+        isSpace (match)                             // is whitespace?
+        layout (destination, format ...)            // featherweight printf
+        lines (str)                                 // foreach lines
+        quotes (str, set)                           // foreach quotes
+        delimiters (str, set)                       // foreach delimiters
+        patterns (str, pattern)                     // foreach patterns
+        ---
+
+*******************************************************************************/
+
+module tango.text.Util;
+
+/******************************************************************************
+
+        Trim the provided array by stripping whitespace from both
+        ends. Returns a slice of the original content
+
+******************************************************************************/
+
+T[] trim(T) (T[] source)
+{
+        T*   head = source.ptr,
+             tail = head + source.length;
+
+        while (head < tail && isSpace(*head))
+               ++head;
+
+        while (tail > head && isSpace(*(tail-1)))
+               --tail;
+
+        return head [0 .. tail - head];
+}
+
+/******************************************************************************
+
+        Trim the provided array by stripping whitespace from the left.
+        Returns a slice of the original content
+
+******************************************************************************/
+
+T[] triml(T) (T[] source)
+{
+        T*   head = source.ptr,
+             tail = head + source.length;
+
+        while (head < tail && isSpace(*head))
+               ++head;
+
+        return head [0 .. tail - head];
+}
+
+/******************************************************************************
+
+        Trim the provided array by stripping whitespace from the right.
+        Returns a slice of the original content
+
+******************************************************************************/
+
+T[] trimr(T) (T[] source)
+{
+        T*   head = source.ptr,
+             tail = head + source.length;
+
+        while (tail > head && isSpace(*(tail-1)))
+               --tail;
+
+        return head [0 .. tail - head];
+}
+
+/******************************************************************************
+
+        Trim the given array by stripping the provided match from
+        both ends. Returns a slice of the original content
+
+******************************************************************************/
+
+T[] strip(T) (T[] source, T match)
+{
+        T*   head = source.ptr,
+             tail = head + source.length;
+
+        while (head < tail && *head is match)
+               ++head;
+
+        while (tail > head && *(tail-1) is match)
+               --tail;
+
+        return head [0 .. tail - head];
+}
+
+/******************************************************************************
+
+        Trim the given array by stripping the provided match from
+        the left hand side. Returns a slice of the original content
+
+******************************************************************************/
+
+T[] stripl(T) (T[] source, T match)
+{
+        T*   head = source.ptr,
+             tail = head + source.length;
+
+        while (head < tail && *head is match)
+               ++head;
+
+        return head [0 .. tail - head];
+}
+
+/******************************************************************************
+
+        Chop the given source by stripping the provided match from
+        the left hand side. Returns a slice of the original content
+
+******************************************************************************/
+
+T[] chopl(T) (T[] source, T[] match)
+{
+        if (match.length <= source.length)
+            if (source[0 .. match.length] == match)
+                source = source [match.length .. $];
+
+        return source;
+}
+
+/******************************************************************************
+
+        Chop the given source by stripping the provided match from
+        the right hand side. Returns a slice of the original content
+
+******************************************************************************/
+
+T[] chopr(T) (T[] source, T[] match)
+{
+        if (match.length <= source.length)
+            if (source[$-match.length .. $] == match)
+                source = source [0 .. $-match.length];
+
+        return source;
+}
+
+/******************************************************************************
+
+        Trim the given array by stripping the provided match from
+        the right hand side. Returns a slice of the original content
+
+******************************************************************************/
+
+T[] stripr(T) (T[] source, T match)
+{
+        T*   head = source.ptr,
+             tail = head + source.length;
+
+        while (tail > head && *(tail-1) is match)
+               --tail;
+
+        return head [0 .. tail - head];
+}
+
+/******************************************************************************
+
+        Replace all instances of one element with another (in place)
+
+******************************************************************************/
+
+T[] replace(T) (T[] source, T match, T replacement)
+{
+        foreach (inout c; source)
+                 if (c is match)
+                     c = replacement;
+        return source;
+}
+
+/******************************************************************************
+
+        Replace all instances of one array with another 
+
+******************************************************************************/
+
+T[] substitute(T) (T[] source, T[] match, T[] replacement)
+{
+        T[] output;
+
+        foreach (s; patterns (source, match, replacement))
+                    output ~= s;
+        return output;
+}
+
+/******************************************************************************
+
+        Returns whether or not the provided array contains an instance
+        of the given match
+        
+******************************************************************************/
+
+bool contains(T) (T[] source, T match)
+{
+        return indexOf (source.ptr, match, source.length) != source.length;
+}
+
+/******************************************************************************
+
+        Returns whether or not the provided array contains an instance
+        of the given match
+        
+******************************************************************************/
+
+bool containsPattern(T) (T[] source, T[] match)
+{
+        return locatePattern (source, match) != source.length;
+}
+
+/******************************************************************************
+
+        Return the index of the next instance of 'match' starting at
+        position 'start', or source.length where there is no match.
+
+        Parameter 'start' defaults to 0
+
+******************************************************************************/
+
+uint locate(T, U=uint) (T[] source, T match, U start=0)
+{return locate!(T) (source, match, start);}
+
+uint locate(T) (T[] source, T match, uint start=0)
+{
+        if (start > source.length)
+            start = source.length;
+        
+        return indexOf (source.ptr+start, match, source.length - start) + start;
+}
+
+/******************************************************************************
+
+        Return the index of the prior instance of 'match' starting
+        just before 'start', or source.length where there is no match.
+
+        Parameter 'start' defaults to source.length
+
+******************************************************************************/
+
+uint locatePrior(T, U=uint) (T[] source, T match, U start=uint.max)
+{return locatePrior!(T)(source, match, start);}
+
+uint locatePrior(T) (T[] source, T match, uint start=uint.max)
+{
+        if (start > source.length)
+            start = source.length;
+
+        while (start > 0)
+               if (source[--start] is match)
+                   return start;
+        return source.length;
+}
+
+/******************************************************************************
+
+        Return the index of the next instance of 'match' starting at
+        position 'start', or source.length where there is no match. 
+
+        Parameter 'start' defaults to 0
+
+******************************************************************************/
+
+uint locatePattern(T, U=uint) (T[] source, T[] match, U start=0)
+{return locatePattern!(T) (source, match, start);}
+
+uint locatePattern(T) (T[] source, T[] match, uint start=0)
+{
+        uint    idx;
+        T*      p = source.ptr + start;
+        uint    extent = source.length - start - match.length + 1;
+
+        if (match.length && extent <= source.length)
+            while (extent)
+                   if ((idx = indexOf (p, match[0], extent)) is extent)
+                        break;
+                   else
+                      if (matching (p+=idx, match.ptr, match.length))
+                          return p - source.ptr;
+                      else
+                         {
+                         extent -= (idx+1);
+                         ++p;
+                         }
+
+        return source.length;
+}
+   
+/******************************************************************************
+
+        Return the index of the prior instance of 'match' starting
+        just before 'start', or source.length where there is no match.
+
+        Parameter 'start' defaults to source.length
+
+******************************************************************************/
+
+uint locatePatternPrior(T, U=uint) (T[] source, T[] match, U start=uint.max)
+{return locatePatternPrior!(T)(source, match, start);}
+
+uint locatePatternPrior(T) (T[] source, T[] match, uint start=uint.max)
+{
+        auto len = source.length;
+        
+        if (start > len)
+            start = len;
+
+        if (match.length && match.length <= len)
+            while (start)
+                  {
+                  start = locatePrior (source, match[0], start);
+                  if (start is len)
+                      break;
+                  else
+                     if ((start + match.length) <= len)
+                          if (matching (source.ptr+start, match.ptr, match.length))
+                              return start;
+                  }
+
+        return len;
+}
+
+/******************************************************************************
+
+        Split the provided array on the first pattern instance, and 
+        return the resultant head and tail. The pattern is excluded 
+        from the two segments. 
+
+        Where a segment is not found, tail will be null and the return
+        value will be the original array.
+        
+******************************************************************************/
+
+T[] head(T) (T[] src, T[] pattern, out T[] tail)
+{
+        auto i = locatePattern (src, pattern);
+        if (i != src.length)
+           {
+           tail = src [i + pattern.length .. $];
+           src = src [0 .. i];
+           }
+        return src;
+}
+
+/******************************************************************************
+
+        Split the provided array on the last pattern instance, and 
+        return the resultant head and tail. The pattern is excluded 
+        from the two segments. 
+
+        Where a segment is not found, head will be null and the return
+        value will be the original array.
+        
+******************************************************************************/
+
+T[] tail(T) (T[] src, T[] pattern, out T[] head)
+{
+        auto i = locatePatternPrior (src, pattern);
+        if (i != src.length)
+           {
+           head = src [0 .. i];
+           src = src [i + pattern.length .. $];
+           }
+        return src;
+}
+
+/******************************************************************************
+
+        Split the provided array wherever a delimiter-set instance is
+        found, and return the resultant segments. The delimiters are
+        excluded from each of the segments. Note that delimiters are
+        matched as a set of alternates rather than as a pattern.
+
+        Splitting on a single delimiter is considerably faster than
+        splitting upon a set of alternatives
+
+******************************************************************************/
+
+T[][] delimit(T) (T[] src, T[] set)
+{
+        T[][] result;
+
+        foreach (segment; delimiters (src, set))
+                 result ~= segment;
+        return result;
+}
+
+/******************************************************************************
+
+        Split the provided array wherever a pattern instance is
+        found, and return the resultant segments. The pattern is
+        excluded from each of the segments.
+        
+******************************************************************************/
+
+T[][] split(T) (T[] src, T[] pattern)
+{
+        T[][] result;
+
+        foreach (segment; patterns (src, pattern))
+                 result ~= segment;
+        return result;
+}
+
+/******************************************************************************
+
+        Convert text into a set of lines, where each line is identified
+        by a \n or \r\n combination. The line terminator is stripped from
+        each resultant array
+
+******************************************************************************/
+
+T[][] splitLines(T) (T[] src)
+{
+        int count;
+        
+        foreach (line; lines (src))
+                 ++count;
+        
+        T[][] result = new T[][count];
+
+        count = 0;
+        foreach (line; lines (src))
+                 result [count++] = line;
+
+        return result;
+}
+
+/******************************************************************************
+
+        Combine a series of text segments together, each appended with an 
+        optional postfix pattern. An optional output buffer can be provided
+        to avoid heap activity - it should be large enough to contain the 
+        entire output, otherwise the heap will be used instead.
+
+        Returns a valid slice of the output, containing the concatenated
+        text.
+
+******************************************************************************/
+
+T[] join(T) (T[][] src, T[] postfix=null, T[] dst=null)
+{
+        uint len = src.length * postfix.length;
+
+        foreach (segment; src)
+                 len += segment.length;
+               
+        if (dst.length < len)
+            dst.length = len;
+            
+        T* p = dst.ptr;
+        foreach (segment; src)
+                {
+                p[0 .. segment.length] = segment;
+                p += segment.length;
+                p[0 .. postfix.length] = postfix;
+                p += postfix.length;
+                }
+
+        // remove trailing seperator
+        if (len)
+            len -= postfix.length;
+        return dst [0 .. len];       
+}
+
+/******************************************************************************
+
+        Combine a series of text segments together, each appended with an 
+        optional postfix pattern. An optional output buffer can be provided
+        to avoid heap activity - it should be large enough to contain the 
+        entire output, otherwise the heap will be used instead.
+
+        Returns a valid slice of the output, containing the concatenated
+        text.
+
+******************************************************************************/
+
+T[] repeat(T, U=uint) (T[] src, U count, T[] dst=null)
+{return repeat!(T)(src, count, dst);}
+
+T[] repeat(T) (T[] src, uint count, T[] dst=null)
+{
+        uint len = src.length * count;
+        if (len is 0)
+            return null;
+
+        if (dst.length < len)
+            dst.length = len;
+            
+        for (auto p = dst.ptr; count--; p += src.length)
+             p[0 .. src.length] = src;
+
+        return dst [0 .. len];
+}
+
+/******************************************************************************
+
+        Is the argument a whitespace character?
+
+******************************************************************************/
+
+bool isSpace(T) (T c)
+{
+        static if (T.sizeof is 1)
+                   return (c <= 32 && (c is ' ' | c is '\t' | c is '\r' | c is '\n' | c is '\f' | c is '\v'));
+        else
+           return (c <= 32 && (c is ' ' | c is '\t' | c is '\r' | c is '\n' | c is '\f' | c is '\v')) || (c is '\u2028' | c is '\u2029');
+}
+
+/******************************************************************************
+
+        Return whether or not the two arrays have matching content
+        
+******************************************************************************/
+
+bool matching(T, U=uint) (T* s1, T* s2, U length)
+{return matching!(T) (s1, s2, length);}
+
+bool matching(T) (T* s1, T* s2, uint length)
+{
+        return mismatch(s1, s2, length) is length;
+}
+
+/******************************************************************************
+
+        Returns the index of the first match in str, failing once
+        length is reached. Note that we return 'length' for failure
+        and a 0-based index on success
+
+******************************************************************************/
+
+uint indexOf(T, U=uint) (T* str, T match, U length)
+{return indexOf!(T) (str, match, length);}
+
+uint indexOf(T) (T* str, T match, uint length)
+{
+        version (D_InlineAsm_X86)
+        {       
+                static if (T.sizeof == 1)
+                {
+                        asm {
+                            mov   EDI, str;
+                            mov   ECX, length;
+                            movzx EAX, match;
+                            mov   ESI, ECX;
+                            and   ESI, ESI;            
+                            jz    end;        
+
+                            cld;
+                            repnz;
+                            scasb;
+                            jnz   end;
+                            sub   ESI, ECX;
+                            dec   ESI;
+                        end:;
+                            mov   EAX, ESI;
+                            }
+                }
+                else static if (T.sizeof == 2)
+                {
+                        asm {
+                            mov   EDI, str;
+                            mov   ECX, length;
+                            movzx EAX, match;
+                            mov   ESI, ECX;
+                            and   ESI, ESI;            
+                            jz    end;        
+
+                            cld;
+                            repnz;
+                            scasw;
+                            jnz   end;
+                            sub   ESI, ECX;
+                            dec   ESI;
+                        end:;
+                            mov   EAX, ESI;
+                            }
+                }
+                else static if (T.sizeof == 4)
+                {
+                        asm {
+                            mov   EDI, str;
+                            mov   ECX, length;
+                            mov   EAX, match;
+                            mov   ESI, ECX;
+                            and   ESI, ESI;            
+                            jz    end;        
+
+                            cld;
+                            repnz;
+                            scasd;
+                            jnz   end;
+                            sub   ESI, ECX;
+                            dec   ESI;
+                        end:;
+                            mov   EAX, ESI;
+                            }
+                }
+                else
+                {
+                        auto len = length;
+                        for (auto p=str-1; len--;)
+                             if (*++p is match)
+                                 return p - str;
+                        return length;
+                }
+        }
+        else
+        {
+                auto len = length;
+                for (auto p=str-1; len--;)
+                     if (*++p is match)
+                         return p - str;
+                return length;
+        }
+}
+
+/******************************************************************************
+
+        Returns the index of a mismatch between s1 & s2, failing when
+        length is reached. Note that we return 'length' upon failure
+        (array content matches) and a 0-based index upon success.
+
+        Use this as a faster opEquals (the assembler version). Also
+        provides the basis for a much faster opCmp, since the index
+        of the first mismatched character can be used to determine
+        the (greater or less than zero) return value
+
+******************************************************************************/
+
+uint mismatch(T, U=uint) (T* s1, T* s2, U length)
+{return mismatch!(T)(s1, s2, length);}
+
+uint mismatch(T) (T* s1, T* s2, uint length)
+{
+        version (D_InlineAsm_X86)
+        {
+                static if (T.sizeof == 1)
+                {
+                        asm {
+                            mov   EDI, s1;
+                            mov   ESI, s2;
+                            mov   ECX, length;
+                            mov   EAX, ECX;
+                            and   EAX, EAX;
+                            jz    end;
+
+                            cld;
+                            repz;
+                            cmpsb;
+                            jz    end;
+                            sub   EAX, ECX;
+                            dec   EAX;
+                        end:;
+                            }
+                }
+                else static if (T.sizeof == 2)
+                {
+                        asm {
+                            mov   EDI, s1;
+                            mov   ESI, s2;
+                            mov   ECX, length;
+                            mov   EAX, ECX;
+                            and   EAX, EAX;
+                            jz    end;
+
+                            cld;
+                            repz;
+                            cmpsw;
+                            jz    end;
+                            sub   EAX, ECX;
+                            dec   EAX;
+                            sar   ESI, 1;
+                        end:;
+                            }
+                }
+                else static if (T.sizeof == 4)
+                {
+                        asm {
+                            mov   EDI, s1;
+                            mov   ESI, s2;
+                            mov   ECX, length;
+                            mov   EAX, ECX;
+                            and   EAX, EAX;
+                            jz    end;
+
+                            cld;
+                            repz;
+                            cmpsd;
+                            jz    end;
+                            sub   EAX, ECX;
+                            dec   EAX;
+                            sar   ESI, 2;
+                        end:;
+                            }
+                }
+                else
+                {
+                        auto len = length;
+                        for (auto p=s1-1; len--;)
+                             if (*++p != *s2++)
+                                 return p - s1;
+                        return length;
+                }
+        }
+        else
+        {
+                auto len = length;
+                for (auto p=s1-1; len--;)
+                     if (*++p != *s2++)
+                         return p - s1;
+                return length;
+        }
+}
+
+/******************************************************************************
+
+        Freachable iterator to isolate lines.
+
+        Converts text into a set of lines, where each line is identified
+        by a \n or \r\n combination. The line terminator is stripped from
+        each resultant array.
+
+        ---
+        foreach (line; lines ("one\ntwo\nthree"))
+                 ...
+        ---
+        
+******************************************************************************/
+
+LineFreach!(T) lines(T) (T[] src)
+{
+        LineFreach!(T) lines;
+        lines.src = src;
+        return lines;
+}
+
+/******************************************************************************
+
+        Freachable iterator to isolate text elements.
+
+        Splits the provided array wherever a delimiter-set instance is
+        found, and return the resultant segments. The delimiters are
+        excluded from each of the segments. Note that delimiters are
+        matched as a set of alternates rather than as a pattern.
+
+        Splitting on a single delimiter is considerably faster than
+        splitting upon a set of alternatives.
+
+        ---
+        foreach (segment; delimiters ("one,two;three", ",;"))
+                 ...
+        ---
+        
+******************************************************************************/
+
+DelimFreach!(T) delimiters(T) (T[] src, T[] set)
+{
+        DelimFreach!(T) elements;
+        elements.set = set;
+        elements.src = src;
+        return elements;
+}
+
+/******************************************************************************
+
+        Freachable iterator to isolate text elements.
+
+        Split the provided array wherever a pattern instance is found, 
+        and return the resultant segments. Pattern are excluded from
+        each of the segments, and an optional sub argument enables 
+        replacement.
+        
+        ---
+        foreach (segment; patterns ("one, two, three", ", "))
+                 ...
+        ---
+        
+******************************************************************************/
+
+PatternFreach!(T) patterns(T) (T[] src, T[] pattern, T[] sub=null)
+{
+        PatternFreach!(T) elements;
+        elements.pattern = pattern;
+        elements.sub = sub;
+        elements.src = src;
+        return elements;
+}
+
+/******************************************************************************
+
+        Freachable iterator to isolate optionally quoted text elements.
+
+        As per elements(), but with the extension of being quote-aware;
+        the set of delimiters is ignored inside a pair of quotes. Note
+        that an unterminated quote will consume remaining content.
+        
+        ---
+        foreach (quote; quotes ("one two 'three four' five", " "))
+                 ...
+        ---
+        
+******************************************************************************/
+
+QuoteFreach!(T) quotes(T) (T[] src, T[] set)
+{
+        QuoteFreach!(T) quotes;
+        quotes.set = set;
+        quotes.src = src;
+        return quotes;
+}
+
+/*******************************************************************************
+
+        Arranges text strings in order, using indices to specify where
+        each particular argument should be positioned within the text. 
+        This is handy for collating I18N components, or as a simplistic
+        and lightweight formatter. Indices range from zero through nine. 
+        
+        ---
+        // write ordered text to the console
+        char[64] tmp;
+
+        Cout (layout (tmp, "%1 is after %0", "zero", "one")).newline;
+        ---
+
+*******************************************************************************/
+
+T[] layout(T) (T[] output, T[][] layout ...)
+{
+        static T[] badarg   = "{index out of range}";
+        static T[] toosmall = "{output buffer too small}";
+        
+        int     pos,
+                args;
+        bool    state;
+
+        args = layout.length - 1;
+        foreach (c; layout[0])
+                {
+                if (state)
+                   {
+                   state = false;
+                   if (c >= '0' && c <= '9')
+                      {
+                      uint index = c - '0';
+                      if (index < args)
+                         {
+                         T[] x = layout[index+1];
+
+                         int limit = pos + x.length;
+                         if (limit < output.length)
+                            {
+                            output [pos .. limit] = x;
+                            pos = limit;
+                            continue;
+                            } 
+                         else
+                            return toosmall;
+                         }
+                      else
+                         return badarg;
+                      }
+                   }
+                else
+                   if (c is '%')
+                      {
+                      state = true;
+                      continue;
+                      }
+
+                if (pos < output.length)
+                   {
+                   output[pos] = c;
+                   ++pos;
+                   }
+                else     
+                   return toosmall;
+                }
+
+        return output [0..pos];
+}
+
+/******************************************************************************
+
+        jhash() -- hash a variable-length key into a 32-bit value
+
+          k     : the key (the unaligned variable-length array of bytes)
+          len   : the length of the key, counting by bytes
+          level : can be any 4-byte value
+
+        Returns a 32-bit value.  Every bit of the key affects every bit of
+        the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+
+        About 4.3*len + 80 X86 instructions, with excellent pipelining
+
+        The best hash table sizes are powers of 2.  There is no need to do
+        mod a prime (mod is sooo slow!).  If you need less than 32 bits,
+        use a bitmask.  For example, if you need only 10 bits, do
+
+                    h = (h & hashmask(10));
+
+        In which case, the hash table should have hashsize(10) elements.
+        If you are hashing n strings (ub1 **)k, do it like this:
+
+                    for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+
+        By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use 
+        this code any way you wish, private, educational, or commercial.  
+        It's free.
+
+        See http://burlteburtle.net/bob/hash/evahash.html
+        Use for hash table lookup, or anything where one collision in 2^32 
+        is acceptable. Do NOT use for cryptographic purposes.
+
+******************************************************************************/
+
+uint jhash (ubyte* k, uint len, uint c = 0)
+{
+        uint a = 0x9e3779b9,
+             b = 0x9e3779b9,
+             i = len;
+
+        // handle most of the key 
+        while (i >= 12) 
+              {
+              a += *cast(uint *)(k+0);
+              b += *cast(uint *)(k+4);
+              c += *cast(uint *)(k+8);
+
+              a -= b; a -= c; a ^= (c>>13); 
+              b -= c; b -= a; b ^= (a<<8); 
+              c -= a; c -= b; c ^= (b>>13); 
+              a -= b; a -= c; a ^= (c>>12);  
+              b -= c; b -= a; b ^= (a<<16); 
+              c -= a; c -= b; c ^= (b>>5); 
+              a -= b; a -= c; a ^= (c>>3);  
+              b -= c; b -= a; b ^= (a<<10); 
+              c -= a; c -= b; c ^= (b>>15); 
+              k += 12; i -= 12;
+              }
+
+        // handle the last 11 bytes 
+        c += len;
+        switch (i)
+               {
+               case 11: c+=(cast(uint)k[10]<<24);
+               case 10: c+=(cast(uint)k[9]<<16);
+               case 9 : c+=(cast(uint)k[8]<<8);
+               case 8 : b+=(cast(uint)k[7]<<24);
+               case 7 : b+=(cast(uint)k[6]<<16);
+               case 6 : b+=(cast(uint)k[5]<<8);
+               case 5 : b+=(cast(uint)k[4]);
+               case 4 : a+=(cast(uint)k[3]<<24);
+               case 3 : a+=(cast(uint)k[2]<<16);
+               case 2 : a+=(cast(uint)k[1]<<8);
+               case 1 : a+=(cast(uint)k[0]);
+               default:
+               }
+
+        a -= b; a -= c; a ^= (c>>13); 
+        b -= c; b -= a; b ^= (a<<8); 
+        c -= a; c -= b; c ^= (b>>13); 
+        a -= b; a -= c; a ^= (c>>12);  
+        b -= c; b -= a; b ^= (a<<16); 
+        c -= a; c -= b; c ^= (b>>5); 
+        a -= b; a -= c; a ^= (c>>3);  
+        b -= c; b -= a; b ^= (a<<10); 
+        c -= a; c -= b; c ^= (b>>15); 
+
+        return c;
+}
+
+/// ditto
+uint jhash (void[] x, uint c = 0)
+{
+        return jhash (cast(ubyte*) x.ptr, x.length, c);
+}
+
+
+/******************************************************************************
+
+        Helper struct for iterator lines()
+         
+******************************************************************************/
+
+private struct LineFreach(T)
+{
+        private T[] src;
+
+        int opApply (int delegate (inout T[] line) dg)
+        {
+                uint    ret,
+                        pos,
+                        mark;
+                T[]     line;
+
+                const T nl = '\n';
+                const T cr = '\r';
+
+                while ((pos = locate (src, nl, mark)) < src.length)
+                      {
+                      auto end = pos;
+                      if (end && src[end-1] is cr)
+                          --end;
+
+                      line = src [mark .. end];
+                      if ((ret = dg (line)) != 0)
+                           return ret;
+                      mark = pos + 1;
+                      }
+
+                line = src [mark .. $];
+                if (mark < src.length)
+                    ret = dg (line);
+
+                return ret;
+        }
+}
+
+/******************************************************************************
+
+        Helper struct for iterator delimiters()
+        
+******************************************************************************/
+
+private struct DelimFreach(T)
+{
+        private T[] src;
+        private T[] set;
+
+        int opApply (int delegate (inout T[] token) dg)
+        {
+                uint    ret,
+                        pos,
+                        mark;
+                T[]     token;
+
+                // optimize for single delimiter case
+                if (set.length is 1)
+                    while ((pos = locate (src, set[0], mark)) < src.length)
+                          {
+                          token = src [mark .. pos];
+                          if ((ret = dg (token)) != 0)
+                               return ret;
+                          mark = pos + 1;
+                          }
+                else
+                   if (set.length > 1)
+                       foreach (i, elem; src)
+                                if (contains (set, elem))
+                                   {
+                                   token = src [mark .. i];
+                                   if ((ret = dg (token)) != 0)
+                                        return ret;
+                                   mark = i + 1;
+                                   }
+
+                token = src [mark .. $];
+                if (mark < src.length)
+                    ret = dg (token);
+
+                return ret;
+        }
+}
+
+/******************************************************************************
+
+        Helper struct for iterator patterns()
+        
+******************************************************************************/
+
+private struct PatternFreach(T)
+{
+        private T[] src,
+                    sub,
+                    pattern;
+
+        int opApply (int delegate (inout T[] token) dg)
+        {
+                uint    ret,
+                        pos,
+                        mark;
+                T[]     token;
+
+                // optimize for single-element pattern
+                if (pattern.length is 1)
+                    while ((pos = locate (src, pattern[0], mark)) < src.length)
+                          {
+                          token = src [mark .. pos];
+                          if ((ret = dg(token)) != 0)
+                               return ret;
+                          if (sub.ptr)
+                              if ((ret = dg(sub)) != 0)
+                                   return ret;
+                          mark = pos + 1;
+                          }
+                else
+                   if (pattern.length > 1)
+                       while ((pos = locatePattern (src, pattern, mark)) < src.length)
+                             {
+                             token = src [mark .. pos];
+                             if ((ret = dg(token)) != 0)
+                                  return ret;
+                             if (sub.ptr)
+                                 if ((ret = dg(sub)) != 0)
+                                      return ret;
+                             mark = pos + pattern.length;
+                             }
+
+                token = src [mark .. $];
+                if (mark < src.length)
+                    ret = dg (token);
+
+                return ret;
+        }
+}
+
+/******************************************************************************
+
+        Helper struct for iterator quotes()
+        
+******************************************************************************/
+
+private struct QuoteFreach(T)
+{
+        private T[] src;
+        private T[] set;
+        
+        int opApply (int delegate (inout T[] token) dg)
+        {
+                int ret,
+                    mark;
+                T[] token;
+
+                if (set.length)
+                    for (uint i=0; i < src.length; ++i)
+                        {
+                        T c = src[i];
+                        if (c is '"' || c is '\'')
+                            i = locate (src, c, i+1);
+                        else
+                           if (contains (set, c))
+                              {
+                              token = src [mark .. i];
+                              if ((ret = dg (token)) != 0)
+                                   return ret;
+                              mark = i + 1;
+                              }
+                        }
+                
+                token = src [mark .. $];
+                if (mark < src.length)
+                    ret = dg (token);
+
+                return ret;
+        }
+}
+
+
+/******************************************************************************
+
+******************************************************************************/
+
+debug (UnitTest)
+{
+        //void main() {}
+        
+        unittest
+        {
+        char[64] tmp;
+
+        assert (isSpace (' ') && !isSpace ('d'));
+
+        assert (indexOf ("abc".ptr, 'a', 3) is 0);
+        assert (indexOf ("abc".ptr, 'b', 3) is 1);
+        assert (indexOf ("abc".ptr, 'c', 3) is 2);
+        assert (indexOf ("abc".ptr, 'd', 3) is 3);
+
+        assert (indexOf ("abc"d.ptr, cast(dchar)'c', 3) is 2);
+        assert (indexOf ("abc"d.ptr, cast(dchar)'d', 3) is 3);
+
+        assert (indexOf ("abc"w.ptr, cast(wchar)'c', 3) is 2);
+        assert (indexOf ("abc"w.ptr, cast(wchar)'d', 3) is 3);
+
+        assert (mismatch ("abc".ptr, "abc".ptr, 3) is 3);
+        assert (mismatch ("abc".ptr, "abd".ptr, 3) is 2);
+        assert (mismatch ("abc".ptr, "acc".ptr, 3) is 1);
+        assert (mismatch ("abc".ptr, "ccc".ptr, 3) is 0);
+
+        assert (mismatch ("abc"w.ptr, "abc"w.ptr, 3) is 3);
+        assert (mismatch ("abc"w.ptr, "acc"w.ptr, 3) is 1);
+
+        assert (mismatch ("abc"d.ptr, "abc"d.ptr, 3) is 3);
+        assert (mismatch ("abc"d.ptr, "acc"d.ptr, 3) is 1);
+
+        assert (matching ("abc".ptr, "abc".ptr, 3));
+        assert (matching ("abc".ptr, "abb".ptr, 3) is false);
+        
+        assert (contains ("abc", 'a'));
+        assert (contains ("abc", 'b'));
+        assert (contains ("abc", 'c'));
+        assert (contains ("abc", 'd') is false);
+
+        assert (containsPattern ("abc", "ab"));
+        assert (containsPattern ("abc", "bc"));
+        assert (containsPattern ("abc", "abc"));
+        assert (containsPattern ("abc", "zabc") is false);
+        assert (containsPattern ("abc", "abcd") is false);
+        assert (containsPattern ("abc", "za") is false);
+        assert (containsPattern ("abc", "cd") is false);
+
+        assert (trim ("") == "");
+        assert (trim (" abc  ") == "abc");
+        assert (trim ("   ") == "");
+
+        assert (strip ("", '%') == "");
+        assert (strip ("%abc%%%", '%') == "abc");
+        assert (strip ("#####", '#') == "");
+        assert (stripl ("#####", '#') == "");
+        assert (stripl (" ###", ' ') == "###");
+        assert (stripl ("#####", 's') == "#####");
+        assert (stripr ("#####", '#') == "");
+        assert (stripr ("### ", ' ') == "###");
+        assert (stripr ("#####", 's') == "#####");
+
+        assert (replace ("abc".dup, 'b', ':') == "a:c");
+        assert (substitute ("abc".dup, "bc", "x") == "ax");
+
+        assert (locate ("abc", 'c', 1) is 2);
+
+        assert (locate ("abc", 'c') is 2);
+        assert (locate ("abc", 'a') is 0);
+        assert (locate ("abc", 'd') is 3);
+        assert (locate ("", 'c') is 0);
+
+        assert (locatePrior ("abce", 'c') is 2);
+        assert (locatePrior ("abce", 'a') is 0);
+        assert (locatePrior ("abce", 'd') is 4);
+        assert (locatePrior ("abce", 'c', 3) is 2);
+        assert (locatePrior ("abce", 'c', 2) is 4);
+        assert (locatePrior ("", 'c') is 0);
+
+        auto x = delimit ("::b", ":");
+        assert (x.length is 3 && x[0] == "" && x[1] == "" && x[2] == "b");
+        x = delimit ("a:bc:d", ":");
+        assert (x.length is 3 && x[0] == "a" && x[1] == "bc" && x[2] == "d");
+        x = delimit ("abcd", ":");
+        assert (x.length is 1 && x[0] == "abcd");
+        x = delimit ("abcd:", ":");
+        assert (x.length is 1 && x[0] == "abcd");
+        x = delimit ("a;b$c#d:e@f", ";:$#@");
+        assert (x.length is 6 && x[0]=="a" && x[1]=="b" && x[2]=="c" &&
+                                 x[3]=="d" && x[4]=="e" && x[5]=="f");
+
+        assert (locatePattern ("abcdefg", "") is 7);
+        assert (locatePattern ("abcdefg", "g") is 6);
+        assert (locatePattern ("abcdefg", "abcdefg") is 0);
+        assert (locatePattern ("abcdefg", "abcdefgx") is 7);
+        assert (locatePattern ("abcdefg", "cce") is 7);
+        assert (locatePattern ("abcdefg", "cde") is 2);
+        assert (locatePattern ("abcdefgcde", "cde", 3) is 7);
+
+        assert (locatePatternPrior ("abcdefg", "") is 7);
+        assert (locatePatternPrior ("abcdefg", "cce") is 7);
+        assert (locatePatternPrior ("abcdefg", "cde") is 2);
+        assert (locatePatternPrior ("abcdefgcde", "cde", 6) is 2);
+        assert (locatePatternPrior ("abcdefgcde", "cde", 4) is 2);
+        assert (locatePatternPrior ("abcdefg", "abcdefgx") is 7);
+
+        x = splitLines ("a\nb\n");
+        assert (x.length is 2 && x[0] == "a" && x[1] == "b");
+        x = splitLines ("a\r\n");
+        assert (x.length is 1 && x[0] == "a");
+        x = splitLines ("a");
+        assert (x.length is 1 && x[0] == "a");
+        x = splitLines ("");
+        assert (x.length is 0);
+
+        char[][] q;
+        foreach (element; quotes ("1 'avcc   cc ' 3", " "))
+                 q ~= element;
+        assert (q.length is 3 && q[0] == "1" && q[1] == "'avcc   cc '" && q[2] == "3");
+
+        assert (layout (tmp, "%1,%%%c %0", "abc", "efg") == "efg,%c abc");
+
+        x = split ("one, two, three", ",");
+        assert (x.length is 3 && x[0] == "one" && x[1] == " two" && x[2] == " three");
+        x = split ("one, two, three", ", ");
+        assert (x.length is 3 && x[0] == "one" && x[1] == "two" && x[2] == "three");
+        x = split ("one, two, three", ",,");
+        assert (x.length is 1 && x[0] == "one, two, three");
+
+        char[] h, t;
+        h =  head ("one:two:three", ":", t);
+        assert (h == "one" && t == "two:three");
+        h = head ("one:::two:three", ":::", t);
+        assert (h == "one" && t == "two:three");
+        h = head ("one:two:three", "*", t);
+        assert (h == "one:two:three" && t is null);
+
+        t =  tail ("one:two:three", ":", h);
+        assert (h == "one:two" && t == "three");
+        t = tail ("one:::two:three", ":::", h);
+        assert (h == "one" && t == "two:three");
+        t = tail ("one:two:three", "*", h);
+        assert (t == "one:two:three" && h is null);
+
+        assert (chopl("hello world", "hello ") == "world");
+        assert (chopl("hello", "hello") == "");
+        assert (chopl("hello world", " ") == "hello world");
+        assert (chopl("hello world", "") == "hello world");
+
+        assert (chopr("hello world", " world") == "hello");
+        assert (chopr("hello", "hello") == "");
+        assert (chopr("hello world", " ") == "hello world");
+        assert (chopr("hello world", "") == "hello world");
+
+        char[][] foo = ["one", "two", "three"];
+        auto j = join (foo);
+        assert (j == "onetwothree");
+        j = join (foo, ", ");
+        assert (j == "one, two, three");
+        j = join (foo, " ", tmp);
+        assert (j == "one two three");
+        assert (j.ptr is tmp.ptr);
+
+        assert (repeat ("abc", 0) == "");
+        assert (repeat ("abc", 1) == "abc");
+        assert (repeat ("abc", 2) == "abcabc");
+        assert (repeat ("abc", 4) == "abcabcabcabc");
+        assert (repeat ("", 4) == "");
+        char[10] rep;
+        assert (repeat ("abc", 0, rep) == "");
+        assert (repeat ("abc", 1, rep) == "abc");
+        assert (repeat ("abc", 2, rep) == "abcabc");
+        assert (repeat ("", 4, rep) == "");
+        }
+}
+
+
+
author	lindquist
date	Fri, 11 Jan 2008 17:57:40 +0100
parents
children