Mercurial > projects > ldc

diff tango/tango/text/convert/Integer.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author: lindquist
date: Fri, 11 Jan 2008 17:57:40 +0100
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tango/tango/text/convert/Integer.d	Fri Jan 11 17:57:40 2008 +0100
@@ -0,0 +1,598 @@
+/*******************************************************************************
+
+        copyright:      Copyright (c) 2004 Kris Bell. All rights reserved
+
+        license:        BSD style: $(LICENSE)
+        
+        version:        Initial release: Nov 2005
+        
+        author:         Kris
+
+        A set of functions for converting between string and integer 
+        values. 
+
+        Applying the D "import alias" mechanism to this module is highly
+        recommended, in order to limit namespace pollution:
+        ---
+        import Integer = tango.text.convert.Integer;
+
+        auto i = Integer.parse ("32767");
+        ---
+        
+*******************************************************************************/
+
+module tango.text.convert.Integer;
+
+private import tango.core.Exception;
+
+/******************************************************************************
+
+        Style identifiers 
+
+******************************************************************************/
+
+enum Style
+{
+        Signed = 'd',                   /// signed decimal
+        Binary = 'b',                   /// binary output
+        Octal = 'o',                    /// octal output
+        Hex = 'x',                      /// lowercase hexadecimal
+        HexUpper = 'X',                 /// uppercase hexadecimal
+        Unsigned = 'u',                 /// unsigned integer
+}
+
+/******************************************************************************
+
+        Style flags 
+
+******************************************************************************/
+
+enum Flags
+{
+        None    = 0x00,                    /// no flags
+        Prefix  = 0x01,                    /// prefix value with type
+        Zero    = 0x02,                    /// prefix value with zeroes
+        Plus    = 0x04,                    /// prefix decimal with '+'
+        Space   = 0x08,                    /// prefix decimal with space
+        Throw   = 0x10,                    /// throw on output truncation
+}
+
+/******************************************************************************
+
+        Parse an integer value from the provided 'digits' string. 
+
+        The string is inspected for a sign and an optional radix 
+        prefix. A radix may be provided as an argument instead, 
+        whereupon it must match the prefix (where present). When
+        radix is set to zero, conversion will default to decimal.
+
+        Throws an exception where the input text is not parsable
+        in its entirety.
+        
+******************************************************************************/
+
+int toInt(T, U=uint) (T[] digits, U radix=0)
+{return toInt!(T)(digits, radix);}
+
+int toInt(T) (T[] digits, uint radix=0)
+{
+        auto x = toLong (digits, radix);
+        if (x > int.max)
+            throw new IllegalArgumentException ("Integer.toInt :: integer overflow");
+        return cast(int) x;
+}
+
+/******************************************************************************
+
+        Parse an integer value from the provided 'digits' string.       
+        
+        The string is inspected for a sign and an optional radix 
+        prefix. A radix may be provided as an argument instead, 
+        whereupon it must match the prefix (where present). When
+        radix is set to zero, conversion will default to decimal.
+
+        Throws an exception where the input text is not parsable
+        in its entirety.
+        
+******************************************************************************/
+
+long toLong(T, U=uint) (T[] digits, U radix=0)
+{return toLong!(T)(digits, radix);}
+
+long toLong(T) (T[] digits, uint radix=0)
+{
+        uint len;
+
+        auto x = parse (digits, radix, &len);
+        if (len < digits.length)
+            throw new IllegalArgumentException ("Integer.toLong :: invalid literal");
+        return x;
+}
+
+/******************************************************************************
+
+        Template wrapper to make life simpler. Returns a text version
+        of the provided value.
+
+        See format() for details
+
+******************************************************************************/
+
+char[] toString (long i, Style t=Style.Signed, Flags f=Flags.None)
+{
+        char[66] tmp = void;
+        
+        return format (tmp, i, t, f).dup;
+}
+               
+/******************************************************************************
+
+        Template wrapper to make life simpler. Returns a text version
+        of the provided value.
+
+        See format() for details
+
+******************************************************************************/
+
+wchar[] toString16 (long i, Style t=Style.Signed, Flags f=Flags.None)
+{
+        wchar[66] tmp = void;
+        
+        return format (tmp, i, t, f).dup;
+}
+               
+/******************************************************************************
+
+        Template wrapper to make life simpler. Returns a text version
+        of the provided value.
+
+        See format() for details
+
+******************************************************************************/
+
+dchar[] toString32 (long i, Style t=Style.Signed, Flags f=Flags.None)
+{
+        dchar[66] tmp = void;
+        
+        return format (tmp, i, t, f).dup;
+}
+               
+/*******************************************************************************
+
+        Style numeric values into the provided output buffer. The
+        following types are supported:
+
+        Unsigned        - unsigned decimal
+        Signed          - signed decimal
+        Octal           - octal
+        Hex             - lowercase hexadecimal
+        HexUpper        - uppercase hexadecimal
+        Binary          - binary
+
+        Modifiers supported include:
+
+        Prefix          - prefix the conversion with a type identifier
+        Plus            - prefix positive decimals with a '+'
+        Space           - prefix positive decimals with one space
+        Zero            - left-pad the number with zeros
+        Throw           - throw an exception when output would be truncated
+
+        The provided 'dst' buffer should be sufficiently large
+        enough to house the output. A 64-element array is often
+        the maximum required (for a padded binary 64-bit string)
+
+*******************************************************************************/
+
+T[] format(T, U=long) (T[] dst, U i, Style fmt=Style.Signed, Flags flags=Flags.None)
+{return format!(T)(dst, i, fmt, flags);}
+
+T[] format(T) (T[] dst, long i, Style fmt=Style.Signed, Flags flags=Flags.None)
+{
+        T[]     prefix;
+        auto    len = dst.length;
+        
+        static T[] error (T[] msg)
+        {
+                if (1 & Flags.Throw)
+                    throw new IllegalArgumentException ("Integer.format :: invalid arguments");
+                 return msg;
+        }
+
+        // must have some buffer space to operate within! 
+        if (len)
+           {
+           uint radix;
+           T[]  numbers = "0123456789abcdef";
+
+           // pre-conversion setup
+           switch (cast(byte) fmt)
+                  {
+                  case 'd':
+                  case 'D':
+                       if (i < 0)
+                          {
+                          prefix = "-";
+                          i = -i;
+                          }
+                       else
+                          if (flags & Flags.Space)
+                              prefix = " ";
+                          else
+                             if (flags & Flags.Plus)
+                                 prefix = "+";
+                       // fall through!
+                  case 'u':
+                  case 'U':
+                       radix = 10;
+                       break;
+
+                  case 'b':
+                  case 'B':
+                       radix = 2;
+                       if (flags & Flags.Prefix)
+                           prefix = "0b";
+                       break;
+
+                  case 'o':
+                  case 'O':
+                       radix = 8;
+                       if (flags & Flags.Prefix)
+                           prefix = "0o";
+                       break;
+
+                  case 'x':
+                       radix = 16;
+                       if (flags & Flags.Prefix)
+                           prefix = "0x";
+                       break;
+
+                  case 'X':
+                       radix = 16;
+                       numbers = "0123456789ABCDEF";
+                       if (flags & Flags.Prefix)
+                           prefix = "0X";
+                       break;
+
+                  default:
+                        return error (cast(T[])"{unknown format '"~cast(T)fmt~"'}");
+                  }
+
+           // convert number to text
+           T* p = dst.ptr + len;
+           if (uint.max >= cast(ulong) i)
+              {
+              uint v = cast (uint) i;
+              do {
+                 *--p = numbers[v % radix];
+                 } while ((v /= radix) && --len);
+              }
+           else
+              {
+              ulong v = cast (ulong) i;
+              do {
+                 *--p = numbers[cast(uint) (v % radix)];
+                 } while ((v /= radix) && --len);
+              }
+           }
+        
+        // are we about to overflow?
+        if (len > prefix.length)
+           {
+           len -= prefix.length + 1;
+
+           // prefix number with zeros? 
+           if (flags & Flags.Zero)
+              {
+              dst [prefix.length .. len + prefix.length] = '0';
+              len = 0;
+              }
+
+           // write optional prefix string ...
+           dst [len .. len + prefix.length] = prefix[];
+           }
+        else
+           return error ("{output width too small}");
+
+        // return slice of provided output buffer
+        return dst [len .. $];                               
+} 
+
+
+/******************************************************************************
+
+        Parse an integer value from the provided 'digits' string. 
+
+        The string is inspected for a sign and an optional radix 
+        prefix. A radix may be provided as an argument instead, 
+        whereupon it must match the prefix (where present). When
+        radix is set to zero, conversion will default to decimal.
+
+        A non-null 'ate' will return the number of characters used
+        to construct the returned value.
+
+******************************************************************************/
+
+long parse(T, U=uint) (T[] digits, U radix=0, uint* ate=null)
+{return parse!(T)(digits, radix, ate);}
+
+long parse(T) (T[] digits, uint radix=0, uint* ate=null)
+{
+        bool sign;
+
+        auto eaten = trim (digits, sign, radix);
+        auto value = convert (digits[eaten..$], radix, ate);
+
+        if (ate)
+            *ate += eaten;
+
+        return cast(long) (sign ? -value : value);
+}
+
+/******************************************************************************
+
+        Convert the provided 'digits' into an integer value,
+        without checking for a sign or radix. The radix defaults
+        to decimal (10).
+
+        Returns the value and updates 'ate' with the number of
+        characters consumed.
+
+******************************************************************************/
+
+ulong convert(T, U=uint) (T[] digits, U radix=10, uint* ate=null)
+{return convert!(T)(digits, radix, ate);}
+
+ulong convert(T) (T[] digits, uint radix=10, uint* ate=null)
+{
+        uint  eaten;
+        ulong value;
+
+        foreach (c; digits)
+                {
+                if (c >= '0' && c <= '9')
+                   {}
+                else
+                   if (c >= 'a' && c <= 'f')
+                       c -= 39;
+                   else
+                      if (c >= 'A' && c <= 'F')
+                          c -= 7;
+                      else
+                         break;
+
+                if ((c -= '0') < radix)
+                   {
+                   value = value * radix + c;
+                   ++eaten;
+                   }
+                else
+                   break;
+                }
+
+        if (ate)
+            *ate = eaten;
+
+        return value;
+}
+
+
+/******************************************************************************
+
+        Strip leading whitespace, extract an optional +/- sign,
+        and an optional radix prefix. If the radix value matches
+        an optional prefix, or the radix is zero, the prefix will
+        be consumed and assigned. Where the radix is non zero and
+        does not match an explicit prefix, the latter will remain 
+        unconsumed. Otherwise, radix will default to 10.
+
+        Returns the number of characters consumed.
+
+******************************************************************************/
+
+uint trim(T, U=uint) (T[] digits, inout bool sign, inout U radix)
+{return trim!(T)(digits, sign, radix);}
+
+uint trim(T) (T[] digits, inout bool sign, inout uint radix)
+{
+        T       c;
+        T*      p = digits.ptr;
+        int     len = digits.length;
+
+        if (len)
+           {
+           // strip off whitespace and sign characters
+           for (c = *p; len; c = *++p, --len)
+                if (c is ' ' || c is '\t')
+                   {}
+                else
+                   if (c is '-')
+                       sign = true;
+                   else
+                      if (c is '+')
+                          sign = false;
+                   else
+                      break;
+
+           // strip off a radix specifier also?
+           auto r = radix;
+           if (c is '0' && len > 1)
+               switch (*++p)
+                      {
+                      case 'x':
+                      case 'X':
+                           r = 16, ++p;
+                           break;
+ 
+                      case 'b':
+                      case 'B':
+                           r = 2, ++p;
+                           break;
+ 
+                      case 'o':
+                      case 'O':
+                           r = 8, ++p;
+                           break;
+ 
+                      default: 
+                           break;
+                      } 
+
+           // default the radix to 10
+           if (r is 0)
+               radix = 10;
+           else
+              // explicit radix must match (optional) prefix
+              if (radix != r)
+                  if (radix)
+                      --p;
+                  else
+                     radix = r;
+           }
+
+        // return number of characters eaten
+        return (p - digits.ptr);
+}
+
+
+/******************************************************************************
+
+        quick & dirty text-to-unsigned int converter. Use only when you
+        know what the content is, or use parse() or convert() instead.
+
+        Return the parsed uint
+        
+******************************************************************************/
+
+uint atoi(T) (T[] s)
+{
+        uint value;
+
+        foreach (c; s)
+                 if (c >= '0' && c <= '9')
+                     value = value * 10 + (c - '0');
+                 else
+                    break;
+        return value;
+}
+
+
+/******************************************************************************
+
+        quick & dirty unsigned to text converter, where the provided output
+        must be large enough to house the result (10 digits in the largest
+        case). For mainstream use, consider utilizing format() instead.
+
+        Returns a populated slice of the provided output
+        
+******************************************************************************/
+
+T[] itoa(T, U=uint) (T[] output, U value)
+{return itoa!(T)(output, value);}
+
+T[] itoa(T) (T[] output, uint value)
+{
+        T* p = output.ptr + output.length;
+
+        do {
+           *--p = value % 10 + '0';
+           } while (value /= 10);
+        return output[p-output.ptr .. $];
+}
+
+
+/******************************************************************************
+
+******************************************************************************/
+
+debug (UnitTest)
+{
+        unittest
+        {
+        char[64] tmp;
+        
+        assert (toInt("1") is 1);
+        assert (toLong("1") is 1);
+        assert (toInt("1", 10) is 1);
+        assert (toLong("1", 10) is 1);
+
+        assert (atoi ("12345") is 12345);
+        assert (itoa (tmp, 12345) == "12345");
+
+        assert(parse( "0"w ) ==  0 );
+        assert(parse( "1"w ) ==  1 );
+        assert(parse( "-1"w ) ==  -1 );
+        assert(parse( "+1"w ) ==  1 );
+
+        // numerical limits
+        assert(parse( "-2147483648" ) == int.min );
+        assert(parse(  "2147483647" ) == int.max );
+        assert(parse(  "4294967295" ) == uint.max );
+
+        assert(parse( "-9223372036854775808" ) == long.min );
+        assert(parse( "9223372036854775807" ) == long.max );
+        assert(parse( "18446744073709551615" ) == ulong.max );
+
+        // hex
+        assert(parse( "a", 16) == 0x0A );
+        assert(parse( "b", 16) == 0x0B );
+        assert(parse( "c", 16) == 0x0C );
+        assert(parse( "d", 16) == 0x0D );
+        assert(parse( "e", 16) == 0x0E );
+        assert(parse( "f", 16) == 0x0F );
+        assert(parse( "A", 16) == 0x0A );
+        assert(parse( "B", 16) == 0x0B );
+        assert(parse( "C", 16) == 0x0C );
+        assert(parse( "D", 16) == 0x0D );
+        assert(parse( "E", 16) == 0x0E );
+        assert(parse( "F", 16) == 0x0F );
+        assert(parse( "FFFF", 16) == ushort.max );
+        assert(parse( "ffffFFFF", 16) == uint.max );
+        assert(parse( "ffffFFFFffffFFFF", 16u ) == ulong.max );
+        // oct
+        assert(parse( "55", 8) == 055 );
+        assert(parse( "100", 8) == 0100 );
+        // bin
+        assert(parse( "10000", 2) == 0x10 );
+        // trim
+        assert(parse( "    \t20") == 20 );
+        assert(parse( "    \t-20") == -20 );
+        assert(parse( "-    \t 20") == -20 );
+        // recognise radix prefix
+        assert(parse( "0xFFFF" ) == ushort.max );
+        assert(parse( "0XffffFFFF" ) == uint.max );
+        assert(parse( "0o55") == 055 );
+        assert(parse( "0O55" ) == 055 );
+        assert(parse( "0b10000") == 0x10 );
+        assert(parse( "0B10000") == 0x10 );
+
+        // prefix tests
+        char[] str = "0x";
+        assert(parse( str[0..1] ) ==  0 );
+        assert(parse("0x10", 10) == 0);
+        assert(parse("0b10", 10) == 0);
+        assert(parse("0o10", 10) == 0);
+        assert(parse("0b10") == 0b10);
+        assert(parse("0o10") == 010);
+        assert(parse("0b10", 2) == 0b10);
+        assert(parse("0o10", 8) == 010);
+
+        // format tests
+        assert (format (tmp, 12345L) == "12345");
+        assert (format (tmp, 0) == "0");
+        assert (format (tmp, 0x10101L, Style.Hex) == "10101");
+        assert (format (tmp, 0xfafaL, Style.Hex) == "fafa");
+        assert (format (tmp, 0xfafaL, Style.HexUpper, Flags.Prefix) == "0XFAFA");
+        assert (format (tmp, -1L, Style.HexUpper, Flags.Prefix) == "0XFFFFFFFFFFFFFFFF");
+        assert (format (tmp, -101L) == "-101");
+        assert (format (tmp, 101L, Style.Signed, Flags.Plus) == "+101");
+        assert (format (tmp, 101L, Style.Signed, Flags.Space) == " 101");
+        assert (format (tmp[0..8], 0x5L, Style.Binary, Flags.Prefix | Flags.Zero) == "0b000101");
+
+        assert (format (tmp[0..8], -1, Style.Binary, Flags.Prefix | Flags.Zero) == "{output width too small}");
+        assert (format (tmp[0..2], 0x3, Style.Binary, Flags.Throw) == "11");
+        assert (format (tmp[0..4], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b11");
+        assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b011");
+        assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Zero | Flags.Throw) == "00011");
+        }
+}
+
author	lindquist
date	Fri, 11 Jan 2008 17:57:40 +0100
parents
children