Mercurial > projects > ldc
diff tango/tango/text/convert/Integer.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!!
Initial commit after moving to Tango instead of Phobos.
Lots of bugfixes...
This build is not suitable for most things.
author | lindquist |
---|---|
date | Fri, 11 Jan 2008 17:57:40 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tango/tango/text/convert/Integer.d Fri Jan 11 17:57:40 2008 +0100 @@ -0,0 +1,598 @@ +/******************************************************************************* + + copyright: Copyright (c) 2004 Kris Bell. All rights reserved + + license: BSD style: $(LICENSE) + + version: Initial release: Nov 2005 + + author: Kris + + A set of functions for converting between string and integer + values. + + Applying the D "import alias" mechanism to this module is highly + recommended, in order to limit namespace pollution: + --- + import Integer = tango.text.convert.Integer; + + auto i = Integer.parse ("32767"); + --- + +*******************************************************************************/ + +module tango.text.convert.Integer; + +private import tango.core.Exception; + +/****************************************************************************** + + Style identifiers + +******************************************************************************/ + +enum Style +{ + Signed = 'd', /// signed decimal + Binary = 'b', /// binary output + Octal = 'o', /// octal output + Hex = 'x', /// lowercase hexadecimal + HexUpper = 'X', /// uppercase hexadecimal + Unsigned = 'u', /// unsigned integer +} + +/****************************************************************************** + + Style flags + +******************************************************************************/ + +enum Flags +{ + None = 0x00, /// no flags + Prefix = 0x01, /// prefix value with type + Zero = 0x02, /// prefix value with zeroes + Plus = 0x04, /// prefix decimal with '+' + Space = 0x08, /// prefix decimal with space + Throw = 0x10, /// throw on output truncation +} + +/****************************************************************************** + + Parse an integer value from the provided 'digits' string. + + The string is inspected for a sign and an optional radix + prefix. A radix may be provided as an argument instead, + whereupon it must match the prefix (where present). When + radix is set to zero, conversion will default to decimal. + + Throws an exception where the input text is not parsable + in its entirety. + +******************************************************************************/ + +int toInt(T, U=uint) (T[] digits, U radix=0) +{return toInt!(T)(digits, radix);} + +int toInt(T) (T[] digits, uint radix=0) +{ + auto x = toLong (digits, radix); + if (x > int.max) + throw new IllegalArgumentException ("Integer.toInt :: integer overflow"); + return cast(int) x; +} + +/****************************************************************************** + + Parse an integer value from the provided 'digits' string. + + The string is inspected for a sign and an optional radix + prefix. A radix may be provided as an argument instead, + whereupon it must match the prefix (where present). When + radix is set to zero, conversion will default to decimal. + + Throws an exception where the input text is not parsable + in its entirety. + +******************************************************************************/ + +long toLong(T, U=uint) (T[] digits, U radix=0) +{return toLong!(T)(digits, radix);} + +long toLong(T) (T[] digits, uint radix=0) +{ + uint len; + + auto x = parse (digits, radix, &len); + if (len < digits.length) + throw new IllegalArgumentException ("Integer.toLong :: invalid literal"); + return x; +} + +/****************************************************************************** + + Template wrapper to make life simpler. Returns a text version + of the provided value. + + See format() for details + +******************************************************************************/ + +char[] toString (long i, Style t=Style.Signed, Flags f=Flags.None) +{ + char[66] tmp = void; + + return format (tmp, i, t, f).dup; +} + +/****************************************************************************** + + Template wrapper to make life simpler. Returns a text version + of the provided value. + + See format() for details + +******************************************************************************/ + +wchar[] toString16 (long i, Style t=Style.Signed, Flags f=Flags.None) +{ + wchar[66] tmp = void; + + return format (tmp, i, t, f).dup; +} + +/****************************************************************************** + + Template wrapper to make life simpler. Returns a text version + of the provided value. + + See format() for details + +******************************************************************************/ + +dchar[] toString32 (long i, Style t=Style.Signed, Flags f=Flags.None) +{ + dchar[66] tmp = void; + + return format (tmp, i, t, f).dup; +} + +/******************************************************************************* + + Style numeric values into the provided output buffer. The + following types are supported: + + Unsigned - unsigned decimal + Signed - signed decimal + Octal - octal + Hex - lowercase hexadecimal + HexUpper - uppercase hexadecimal + Binary - binary + + Modifiers supported include: + + Prefix - prefix the conversion with a type identifier + Plus - prefix positive decimals with a '+' + Space - prefix positive decimals with one space + Zero - left-pad the number with zeros + Throw - throw an exception when output would be truncated + + The provided 'dst' buffer should be sufficiently large + enough to house the output. A 64-element array is often + the maximum required (for a padded binary 64-bit string) + +*******************************************************************************/ + +T[] format(T, U=long) (T[] dst, U i, Style fmt=Style.Signed, Flags flags=Flags.None) +{return format!(T)(dst, i, fmt, flags);} + +T[] format(T) (T[] dst, long i, Style fmt=Style.Signed, Flags flags=Flags.None) +{ + T[] prefix; + auto len = dst.length; + + static T[] error (T[] msg) + { + if (1 & Flags.Throw) + throw new IllegalArgumentException ("Integer.format :: invalid arguments"); + return msg; + } + + // must have some buffer space to operate within! + if (len) + { + uint radix; + T[] numbers = "0123456789abcdef"; + + // pre-conversion setup + switch (cast(byte) fmt) + { + case 'd': + case 'D': + if (i < 0) + { + prefix = "-"; + i = -i; + } + else + if (flags & Flags.Space) + prefix = " "; + else + if (flags & Flags.Plus) + prefix = "+"; + // fall through! + case 'u': + case 'U': + radix = 10; + break; + + case 'b': + case 'B': + radix = 2; + if (flags & Flags.Prefix) + prefix = "0b"; + break; + + case 'o': + case 'O': + radix = 8; + if (flags & Flags.Prefix) + prefix = "0o"; + break; + + case 'x': + radix = 16; + if (flags & Flags.Prefix) + prefix = "0x"; + break; + + case 'X': + radix = 16; + numbers = "0123456789ABCDEF"; + if (flags & Flags.Prefix) + prefix = "0X"; + break; + + default: + return error (cast(T[])"{unknown format '"~cast(T)fmt~"'}"); + } + + // convert number to text + T* p = dst.ptr + len; + if (uint.max >= cast(ulong) i) + { + uint v = cast (uint) i; + do { + *--p = numbers[v % radix]; + } while ((v /= radix) && --len); + } + else + { + ulong v = cast (ulong) i; + do { + *--p = numbers[cast(uint) (v % radix)]; + } while ((v /= radix) && --len); + } + } + + // are we about to overflow? + if (len > prefix.length) + { + len -= prefix.length + 1; + + // prefix number with zeros? + if (flags & Flags.Zero) + { + dst [prefix.length .. len + prefix.length] = '0'; + len = 0; + } + + // write optional prefix string ... + dst [len .. len + prefix.length] = prefix[]; + } + else + return error ("{output width too small}"); + + // return slice of provided output buffer + return dst [len .. $]; +} + + +/****************************************************************************** + + Parse an integer value from the provided 'digits' string. + + The string is inspected for a sign and an optional radix + prefix. A radix may be provided as an argument instead, + whereupon it must match the prefix (where present). When + radix is set to zero, conversion will default to decimal. + + A non-null 'ate' will return the number of characters used + to construct the returned value. + +******************************************************************************/ + +long parse(T, U=uint) (T[] digits, U radix=0, uint* ate=null) +{return parse!(T)(digits, radix, ate);} + +long parse(T) (T[] digits, uint radix=0, uint* ate=null) +{ + bool sign; + + auto eaten = trim (digits, sign, radix); + auto value = convert (digits[eaten..$], radix, ate); + + if (ate) + *ate += eaten; + + return cast(long) (sign ? -value : value); +} + +/****************************************************************************** + + Convert the provided 'digits' into an integer value, + without checking for a sign or radix. The radix defaults + to decimal (10). + + Returns the value and updates 'ate' with the number of + characters consumed. + +******************************************************************************/ + +ulong convert(T, U=uint) (T[] digits, U radix=10, uint* ate=null) +{return convert!(T)(digits, radix, ate);} + +ulong convert(T) (T[] digits, uint radix=10, uint* ate=null) +{ + uint eaten; + ulong value; + + foreach (c; digits) + { + if (c >= '0' && c <= '9') + {} + else + if (c >= 'a' && c <= 'f') + c -= 39; + else + if (c >= 'A' && c <= 'F') + c -= 7; + else + break; + + if ((c -= '0') < radix) + { + value = value * radix + c; + ++eaten; + } + else + break; + } + + if (ate) + *ate = eaten; + + return value; +} + + +/****************************************************************************** + + Strip leading whitespace, extract an optional +/- sign, + and an optional radix prefix. If the radix value matches + an optional prefix, or the radix is zero, the prefix will + be consumed and assigned. Where the radix is non zero and + does not match an explicit prefix, the latter will remain + unconsumed. Otherwise, radix will default to 10. + + Returns the number of characters consumed. + +******************************************************************************/ + +uint trim(T, U=uint) (T[] digits, inout bool sign, inout U radix) +{return trim!(T)(digits, sign, radix);} + +uint trim(T) (T[] digits, inout bool sign, inout uint radix) +{ + T c; + T* p = digits.ptr; + int len = digits.length; + + if (len) + { + // strip off whitespace and sign characters + for (c = *p; len; c = *++p, --len) + if (c is ' ' || c is '\t') + {} + else + if (c is '-') + sign = true; + else + if (c is '+') + sign = false; + else + break; + + // strip off a radix specifier also? + auto r = radix; + if (c is '0' && len > 1) + switch (*++p) + { + case 'x': + case 'X': + r = 16, ++p; + break; + + case 'b': + case 'B': + r = 2, ++p; + break; + + case 'o': + case 'O': + r = 8, ++p; + break; + + default: + break; + } + + // default the radix to 10 + if (r is 0) + radix = 10; + else + // explicit radix must match (optional) prefix + if (radix != r) + if (radix) + --p; + else + radix = r; + } + + // return number of characters eaten + return (p - digits.ptr); +} + + +/****************************************************************************** + + quick & dirty text-to-unsigned int converter. Use only when you + know what the content is, or use parse() or convert() instead. + + Return the parsed uint + +******************************************************************************/ + +uint atoi(T) (T[] s) +{ + uint value; + + foreach (c; s) + if (c >= '0' && c <= '9') + value = value * 10 + (c - '0'); + else + break; + return value; +} + + +/****************************************************************************** + + quick & dirty unsigned to text converter, where the provided output + must be large enough to house the result (10 digits in the largest + case). For mainstream use, consider utilizing format() instead. + + Returns a populated slice of the provided output + +******************************************************************************/ + +T[] itoa(T, U=uint) (T[] output, U value) +{return itoa!(T)(output, value);} + +T[] itoa(T) (T[] output, uint value) +{ + T* p = output.ptr + output.length; + + do { + *--p = value % 10 + '0'; + } while (value /= 10); + return output[p-output.ptr .. $]; +} + + +/****************************************************************************** + +******************************************************************************/ + +debug (UnitTest) +{ + unittest + { + char[64] tmp; + + assert (toInt("1") is 1); + assert (toLong("1") is 1); + assert (toInt("1", 10) is 1); + assert (toLong("1", 10) is 1); + + assert (atoi ("12345") is 12345); + assert (itoa (tmp, 12345) == "12345"); + + assert(parse( "0"w ) == 0 ); + assert(parse( "1"w ) == 1 ); + assert(parse( "-1"w ) == -1 ); + assert(parse( "+1"w ) == 1 ); + + // numerical limits + assert(parse( "-2147483648" ) == int.min ); + assert(parse( "2147483647" ) == int.max ); + assert(parse( "4294967295" ) == uint.max ); + + assert(parse( "-9223372036854775808" ) == long.min ); + assert(parse( "9223372036854775807" ) == long.max ); + assert(parse( "18446744073709551615" ) == ulong.max ); + + // hex + assert(parse( "a", 16) == 0x0A ); + assert(parse( "b", 16) == 0x0B ); + assert(parse( "c", 16) == 0x0C ); + assert(parse( "d", 16) == 0x0D ); + assert(parse( "e", 16) == 0x0E ); + assert(parse( "f", 16) == 0x0F ); + assert(parse( "A", 16) == 0x0A ); + assert(parse( "B", 16) == 0x0B ); + assert(parse( "C", 16) == 0x0C ); + assert(parse( "D", 16) == 0x0D ); + assert(parse( "E", 16) == 0x0E ); + assert(parse( "F", 16) == 0x0F ); + assert(parse( "FFFF", 16) == ushort.max ); + assert(parse( "ffffFFFF", 16) == uint.max ); + assert(parse( "ffffFFFFffffFFFF", 16u ) == ulong.max ); + // oct + assert(parse( "55", 8) == 055 ); + assert(parse( "100", 8) == 0100 ); + // bin + assert(parse( "10000", 2) == 0x10 ); + // trim + assert(parse( " \t20") == 20 ); + assert(parse( " \t-20") == -20 ); + assert(parse( "- \t 20") == -20 ); + // recognise radix prefix + assert(parse( "0xFFFF" ) == ushort.max ); + assert(parse( "0XffffFFFF" ) == uint.max ); + assert(parse( "0o55") == 055 ); + assert(parse( "0O55" ) == 055 ); + assert(parse( "0b10000") == 0x10 ); + assert(parse( "0B10000") == 0x10 ); + + // prefix tests + char[] str = "0x"; + assert(parse( str[0..1] ) == 0 ); + assert(parse("0x10", 10) == 0); + assert(parse("0b10", 10) == 0); + assert(parse("0o10", 10) == 0); + assert(parse("0b10") == 0b10); + assert(parse("0o10") == 010); + assert(parse("0b10", 2) == 0b10); + assert(parse("0o10", 8) == 010); + + // format tests + assert (format (tmp, 12345L) == "12345"); + assert (format (tmp, 0) == "0"); + assert (format (tmp, 0x10101L, Style.Hex) == "10101"); + assert (format (tmp, 0xfafaL, Style.Hex) == "fafa"); + assert (format (tmp, 0xfafaL, Style.HexUpper, Flags.Prefix) == "0XFAFA"); + assert (format (tmp, -1L, Style.HexUpper, Flags.Prefix) == "0XFFFFFFFFFFFFFFFF"); + assert (format (tmp, -101L) == "-101"); + assert (format (tmp, 101L, Style.Signed, Flags.Plus) == "+101"); + assert (format (tmp, 101L, Style.Signed, Flags.Space) == " 101"); + assert (format (tmp[0..8], 0x5L, Style.Binary, Flags.Prefix | Flags.Zero) == "0b000101"); + + assert (format (tmp[0..8], -1, Style.Binary, Flags.Prefix | Flags.Zero) == "{output width too small}"); + assert (format (tmp[0..2], 0x3, Style.Binary, Flags.Throw) == "11"); + assert (format (tmp[0..4], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b11"); + assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b011"); + assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Zero | Flags.Throw) == "00011"); + } +} +