# HG changeset patch
# User Diggory Hardy <diggory.hardy@gmail.com>
# Date 1215194656 -3600
# Node ID 7fc0a8295c83dde40d2ff04150c7487a352cc0e8
# Parent  ead4afc6d0b885f99476a935c8bc7bfffe7fd9f4
Moved my parseTo and parseFrom modules from tango.scrapple to mde in order to reduce dependencies.

diff -r ead4afc6d0b8 -r 7fc0a8295c83 doc/Readme.txt
--- a/doc/Readme.txt	Fri Jul 04 18:53:28 2008 +0100
+++ b/doc/Readme.txt	Fri Jul 04 19:04:16 2008 +0100
@@ -10,7 +10,6 @@
 -- Dependencies --
 Compile-time:
 tango (at least r3697)
-tango.scrapple (newer than current r58!)
 derelict (at least r300)
 
 Run-time:
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/font/font.d
--- a/mde/font/font.d	Fri Jul 04 18:53:28 2008 +0100
+++ b/mde/font/font.d	Fri Jul 04 19:04:16 2008 +0100
@@ -29,7 +29,7 @@
 import derelict.freetype.ft;
 import derelict.opengl.gl;
 
-import tango.scrapple.text.convert.parseTo : parseTo;
+import mde.mergetag.parse.parseTo : parseTo;
 import tango.stdc.stringz;
 import Util = tango.text.Util;
 import tango.util.log.Log : Log, Logger;
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/gui/widget/Window.d
--- a/mde/gui/widget/Window.d	Fri Jul 04 18:53:28 2008 +0100
+++ b/mde/gui/widget/Window.d	Fri Jul 04 19:04:16 2008 +0100
@@ -25,8 +25,8 @@
 import mde.gui.renderer.createRenderer;
 
 import mt = mde.mergetag.DataSet;
-import tango.scrapple.text.convert.parseTo : parseTo;
-import tango.scrapple.text.convert.parseFrom : parseFrom;
+import mde.mergetag.parse.parseTo : parseTo;
+import mde.mergetag.parse.parseFrom : parseFrom;
 
 import tango.util.log.Log : Log, Logger;
 
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/input/Config.d
--- a/mde/input/Config.d	Fri Jul 04 18:53:28 2008 +0100
+++ b/mde/input/Config.d	Fri Jul 04 19:04:16 2008 +0100
@@ -16,13 +16,12 @@
 /// This module contains a class for holding configs and handles saving, loading and editing.
 module mde.input.Config;
 
-debug import tango.scrapple.text.convert.parseFrom : parseFrom;
-
 import mde.input.exception;
 
 import MT = mde.mergetag.Reader;
 import mde.setup.paths;
-import tango.scrapple.text.convert.parseTo : parseTo;
+import mde.mergetag.parse.parseTo : parseTo;
+debug import mde.mergetag.parse.parseFrom : parseFrom;
 
 import tango.util.log.Log : Log, Logger;
 import tango.util.collection.TreeBag : TreeBag;
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/lookup/Options.d
--- a/mde/lookup/Options.d	Fri Jul 04 18:53:28 2008 +0100
+++ b/mde/lookup/Options.d	Fri Jul 04 19:04:16 2008 +0100
@@ -21,16 +21,15 @@
 */
 module mde.lookup.Options;
 
+import mde.setup.paths;
 import mde.exception;
 
 import mde.mergetag.Reader;
 import mde.mergetag.Writer;
 import mde.mergetag.DataSet;
 import mde.mergetag.exception;
-import mde.setup.paths;
-
-import tango.scrapple.text.convert.parseTo : parseTo;
-import tango.scrapple.text.convert.parseFrom : parseFrom;
+import mde.mergetag.parse.parseTo : parseTo;
+import mde.mergetag.parse.parseFrom : parseFrom;
 
 import tango.core.Exception : ArrayBoundsException;
 import tango.util.log.Log : Log, Logger;
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/lookup/Translation.d
--- a/mde/lookup/Translation.d	Fri Jul 04 18:53:28 2008 +0100
+++ b/mde/lookup/Translation.d	Fri Jul 04 19:04:16 2008 +0100
@@ -38,15 +38,15 @@
 module mde.lookup.Translation;
 
 import mde.options;
+import mde.resource.paths;
 import mde.exception;
 
 import mde.mergetag.DataSet;
 import mde.mergetag.Reader;
 import mde.mergetag.exception;
-import mde.resource.paths;
+import mde.mergetag.parse.parseTo;
 
 import tango.util.log.Log : Log, Logger;
-import tango.scrapple.text.convert.parseTo;
 
 /** The translation class
 *
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/mergetag/DefaultData.d
--- a/mde/mergetag/DefaultData.d	Fri Jul 04 18:53:28 2008 +0100
+++ b/mde/mergetag/DefaultData.d	Fri Jul 04 19:04:16 2008 +0100
@@ -21,8 +21,8 @@
 public import mde.mergetag.iface.IDataSection;
 import mde.mergetag.exception;
 
-import tango.scrapple.text.convert.parseTo : parseTo;
-import tango.scrapple.text.convert.parseFrom : parseFrom;
+import mde.mergetag.parse.parseTo : parseTo;
+import mde.mergetag.parse.parseFrom : parseFrom;
 
 
 /**
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/mergetag/iface/IDataSection.d
--- a/mde/mergetag/iface/IDataSection.d	Fri Jul 04 18:53:28 2008 +0100
+++ b/mde/mergetag/iface/IDataSection.d	Fri Jul 04 19:04:16 2008 +0100
@@ -37,7 +37,7 @@
  * tag was last loaded), while ignoring unsupported types. A different
  * implementation could filter out the tags desired and use them directly, and ignore the rest.
  *
- * The tango.scrapple.text.convert.parseTo module provides a useful set of templated functions to
+ * The mde.mergetag.parse.parseTo module provides a useful set of templated functions to
  * convert the data accordingly. It is advised to keep the type definitions as defined in the file-
  * format except for user-defined types, although this isn't necessary for library operation
  * (addTag and writeAll are solely responsible for using and setting the type, ID and data fields).
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/mergetag/mtunittest.d
--- a/mde/mergetag/mtunittest.d	Fri Jul 04 18:53:28 2008 +0100
+++ b/mde/mergetag/mtunittest.d	Fri Jul 04 19:04:16 2008 +0100
@@ -21,9 +21,8 @@
     import mde.mergetag.Writer;
     import mde.mergetag.DataSet;
     import mde.mergetag.DefaultData;
-    
-    import tango.scrapple.text.convert.parseTo : parseTo;
-    import tango.scrapple.text.convert.parseFrom : parseFrom;
+    import mde.mergetag.parse.parseTo : parseTo;
+    import mde.mergetag.parse.parseFrom : parseFrom;
     
     import tango.io.FilePath;
     import tango.util.log.Log : Log, Logger;
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/mergetag/parse/parseFrom.d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/parse/parseFrom.d	Fri Jul 04 19:04:16 2008 +0100
@@ -0,0 +1,373 @@
+/**************************************************************************************************
+ * copyright: Copyright (c) 2007-2008 Diggory Hardy.
+ *
+ * author: Diggory Hardy, diggory.hardy@gmail.com
+ *
+ * license: BSD style: $(LICENSE)
+ *
+ * This contains templates for converting various data-types to a char[].
+ *
+ * parseFrom is roughly the inverse of $(B parseTo).
+ * It is also available in tango.scrapple.
+ *
+ * This module basically implements the following templated function for most basic D types:
+ * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char, wchar,
+ * dchar.
+ * It also supports arrays of any supported type (including of other arrays) and has special
+ * handling for strings (char[]) and binary (ubyte[]) data-types.
+ * -----------------------------
+ * char[] parseFrom(T) (T value);
+ * -----------------------------
+ *
+ * $(I value) is the value to convert; it is converted to a string and returned.
+ *
+ * Syntax:
+ * The syntax is the same as parseTo; but since this module only generates formatted output
+ * knowing the syntax shouldn't be necessary. There is currently no way to specify options like
+ * output base for ints, precision of floats, or
+ * whether to write char[] or ubyte[] types as arrays or in their more compact forms.
+ *
+ * Throws:
+ * On errors, an exception is thrown (UnicodeException or IllegalArgumentException). No other
+ * exceptions should be thrown.
+ *
+ * Remarks:
+ * There is currently no support for outputting wchar/dchar strings. There are, however, unicode
+ * conversions for converting UTF-16/32 to UTF-8. Be warned though that many wchar/dchar characters
+ * (any that are non-ascii) will not fit in a single char and an exception will be thrown.
+ *
+ * The code does involve some heap activity; this is necessary anyway for returning dynamic arrays.
+ * (Slices of a pre-allocated array could be returned instead, but for many uses would have to be
+ * duplicated before storage, leading to less efficient operation.)
+ * Most memory allocation has been kept to a minimum.
+ *
+ * Unlike the parseTo!() module, the parseFrom templates could be re-written to use static-ifs
+ * instead of type specialisation, thus allowing type inference. However I likely won't bother
+ * implementing this myself.
+ *
+ * Examples:
+ * ------------------------------------------------------------------------------------------------
+ * // Examples are printed via Cout.
+ *
+ * // Basic examples:
+ * Cout (parseFrom!(byte) (-13)).newline;                       // -13
+ * Cout (parseFrom!(real) (2.56e11)).newline;                   // 2.55999999999999990000e+11
+ * Cout (parseFrom!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline;  // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000]
+ * Cout (parseFrom!(bool[]) ([true,false,false])).newline;      // [true,false,false]
+ *
+ * // String and ubyte[] special syntaxes (always used):
+ * Cout (parseFrom!(char[]) ("A string.")).newline;             // "A string." (including quotes)
+ * Cout (parseFrom!(ubyte[]) (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline;   // 0x05f110
+ *
+ * // Associative arrays:
+ * Cout (parseFrom!(char[][byte]) ([-1:"negative one"[], 0:"zero", 1:"one"])).newline;  // [0:"zero",1:"one",-1:"negative one"]
+ *
+ * // No limit on complexity...
+ * char[] somethingComplicated = parseFrom!(real[][][bool[int[][]]]) (...);
+ * ------------------------------------------------------------------------------------------------
+ *************************************************************************************************/
+
+module mde.mergetag.parse.parseFrom;
+
+// tango imports
+import tango.core.Exception : UnicodeException, IllegalArgumentException;
+import cInt = tango.text.convert.Integer;
+import cFloat = tango.text.convert.Float;
+import Utf = tango.text.convert.Utf;
+import Util = tango.text.Util;
+
+//BEGIN parseFrom templates
+/* Idea: could extend parseFrom with a second parameter, containing flags for things like base to output.
+ * Unnecessary for mergetag though.
+*/
+
+// Associative arrays
+
+char[] parseFrom(T : T[S], S) (T[S] val) {
+    char[] ret;
+    // A guess, including values themselves and [,:] elements (must be at least 2).
+    ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2;
+    ret[0] = '[';
+    uint i = 1;
+    foreach (S k, T v; val) {
+        char[] s = parseFrom!(S) (k) ~ ":" ~ parseFrom!(T) (v);
+        i += s.length;
+        if (i+1 >= ret.length) ret.length = ret.length * 2;	// check.
+        ret[i-s.length .. i] = s;
+        ret[i++] = ',';
+    }
+    if (i == 1) ++i;	// special case - not overwriting a comma
+    ret[i-1] = ']';	// replaces last comma
+    return ret[0..i];
+}
+debug (UnitTest) unittest {
+    char[] X = parseFrom!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]);
+    char[] Y = `['a':"animal",'b':"bus"]`;
+    assert (X == Y);
+}
+
+
+// Arrays
+
+char[] parseFrom(T : T[]) (T[] val) {
+    char[] ret;
+    // A guess, including commas and brackets (must be at least 2)
+    ret.length = val.length * (defLength!(T) + 1) + 2;
+    ret[0] = '[';
+    uint i = 1;
+    foreach (T x; val) {
+        char[] s = parseFrom!(T) (x);
+        i += s.length;
+        if (i+1 >= ret.length) ret.length = ret.length * 2;	// check length
+        ret[i-s.length .. i] = s;
+        ret[i++] = ',';
+    }
+    if (i == 1) ++i;	// special case - not overwriting a comma
+    ret[i-1] = ']';	// replaces last comma
+    return ret[0..i];
+}
+
+// Strings (array special case)
+char[] parseFrom(T : char[]) (T val) {
+    char[] ret = new char[val.length * 2 + 2];	// Initial storage. This should ALWAYS be enough.
+    ret[0] = '"';
+    uint i = 1;
+    for (uint t = 0; t < val.length;) {
+        // process a block of non-escapable characters
+        uint s = t;
+        while (t < val.length && !isEscapableChar(val[t]))
+            ++t;	// skip all non-escapable chars
+        uint j = i + t - s;
+        ret[i..j] = val[s..t];	// copy a block
+        i = j;
+        // process a block of escapable charaters
+        while (t < val.length && isEscapableChar(val[t])) {
+            ret[i++] = '\\';				// backslash; increment i
+            ret[i++] = replaceEscapableChar(val[t++]);	// character; increment i and t
+        }
+    }
+    ret[i++] = '"';
+    return ret[0..i];
+}
+// Unicode conversions for strings:
+char[] parseFrom(T : dchar[]) (T val) {
+    // May throw a UnicodeException; don't bother catching and rethrowing:
+    return parseFrom!(char[]) (Utf.toString (val));
+}
+char[] parseFrom(T : wchar[]) (T val) {
+    // May throw a UnicodeException; don't bother catching and rethrowing:
+    return parseFrom!(char[]) (Utf.toString (val));
+}
+
+// Binary (array special case)
+char[] parseFrom(T : ubyte[]) (T val) {
+    static const char[16] digits = "0123456789abcdef";
+    
+    char[] ret = new char[val.length * 2 + 2];	// exact length
+    ret[0..2] = "0x";
+    uint i = 2;
+    
+    foreach (ubyte x; val) {
+        ret[i++] = digits[x >> 4];
+        ret[i++] = digits[x & 0x0F];
+    }
+    return ret;
+}
+
+debug (UnitTest) unittest {
+    // generic array stuff:
+    assert (parseFrom!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`);
+    assert (parseFrom!(double[]) (cast(double[]) []) == `[]`);		// empty array
+    
+    // char[] conversions, with commas, escape sequences and multichar UTF8 characters:
+    assert (parseFrom!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`);
+    
+    // wchar[] and dchar[] conversions:
+    // The characters were pretty-much pulled at random from unicode tables.
+    // The last few cause some wierd (display only) effects in my editor.
+    assert (parseFrom!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\"");
+    assert (parseFrom!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\"");
+    
+    assert (parseFrom!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`);	// ubyte[] special notation
+}
+
+
+// Basic types
+
+// Char
+char[] parseFrom(T : char) (T val) {
+    // NOTE: if (val > 127) "is invalid UTF-8 single char"
+    // However we don't know what this is for, in particular if it will be recombined with other chars later
+    
+    // Can't return reference to static array; making dynamic is cheaper than copying.
+    char[] ret = new char[4];	// max length for an escaped char
+    ret[0] = '\'';
+    
+    if (!isEscapableChar (val)) {
+        ret[1] = val;
+        ret[2] = '\'';
+        return ret[0..3];
+    } else {
+        ret[1] = '\\';
+        ret[2] = replaceEscapableChar (val);
+        ret[3] = '\'';
+        return ret;
+    }
+    assert (false);
+}
+// Basic unicode convertions for wide-chars.
+// NOTE: any other wide-chars will not fit in a single UTF-8 encoded char.
+const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted to a single UTF-8 char";
+char[] parseFrom(T : wchar) (T val) {
+    if (val <= 127u) return parseFrom!(char) (cast(char) val);	// this char can be converted
+    else throw new UnicodeException (WIDE_CHAR_ERROR, 0);
+}
+char[] parseFrom(T : dchar) (T val) {
+    if (val <= 127u) return parseFrom!(char) (cast(char) val);	// this char can be converted
+    else throw new UnicodeException (WIDE_CHAR_ERROR, 0);
+}
+debug (UnitTest) unittest {
+    assert (parseFrom!(char) ('\'') == "\'\\\'\'");
+    assert (parseFrom!(wchar) ('X') == "'X'");
+    assert (parseFrom!(dchar) ('X') == "'X'");
+}
+
+// Bool
+char[] parseFrom(T : bool) (T val) {
+    if (val) return "true";
+    else return "false";
+}
+// too simple to need a unittest
+
+// Signed ints
+char[] parseFrom(T : byte) (T val) {
+    return formatLong (val);
+}
+char[] parseFrom(T : short) (T val) {
+    return formatLong (val);
+}
+char[] parseFrom(T : int) (T val) {
+    return formatLong (val);
+}
+char[] parseFrom(T : long) (T val) {
+    return formatLong (val);
+}
+// Unsigned ints
+char[] parseFrom(T : ubyte) (T val) {
+    return formatLong (val);
+}
+char[] parseFrom(T : ushort) (T val) {
+    return formatLong (val);
+}
+char[] parseFrom(T : uint) (T val) {
+    return formatLong (val);
+}
+char[] parseFrom(T : ulong) (T val) {
+    if (val > cast(ulong) long.max)
+        throw new IllegalArgumentException ("No handling available for ulong where value > long.max");
+    return formatLong (val);
+}
+debug (UnitTest) unittest {
+    assert (parseFrom!(byte) (cast(byte) -5) == "-5");
+    // annoyingly, octal syntax differs from D (blame tango):
+    assert (parseFrom!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]");
+}
+
+// Floats
+/* Old calculation (not used):
+t.dig+2+4+3	// should be sufficient length (mant + (neg, dot, e, exp neg) + exp (3,4,5 for float,double,real resp.)) */
+char[] parseFrom(T : float) (T val) {
+    char[] ret = new char[32];	// minimum allowed by assert in format
+    return cFloat.format (ret, val, T.dig+2, 1);	// from old C++ tests, T.dig+2 gives best(?) accuracy
+}
+char[] parseFrom(T : double) (T val) {
+    char[] ret = new char[32];
+    return cFloat.format (ret, val, T.dig+2, 1);
+}
+char[] parseFrom(T : real) (T val) {
+    char[] ret = new char[32];
+    return cFloat.format (ret, val, T.dig+2, 1);
+}
+debug (UnitTest) unittest {
+    // NOTE: these numbers are not particularly meaningful.
+    assert (parseFrom!(float) (0.0f) == "0.00000000");
+    assert (parseFrom!(double) (-1e25) == "-1.00000000000000000e+25");
+    assert (parseFrom!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300");
+}
+//END parrseFrom templates
+
+//BEGIN Length templates
+/* This template provides the initial length for strings for formatting various types. These strings
+ * can be expanded; this value is intended to cover 90% of cases or so.
+ *
+ * NOTE: This template was intended to provide specialisations for different types.
+ * This one value should do reasonably well for most types.
+ */
+private {
+    template defLength(T)        { const uint defLength = 20; }
+    template defLength(T : char) { const uint defLength = 4;  }
+    template defLength(T : bool) { const uint defLength = 5;  }
+}
+//END Length templates
+
+//BEGIN Utility funcs
+private char[] formatLong (long val) {
+    // May throw an IllegalArgumentException; don't bother catching and rethrowing:
+    return cInt.toString (val);
+}
+private bool isEscapableChar (char c) {
+    return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\');
+}
+// Throws on unsupported escape sequences; however this should never actually happen within parseFrom.
+private char replaceEscapableChar (char c) {
+    // This code was generated:
+    if (c <= '\v') {
+        if (c <= '\b') {
+            if (c == '\a') {
+                return 'a';
+            } else if (c == '\b') {
+                return 'b';
+            }
+        } else {
+            if (c == '\t') {
+                return 't';
+            } else if (c == '\n') {
+                return 'n';
+            } else if (c == '\v') {
+                return 'v';
+            }
+        }
+    } else {
+        if (c <= '\r') {
+            if (c == '\f') {
+                return 'f';
+            } else if (c == '\r') {
+                return 'r';
+            }
+        } else {
+            if (c == '\"') {
+                return '\"';
+            } else if (c == '\'') {
+                return '\'';
+            } else if (c == '\\') {
+                return '\\';
+            }
+        }
+    }
+    
+    // if we haven't returned:
+    throw new IllegalArgumentException ("Character is not escapable (internal parseFrom error)");
+}
+
+debug (UnitTest) {
+    import tango.io.Console;
+    
+    unittest {
+        Cout ("Running unittest: parseFrom ...").flush;
+        
+        assert (parseFrom!(char[]) ("\a\b\t\n\v\f\r\"\'\\") == "\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"");
+        
+        Cout (" complete").newline;
+    }
+}
+//END Utility funcs
diff -r ead4afc6d0b8 -r 7fc0a8295c83 mde/mergetag/parse/parseTo.d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/parse/parseTo.d	Fri Jul 04 19:04:16 2008 +0100
@@ -0,0 +1,515 @@
+/**************************************************************************************************
+ * copyright: Copyright (c) 2007-2008 Diggory Hardy.
+ *
+ * author: Diggory Hardy, diggory.hardy@gmail.com
+ *
+ * license: BSD style: $(LICENSE)
+ *
+ * This contains templates for converting a char[] to various data-types.
+ *
+ * parseTo is roughly the inverse of $(B parseFrom) and should read any data output by $(B parseFrom).
+ * It is also available in tango.scrapple.
+ *
+ * This module basically implements the following templated function for most basic D types:
+ * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char.
+ * It also supports arrays and associative arrays of any supported type (including of other arrays)
+ * and has special handling for strings (char[]) and binary (ubyte[]) data-types.
+ * -----------------------------
+ * T parseTo(T) (char[] source);
+ * -----------------------------
+ *
+ * $(I source) is the string to parse, and data of the templated type that is read from the string
+ * is returned. See the examples to get a better idea of its use.
+ *
+ * Syntax:
+ * The syntax for parsing $(I source) is mostly the same used by D without any prefixes/suffixes
+ * (except 0x, 0b & 0o base specifiers). Also a special ubyte[] syntax is supported; see examples.
+ * The following escape sequences are supported for strings and characters: \' \" \\
+ * \a \b \f \n \r \t \v . Associative array literals use the same syntax as D, described here:
+ * $(LINK http://www.digitalmars.com/d/2.0/expression.html#AssocArrayLiteral). All whitespace is
+ * ignored (except of course within strings).
+ *
+ * There are also some public utility functions with their own documentation.
+ *
+ * Throws:
+ * On errors, a ParseException or a UnicodeException (both extend TextException) is thrown with a
+ * suitable message. No other exceptions should be thrown.
+ *
+ * Remarks:
+ * There is currently no support for reading wchar/dchar strings. There are, however, unicode
+ * conversions for converting UTF-8 to UTF-16/32. Be careful if converting on a char-by-char basis;
+ * such conversions cannot be used for non-ascii characters.
+ *
+ * Examples:
+ * ------------------------------------------------------------------------------------------------
+ * // Basic examples:
+ * ulong        a = parseTo!(ulong) ("20350");
+ * float        d = parseTo!(float) ("  1.2e-9 ");
+ * int[]        b = parseTo!(int[]) ("[0,1,2,3]");
+ *
+ * // String and char[] syntax:
+ * char[]       c = parseTo!(char[]) ("\"A string\"");
+ * char[]       e = parseTo!(char[]) ("['a','n','o','t','h','e','r', ' ' ,'s','t','r','i','n','g']");
+ *
+ * // These be used interchangably; here's a more complex example of an associative array:
+ * bool[char[]] f = parseTo!(bool[char[]]) ("[ \"one\":true, ['t','w','o']:false, \"three\":1, \"four\":000 ]");
+ *
+ * // There is also a special notation for ubyte[] types:
+ * // The digits following 0x must be in pairs and each specify one ubyte.
+ * assert ( parseTo!(ubyte[]) (`0x01F2AC`) == parseTo!(ubyte[]) (`[01 ,0xF2, 0xAC]`) );
+ *
+ * // There's no limit to the complexity!
+ * char[char[][][][char]][bool] z = ...; // don't expect me to write this!
+ * ------------------------------------------------------------------------------------------------
+ *************************************************************************************************/
+
+module mde.mergetag.parse.parseTo;
+
+// tango imports
+import tango.core.Exception : TextException, UnicodeException;
+import cInt = tango.text.convert.Integer;
+import cFloat = tango.text.convert.Float;
+import Utf = tango.text.convert.Utf;
+import Util = tango.text.Util;
+
+/**
+ * Base class for parseTo exceptions.
+ */
+class ParseException : TextException
+{
+    this( char[] msg )
+    {
+        super( msg );
+    }
+}
+
+
+//BEGIN parseTo templates
+
+// Associative arrays
+
+const char[] AA_ERR = "Invalid associative array: ";
+T[S] parseTo(T : T[S], S) (char[] src) {
+    src = Util.trim(src);
+    if (src.length < 2 || src[0] != '[' || src[$-1] != ']')
+        throw new ParseException (AA_ERR ~ "not [ ... ]");	// bad braces.
+    
+    T[S] ret;
+    foreach (char[] pair; split (src[1..$-1])) {
+        uint i = 0;
+        while (i < pair.length) {	// advance to the ':'
+            char c = pair[i];
+            if (c == ':') break;
+            if (c == '\'' || c == '"') {	// string or character
+                ++i;
+                while (i < pair.length && pair[i] != c) {
+                    if (pair[i] == '\\') {
+                        if (i+2 >= pair.length) throw new ParseException (AA_ERR ~ "unfinished escape sequence within string/char");
+                        ++i;	// escape seq.
+                    }
+                    ++i;
+                }
+                if (i == pair.length) {
+                    throw new ParseException (AA_ERR ~ "encountered [ ... KEY] (missing :DATA)");
+                }
+            }
+            ++i;
+        }
+        if (i == pair.length) {
+            throw new ParseException (AA_ERR ~ "encountered [ ... KEY:] (missing DATA)");
+        }
+        ret[parseTo!(S) (pair[0..i])] = parseTo!(T) (pair[i+1..$]);
+    }
+    return ret;
+}
+debug (UnitTest) unittest {
+    char[][char] X = parseTo!(char[][char]) (`['a':"animal", 'b':['b','u','s']]`);
+    char[][char] Y = ['a':cast(char[])"animal", 'b':['b','u','s']];
+    
+    //FIXME: when the compiler's fixed: http://d.puremagic.com/issues/show_bug.cgi?id=1671
+    // just assert (X == Y)
+    assert (X.length == Y.length);
+    assert (X.keys == Y.keys);
+    assert (X.values == Y.values);
+    //X.rehash; Y.rehash;	// doesn't make a difference
+    //assert (X == Y);		// fails (compiler bug)
+}
+
+
+// Arrays
+
+T[] parseTo(T : T[]) (char[] src) {
+    src = Util.trim(src);
+    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T[]) (src);
+    throw new ParseException ("Invalid array: not [x, ..., z]");
+}
+
+// String (array special case)
+T parseTo(T : char[]) (char[] src) {
+    src = Util.trim(src);
+    if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') {
+        src = src[1..$-1];
+        T ret;
+        ret.length = src.length;	// maximum length; retract to actual length later
+        uint i = 0;
+        for (uint t = 0; t < src.length;) {
+            // process a block of non-escaped characters
+            uint s = t;
+            while (t < src.length && src[t] != '\\') ++t;	// non-escaped characters
+            uint j = i + t - s;
+            ret[i..j] = src[s..t];	// copy a block
+            i = j;
+            
+            // process a block of escaped characters
+            while (t < src.length && src[t] == '\\') {
+                t++;
+                if (t == src.length) throw new ParseException ("Invalid string: ends \\\" !");	// next char is "
+                ret[i++] = replaceEscapedChar (src[t++]);	// throws if it's invalid
+            }
+        }
+        return ret[0..i];
+    }
+    else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src);
+    throw new ParseException ("Invalid string: not quoted (\"*\") or char array (['a',...,'c'])");
+}
+// Unicode conversions for strings:
+T parseTo(T : wchar[]) (char[] src) {
+    // May throw a UnicodeException; don't bother catching and rethrowing:
+    return Utf.toString16 (parseTo!(char[]) (src));
+}
+T parseTo(T : dchar[]) (char[] src) {
+    // May throw a UnicodeException; don't bother catching and rethrowing:
+    return Utf.toString32 (parseTo!(char[]) (src));
+}
+
+// Binary (array special case)
+T parseTo(T : ubyte[]) (char[] src) {
+    src = Util.trim(src);
+    // Standard case:
+    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src);
+    // Special case: sequence of hex digits, each pair of which is a ubyte
+    if (src.length >= 2 && src[0..2] == "0x") {
+        src = src[2..$];    // strip down to actual digits
+        
+        // Must be in pairs:
+        if (src.length % 2 == 1) throw new ParseException ("Invalid binary: odd number of chars");
+        
+        T ret;
+        ret.length = src.length / 2;	// exact
+        
+        for (uint i, pos; pos + 1 < src.length; ++i) {
+            ubyte x = readHexChar(src, pos) << 4;
+            x |= readHexChar(src, pos);
+            ret[i] = x;
+        }
+        return ret;
+    }
+    else throw new ParseException ("Invalid ubyte[]: not an array and doesn't start 0x");
+}
+
+debug (UnitTest) unittest {
+    assert (parseTo!(double[]) (`[1.0,1.0e-10]`) == [1.0, 1.0e-10]);   // generic array stuff
+    assert (parseTo!(double[]) (`[     ]`) == cast(double[]) []);      // empty array
+    
+    // char[] and char conversions, with commas, escape sequences and multichar UTF8 characters:
+    assert (parseTo!(char[][]) (`[ ".\"", [',','\''] ,"!\b€" ]`) == [ ".\"".dup, [',','\''] ,"!\b€" ]);
+    
+    // wchar[] and dchar[] conversions:
+    // The characters were pretty-much pulled at random from unicode tables.
+    // The last few cause some wierd (display only) effects in my editor.
+    assert (parseTo!(wchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"w);
+    assert (parseTo!(dchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"d);
+    
+    assert (parseTo!(ubyte[]) (`0x01F2AC`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]);	// ubyte[] special notation
+    assert (parseTo!(ubyte[]) (`[01 ,0xF2, 0xAC]`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]);	// ubyte[] std notation
+}
+
+
+// Basic types
+
+// Char
+T parseTo(T : char) (char[] src) {
+    src = Util.trim(src);
+    if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'')
+        throw new ParseException ("Invalid char: not quoted (e.g. 'c')");
+    if (src[1] != '\\' && src.length == 3) return src[1];	// Either non escaped
+    if (src.length == 4) return replaceEscapedChar (src[2]);	// Or escaped
+    
+    // Report various errors; warnings for likely and difficult to tell cases:
+    // Warn in case it's a multibyte UTF-8 character:
+    if (src[1] & 0xC0u) throw new UnicodeException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)", 1);
+    throw new ParseException ("Invalid char: too long");
+}
+/* Basic unicode convertions for wide-chars.
+* NOTE: c > 127 signals the start of a multibyte UTF-8 sequence which must be converted for
+* UTF-16/32. But since we don't know what the next bytes are we can't do the conversion. */
+const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted from a single UTF-8 char";
+T parseTo(T : wchar) (char[] src) {
+    char c = parseTo!(char) (src);
+    if (c <= 127u) return cast(wchar) c;	// this char can be converted
+    else throw new UnicodeException (WIDE_CHAR_ERROR, 1);
+}
+T parseTo(T : dchar) (char[] src) {
+    char c = parseTo!(char) (src);
+    if (c <= 127u) return cast(dchar) c;	// this char can be converted
+    else throw new UnicodeException (WIDE_CHAR_ERROR, 1);
+}
+debug (UnitTest) unittest {
+    assert (parseTo!(char) ("\'\\\'\'") == '\'');
+    assert (parseTo!(wchar) ("'X'") == 'X');
+    assert (parseTo!(dchar) ("'X'") == 'X');
+}
+
+// Bool
+T parseTo(T : bool) (char[] src) {
+    src = Util.trim(src);
+    if (src == "true") return true;
+    if (src == "false") return false;
+    uint pos;
+    while (src.length > pos && src[pos] == '0') ++pos;	// skip leading zeros
+    if (src.length == pos && pos > 0) return false;
+    if (src.length == pos + 1 && src[pos] == '1') return true;
+    throw new ParseException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1");
+}
+debug (UnitTest) unittest {
+    assert (parseTo!(bool[]) (`[true,false,01,00]`) == cast(bool[]) [1,0,1,0]);
+}
+
+// Ints
+T parseTo(T : byte) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parseTo(T : short) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parseTo(T : int) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parseTo(T : long) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parseTo(T : ubyte) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parseTo(T : ushort) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parseTo(T : uint) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parseTo(T : ulong) (char[] src) {
+    return toTInt!(T) (src);
+}
+debug (UnitTest) unittest {
+    assert (parseTo!(byte) ("-5") == cast(byte) -5);
+    // annoyingly, octal syntax differs from D (blame tango):
+    assert (parseTo!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]);
+}
+
+// Floats
+T parseTo(T : float) (char[] src) {
+    return toTFloat!(T) (src);
+}
+T parseTo(T : double) (char[] src) {
+    return toTFloat!(T) (src);
+}
+T parseTo(T : real) (char[] src) {
+    return toTFloat!(T) (src);
+}
+debug (UnitTest) unittest {
+    assert (parseTo!(float) ("0.0") == 0.0f);
+    assert (parseTo!(double) ("-1e25") == -1e25);
+    assert (parseTo!(real) ("5.24e-269") == cast(real) 5.24e-269);
+}
+//END parseTo templates
+
+//BEGIN Utility funcs
+/** Trims whitespace at ends of string and checks for and removes array brackets: []
+*
+* Throws:
+*   ParseException if brackets aren't end non-whitespace characters.
+*
+* Returns:
+*   String without brackets (and whitespace outside those brackets). Useful for passing to split.
+*/
+char[] stripBrackets (char[] src) {
+    src = Util.trim(src);
+    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return src[1..$-1];
+    throw new ParseException ("Invalid bracketed string: not [...]");
+}
+
+/** Splits a string into substrings separated by '$(B ,)' with support for characters and strings
+ * containing escape sequences and for embedded arrays ($(B [...])).
+ *
+ * Params:
+ *     src A string to separate on commas. Where used for parsing arrays, the brackets enclosing
+ *     the array should be removed before calling this function (stripBrackets can do this).
+ *
+ * Returns:
+ *     An array of substrings within src, excluding commas. Whitespace is not stripped and
+ *     empty strings may get returned.
+ *
+ * Remarks:
+ *     This function is primarily intended for as a utility function for use by the templates
+ *     parsing arrays and associative arrays, but it may be useful in other cases too. Hence the
+ *     fact no brackets are stripped from src.
+ */
+char[][] split (char[] src) {
+    src = Util.trim (src);
+    if (src == "") return [];		// empty array: no elements when no data
+    
+    uint depth = 0;			// surface depth (embedded arrays)
+    char[][] ret;
+    ret.length = src.length / 3;	// unlikely to need a longer array
+    uint k = 0;				// current split piece
+    uint i = 0, j = 0;			// current read location, start of current piece
+    
+    while (i < src.length) {
+        char c = src[i];
+        if (c == '\'' || c == '"') {	// string or character
+            ++i;
+            while (i < src.length && src[i] != c) {
+                if (src[i] == '\\') ++i;	// escape seq.
+                ++i;
+            }	// Doesn't throw if no terminal quote at end of src, but this should be caught later.
+        }
+        else if (c == '[') ++depth;
+        else if (c == ']') {
+            if (depth) --depth;
+            else throw new ParseException ("Invalid array literal: closes before end of data item.");
+        }
+        else if (c == ',' && depth == 0) {		// only if not an embedded array
+            if (ret.length <= k) ret.length = ret.length * 2;
+            ret[k++] = src[j..i];	// add this piece and increment k
+            j = i + 1;
+        }
+        ++i;
+    }
+    if (ret.length <= k) ret.length = k + 1;
+    ret[k] = src[j..i];		// add final piece (i >= j)
+    return ret[0..k+1];
+}
+
+/* Templated read-int function to read (un)signed 1-4 byte integers.
+ *
+ * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions.
+ */
+private TInt toTInt(TInt) (char[] src) {
+    const char[] INT_OUT_OF_RANGE = "Integer out of range";
+    bool sign;
+    uint radix, ate, ate2;
+    
+    // Trim off whitespace.
+    // NOTE: Cannot use tango.text.convert.Integer.trim to trim leading whitespace since it doesn't
+    // treat new-lines, etc. as whitespace which for our purposes is whitespace.
+    src = Util.trim (src);
+    
+    ate = cInt.trim (src, sign, radix);
+    if (ate == src.length) throw new ParseException ("Invalid integer: no digits");
+    ulong val = cInt.convert (src[ate..$], radix, &ate2);
+    ate += ate2;
+    
+    if (ate < src.length)
+        throw new ParseException ("Invalid integer at marked character: \"" ~ src[0..ate] ~ "'" ~ src[ate] ~ "'" ~ src[ate+1..$] ~ "\"");
+    
+    if (val > TInt.max) throw new ParseException (INT_OUT_OF_RANGE);
+    if (sign) {
+        long sval = cast(long) -val;
+        if (sval > TInt.min) return cast(TInt) sval;
+        else throw new ParseException (INT_OUT_OF_RANGE);
+    }
+    return cast(TInt) val;
+}
+
+/* Basically a reimplementation of tango.text.convert.Float.toFloat which checks for
+ * whitespace before throwing an exception for overlong input. */
+private TFloat toTFloat(TFloat) (char[] src) {
+    // NOTE: As for toTInt(), this needs to strip leading as well as trailing whitespace.
+    src = Util.trim (src);
+    if (src == "") throw new ParseException ("Invalid float: no digits");
+    uint ate;
+    
+    TFloat x = cFloat.parse (src, &ate);
+    return x;
+}
+
+/* Throws an exception on invalid escape sequences. Supported escape sequences are the following
+ * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v
+ */
+private char replaceEscapedChar (char c)
+{
+    // This code was generated:
+    if (c <= 'b') {
+        if (c <= '\'') {
+            if (c == '\"') {
+                return '\"';
+            } else if (c == '\'') {
+                return '\'';
+            }
+        } else {
+            if (c == '\\') {
+                return '\\';
+            } else if (c == 'a') {
+                return '\a';
+            } else if (c == 'b') {
+                return '\b';
+            }
+        }
+    } else {
+        if (c <= 'n') {
+            if (c == 'f') {
+                return '\f';
+            } else if (c == 'n') {
+                return '\n';
+            }
+        } else {
+            if (c == 'r') {
+                return '\r';
+            } else if (c == 't') {
+                return '\t';
+            } else if (c == 'v') {
+                return '\v';
+            }
+        }
+    }
+    
+    // if we haven't returned:
+    throw new ParseException ("Invalid escape sequence: \\"~c);
+}
+
+// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length.
+private ubyte readHexChar (char[] src, inout uint pos) {
+    ubyte x;
+    if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0';
+    else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10;
+    else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10;
+    else throw new ParseException ("Invalid hex digit.");
+    ++pos;
+    return x;
+}
+
+// Generic array reader
+// Assumes input is of form "[xxxxx]" (i.e. first and last chars are '[', ']' and length >= 2).
+private T[] toArray(T : T[]) (char[] src) {
+    T[] ret = new T[16];	// avoid unnecessary allocations
+    uint i = 0;
+    foreach (char[] element; split(src[1..$-1])) {
+        if (i == ret.length) ret.length = ret.length * 2;
+        ret[i] = parseTo!(T) (element);
+        ++i;
+    }
+    return ret[0..i];
+}
+
+debug (UnitTest) {
+    import tango.io.Console;
+    
+    unittest {
+        Cout ("Running unittest: parseTo ...").flush;
+        
+        assert (parseTo!(char[]) ("\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"") == "\a\b\t\n\v\f\r\"\'\\");
+        
+        Cout (" complete").newline;
+    }
+}
+//END Utility funcs