changeset 79:61ea26abe4dd

Moved mde/mergetag/parse/parse(To/From) to mde/mergetag/(de)serialize. Implemented (de)serialization of structs.
author Diggory Hardy <diggory.hardy@gmail.com>
date Tue, 05 Aug 2008 11:51:51 +0100
parents 79a1809421aa
children ea58f277f487
files codeDoc/jobs.txt data/L10n/i18nUnitTest.mtt mde/font/font.d mde/gui/WidgetData.d mde/gui/content/options.d mde/input/Config.d mde/lookup/Options.d mde/lookup/Translation.d mde/mergetag/DataSet.d mde/mergetag/DefaultData.d mde/mergetag/deserialize.d mde/mergetag/parse/parseFrom.d mde/mergetag/parse/parseTo.d mde/mergetag/serialize.d unittest/Translation.mtt
diffstat 15 files changed, 1024 insertions(+), 946 deletions(-) [+]
line wrap: on
line diff
--- a/codeDoc/jobs.txt	Tue Jul 29 18:14:53 2008 +0100
+++ b/codeDoc/jobs.txt	Tue Aug 05 11:51:51 2008 +0100
@@ -3,7 +3,6 @@
 
 
 In progress:
-Redesigning how widgets are created and receive their data.
 
 
 
@@ -18,7 +17,7 @@
 3   on-event draw support (mde.events and GUI need to tell mde.mde)
 3   Scheduler for drawing only windows which need redrawing.
 3   Update scheduler as outlined in FIXME.
-3   Windows building/compatibility (currently partial)
+3   Windows building/compatibility (currently partial) - tango/sys/win32/SpecialPath.d
 2   Remove ability to scan, then load, mergetag sections. Not so necessary with section creator callback and allows "sliding window" type partial buffering.
 2   Options need a "level": simple options, for advanced users, for debugging only, etc.
 2   Command-line options for paths to by-pass normal path finding functionality.
--- a/data/L10n/i18nUnitTest.mtt	Tue Jul 29 18:14:53 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-{MT01}
-{test-1}
-<entry|Str1=["Test 1"]>
-<char[][]|depends=["test-2"]>
-{test-2}
-<entry|Str1=["Test 2"]>
-<entry|Str2=["Test 3","Description",bogus,"entries",56]>
--- a/mde/font/font.d	Tue Jul 29 18:14:53 2008 +0100
+++ b/mde/font/font.d	Tue Aug 05 11:51:51 2008 +0100
@@ -29,7 +29,7 @@
 import derelict.freetype.ft;
 import derelict.opengl.gl;
 
-import mde.mergetag.parse.parseTo : parseTo;
+import mde.mergetag.deserialize;
 import tango.stdc.stringz;
 import Util = tango.text.Util;
 import tango.util.log.Log : Log, Logger;
@@ -294,6 +294,11 @@
     int		size;	// font size
     
     FT_Face	face;
+    
+    debug(mdeUnitTest) unittest {
+        // Don't do a unittest since font relies on loading the freetype library dynamically,
+        // normally done by Init. Also font is mostly visual and many problems will be obvious.
+    }
 }
 
 /+class OptionsFont : Options {
--- a/mde/gui/WidgetData.d	Tue Jul 29 18:14:53 2008 +0100
+++ b/mde/gui/WidgetData.d	Tue Aug 05 11:51:51 2008 +0100
@@ -38,8 +38,7 @@
 import mde.mergetag.Reader;
 import mde.mergetag.Writer;
 import mde.setup.paths;
-import mde.mergetag.parse.parseTo;
-import mde.mergetag.parse.parseFrom : parseFrom;
+import mde.mergetag.serialize;
 
 import tango.core.sync.Mutex;
 import tango.util.log.Log : Log, Logger;
--- a/mde/gui/content/options.d	Tue Jul 29 18:14:53 2008 +0100
+++ b/mde/gui/content/options.d	Tue Aug 05 11:51:51 2008 +0100
@@ -37,7 +37,7 @@
         textOpts.length = list.length;
         foreach (i,s; list) {
             Translation.Entry transled = trans.getStruct (s);
-            textOpts[i] = new ContentOptionText(opts, s, transled.str, transled.desc);
+            textOpts[i] = new ContentOptionText(opts, s, transled.name, transled.desc);
         }
     }
     
@@ -46,7 +46,7 @@
     }
     
     static OptionList trial () {
-        return new OptionList (miscOpts, "OptionsMisc");
+        return new OptionList (miscOpts, "L10n/OptionsMisc");
     }
     
 protected:
--- a/mde/input/Config.d	Tue Jul 29 18:14:53 2008 +0100
+++ b/mde/input/Config.d	Tue Aug 05 11:51:51 2008 +0100
@@ -20,8 +20,8 @@
 
 import MT = mde.mergetag.Reader;
 import mde.setup.paths;
-import mde.mergetag.parse.parseTo : parseTo;
-debug import mde.mergetag.parse.parseFrom : parseFrom;
+import mde.mergetag.deserialize;
+debug import mde.mergetag.serialize;
 
 import tango.util.log.Log : Log, Logger;
 import tango.util.collection.TreeBag : TreeBag;
--- a/mde/lookup/Options.d	Tue Jul 29 18:14:53 2008 +0100
+++ b/mde/lookup/Options.d	Tue Aug 05 11:51:51 2008 +0100
@@ -28,8 +28,7 @@
 import mde.mergetag.Writer;
 import mde.mergetag.DataSet;
 import mde.mergetag.exception;
-import mde.mergetag.parse.parseTo : parseTo;
-import mde.mergetag.parse.parseFrom : parseFrom;
+import mde.mergetag.serialize;
 
 import tango.core.Exception : ArrayBoundsException;
 import tango.util.log.Log : Log, Logger;
@@ -202,8 +201,7 @@
      * via hash-maps, which is a little slower than direct access but necessary since the option
      * must be changed in two separate places. */
     void set(T) (char[] symbol, T val) {
-        static if (!TIsIn!(T,TYPES))
-            static assert (false, "Options.set does not currently support type "~T.stringof);
+        static assert (TIsIn!(T,TYPES), "Options does not support type "~T.stringof);
         
         mixin (`alias opts`~TName!(T)~` optsVars;`);
         
@@ -222,8 +220,7 @@
      *
      * Using this method to read an option is not necessary, but allows for generic use.  */
     T get(T) (char[] symbol) {
-        static if (!TIsIn!(T,TYPES))
-            static assert (false, "Options.get does not currently support type "~T.stringof);
+        static assert (TIsIn!(T,TYPES), "Options does not support type "~T.stringof);
         
         mixin (`alias opts`~TName!(T)~` optsVars;`);
         
@@ -237,8 +234,7 @@
     
     /** List the names of all options of a specific type. */
     char[][] list(T) () {
-        static if (!TIsIn!(T,TYPES))
-            static assert (false, "Options.list does not currently support type "~T.stringof);
+        static assert (TIsIn!(T,TYPES), "Options does not support type "~T.stringof);
         
         mixin (`alias opts`~TName!(T)~` optsVars;`);
         
@@ -248,7 +244,7 @@
     protected {
         OptionChanges optionChanges;	// all changes to options (for saving)
     	
-    	// The "pointer lists":
+        // The "pointer lists", e.g. char[]*[ID] optscharA;
         mixin (PLists!(TYPES));
     }
     
@@ -408,8 +404,7 @@
     this () {}
     
     void set(T) (ID id, T x) {
-        static if (!TIsIn!(T,TYPES))
-            static assert (false, "OptionChanges.set does not currently support type "~T.stringof);
+        static assert (TIsIn!(T,TYPES), "Options does not support type "~T.stringof);
         
         mixin (`alias opts`~TName!(T)~` optsVars;`);
         mixin (`alias `~TName!(T)~`s vars;`);
--- a/mde/lookup/Translation.d	Tue Jul 29 18:14:53 2008 +0100
+++ b/mde/lookup/Translation.d	Tue Aug 05 11:51:51 2008 +0100
@@ -44,7 +44,7 @@
 import mde.mergetag.DataSet;
 import mde.mergetag.Reader;
 import mde.mergetag.exception;
-import mde.mergetag.parse.parseTo;
+import mde.mergetag.deserialize;
 
 import tango.util.log.Log : Log, Logger;
 
@@ -68,7 +68,7 @@
     char[] entry (char[] id) {
         Entry* p = id in entries;
         if (p) {
-            return p.str;
+            return p.name;
         } else {
             return id;
         }
@@ -78,7 +78,7 @@
         Entry* p = id in entries;
         if (p) {
             description = p.desc;
-            return p.str;
+            return p.name;
         } else {
             return id;
         }
@@ -91,7 +91,7 @@
             return *p;
         } else {
             Entry ret;
-            ret.str  = id;
+            ret.name = id;
             return ret;
         }
     }
@@ -117,7 +117,7 @@
         
         IReader reader;
         try {
-            reader = dataDir.makeMTReader ("L10n/"~name, PRIORITY.HIGH_LOW);
+            reader = dataDir.makeMTReader (name, PRIORITY.HIGH_LOW);
             /* Note: we don't want to load every translation section depended on to its own class
             * instance, since we want to merge them. So make every mergetag section use the same
             * instance. */
@@ -167,22 +167,14 @@
     */
     void addTag (char[] tp, ID id, char[] dt) {
         if (tp == "entry") {
-            char[][] fields = split (stripBrackets (dt));
+            // If the tag already exists, don't replace it
+            if (cast(char[]) id in entries) return;
             
-            if (fields.length < 1) {
-                // This tag is invalid, but this fact doesn't need to be reported elsewhere:
+            Entry entry = deserialize!(Entry) (dt);
+            if (entry.name is null) {   // This tag is invalid; ignore it
                 logger.error ("For name "~name~", L10n "~L10n~": tag with ID "~cast(char[])id~" has no data");
                 return;
             }
-            // If the tag already exists, don't replace it
-            if (cast(char[]) id in entries) return;
-            
-            Entry entry;
-            entry.str = parseTo!(char[]) (fields[0]);
-            
-            if (fields.length >= 2)
-                entry.desc = parseTo!(char[]) (fields[1]);
-            
             entries[cast(char[]) id] = entry;
         } else if (tp == "char[][]") {
             if (id == cast(ID)"depends") depends = cast(ID[]) parseTo!(char[][]) (dt);
@@ -197,7 +189,7 @@
      * Note that although each entry also has a version field, this is not loaded for general use.
      */
     struct Entry {
-        char[] str;         // The translated string
+        char[] name;        // The translated string
         char[] desc;        // An optional description
     }
     
@@ -236,7 +228,7 @@
         char[] currentL10n = miscOpts.L10n;
         miscOpts.L10n = "test-1";
         
-        Translation transl = load ("i18nUnitTest");
+        Translation transl = load ("unittest/Translation");
         
         // Simple get-string, check dependancy's entry doesn't override
         assert (transl.entry ("Str1") == "Test 1");
--- a/mde/mergetag/DataSet.d	Tue Jul 29 18:14:53 2008 +0100
+++ b/mde/mergetag/DataSet.d	Tue Aug 05 11:51:51 2008 +0100
@@ -63,8 +63,8 @@
         DataSet ds = new DataSet;
         ds.sec[cast(ID)"test"] = new DefaultData;
         assert (ds.getSections!(DefaultData)().length == 1);
-        ds.sec[cast(ID)"test"].addTag ("int",cast(ID)"T"," -543 ");
-        assert (ds.getSections!(DefaultData)()[cast(ID)"test"]._int[cast(ID)"T"] == -543);
+        ds.sec[cast(ID)"test"].addTag ("char[]",cast(ID)"T"," \"ut tag 1 \" ");
+        assert (ds.getSections!(DefaultData)()[cast(ID)"test"].Arg!(char[])[cast(ID)"T"] == "ut tag 1 ");
     
         logger.info ("Unittest complete.");
     }
--- a/mde/mergetag/DefaultData.d	Tue Jul 29 18:14:53 2008 +0100
+++ b/mde/mergetag/DefaultData.d	Tue Aug 05 11:51:51 2008 +0100
@@ -21,23 +21,17 @@
 public import mde.mergetag.iface.IDataSection;
 import mde.mergetag.exception;
 
-import mde.mergetag.parse.parseTo : parseTo;
-import mde.mergetag.parse.parseFrom : parseFrom;
+import mde.mergetag.serialize;
 
 
 /*************************************************************************************************
  * Default DataSection class.
  * 
- * Currently this is only used for headers, and thus the list of supported types has been
+ * Supported types are given by dataTypes.
+ * 
+ * Currently DefaultData is only used for headers, and thus the list of supported types has been
  * reduced to just those used in headers. Load order is HIGH_LOW, i.e. existing entries aren't
  * overwritten.
- * 
- * It did supports most of the basic types supported by D (excluding cent/ucent and
- * imaginary/complex types) and array versions of each of these types, plus arrays of strings.
- *
- * Extending the class to support more types, even custom types, shouldn't be particularly
- * difficult provided mde.text.parseTo and mde.text.parseFrom are extended to support the new
- * types.
  *************************************************************************************************/
 /* The implementation now uses a fair bit of generic programming. Adjusting the types supported
 * should be as simple as adjusting the list dataTypes, and possibly implemting new conversions in
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/deserialize.d	Tue Aug 05 11:51:51 2008 +0100
@@ -0,0 +1,605 @@
+/**************************************************************************************************
+ * Generic deserialization templated function.
+ *
+ * copyright: Copyright (c) 2007-2008 Diggory Hardy.
+ *
+ * author: Diggory Hardy, diggory.hardy@gmail.com
+ *
+ * Supports:
+ *  Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types.
+ *
+ * There are also some public utility functions with their own documentation.
+ *
+ * Throws:
+ * On errors, a ParseException or a UnicodeException (both extend TextException) is thrown with a
+ * suitable message. No other exceptions should be thrown.
+ *
+ * Examples:
+ * ------------------------------------------------------------------------------------------------
+ * // Basic examples:
+ * ulong        a = deserialize!(ulong) ("20350");
+ * float        d = deserialize!(float) ("  1.2e-9 ");
+ * int[]        b = deserialize!(int[]) ("[0,1,2,3]");
+ *
+ * // String and char[] syntax:
+ * char[]       c = deserialize!(char[]) ("\"A string\"");
+ * char[]       e = deserialize!(char[]) ("['a','n','o','t','h','e','r', ' ' ,'s','t','r','i','n','g']");
+ *
+ * // These be used interchangably; here's a more complex example of an associative array:
+ * bool[char[]] f = deserialize!(bool[char[]]) ("[ \"one\":true, ['t','w','o']:false, \"three\":1, \"four\":000 ]");
+ *
+ * // There is also a special notation for ubyte[] types:
+ * // The digits following 0x must be in pairs and each specify one ubyte.
+ * assert ( deserialize!(ubyte[]) (`0x01F2AC`) == deserialize!(ubyte[]) (`[01 ,0xF2, 0xAC]`) );
+ *
+ * // There's no limit to the complexity!
+ * char[char[][][][char]][bool] z = ...; // don't expect me to write this!
+ * ------------------------------------------------------------------------------------------------
+ *
+ * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations
+ * instead of merely guessing?
+ *************************************************************************************************/
+//NOTE: in case of multiple formats, make this a dummy module importing both serialize modules,
+// or put all the code here.
+module mde.mergetag.deserialize;
+
+// tango imports
+import tango.core.Exception : TextException, UnicodeException;
+import cInt = tango.text.convert.Integer;
+import cFloat = tango.text.convert.Float;
+import Utf = tango.text.convert.Utf;
+import Util = tango.text.Util;
+
+/**
+ * Base class for deserialize exceptions.
+ */
+class ParseException : TextException
+{
+    this( char[] msg )
+    {
+        super( msg );
+    }
+}
+
+alias deserialize parseTo;      // support the old name
+
+//BEGIN deserialize templates
+
+// Associative arrays
+
+T[S] deserialize(T : T[S], S) (char[] src) {
+    src = Util.trim(src);
+    if (src.length < 2 || src[0] != '[' || src[$-1] != ']')
+        throw new ParseException ("Invalid associative array: not [ ... ]");  // bad braces.
+    
+    T[S] ret;
+    foreach (char[] pair; split (src[1..$-1])) {
+        uint i = 0;
+        while (i < pair.length) {   // advance to the ':'
+            char c = pair[i];
+            if (c == ':') break;
+            if (c == '\'' || c == '"') {    // string or character
+                ++i;
+                while (i < pair.length && pair[i] != c) {
+                    if (pair[i] == '\\')
+                        ++i;    // escape seq.
+                    ++i;
+                }
+                // Could have an unterminated ' or " causing i >= pair.length, but:
+                // 1. Impossible: split would have thrown
+                // 2. In any case this would be caught below.
+            }
+            ++i;
+        }
+        if (i >= pair.length)
+            throw new ParseException ("Invalid associative array: encountered [ ... KEY] (missing :DATA)");
+        ret[deserialize!(S) (pair[0..i])] = deserialize!(T) (pair[i+1..$]);
+    }
+    return ret;
+}
+
+
+// Arrays
+
+T[] deserialize(T : T[]) (char[] src) {
+    src = Util.trim(src);
+    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']')
+        return toArray!(T[]) (src);
+    throw new ParseException ("Invalid array: not [ ... ]");
+}
+
+// String (array special case)
+T deserialize(T : char[]) (char[] src) {
+    src = Util.trim(src);
+    if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') {
+        src = src[1..$-1];
+        T ret;
+        ret.length = src.length;    // maximum length; retract to actual length later
+        uint i = 0;
+        for (uint t = 0; t < src.length;) {
+            // process a block of non-escaped characters
+            uint s = t;
+            while (t < src.length && src[t] != '\\') ++t;   // non-escaped characters
+            uint j = i + t - s;
+            ret[i..j] = src[s..t];  // copy a block
+            i = j;
+            
+            // process a block of escaped characters
+            while (t < src.length && src[t] == '\\') {
+                t++;
+                if (t == src.length)
+                    throw new ParseException ("Invalid string: ends \\\" !");  // next char is "
+                ret[i++] = unEscapeChar (src[t++]);   // throws if it's invalid
+            }
+        }
+        return ret[0..i];
+    }
+    else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']')
+        return toArray!(T) (src);
+    throw new ParseException ("Invalid string: not quoted (\"*\") or char array (['a',...,'c'])");
+}
+// Unicode conversions for strings:
+T deserialize(T : wchar[]) (char[] src) {
+    // May throw a UnicodeException; don't bother catching and rethrowing:
+    return Utf.toString16 (deserialize!(char[]) (src));
+}
+T deserialize(T : dchar[]) (char[] src) {
+    // May throw a UnicodeException; don't bother catching and rethrowing:
+    return Utf.toString32 (deserialize!(char[]) (src));
+}
+
+// Binary (array special case)
+T deserialize(T : ubyte[]) (char[] src) {
+    src = Util.trim(src);
+    // Standard case:
+    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src);
+    // Special case: sequence of hex digits, each pair of which is a ubyte
+    if (src.length >= 2 && src[0..2] == "0x") {
+        src = src[2..$];    // strip down to actual digits
+        
+        // Must be in pairs:
+        if (src.length % 2 == 1)
+            throw new ParseException ("Invalid binary: odd number of chars");
+        
+        T ret;
+        ret.length = src.length / 2;    // exact
+        
+        for (uint i, pos; pos + 1 < src.length; ++i) {
+            ubyte x = readHexChar(src, pos) << 4;
+            x |= readHexChar(src, pos);
+            ret[i] = x;
+        }
+        return ret;
+    }
+    else throw new ParseException ("Invalid ubyte[]: not an array and doesn't start 0x");
+}
+
+
+// Basic types
+
+// Char
+// Assumes value is <= 127 (for valid UTF-8), since input would be invalid UTF-8 if not anyway.
+// (And we're not really interested in checking for valid unicode; char[] conversions don't either.)
+T deserialize(T : char) (char[] src) {
+    src = Util.trim(src);
+    if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'')
+        throw new ParseException ("Invalid char: not 'x' or '\\x'");
+    if (src[1] != '\\') {
+        if (src.length == 3)
+            return src[1];              // Either non escaped
+        throw new ParseException ("Invalid char: too long (or non-ASCII)");
+    } else if (src.length == 4)
+        return unEscapeChar (src[2]);   // Or escaped
+    
+    throw new ParseException ("Invalid char: '\\'");
+}
+// Basic unicode convertions for wide-chars.
+// Assumes value is <= 127 as does deserialize!(char).
+T deserialize(T : wchar) (char[] src) {
+    return cast(T) deserialize!(char) (src);
+}
+T deserialize(T : dchar) (char[] src) {
+    return cast(T) deserialize!(char) (src);
+}
+
+// Bool
+T deserialize(T : bool) (char[] src) {
+    src = Util.trim(src);
+    if (src == "true")
+        return true;
+    if (src == "false")
+        return false;
+    uint pos;
+    while (src.length > pos && src[pos] == '0') ++pos;  // skip leading zeros
+    if (src.length == pos && pos > 0)
+        return false;
+    if (src.length == pos + 1 && src[pos] == '1')
+        return true;
+    throw new ParseException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1");
+}
+
+// Ints
+T deserialize(T : byte) (char[] src) {
+    return toTInt!(T) (src);
+}
+T deserialize(T : short) (char[] src) {
+    return toTInt!(T) (src);
+}
+T deserialize(T : int) (char[] src) {
+    return toTInt!(T) (src);
+}
+T deserialize(T : long) (char[] src) {
+    return toTInt!(T) (src);
+}
+T deserialize(T : ubyte) (char[] src) {
+    return toTInt!(T) (src);
+}
+T deserialize(T : ushort) (char[] src) {
+    return toTInt!(T) (src);
+}
+T deserialize(T : uint) (char[] src) {
+    return toTInt!(T) (src);
+}
+T deserialize(T : ulong) (char[] src) {
+    return toTInt!(T) (src);
+}
+debug (UnitTest) unittest {
+    assert (deserialize!(byte) ("-5") == cast(byte) -5);
+    // annoyingly, octal syntax differs from D (blame tango):
+    assert (deserialize!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]);
+}
+
+// Floats
+T deserialize(T : float) (char[] src) {
+    return toTFloat!(T) (src);
+}
+T deserialize(T : double) (char[] src) {
+    return toTFloat!(T) (src);
+}
+T deserialize(T : real) (char[] src) {
+    return toTFloat!(T) (src);
+}
+
+
+// Structs
+T deserialize(T) (char[] src) {
+    static assert (is(T == struct), "Unsupported type: "~typeof(T));
+    
+    src = Util.trim(src);
+    if (src.length < 2 || src[0] != '{' || src[$-1] != '}')
+        throw new ParseException ("Invalid struct: not { ... }");
+    
+    // cannot access elements of T.tupleof with non-const key, so use a type which can be
+    // accessed with a non-const key to store slices:
+    char[][T.tupleof.length] temp;
+    foreach (char[] pair; split (src[1..$-1])) {
+        uint i = 0;
+        while (i < pair.length) {   // advance to the ':'
+            char c = pair[i];
+            if (c == ':')
+                break;
+            // key must be an int so no need for string checks
+            ++i;
+        }
+        if (i >= pair.length)
+            throw new ParseException ("Invalid struct: encountered { ... KEY} (missing :DATA)");
+        
+        size_t k = deserialize!(size_t) (pair[0..i]);
+        // Note: could check no entry was already stored in temp.
+        temp[k] = pair[i+1..$];
+    }
+    T ret;
+    setStruct (ret, temp);
+    return ret;
+}
+//END deserialize templates
+
+//BEGIN Utility funcs
+/** Splits a string into substrings separated by '$(B ,)' with support for characters and strings
+ * containing escape sequences and for embedded arrays ($(B [...])).
+ *
+ * Params:
+ *     src A string to separate on commas. It shouldn't have enclosing brackets.
+ *
+ * Returns:
+ *     An array of substrings within src, excluding commas. Whitespace is not stripped and
+ *     empty strings may get returned.
+ *
+ * Remarks:
+ *     This function is primarily intended for as a utility function for use by the templates
+ *     parsing arrays and associative arrays, but it may be useful in other cases too. Hence the
+ *     fact no brackets are stripped from src.
+ */
+//FIXME foreach struct is more efficient
+char[][] split (char[] src) {
+    src = Util.trim (src);
+    if (src == "")
+        return [];       // empty array: no elements when no data
+    
+    uint depth = 0;         // surface depth (embedded arrays)
+    char[][] ret;
+    ret.length = src.length / 3;    // unlikely to need a longer array
+    uint k = 0;             // current split piece
+    uint i = 0, j = 0;          // current read location, start of current piece
+    
+    while (i < src.length) {
+        char c = src[i];
+        if (c == '\'' || c == '"') {    // string or character
+            ++i;
+            while (i < src.length && src[i] != c) {
+                if (src[i] == '\\')
+                    ++i;    // escape seq.
+                ++i;
+            }   // Doesn't throw if no terminal quote at end of src, but this should be caught later.
+        }
+        else if (c == '[') ++depth;
+        else if (c == ']') {
+            if (depth)
+                --depth;
+            else throw new ParseException ("Invalid array literal: closes before end of data item.");
+        }
+        else if (c == ',' && depth == 0) {      // only if not an embedded array
+            if (ret.length <= k)
+                ret.length = ret.length * 2;
+            ret[k++] = src[j..i];   // add this piece and increment k
+            j = i + 1;
+        }
+        ++i;
+    }
+    if (i > src.length)
+        throw new ParseException ("Unterminated quote (\' or \")");
+    
+    if (ret.length <= k)
+        ret.length = k + 1;
+    ret[k] = src[j..i];     // add final piece (i >= j)
+    return ret[0..k+1];
+}
+
+/* Templated read-int function to read (un)signed 1-4 byte integers.
+ *
+ * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions.
+ */
+private TInt toTInt(TInt) (char[] src) {
+    const char[] INT_OUT_OF_RANGE = "Integer out of range";
+    bool sign;
+    uint radix, ate, ate2;
+    
+    // Trim off whitespace.
+    // NOTE: Cannot use tango.text.convert.Integer.trim to trim leading whitespace since it doesn't
+    // treat new-lines, etc. as whitespace which for our purposes is whitespace.
+    src = Util.trim (src);
+    
+    ate = cInt.trim (src, sign, radix);
+    if (ate == src.length)
+        throw new ParseException ("Invalid integer: no digits");
+    ulong val = cInt.convert (src[ate..$], radix, &ate2);
+    ate += ate2;
+    
+    if (ate < src.length)
+        throw new ParseException ("Invalid integer at marked character: \"" ~ src[0..ate] ~ "'" ~ src[ate] ~ "'" ~ src[ate+1..$] ~ "\"");
+    
+    if (val > TInt.max)
+        throw new ParseException (INT_OUT_OF_RANGE);
+    if (sign) {
+        long sval = cast(long) -val;
+        if (sval > TInt.min)
+            return cast(TInt) sval;
+        else throw new ParseException (INT_OUT_OF_RANGE);
+    }
+    return cast(TInt) val;
+}
+
+/* Basically a reimplementation of tango.text.convert.Float.toFloat which checks for
+ * whitespace before throwing an exception for overlong input. */
+private TFloat toTFloat(TFloat) (char[] src) {
+    // NOTE: As for toTInt(), this needs to strip leading as well as trailing whitespace.
+    src = Util.trim (src);
+    if (src == "")
+        throw new ParseException ("Invalid float: no digits");
+    uint ate;
+    
+    TFloat x = cFloat.parse (src, &ate);
+    return x;
+}
+
+/* Throws an exception on invalid escape sequences. Supported escape sequences are the following
+ * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v
+ */
+private char unEscapeChar (char c)
+{
+    // This code was generated:
+    if (c <= 'b') {
+        if (c <= '\'') {
+            if (c == '\"') {
+                return '\"';
+            } else if (c == '\'') {
+                return '\'';
+            }
+        } else {
+            if (c == '\\') {
+                return '\\';
+            } else if (c == 'a') {
+                return '\a';
+            } else if (c == 'b') {
+                return '\b';
+            }
+        }
+    } else {
+        if (c <= 'n') {
+            if (c == 'f') {
+                return '\f';
+            } else if (c == 'n') {
+                return '\n';
+            }
+        } else {
+            if (c == 'r') {
+                return '\r';
+            } else if (c == 't') {
+                return '\t';
+            } else if (c == 'v') {
+                return '\v';
+            }
+        }
+    }
+    
+    // if we haven't returned:
+    throw new ParseException ("Bad escape sequence: \\"~c);
+}
+
+// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length.
+private ubyte readHexChar (char[] src, inout uint pos) {
+    ubyte x;
+    if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0';
+    else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10;
+    else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10;
+    else throw new ParseException ("Invalid hex digit.");
+    ++pos;
+    return x;
+}
+
+// Generic array reader
+// Assumes input is of form "[xxxxx]" (i.e. first and last chars are '[', ']' and length >= 2).
+private T[] toArray(T : T[]) (char[] src) {
+    T[] ret = new T[16];    // avoid unnecessary allocations
+    uint i = 0;
+    foreach (char[] element; split(src[1..$-1])) {
+        if (i == ret.length) ret.length = ret.length * 2;
+        ret[i] = deserialize!(T) (element);
+        ++i;
+    }
+    return ret[0..i];
+}
+
+/** Set a struct's elements from an array.
+*
+* For a more generic version, see http://www.dsource.org/projects/tutorials/wiki/StructTupleof
+*/
+// NOTE: Efficiency? Do recursive calls get inlined?
+private void setStruct(S, size_t N, size_t i = 0) (ref S s, char[][N] src) {
+    static assert (is(S == struct), "Only to be used with structs.");
+    static assert (N == S.tupleof.length, "src.length != S.tupleof.length");
+    static if (i < N) {
+        if (src[i])
+            s.tupleof[i] = deserialize!(typeof(s.tupleof[i])) (src[i]);
+        setStruct!(S, N, i+1) (s, src);
+    }
+}
+//END Utility funcs
+
+debug (UnitTest) {
+    import tango.util.log.Log : Log, Logger;
+    
+    private Logger logger;
+    static this() {
+        logger = Log.getLogger ("text.deserialize");
+    }
+unittest {
+    // Utility
+    bool throws (void delegate() dg) {
+        bool r = false;
+        try {
+            dg();
+        } catch (Exception e) {
+            r = true;
+            logger.info ("Exception caught: "~e.msg);
+        }
+        return r;
+    }
+    assert (!throws ({ int i = 5; }));
+    assert (throws ({ throw new Exception ("Test - this exception should be caught"); }));
+    
+    
+    // Associative arrays
+    char[][char] X = deserialize!(char[][char]) (`['a':"animal\n", 'b':['b','u','s','\n']]`);
+    char[][char] Y = ['a':cast(char[])"animal\n", 'b':['b','u','s','\n']];
+    
+    //FIXME: when the compiler's fixed: http://d.puremagic.com/issues/show_bug.cgi?id=1671
+    // just assert (X == Y)
+    assert (X.length == Y.length);
+    assert (X.keys == Y.keys);
+    assert (X.values == Y.values);
+    //X.rehash; Y.rehash;   // doesn't make a difference
+    //assert (X == Y);      // fails (compiler bug)
+    
+    assert (throws ({ deserialize!(int[int]) (`[1:1`); }));             // bad brackets
+    assert (throws ({ deserialize!(int[char[]]) (`["ab\":1]`); }));     // unterminated quote
+    assert (throws ({ deserialize!(int[char[]]) (`["abc,\a\b\c":1]`); }));    // bad escape seq.
+    assert (throws ({ deserialize!(int[char[]]) (`["abc"]`); }));       // no data
+    
+    
+    // Arrays
+    assert (deserialize!(double[]) (`[1.0,1.0e-10]`) == [1.0, 1.0e-10]);// generic array stuff
+    assert (deserialize!(double[]) (`[     ]`) == cast(double[]) []);   // empty array
+    assert (deserialize!(int[][]) (`[[1],[2,3],[]]`) == [[1],[2,3],[]]);// sub-array
+    assert (throws ({ deserialize!(int[]) (`[1,2`); }));                // bad brackets
+    assert (throws ({ deserialize!(int[][]) (`[[1]]]`); }));            // bad brackets
+    
+    // char[] and char conversions, with commas, escape sequences and multichar UTF8 characters:
+    assert (deserialize!(char[][]) (`[ ".\"", [',','\''] ,"!\b€" ]`) == [ ".\"".dup, [',','\''] ,"!\b€" ]);
+    assert (throws ({ deserialize!(char[]) ("\"\\\""); }));
+    assert (throws ({ deserialize!(char[]) (`['a'`); }));               // bad brackets
+    
+    // wchar[] and dchar[] conversions:
+    // The characters were pretty-much pulled at random from unicode tables.
+    // The last few cause some wierd (display only) effects in my editor.
+    assert (deserialize!(wchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"w);
+    assert (deserialize!(dchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"d);
+    
+    assert (deserialize!(ubyte[]) (`0x01F2aC`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]);    // ubyte[] special notation
+    assert (deserialize!(ubyte[]) (`[01 ,0xF2, 0xAC]`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]);    // ubyte[] std notation
+    assert (throws ({ deserialize!(ubyte[]) (`0x123`); }));             // digits not in pairs
+    assert (throws ({ deserialize!(ubyte[]) (`[2,5`); }));              // not [...] or 0x..
+    assert (throws ({ deserialize!(ubyte[]) (`0x123j`); }));
+    
+    
+    // char types
+    assert (deserialize!(char) ("'\\\''") == '\'');
+    assert (deserialize!(wchar) ("'X'") == 'X');
+    assert (deserialize!(dchar) ("'X'") == 'X');
+    assert (throws ({ deserialize!(char) ("'\\'"); }));
+    assert (throws ({ deserialize!(char) ("'£'"); }));        // non-ascii
+    assert (throws ({ deserialize!(char) ("''"); }));
+    assert (throws ({ deserialize!(char) ("'ab'"); }));
+    assert (throws ({ deserialize!(wchar) ("''"); }));
+    
+    
+    // bool
+    assert (deserialize!(bool[]) (`[true,false,01,00]`) == cast(bool[]) [1,0,1,0]);
+    assert (throws ({ deserialize!(bool) ("011"); }));
+    
+    
+    // ints
+    assert (deserialize!(byte) ("-5") == cast(byte) -5);
+    assert (deserialize!(int) ("-0x7FFFFFFF") == cast(int) -0x7FFF_FFFF);
+    // annoyingly, octal syntax differs from D (blame tango):
+    assert (deserialize!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]);
+    assert (throws ({ deserialize!(int) (""); }));
+    assert (throws ({ deserialize!(int) ("0x8FFFFFFF"); }));
+    assert (throws ({ deserialize!(uint) ("-1"); }));
+    assert (throws ({ deserialize!(uint) ("1a"); }));
+    
+    
+    // floats
+    assert (deserialize!(float) ("0.0") == 0.0f);
+    assert (deserialize!(double) ("-1e25") == -1e25);
+    assert (deserialize!(real) ("5.24e-269") == cast(real) 5.24e-269);
+    assert (throws ({ deserialize!(float) (""); }));
+    
+    
+    // structs
+    struct A {  int x = 5;  char y; }
+    struct B {  A a;    float b;   }
+    A a;    a.y = 'y';
+    assert (deserialize!(A) ("{ 1 : 'y' }") == a);
+    B b;    b.a = a;    b.b = 1.0f;
+    assert (deserialize!(B) (" {1:1.0,0: { 1 : 'y' } } ") == b);
+    assert (throws ({ deserialize!(A) (" 1:'x'}"); })); // bad braces
+    assert (throws ({ deserialize!(A) ("{ 1 }"); }));     // no :DATA
+    
+    
+    // unEscapeChar
+    assert (deserialize!(char[]) ("\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"") == "\a\b\t\n\v\f\r\"\'\\");
+    
+    logger.info ("Unittest complete.");
+}
+}
--- a/mde/mergetag/parse/parseFrom.d	Tue Jul 29 18:14:53 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,373 +0,0 @@
-/**************************************************************************************************
- * copyright: Copyright (c) 2007-2008 Diggory Hardy.
- *
- * author: Diggory Hardy, diggory.hardy@gmail.com
- *
- * license: BSD style: $(LICENSE)
- *
- * This contains templates for converting various data-types to a char[].
- *
- * parseFrom is roughly the inverse of $(B parseTo).
- * It is also available in tango.scrapple.
- *
- * This module basically implements the following templated function for most basic D types:
- * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char, wchar,
- * dchar.
- * It also supports arrays of any supported type (including of other arrays) and has special
- * handling for strings (char[]) and binary (ubyte[]) data-types.
- * -----------------------------
- * char[] parseFrom(T) (T value);
- * -----------------------------
- *
- * $(I value) is the value to convert; it is converted to a string and returned.
- *
- * Syntax:
- * The syntax is the same as parseTo; but since this module only generates formatted output
- * knowing the syntax shouldn't be necessary. There is currently no way to specify options like
- * output base for ints, precision of floats, or
- * whether to write char[] or ubyte[] types as arrays or in their more compact forms.
- *
- * Throws:
- * On errors, an exception is thrown (UnicodeException or IllegalArgumentException). No other
- * exceptions should be thrown.
- *
- * Remarks:
- * There is currently no support for outputting wchar/dchar strings. There are, however, unicode
- * conversions for converting UTF-16/32 to UTF-8. Be warned though that many wchar/dchar characters
- * (any that are non-ascii) will not fit in a single char and an exception will be thrown.
- *
- * The code does involve some heap activity; this is necessary anyway for returning dynamic arrays.
- * (Slices of a pre-allocated array could be returned instead, but for many uses would have to be
- * duplicated before storage, leading to less efficient operation.)
- * Most memory allocation has been kept to a minimum.
- *
- * Unlike the parseTo!() module, the parseFrom templates could be re-written to use static-ifs
- * instead of type specialisation, thus allowing type inference. However I likely won't bother
- * implementing this myself.
- *
- * Examples:
- * ------------------------------------------------------------------------------------------------
- * // Examples are printed via Cout.
- *
- * // Basic examples:
- * Cout (parseFrom!(byte) (-13)).newline;                       // -13
- * Cout (parseFrom!(real) (2.56e11)).newline;                   // 2.55999999999999990000e+11
- * Cout (parseFrom!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline;  // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000]
- * Cout (parseFrom!(bool[]) ([true,false,false])).newline;      // [true,false,false]
- *
- * // String and ubyte[] special syntaxes (always used):
- * Cout (parseFrom!(char[]) ("A string.")).newline;             // "A string." (including quotes)
- * Cout (parseFrom!(ubyte[]) (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline;   // 0x05f110
- *
- * // Associative arrays:
- * Cout (parseFrom!(char[][byte]) ([-1:"negative one"[], 0:"zero", 1:"one"])).newline;  // [0:"zero",1:"one",-1:"negative one"]
- *
- * // No limit on complexity...
- * char[] somethingComplicated = parseFrom!(real[][][bool[int[][]]]) (...);
- * ------------------------------------------------------------------------------------------------
- *************************************************************************************************/
-
-module mde.mergetag.parse.parseFrom;
-
-// tango imports
-import tango.core.Exception : UnicodeException, IllegalArgumentException;
-import cInt = tango.text.convert.Integer;
-import cFloat = tango.text.convert.Float;
-import Utf = tango.text.convert.Utf;
-import Util = tango.text.Util;
-
-//BEGIN parseFrom templates
-/* Idea: could extend parseFrom with a second parameter, containing flags for things like base to output.
- * Unnecessary for mergetag though.
-*/
-
-// Associative arrays
-
-char[] parseFrom(T : T[S], S) (T[S] val) {
-    char[] ret;
-    // A guess, including values themselves and [,:] elements (must be at least 2).
-    ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2;
-    ret[0] = '[';
-    uint i = 1;
-    foreach (S k, T v; val) {
-        char[] s = parseFrom!(S) (k) ~ ":" ~ parseFrom!(T) (v);
-        i += s.length;
-        if (i+1 >= ret.length) ret.length = ret.length * 2;	// check.
-        ret[i-s.length .. i] = s;
-        ret[i++] = ',';
-    }
-    if (i == 1) ++i;	// special case - not overwriting a comma
-    ret[i-1] = ']';	// replaces last comma
-    return ret[0..i];
-}
-debug (UnitTest) unittest {
-    char[] X = parseFrom!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]);
-    char[] Y = `['a':"animal",'b':"bus"]`;
-    assert (X == Y);
-}
-
-
-// Arrays
-
-char[] parseFrom(T : T[]) (T[] val) {
-    char[] ret;
-    // A guess, including commas and brackets (must be at least 2)
-    ret.length = val.length * (defLength!(T) + 1) + 2;
-    ret[0] = '[';
-    uint i = 1;
-    foreach (T x; val) {
-        char[] s = parseFrom!(T) (x);
-        i += s.length;
-        if (i+1 >= ret.length) ret.length = ret.length * 2;	// check length
-        ret[i-s.length .. i] = s;
-        ret[i++] = ',';
-    }
-    if (i == 1) ++i;	// special case - not overwriting a comma
-    ret[i-1] = ']';	// replaces last comma
-    return ret[0..i];
-}
-
-// Strings (array special case)
-char[] parseFrom(T : char[]) (T val) {
-    char[] ret = new char[val.length * 2 + 2];	// Initial storage. This should ALWAYS be enough.
-    ret[0] = '"';
-    uint i = 1;
-    for (uint t = 0; t < val.length;) {
-        // process a block of non-escapable characters
-        uint s = t;
-        while (t < val.length && !isEscapableChar(val[t]))
-            ++t;	// skip all non-escapable chars
-        uint j = i + t - s;
-        ret[i..j] = val[s..t];	// copy a block
-        i = j;
-        // process a block of escapable charaters
-        while (t < val.length && isEscapableChar(val[t])) {
-            ret[i++] = '\\';				// backslash; increment i
-            ret[i++] = replaceEscapableChar(val[t++]);	// character; increment i and t
-        }
-    }
-    ret[i++] = '"';
-    return ret[0..i];
-}
-// Unicode conversions for strings:
-char[] parseFrom(T : dchar[]) (T val) {
-    // May throw a UnicodeException; don't bother catching and rethrowing:
-    return parseFrom!(char[]) (Utf.toString (val));
-}
-char[] parseFrom(T : wchar[]) (T val) {
-    // May throw a UnicodeException; don't bother catching and rethrowing:
-    return parseFrom!(char[]) (Utf.toString (val));
-}
-
-// Binary (array special case)
-char[] parseFrom(T : ubyte[]) (T val) {
-    static const char[16] digits = "0123456789abcdef";
-    
-    char[] ret = new char[val.length * 2 + 2];	// exact length
-    ret[0..2] = "0x";
-    uint i = 2;
-    
-    foreach (ubyte x; val) {
-        ret[i++] = digits[x >> 4];
-        ret[i++] = digits[x & 0x0F];
-    }
-    return ret;
-}
-
-debug (UnitTest) unittest {
-    // generic array stuff:
-    assert (parseFrom!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`);
-    assert (parseFrom!(double[]) (cast(double[]) []) == `[]`);		// empty array
-    
-    // char[] conversions, with commas, escape sequences and multichar UTF8 characters:
-    assert (parseFrom!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`);
-    
-    // wchar[] and dchar[] conversions:
-    // The characters were pretty-much pulled at random from unicode tables.
-    // The last few cause some wierd (display only) effects in my editor.
-    assert (parseFrom!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\"");
-    assert (parseFrom!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\"");
-    
-    assert (parseFrom!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`);	// ubyte[] special notation
-}
-
-
-// Basic types
-
-// Char
-char[] parseFrom(T : char) (T val) {
-    // NOTE: if (val > 127) "is invalid UTF-8 single char"
-    // However we don't know what this is for, in particular if it will be recombined with other chars later
-    
-    // Can't return reference to static array; making dynamic is cheaper than copying.
-    char[] ret = new char[4];	// max length for an escaped char
-    ret[0] = '\'';
-    
-    if (!isEscapableChar (val)) {
-        ret[1] = val;
-        ret[2] = '\'';
-        return ret[0..3];
-    } else {
-        ret[1] = '\\';
-        ret[2] = replaceEscapableChar (val);
-        ret[3] = '\'';
-        return ret;
-    }
-    assert (false);
-}
-// Basic unicode convertions for wide-chars.
-// NOTE: any other wide-chars will not fit in a single UTF-8 encoded char.
-const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted to a single UTF-8 char";
-char[] parseFrom(T : wchar) (T val) {
-    if (val <= 127u) return parseFrom!(char) (cast(char) val);	// this char can be converted
-    else throw new UnicodeException (WIDE_CHAR_ERROR, 0);
-}
-char[] parseFrom(T : dchar) (T val) {
-    if (val <= 127u) return parseFrom!(char) (cast(char) val);	// this char can be converted
-    else throw new UnicodeException (WIDE_CHAR_ERROR, 0);
-}
-debug (UnitTest) unittest {
-    assert (parseFrom!(char) ('\'') == "\'\\\'\'");
-    assert (parseFrom!(wchar) ('X') == "'X'");
-    assert (parseFrom!(dchar) ('X') == "'X'");
-}
-
-// Bool
-char[] parseFrom(T : bool) (T val) {
-    if (val) return "true";
-    else return "false";
-}
-// too simple to need a unittest
-
-// Signed ints
-char[] parseFrom(T : byte) (T val) {
-    return formatLong (val);
-}
-char[] parseFrom(T : short) (T val) {
-    return formatLong (val);
-}
-char[] parseFrom(T : int) (T val) {
-    return formatLong (val);
-}
-char[] parseFrom(T : long) (T val) {
-    return formatLong (val);
-}
-// Unsigned ints
-char[] parseFrom(T : ubyte) (T val) {
-    return formatLong (val);
-}
-char[] parseFrom(T : ushort) (T val) {
-    return formatLong (val);
-}
-char[] parseFrom(T : uint) (T val) {
-    return formatLong (val);
-}
-char[] parseFrom(T : ulong) (T val) {
-    if (val > cast(ulong) long.max)
-        throw new IllegalArgumentException ("No handling available for ulong where value > long.max");
-    return formatLong (val);
-}
-debug (UnitTest) unittest {
-    assert (parseFrom!(byte) (cast(byte) -5) == "-5");
-    // annoyingly, octal syntax differs from D (blame tango):
-    assert (parseFrom!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]");
-}
-
-// Floats
-/* Old calculation (not used):
-t.dig+2+4+3	// should be sufficient length (mant + (neg, dot, e, exp neg) + exp (3,4,5 for float,double,real resp.)) */
-char[] parseFrom(T : float) (T val) {
-    char[] ret = new char[32];	// minimum allowed by assert in format
-    return cFloat.format (ret, val, T.dig+2, 1);	// from old C++ tests, T.dig+2 gives best(?) accuracy
-}
-char[] parseFrom(T : double) (T val) {
-    char[] ret = new char[32];
-    return cFloat.format (ret, val, T.dig+2, 1);
-}
-char[] parseFrom(T : real) (T val) {
-    char[] ret = new char[32];
-    return cFloat.format (ret, val, T.dig+2, 1);
-}
-debug (UnitTest) unittest {
-    // NOTE: these numbers are not particularly meaningful.
-    assert (parseFrom!(float) (0.0f) == "0.00000000");
-    assert (parseFrom!(double) (-1e25) == "-1.00000000000000000e+25");
-    assert (parseFrom!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300");
-}
-//END parrseFrom templates
-
-//BEGIN Length templates
-/* This template provides the initial length for strings for formatting various types. These strings
- * can be expanded; this value is intended to cover 90% of cases or so.
- *
- * NOTE: This template was intended to provide specialisations for different types.
- * This one value should do reasonably well for most types.
- */
-private {
-    template defLength(T)        { const uint defLength = 20; }
-    template defLength(T : char) { const uint defLength = 4;  }
-    template defLength(T : bool) { const uint defLength = 5;  }
-}
-//END Length templates
-
-//BEGIN Utility funcs
-private char[] formatLong (long val) {
-    // May throw an IllegalArgumentException; don't bother catching and rethrowing:
-    return cInt.toString (val);
-}
-private bool isEscapableChar (char c) {
-    return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\');
-}
-// Throws on unsupported escape sequences; however this should never actually happen within parseFrom.
-private char replaceEscapableChar (char c) {
-    // This code was generated:
-    if (c <= '\v') {
-        if (c <= '\b') {
-            if (c == '\a') {
-                return 'a';
-            } else if (c == '\b') {
-                return 'b';
-            }
-        } else {
-            if (c == '\t') {
-                return 't';
-            } else if (c == '\n') {
-                return 'n';
-            } else if (c == '\v') {
-                return 'v';
-            }
-        }
-    } else {
-        if (c <= '\r') {
-            if (c == '\f') {
-                return 'f';
-            } else if (c == '\r') {
-                return 'r';
-            }
-        } else {
-            if (c == '\"') {
-                return '\"';
-            } else if (c == '\'') {
-                return '\'';
-            } else if (c == '\\') {
-                return '\\';
-            }
-        }
-    }
-    
-    // if we haven't returned:
-    throw new IllegalArgumentException ("Character is not escapable (internal parseFrom error)");
-}
-
-debug (UnitTest) {
-    import tango.io.Console;
-    
-    unittest {
-        Cout ("Running unittest: parseFrom ...").flush;
-        
-        assert (parseFrom!(char[]) ("\a\b\t\n\v\f\r\"\'\\") == "\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"");
-        
-        Cout (" complete").newline;
-    }
-}
-//END Utility funcs
--- a/mde/mergetag/parse/parseTo.d	Tue Jul 29 18:14:53 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,515 +0,0 @@
-/**************************************************************************************************
- * copyright: Copyright (c) 2007-2008 Diggory Hardy.
- *
- * author: Diggory Hardy, diggory.hardy@gmail.com
- *
- * license: BSD style: $(LICENSE)
- *
- * This contains templates for converting a char[] to various data-types.
- *
- * parseTo is roughly the inverse of $(B parseFrom) and should read any data output by $(B parseFrom).
- * It is also available in tango.scrapple.
- *
- * This module basically implements the following templated function for most basic D types:
- * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char.
- * It also supports arrays and associative arrays of any supported type (including of other arrays)
- * and has special handling for strings (char[]) and binary (ubyte[]) data-types.
- * -----------------------------
- * T parseTo(T) (char[] source);
- * -----------------------------
- *
- * $(I source) is the string to parse, and data of the templated type that is read from the string
- * is returned. See the examples to get a better idea of its use.
- *
- * Syntax:
- * The syntax for parsing $(I source) is mostly the same used by D without any prefixes/suffixes
- * (except 0x, 0b & 0o base specifiers). Also a special ubyte[] syntax is supported; see examples.
- * The following escape sequences are supported for strings and characters: \' \" \\
- * \a \b \f \n \r \t \v . Associative array literals use the same syntax as D, described here:
- * $(LINK http://www.digitalmars.com/d/2.0/expression.html#AssocArrayLiteral). All whitespace is
- * ignored (except of course within strings).
- *
- * There are also some public utility functions with their own documentation.
- *
- * Throws:
- * On errors, a ParseException or a UnicodeException (both extend TextException) is thrown with a
- * suitable message. No other exceptions should be thrown.
- *
- * Remarks:
- * There is currently no support for reading wchar/dchar strings. There are, however, unicode
- * conversions for converting UTF-8 to UTF-16/32. Be careful if converting on a char-by-char basis;
- * such conversions cannot be used for non-ascii characters.
- *
- * Examples:
- * ------------------------------------------------------------------------------------------------
- * // Basic examples:
- * ulong        a = parseTo!(ulong) ("20350");
- * float        d = parseTo!(float) ("  1.2e-9 ");
- * int[]        b = parseTo!(int[]) ("[0,1,2,3]");
- *
- * // String and char[] syntax:
- * char[]       c = parseTo!(char[]) ("\"A string\"");
- * char[]       e = parseTo!(char[]) ("['a','n','o','t','h','e','r', ' ' ,'s','t','r','i','n','g']");
- *
- * // These be used interchangably; here's a more complex example of an associative array:
- * bool[char[]] f = parseTo!(bool[char[]]) ("[ \"one\":true, ['t','w','o']:false, \"three\":1, \"four\":000 ]");
- *
- * // There is also a special notation for ubyte[] types:
- * // The digits following 0x must be in pairs and each specify one ubyte.
- * assert ( parseTo!(ubyte[]) (`0x01F2AC`) == parseTo!(ubyte[]) (`[01 ,0xF2, 0xAC]`) );
- *
- * // There's no limit to the complexity!
- * char[char[][][][char]][bool] z = ...; // don't expect me to write this!
- * ------------------------------------------------------------------------------------------------
- *************************************************************************************************/
-
-module mde.mergetag.parse.parseTo;
-
-// tango imports
-import tango.core.Exception : TextException, UnicodeException;
-import cInt = tango.text.convert.Integer;
-import cFloat = tango.text.convert.Float;
-import Utf = tango.text.convert.Utf;
-import Util = tango.text.Util;
-
-/**
- * Base class for parseTo exceptions.
- */
-class ParseException : TextException
-{
-    this( char[] msg )
-    {
-        super( msg );
-    }
-}
-
-
-//BEGIN parseTo templates
-
-// Associative arrays
-
-const char[] AA_ERR = "Invalid associative array: ";
-T[S] parseTo(T : T[S], S) (char[] src) {
-    src = Util.trim(src);
-    if (src.length < 2 || src[0] != '[' || src[$-1] != ']')
-        throw new ParseException (AA_ERR ~ "not [ ... ]");	// bad braces.
-    
-    T[S] ret;
-    foreach (char[] pair; split (src[1..$-1])) {
-        uint i = 0;
-        while (i < pair.length) {	// advance to the ':'
-            char c = pair[i];
-            if (c == ':') break;
-            if (c == '\'' || c == '"') {	// string or character
-                ++i;
-                while (i < pair.length && pair[i] != c) {
-                    if (pair[i] == '\\') {
-                        if (i+2 >= pair.length) throw new ParseException (AA_ERR ~ "unfinished escape sequence within string/char");
-                        ++i;	// escape seq.
-                    }
-                    ++i;
-                }
-                if (i == pair.length) {
-                    throw new ParseException (AA_ERR ~ "encountered [ ... KEY] (missing :DATA)");
-                }
-            }
-            ++i;
-        }
-        if (i == pair.length) {
-            throw new ParseException (AA_ERR ~ "encountered [ ... KEY:] (missing DATA)");
-        }
-        ret[parseTo!(S) (pair[0..i])] = parseTo!(T) (pair[i+1..$]);
-    }
-    return ret;
-}
-debug (UnitTest) unittest {
-    char[][char] X = parseTo!(char[][char]) (`['a':"animal", 'b':['b','u','s']]`);
-    char[][char] Y = ['a':cast(char[])"animal", 'b':['b','u','s']];
-    
-    //FIXME: when the compiler's fixed: http://d.puremagic.com/issues/show_bug.cgi?id=1671
-    // just assert (X == Y)
-    assert (X.length == Y.length);
-    assert (X.keys == Y.keys);
-    assert (X.values == Y.values);
-    //X.rehash; Y.rehash;	// doesn't make a difference
-    //assert (X == Y);		// fails (compiler bug)
-}
-
-
-// Arrays
-
-T[] parseTo(T : T[]) (char[] src) {
-    src = Util.trim(src);
-    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T[]) (src);
-    throw new ParseException ("Invalid array: not [x, ..., z]");
-}
-
-// String (array special case)
-T parseTo(T : char[]) (char[] src) {
-    src = Util.trim(src);
-    if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') {
-        src = src[1..$-1];
-        T ret;
-        ret.length = src.length;	// maximum length; retract to actual length later
-        uint i = 0;
-        for (uint t = 0; t < src.length;) {
-            // process a block of non-escaped characters
-            uint s = t;
-            while (t < src.length && src[t] != '\\') ++t;	// non-escaped characters
-            uint j = i + t - s;
-            ret[i..j] = src[s..t];	// copy a block
-            i = j;
-            
-            // process a block of escaped characters
-            while (t < src.length && src[t] == '\\') {
-                t++;
-                if (t == src.length) throw new ParseException ("Invalid string: ends \\\" !");	// next char is "
-                ret[i++] = replaceEscapedChar (src[t++]);	// throws if it's invalid
-            }
-        }
-        return ret[0..i];
-    }
-    else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src);
-    throw new ParseException ("Invalid string: not quoted (\"*\") or char array (['a',...,'c'])");
-}
-// Unicode conversions for strings:
-T parseTo(T : wchar[]) (char[] src) {
-    // May throw a UnicodeException; don't bother catching and rethrowing:
-    return Utf.toString16 (parseTo!(char[]) (src));
-}
-T parseTo(T : dchar[]) (char[] src) {
-    // May throw a UnicodeException; don't bother catching and rethrowing:
-    return Utf.toString32 (parseTo!(char[]) (src));
-}
-
-// Binary (array special case)
-T parseTo(T : ubyte[]) (char[] src) {
-    src = Util.trim(src);
-    // Standard case:
-    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src);
-    // Special case: sequence of hex digits, each pair of which is a ubyte
-    if (src.length >= 2 && src[0..2] == "0x") {
-        src = src[2..$];    // strip down to actual digits
-        
-        // Must be in pairs:
-        if (src.length % 2 == 1) throw new ParseException ("Invalid binary: odd number of chars");
-        
-        T ret;
-        ret.length = src.length / 2;	// exact
-        
-        for (uint i, pos; pos + 1 < src.length; ++i) {
-            ubyte x = readHexChar(src, pos) << 4;
-            x |= readHexChar(src, pos);
-            ret[i] = x;
-        }
-        return ret;
-    }
-    else throw new ParseException ("Invalid ubyte[]: not an array and doesn't start 0x");
-}
-
-debug (UnitTest) unittest {
-    assert (parseTo!(double[]) (`[1.0,1.0e-10]`) == [1.0, 1.0e-10]);   // generic array stuff
-    assert (parseTo!(double[]) (`[     ]`) == cast(double[]) []);      // empty array
-    
-    // char[] and char conversions, with commas, escape sequences and multichar UTF8 characters:
-    assert (parseTo!(char[][]) (`[ ".\"", [',','\''] ,"!\b€" ]`) == [ ".\"".dup, [',','\''] ,"!\b€" ]);
-    
-    // wchar[] and dchar[] conversions:
-    // The characters were pretty-much pulled at random from unicode tables.
-    // The last few cause some wierd (display only) effects in my editor.
-    assert (parseTo!(wchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"w);
-    assert (parseTo!(dchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"d);
-    
-    assert (parseTo!(ubyte[]) (`0x01F2AC`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]);	// ubyte[] special notation
-    assert (parseTo!(ubyte[]) (`[01 ,0xF2, 0xAC]`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]);	// ubyte[] std notation
-}
-
-
-// Basic types
-
-// Char
-T parseTo(T : char) (char[] src) {
-    src = Util.trim(src);
-    if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'')
-        throw new ParseException ("Invalid char: not quoted (e.g. 'c')");
-    if (src[1] != '\\' && src.length == 3) return src[1];	// Either non escaped
-    if (src.length == 4) return replaceEscapedChar (src[2]);	// Or escaped
-    
-    // Report various errors; warnings for likely and difficult to tell cases:
-    // Warn in case it's a multibyte UTF-8 character:
-    if (src[1] & 0xC0u) throw new UnicodeException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)", 1);
-    throw new ParseException ("Invalid char: too long");
-}
-/* Basic unicode convertions for wide-chars.
-* NOTE: c > 127 signals the start of a multibyte UTF-8 sequence which must be converted for
-* UTF-16/32. But since we don't know what the next bytes are we can't do the conversion. */
-const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted from a single UTF-8 char";
-T parseTo(T : wchar) (char[] src) {
-    char c = parseTo!(char) (src);
-    if (c <= 127u) return cast(wchar) c;	// this char can be converted
-    else throw new UnicodeException (WIDE_CHAR_ERROR, 1);
-}
-T parseTo(T : dchar) (char[] src) {
-    char c = parseTo!(char) (src);
-    if (c <= 127u) return cast(dchar) c;	// this char can be converted
-    else throw new UnicodeException (WIDE_CHAR_ERROR, 1);
-}
-debug (UnitTest) unittest {
-    assert (parseTo!(char) ("\'\\\'\'") == '\'');
-    assert (parseTo!(wchar) ("'X'") == 'X');
-    assert (parseTo!(dchar) ("'X'") == 'X');
-}
-
-// Bool
-T parseTo(T : bool) (char[] src) {
-    src = Util.trim(src);
-    if (src == "true") return true;
-    if (src == "false") return false;
-    uint pos;
-    while (src.length > pos && src[pos] == '0') ++pos;	// skip leading zeros
-    if (src.length == pos && pos > 0) return false;
-    if (src.length == pos + 1 && src[pos] == '1') return true;
-    throw new ParseException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1");
-}
-debug (UnitTest) unittest {
-    assert (parseTo!(bool[]) (`[true,false,01,00]`) == cast(bool[]) [1,0,1,0]);
-}
-
-// Ints
-T parseTo(T : byte) (char[] src) {
-    return toTInt!(T) (src);
-}
-T parseTo(T : short) (char[] src) {
-    return toTInt!(T) (src);
-}
-T parseTo(T : int) (char[] src) {
-    return toTInt!(T) (src);
-}
-T parseTo(T : long) (char[] src) {
-    return toTInt!(T) (src);
-}
-T parseTo(T : ubyte) (char[] src) {
-    return toTInt!(T) (src);
-}
-T parseTo(T : ushort) (char[] src) {
-    return toTInt!(T) (src);
-}
-T parseTo(T : uint) (char[] src) {
-    return toTInt!(T) (src);
-}
-T parseTo(T : ulong) (char[] src) {
-    return toTInt!(T) (src);
-}
-debug (UnitTest) unittest {
-    assert (parseTo!(byte) ("-5") == cast(byte) -5);
-    // annoyingly, octal syntax differs from D (blame tango):
-    assert (parseTo!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]);
-}
-
-// Floats
-T parseTo(T : float) (char[] src) {
-    return toTFloat!(T) (src);
-}
-T parseTo(T : double) (char[] src) {
-    return toTFloat!(T) (src);
-}
-T parseTo(T : real) (char[] src) {
-    return toTFloat!(T) (src);
-}
-debug (UnitTest) unittest {
-    assert (parseTo!(float) ("0.0") == 0.0f);
-    assert (parseTo!(double) ("-1e25") == -1e25);
-    assert (parseTo!(real) ("5.24e-269") == cast(real) 5.24e-269);
-}
-//END parseTo templates
-
-//BEGIN Utility funcs
-/** Trims whitespace at ends of string and checks for and removes array brackets: []
-*
-* Throws:
-*   ParseException if brackets aren't end non-whitespace characters.
-*
-* Returns:
-*   String without brackets (and whitespace outside those brackets). Useful for passing to split.
-*/
-char[] stripBrackets (char[] src) {
-    src = Util.trim(src);
-    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return src[1..$-1];
-    throw new ParseException ("Invalid bracketed string: not [...]");
-}
-
-/** Splits a string into substrings separated by '$(B ,)' with support for characters and strings
- * containing escape sequences and for embedded arrays ($(B [...])).
- *
- * Params:
- *     src A string to separate on commas. Where used for parsing arrays, the brackets enclosing
- *     the array should be removed before calling this function (stripBrackets can do this).
- *
- * Returns:
- *     An array of substrings within src, excluding commas. Whitespace is not stripped and
- *     empty strings may get returned.
- *
- * Remarks:
- *     This function is primarily intended for as a utility function for use by the templates
- *     parsing arrays and associative arrays, but it may be useful in other cases too. Hence the
- *     fact no brackets are stripped from src.
- */
-char[][] split (char[] src) {
-    src = Util.trim (src);
-    if (src == "") return [];		// empty array: no elements when no data
-    
-    uint depth = 0;			// surface depth (embedded arrays)
-    char[][] ret;
-    ret.length = src.length / 3;	// unlikely to need a longer array
-    uint k = 0;				// current split piece
-    uint i = 0, j = 0;			// current read location, start of current piece
-    
-    while (i < src.length) {
-        char c = src[i];
-        if (c == '\'' || c == '"') {	// string or character
-            ++i;
-            while (i < src.length && src[i] != c) {
-                if (src[i] == '\\') ++i;	// escape seq.
-                ++i;
-            }	// Doesn't throw if no terminal quote at end of src, but this should be caught later.
-        }
-        else if (c == '[') ++depth;
-        else if (c == ']') {
-            if (depth) --depth;
-            else throw new ParseException ("Invalid array literal: closes before end of data item.");
-        }
-        else if (c == ',' && depth == 0) {		// only if not an embedded array
-            if (ret.length <= k) ret.length = ret.length * 2;
-            ret[k++] = src[j..i];	// add this piece and increment k
-            j = i + 1;
-        }
-        ++i;
-    }
-    if (ret.length <= k) ret.length = k + 1;
-    ret[k] = src[j..i];		// add final piece (i >= j)
-    return ret[0..k+1];
-}
-
-/* Templated read-int function to read (un)signed 1-4 byte integers.
- *
- * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions.
- */
-private TInt toTInt(TInt) (char[] src) {
-    const char[] INT_OUT_OF_RANGE = "Integer out of range";
-    bool sign;
-    uint radix, ate, ate2;
-    
-    // Trim off whitespace.
-    // NOTE: Cannot use tango.text.convert.Integer.trim to trim leading whitespace since it doesn't
-    // treat new-lines, etc. as whitespace which for our purposes is whitespace.
-    src = Util.trim (src);
-    
-    ate = cInt.trim (src, sign, radix);
-    if (ate == src.length) throw new ParseException ("Invalid integer: no digits");
-    ulong val = cInt.convert (src[ate..$], radix, &ate2);
-    ate += ate2;
-    
-    if (ate < src.length)
-        throw new ParseException ("Invalid integer at marked character: \"" ~ src[0..ate] ~ "'" ~ src[ate] ~ "'" ~ src[ate+1..$] ~ "\"");
-    
-    if (val > TInt.max) throw new ParseException (INT_OUT_OF_RANGE);
-    if (sign) {
-        long sval = cast(long) -val;
-        if (sval > TInt.min) return cast(TInt) sval;
-        else throw new ParseException (INT_OUT_OF_RANGE);
-    }
-    return cast(TInt) val;
-}
-
-/* Basically a reimplementation of tango.text.convert.Float.toFloat which checks for
- * whitespace before throwing an exception for overlong input. */
-private TFloat toTFloat(TFloat) (char[] src) {
-    // NOTE: As for toTInt(), this needs to strip leading as well as trailing whitespace.
-    src = Util.trim (src);
-    if (src == "") throw new ParseException ("Invalid float: no digits");
-    uint ate;
-    
-    TFloat x = cFloat.parse (src, &ate);
-    return x;
-}
-
-/* Throws an exception on invalid escape sequences. Supported escape sequences are the following
- * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v
- */
-private char replaceEscapedChar (char c)
-{
-    // This code was generated:
-    if (c <= 'b') {
-        if (c <= '\'') {
-            if (c == '\"') {
-                return '\"';
-            } else if (c == '\'') {
-                return '\'';
-            }
-        } else {
-            if (c == '\\') {
-                return '\\';
-            } else if (c == 'a') {
-                return '\a';
-            } else if (c == 'b') {
-                return '\b';
-            }
-        }
-    } else {
-        if (c <= 'n') {
-            if (c == 'f') {
-                return '\f';
-            } else if (c == 'n') {
-                return '\n';
-            }
-        } else {
-            if (c == 'r') {
-                return '\r';
-            } else if (c == 't') {
-                return '\t';
-            } else if (c == 'v') {
-                return '\v';
-            }
-        }
-    }
-    
-    // if we haven't returned:
-    throw new ParseException ("Invalid escape sequence: \\"~c);
-}
-
-// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length.
-private ubyte readHexChar (char[] src, inout uint pos) {
-    ubyte x;
-    if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0';
-    else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10;
-    else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10;
-    else throw new ParseException ("Invalid hex digit.");
-    ++pos;
-    return x;
-}
-
-// Generic array reader
-// Assumes input is of form "[xxxxx]" (i.e. first and last chars are '[', ']' and length >= 2).
-private T[] toArray(T : T[]) (char[] src) {
-    T[] ret = new T[16];	// avoid unnecessary allocations
-    uint i = 0;
-    foreach (char[] element; split(src[1..$-1])) {
-        if (i == ret.length) ret.length = ret.length * 2;
-        ret[i] = parseTo!(T) (element);
-        ++i;
-    }
-    return ret[0..i];
-}
-
-debug (UnitTest) {
-    import tango.io.Console;
-    
-    unittest {
-        Cout ("Running unittest: parseTo ...").flush;
-        
-        assert (parseTo!(char[]) ("\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"") == "\a\b\t\n\v\f\r\"\'\\");
-        
-        Cout (" complete").newline;
-    }
-}
-//END Utility funcs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/serialize.d	Tue Aug 05 11:51:51 2008 +0100
@@ -0,0 +1,377 @@
+/**************************************************************************************************
+ * Generic serialization templated function.
+ *
+ * copyright: Copyright (c) 2007-2008 Diggory Hardy.
+ *
+ * author: Diggory Hardy, diggory.hardy@gmail.com
+ *
+ * Supports:
+ *  Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types.
+ *
+ * Examples:
+ * ------------------------------------------------------------------------------------------------
+ * // Basic examples:
+ * Cout (serialize!(byte) (-13)).newline;                       // -13
+ * Cout (serialize!(real) (2.56e11)).newline;                   // 2.55999999999999990000e+11
+ * Cout (serialize!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline;  // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000]
+ * Cout (serialize ([true,false,false])).newline;               // [true,false,false]
+ *
+ * // String and ubyte[] special syntaxes (always used):
+ * Cout (serialize ("A string.")).newline;                      // "A string." (including quotes)
+ * Cout (serialize (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110
+ *
+ * // Associative arrays:
+ * Cout (serialize ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"]
+ *
+ * // Structs:
+ * struct S {   int a = 5;  double[int[]] x;    }
+ * S s;
+ * Cout (serialize (s));
+ *
+ * // No limit on complexity...
+ * char[] somethingComplicated = serialize!(real[][][bool[int[][]]]) (...);
+ * ------------------------------------------------------------------------------------------------
+ *
+ * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations
+ * instead of merely guessing?
+ *************************************************************************************************/
+//NOTE: in case of multiple formats, make this a dummy module importing both serialize modules,
+// or put all the code here.
+module mde.mergetag.serialize;
+// Since serialize is never used in a module where deserialize is not used, save an import:
+public import mde.mergetag.deserialize;
+
+// tango imports
+import tango.core.Traits;
+import tango.core.Exception : UnicodeException, IllegalArgumentException;
+import cInt = tango.text.convert.Integer;
+import cFloat = tango.text.convert.Float;
+import Utf = tango.text.convert.Utf;
+
+
+alias serialize parseFrom;      // support the old name
+
+// Formatting options, for where multiple formats are supported by the deserializer.
+
+// Output using the special binary notation (0x01F2AC instead of [01 ,0xF2, 0xAC])?
+const bool SPECIAL_BINARY_NOTATION = true;
+
+// Output binary as true / false or 1 / 0 ?
+const bool BINARY_AS_WORDS = true;
+
+
+char[] serialize(U) (U val) {
+    // Associative arrays (NOTE: cannot use is() expression)
+    static if (isAssocArrayType!(U)) {          // generic associative array
+        alias typeof(U.keys[0])     S;
+        alias typeof(U.values[0])   T;
+        char[] ret;
+        // A guess, including values themselves and [,:] elements (must be at least 2).
+        ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2;
+        ret[0] = '[';
+        uint i = 1;
+        foreach (S k, T v; val) {
+            char[] s = serialize!(S) (k) ~ ":" ~ serialize!(T) (v);
+            i += s.length;
+            if (i+1 >= ret.length)
+                ret.length = ret.length * 2; // check.
+            ret[i-s.length .. i] = s;
+            ret[i++] = ',';
+        }
+        if (i == 1) ++i;    // special case - not overwriting a comma
+            ret[i-1] = ']'; // replaces last comma
+            return ret[0..i];
+    }
+    // Arrays
+    else static if (is(U S == S[]) || isStaticArrayType!(U)) {
+        alias typeof(U[0]) T;
+        
+        static if (is(T == char)) {             // string
+            char[] ret = new char[val.length * 2 + 2];  // Initial storage. This should ALWAYS be enough.
+            ret[0] = '"';
+            uint i = 1;
+            for (uint t = 0; t < val.length;) {
+            // process a block of non-escapable characters
+                uint s = t;
+                while (t < val.length && !isEscapableChar(val[t]))
+                    ++t;	// skip all non-escapable chars
+                uint j = i + t - s;
+                ret[i..j] = val[s..t];	// copy a block
+                i = j;
+            // process a block of escapable charaters
+                while (t < val.length && isEscapableChar(val[t])) {
+                    ret[i++] = '\\';				// backslash; increment i
+                    ret[i++] = escapeChar(val[t++]);	// character; increment i and t
+                }
+            }
+            ret[i++] = '"';
+            return ret[0..i];
+        }
+        else static if (is(T == wchar) || is(T == dchar)) {   // wstring or dstring
+            // May throw a UnicodeException; don't bother catching and rethrowing:
+            return serialize!(char[]) (Utf.toString (val));
+        }
+        else static if (SPECIAL_BINARY_NOTATION && is(T == ubyte)) {    // special binary notation
+            // Note: To disable the usage of this special type, set SPECIAL_BINARY_NOTATION = false.
+            static const char[16] digits = "0123456789abcdef";
+    
+            char[] ret = new char[val.length * 2 + 2];	// exact length
+            ret[0..2] = "0x";
+            uint i = 2;
+    
+            foreach (ubyte x; val) {
+                ret[i++] = digits[x >> 4];
+                ret[i++] = digits[x & 0x0F];
+            }
+            return ret;
+        }
+        else {                                  // generic array
+            char[] ret;
+        // A guess, including commas and brackets (must be at least 2)
+            ret.length = val.length * (defLength!(T) + 1) + 2;
+            ret[0] = '[';
+            uint i = 1;
+            foreach (T x; val) {
+                char[] s = serialize!(T) (x);
+                i += s.length;
+                if (i+1 >= ret.length)
+                    ret.length = ret.length * 2;	// check length
+                ret[i-s.length .. i] = s;
+                ret[i++] = ',';
+            }
+            if (i == 1)
+                ++i;	// special case - not overwriting a comma
+            ret[i-1] = ']'; 	// replaces last comma
+            return ret[0..i];
+        }
+    }
+    // Structs
+    else static if (is(U == struct)) {
+        char[] ret;
+        // A very rough guess.
+        ret.length = val.sizeof * 4;
+        ret[0] = '{';
+        uint i = 1;
+        foreach (k, v; val.tupleof) {
+            alias typeof(v) T;
+            char[] s = serialize!(size_t) (k) ~ ":" ~ serialize!(T) (v);
+            i += s.length;
+            if (i+1 >= ret.length)
+                ret.length = ret.length * 2; // check.
+            ret[i-s.length .. i] = s;
+            ret[i++] = ',';
+        }
+        if (i == 1) ++i;    // special case - not overwriting a comma
+            ret[i-1] = '}'; // replaces last comma
+            return ret[0..i];
+    }
+    // Basic types
+    else static if (is(U == char)) {            // char (UTF-8 byte)
+        // Note: if (val > 127) "is invalid UTF-8 single char".  However we don't know
+        // what this is for, in particular if it will be recombined with other chars later.
+        
+        // Can't return reference to static array; so making it dynamic is cheaper than copying.
+        char[] ret = new char[4];	// max length for an escaped char
+        ret[0] = '\'';
+        
+        if (!isEscapableChar (val)) {
+            ret[1] = val;
+            ret[2] = '\'';
+            return ret[0..3];
+        } else {
+            ret[1] = '\\';
+            ret[2] = escapeChar (val);
+            ret[3] = '\'';
+            return ret;
+        }
+    } else static if (is(U == wchar) ||
+                      is(U == dchar)) {         // wchar or dchar (UTF-16/32 single char)
+        // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char?
+        if (val <= 127u)
+            return serialize!(char) (cast(char) val);  // ASCII
+        else throw new UnicodeException (
+            "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0);
+    } else static if (is (U == bool)) {         // boolean
+        static if (BINARY_AS_WORDS) {
+            if (val)
+                return "true";
+            else return "false";
+        } else {
+            if (val)
+                return "1";
+            else return "0";
+        }
+    } else static if (is (U : long)) {          // any integer type, except char types and bool
+        static if (is (U == ulong))             // ulong may not be supported properly
+            if (val > cast(ulong) long.max)
+                throw new IllegalArgumentException ("No handling available for ulong where value > long.max");
+        return cInt.toString (val);
+    } else static if (is (U : real)) {          // any (real) floating point type
+        char[] ret = new char[32];              // minimum allowed by assert in format
+        return cFloat.format (ret, val, U.dig+2, 1);// from old C++ tests, U.dig+2 gives best(?) accuracy
+    }
+    // Unsupported
+    else
+        static assert (false, "Unsupported type: "~U.stringof);
+}
+
+//BEGIN Utility funcs
+/* This template provides the initial length for strings for formatting various types. These strings
+ * can be expanded; this value is intended to cover 90% of cases or so.
+ *
+ * NOTE: This template was intended to provide specialisations for different types.
+ * This one value should do reasonably well for most types.
+ */
+private {
+    template defLength(T)        { const uint defLength = 20; }
+    template defLength(T : char) { const uint defLength = 4;  }
+    template defLength(T : bool) { const uint defLength = 5;  }
+}
+private bool isEscapableChar (char c) {
+    return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\');
+}
+// Throws on unsupported escape sequences; however this should never happen within serialize.
+private char escapeChar (char c) {
+    // This code was generated:
+    if (c <= '\v') {
+        if (c <= '\b') {
+            if (c == '\a') {
+                return 'a';
+            } else if (c == '\b') {
+                return 'b';
+            }
+        } else {
+            if (c == '\t') {
+                return 't';
+            } else if (c == '\n') {
+                return 'n';
+            } else if (c == '\v') {
+                return 'v';
+            }
+        }
+    } else {
+        if (c <= '\r') {
+            if (c == '\f') {
+                return 'f';
+            } else if (c == '\r') {
+                return 'r';
+            }
+        } else {
+            if (c == '\"') {
+                return '\"';
+            } else if (c == '\'') {
+                return '\'';
+            } else if (c == '\\') {
+                return '\\';
+            }
+        }
+    }
+    
+    // if we haven't returned:
+    throw new IllegalArgumentException ("Internal error (escapeChar)");
+}
+//END Utility funcs
+
+
+
+debug (UnitTest) {
+    import tango.util.log.Log : Log, Logger;
+
+    private Logger logger;
+    static this() {
+        logger = Log.getLogger ("text.serialize");
+    }
+unittest {
+    // Utility
+    bool throws (void delegate() dg) {
+        bool r = false;
+        try {
+            dg();
+        } catch (Exception e) {
+            r = true;
+            logger.info ("Exception caught: "~e.msg);
+        }
+        return r;
+    }
+    assert (!throws ({ int i = 5; }));
+    assert (throws ({ throw new Exception ("Test - this exception should be caught"); }));
+    
+    // Associative arrays
+    char[] X = serialize!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]);
+    char[] Y = `['a':"animal",'b':"bus"]`;
+    assert (X == Y);
+    
+    
+    // Arrays
+    // generic array stuff:
+    assert (serialize!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`);
+    assert (serialize!(double[]) (cast(double[]) []) == `[]`);		// empty array
+    
+    // char[] conversions, with commas, escape sequences and multichar UTF8 characters:
+    assert (serialize!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`);
+    
+    // wchar[] and dchar[] conversions:
+    // The characters were pretty-much pulled at random from unicode tables.
+    assert (serialize!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\"");
+    assert (serialize!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\"");
+    
+    
+    static if (SPECIAL_BINARY_NOTATION)
+        assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`);	// ubyte[] special notation
+    else
+        assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `[1,242,172]`);
+    
+    
+    // Structs
+    struct Foo {    int a = 9;  char b = '\v'; float c;    }
+    struct Bar {    Foo a,b;    }
+    static Foo foo1 = { a:150, b:'8', c:17.2f}, foo2;
+    Bar bar;
+    bar.a = foo1;
+    bar.b = foo2;
+    assert (serialize(bar) == "{0:{0:150,1:'8',2:1.72000007e+01},1:{0:9,1:'\\v',2:nan}}");
+    
+    
+    // Basic Types
+    // Character types
+    assert (serialize!(char) ('\'') == "\'\\\'\'");
+    assert (serialize!(wchar) ('X') == "'X'");
+    assert (serialize!(dchar) ('X') == "'X'");
+    assert (throws ({ char[] r = serialize!(wchar) ('£');   /* unicode U+00A3 */ }));
+    assert (throws ({ char[] r = serialize!(dchar) ('£'); }));
+    
+    // Bool
+    static if (BINARY_AS_WORDS)
+        assert (serialize(false) == "false");
+    else
+        assert (serialize(true) == "1");
+    
+    // Integers
+    assert (serialize (cast(byte) -5) == "-5");
+    assert (serialize (cast(short) -32768) == "-32768");
+    assert (serialize (-5) == "-5");
+    assert (serialize (-9223372036854775807L) == "-9223372036854775807");
+    assert (serialize (cast(ubyte) -1) == "255");
+    assert (serialize (cast(ushort) -1) == "65535");
+    assert (serialize!(uint) (-1) == "4294967295");
+    assert (serialize (cast(ulong) 0x7FFF_FFFF_FFFF_FFFFLu) == "9223372036854775807");
+    assert (serialize!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) ==
+                               "[4,468,1025436,4294967295,0]");
+    assert (throws ({
+        // ulong is not properly supported.
+        // NOTE: this is something that should really work.
+        char[] r = serialize!(ulong) (0x8FFF_FFFF_FFFF_FFFFLu);
+    }));
+    
+    // Floats
+    // These numbers are not particularly meaningful:
+    assert (serialize!(float) (0.0f) == "0.00000000");
+    assert (serialize!(double) (-1e25) == "-1.00000000000000000e+25");
+    assert (serialize!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300");
+    
+    // Escape sequences (test conversion functions)
+    assert (serialize ("\a\b\t\n\v\f\r\"\'\\") == `"\a\b\t\n\v\f\r\"\'\\"`);
+    
+    logger.info ("Unittest complete.");
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unittest/Translation.mtt	Tue Aug 05 11:51:51 2008 +0100
@@ -0,0 +1,7 @@
+{MT01}
+{test-1}
+<entry|Str1={0:"Test 1"}>
+<char[][]|depends=["test-2"]>
+{test-2}
+<entry|Str1={0:"Test 2"}>
+<entry|Str2={0:"Test 3",1:"Description"}>