Mercurial > projects > mde
changeset 79:61ea26abe4dd
Moved mde/mergetag/parse/parse(To/From) to mde/mergetag/(de)serialize. Implemented (de)serialization of structs.
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Tue, 05 Aug 2008 11:51:51 +0100 |
parents | 79a1809421aa |
children | ea58f277f487 |
files | codeDoc/jobs.txt data/L10n/i18nUnitTest.mtt mde/font/font.d mde/gui/WidgetData.d mde/gui/content/options.d mde/input/Config.d mde/lookup/Options.d mde/lookup/Translation.d mde/mergetag/DataSet.d mde/mergetag/DefaultData.d mde/mergetag/deserialize.d mde/mergetag/parse/parseFrom.d mde/mergetag/parse/parseTo.d mde/mergetag/serialize.d unittest/Translation.mtt |
diffstat | 15 files changed, 1024 insertions(+), 946 deletions(-) [+] |
line wrap: on
line diff
--- a/codeDoc/jobs.txt Tue Jul 29 18:14:53 2008 +0100 +++ b/codeDoc/jobs.txt Tue Aug 05 11:51:51 2008 +0100 @@ -3,7 +3,6 @@ In progress: -Redesigning how widgets are created and receive their data. @@ -18,7 +17,7 @@ 3 on-event draw support (mde.events and GUI need to tell mde.mde) 3 Scheduler for drawing only windows which need redrawing. 3 Update scheduler as outlined in FIXME. -3 Windows building/compatibility (currently partial) +3 Windows building/compatibility (currently partial) - tango/sys/win32/SpecialPath.d 2 Remove ability to scan, then load, mergetag sections. Not so necessary with section creator callback and allows "sliding window" type partial buffering. 2 Options need a "level": simple options, for advanced users, for debugging only, etc. 2 Command-line options for paths to by-pass normal path finding functionality.
--- a/data/L10n/i18nUnitTest.mtt Tue Jul 29 18:14:53 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -{MT01} -{test-1} -<entry|Str1=["Test 1"]> -<char[][]|depends=["test-2"]> -{test-2} -<entry|Str1=["Test 2"]> -<entry|Str2=["Test 3","Description",bogus,"entries",56]>
--- a/mde/font/font.d Tue Jul 29 18:14:53 2008 +0100 +++ b/mde/font/font.d Tue Aug 05 11:51:51 2008 +0100 @@ -29,7 +29,7 @@ import derelict.freetype.ft; import derelict.opengl.gl; -import mde.mergetag.parse.parseTo : parseTo; +import mde.mergetag.deserialize; import tango.stdc.stringz; import Util = tango.text.Util; import tango.util.log.Log : Log, Logger; @@ -294,6 +294,11 @@ int size; // font size FT_Face face; + + debug(mdeUnitTest) unittest { + // Don't do a unittest since font relies on loading the freetype library dynamically, + // normally done by Init. Also font is mostly visual and many problems will be obvious. + } } /+class OptionsFont : Options {
--- a/mde/gui/WidgetData.d Tue Jul 29 18:14:53 2008 +0100 +++ b/mde/gui/WidgetData.d Tue Aug 05 11:51:51 2008 +0100 @@ -38,8 +38,7 @@ import mde.mergetag.Reader; import mde.mergetag.Writer; import mde.setup.paths; -import mde.mergetag.parse.parseTo; -import mde.mergetag.parse.parseFrom : parseFrom; +import mde.mergetag.serialize; import tango.core.sync.Mutex; import tango.util.log.Log : Log, Logger;
--- a/mde/gui/content/options.d Tue Jul 29 18:14:53 2008 +0100 +++ b/mde/gui/content/options.d Tue Aug 05 11:51:51 2008 +0100 @@ -37,7 +37,7 @@ textOpts.length = list.length; foreach (i,s; list) { Translation.Entry transled = trans.getStruct (s); - textOpts[i] = new ContentOptionText(opts, s, transled.str, transled.desc); + textOpts[i] = new ContentOptionText(opts, s, transled.name, transled.desc); } } @@ -46,7 +46,7 @@ } static OptionList trial () { - return new OptionList (miscOpts, "OptionsMisc"); + return new OptionList (miscOpts, "L10n/OptionsMisc"); } protected:
--- a/mde/input/Config.d Tue Jul 29 18:14:53 2008 +0100 +++ b/mde/input/Config.d Tue Aug 05 11:51:51 2008 +0100 @@ -20,8 +20,8 @@ import MT = mde.mergetag.Reader; import mde.setup.paths; -import mde.mergetag.parse.parseTo : parseTo; -debug import mde.mergetag.parse.parseFrom : parseFrom; +import mde.mergetag.deserialize; +debug import mde.mergetag.serialize; import tango.util.log.Log : Log, Logger; import tango.util.collection.TreeBag : TreeBag;
--- a/mde/lookup/Options.d Tue Jul 29 18:14:53 2008 +0100 +++ b/mde/lookup/Options.d Tue Aug 05 11:51:51 2008 +0100 @@ -28,8 +28,7 @@ import mde.mergetag.Writer; import mde.mergetag.DataSet; import mde.mergetag.exception; -import mde.mergetag.parse.parseTo : parseTo; -import mde.mergetag.parse.parseFrom : parseFrom; +import mde.mergetag.serialize; import tango.core.Exception : ArrayBoundsException; import tango.util.log.Log : Log, Logger; @@ -202,8 +201,7 @@ * via hash-maps, which is a little slower than direct access but necessary since the option * must be changed in two separate places. */ void set(T) (char[] symbol, T val) { - static if (!TIsIn!(T,TYPES)) - static assert (false, "Options.set does not currently support type "~T.stringof); + static assert (TIsIn!(T,TYPES), "Options does not support type "~T.stringof); mixin (`alias opts`~TName!(T)~` optsVars;`); @@ -222,8 +220,7 @@ * * Using this method to read an option is not necessary, but allows for generic use. */ T get(T) (char[] symbol) { - static if (!TIsIn!(T,TYPES)) - static assert (false, "Options.get does not currently support type "~T.stringof); + static assert (TIsIn!(T,TYPES), "Options does not support type "~T.stringof); mixin (`alias opts`~TName!(T)~` optsVars;`); @@ -237,8 +234,7 @@ /** List the names of all options of a specific type. */ char[][] list(T) () { - static if (!TIsIn!(T,TYPES)) - static assert (false, "Options.list does not currently support type "~T.stringof); + static assert (TIsIn!(T,TYPES), "Options does not support type "~T.stringof); mixin (`alias opts`~TName!(T)~` optsVars;`); @@ -248,7 +244,7 @@ protected { OptionChanges optionChanges; // all changes to options (for saving) - // The "pointer lists": + // The "pointer lists", e.g. char[]*[ID] optscharA; mixin (PLists!(TYPES)); } @@ -408,8 +404,7 @@ this () {} void set(T) (ID id, T x) { - static if (!TIsIn!(T,TYPES)) - static assert (false, "OptionChanges.set does not currently support type "~T.stringof); + static assert (TIsIn!(T,TYPES), "Options does not support type "~T.stringof); mixin (`alias opts`~TName!(T)~` optsVars;`); mixin (`alias `~TName!(T)~`s vars;`);
--- a/mde/lookup/Translation.d Tue Jul 29 18:14:53 2008 +0100 +++ b/mde/lookup/Translation.d Tue Aug 05 11:51:51 2008 +0100 @@ -44,7 +44,7 @@ import mde.mergetag.DataSet; import mde.mergetag.Reader; import mde.mergetag.exception; -import mde.mergetag.parse.parseTo; +import mde.mergetag.deserialize; import tango.util.log.Log : Log, Logger; @@ -68,7 +68,7 @@ char[] entry (char[] id) { Entry* p = id in entries; if (p) { - return p.str; + return p.name; } else { return id; } @@ -78,7 +78,7 @@ Entry* p = id in entries; if (p) { description = p.desc; - return p.str; + return p.name; } else { return id; } @@ -91,7 +91,7 @@ return *p; } else { Entry ret; - ret.str = id; + ret.name = id; return ret; } } @@ -117,7 +117,7 @@ IReader reader; try { - reader = dataDir.makeMTReader ("L10n/"~name, PRIORITY.HIGH_LOW); + reader = dataDir.makeMTReader (name, PRIORITY.HIGH_LOW); /* Note: we don't want to load every translation section depended on to its own class * instance, since we want to merge them. So make every mergetag section use the same * instance. */ @@ -167,22 +167,14 @@ */ void addTag (char[] tp, ID id, char[] dt) { if (tp == "entry") { - char[][] fields = split (stripBrackets (dt)); + // If the tag already exists, don't replace it + if (cast(char[]) id in entries) return; - if (fields.length < 1) { - // This tag is invalid, but this fact doesn't need to be reported elsewhere: + Entry entry = deserialize!(Entry) (dt); + if (entry.name is null) { // This tag is invalid; ignore it logger.error ("For name "~name~", L10n "~L10n~": tag with ID "~cast(char[])id~" has no data"); return; } - // If the tag already exists, don't replace it - if (cast(char[]) id in entries) return; - - Entry entry; - entry.str = parseTo!(char[]) (fields[0]); - - if (fields.length >= 2) - entry.desc = parseTo!(char[]) (fields[1]); - entries[cast(char[]) id] = entry; } else if (tp == "char[][]") { if (id == cast(ID)"depends") depends = cast(ID[]) parseTo!(char[][]) (dt); @@ -197,7 +189,7 @@ * Note that although each entry also has a version field, this is not loaded for general use. */ struct Entry { - char[] str; // The translated string + char[] name; // The translated string char[] desc; // An optional description } @@ -236,7 +228,7 @@ char[] currentL10n = miscOpts.L10n; miscOpts.L10n = "test-1"; - Translation transl = load ("i18nUnitTest"); + Translation transl = load ("unittest/Translation"); // Simple get-string, check dependancy's entry doesn't override assert (transl.entry ("Str1") == "Test 1");
--- a/mde/mergetag/DataSet.d Tue Jul 29 18:14:53 2008 +0100 +++ b/mde/mergetag/DataSet.d Tue Aug 05 11:51:51 2008 +0100 @@ -63,8 +63,8 @@ DataSet ds = new DataSet; ds.sec[cast(ID)"test"] = new DefaultData; assert (ds.getSections!(DefaultData)().length == 1); - ds.sec[cast(ID)"test"].addTag ("int",cast(ID)"T"," -543 "); - assert (ds.getSections!(DefaultData)()[cast(ID)"test"]._int[cast(ID)"T"] == -543); + ds.sec[cast(ID)"test"].addTag ("char[]",cast(ID)"T"," \"ut tag 1 \" "); + assert (ds.getSections!(DefaultData)()[cast(ID)"test"].Arg!(char[])[cast(ID)"T"] == "ut tag 1 "); logger.info ("Unittest complete."); }
--- a/mde/mergetag/DefaultData.d Tue Jul 29 18:14:53 2008 +0100 +++ b/mde/mergetag/DefaultData.d Tue Aug 05 11:51:51 2008 +0100 @@ -21,23 +21,17 @@ public import mde.mergetag.iface.IDataSection; import mde.mergetag.exception; -import mde.mergetag.parse.parseTo : parseTo; -import mde.mergetag.parse.parseFrom : parseFrom; +import mde.mergetag.serialize; /************************************************************************************************* * Default DataSection class. * - * Currently this is only used for headers, and thus the list of supported types has been + * Supported types are given by dataTypes. + * + * Currently DefaultData is only used for headers, and thus the list of supported types has been * reduced to just those used in headers. Load order is HIGH_LOW, i.e. existing entries aren't * overwritten. - * - * It did supports most of the basic types supported by D (excluding cent/ucent and - * imaginary/complex types) and array versions of each of these types, plus arrays of strings. - * - * Extending the class to support more types, even custom types, shouldn't be particularly - * difficult provided mde.text.parseTo and mde.text.parseFrom are extended to support the new - * types. *************************************************************************************************/ /* The implementation now uses a fair bit of generic programming. Adjusting the types supported * should be as simple as adjusting the list dataTypes, and possibly implemting new conversions in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/mergetag/deserialize.d Tue Aug 05 11:51:51 2008 +0100 @@ -0,0 +1,605 @@ +/************************************************************************************************** + * Generic deserialization templated function. + * + * copyright: Copyright (c) 2007-2008 Diggory Hardy. + * + * author: Diggory Hardy, diggory.hardy@gmail.com + * + * Supports: + * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. + * + * There are also some public utility functions with their own documentation. + * + * Throws: + * On errors, a ParseException or a UnicodeException (both extend TextException) is thrown with a + * suitable message. No other exceptions should be thrown. + * + * Examples: + * ------------------------------------------------------------------------------------------------ + * // Basic examples: + * ulong a = deserialize!(ulong) ("20350"); + * float d = deserialize!(float) (" 1.2e-9 "); + * int[] b = deserialize!(int[]) ("[0,1,2,3]"); + * + * // String and char[] syntax: + * char[] c = deserialize!(char[]) ("\"A string\""); + * char[] e = deserialize!(char[]) ("['a','n','o','t','h','e','r', ' ' ,'s','t','r','i','n','g']"); + * + * // These be used interchangably; here's a more complex example of an associative array: + * bool[char[]] f = deserialize!(bool[char[]]) ("[ \"one\":true, ['t','w','o']:false, \"three\":1, \"four\":000 ]"); + * + * // There is also a special notation for ubyte[] types: + * // The digits following 0x must be in pairs and each specify one ubyte. + * assert ( deserialize!(ubyte[]) (`0x01F2AC`) == deserialize!(ubyte[]) (`[01 ,0xF2, 0xAC]`) ); + * + * // There's no limit to the complexity! + * char[char[][][][char]][bool] z = ...; // don't expect me to write this! + * ------------------------------------------------------------------------------------------------ + * + * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations + * instead of merely guessing? + *************************************************************************************************/ +//NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, +// or put all the code here. +module mde.mergetag.deserialize; + +// tango imports +import tango.core.Exception : TextException, UnicodeException; +import cInt = tango.text.convert.Integer; +import cFloat = tango.text.convert.Float; +import Utf = tango.text.convert.Utf; +import Util = tango.text.Util; + +/** + * Base class for deserialize exceptions. + */ +class ParseException : TextException +{ + this( char[] msg ) + { + super( msg ); + } +} + +alias deserialize parseTo; // support the old name + +//BEGIN deserialize templates + +// Associative arrays + +T[S] deserialize(T : T[S], S) (char[] src) { + src = Util.trim(src); + if (src.length < 2 || src[0] != '[' || src[$-1] != ']') + throw new ParseException ("Invalid associative array: not [ ... ]"); // bad braces. + + T[S] ret; + foreach (char[] pair; split (src[1..$-1])) { + uint i = 0; + while (i < pair.length) { // advance to the ':' + char c = pair[i]; + if (c == ':') break; + if (c == '\'' || c == '"') { // string or character + ++i; + while (i < pair.length && pair[i] != c) { + if (pair[i] == '\\') + ++i; // escape seq. + ++i; + } + // Could have an unterminated ' or " causing i >= pair.length, but: + // 1. Impossible: split would have thrown + // 2. In any case this would be caught below. + } + ++i; + } + if (i >= pair.length) + throw new ParseException ("Invalid associative array: encountered [ ... KEY] (missing :DATA)"); + ret[deserialize!(S) (pair[0..i])] = deserialize!(T) (pair[i+1..$]); + } + return ret; +} + + +// Arrays + +T[] deserialize(T : T[]) (char[] src) { + src = Util.trim(src); + if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') + return toArray!(T[]) (src); + throw new ParseException ("Invalid array: not [ ... ]"); +} + +// String (array special case) +T deserialize(T : char[]) (char[] src) { + src = Util.trim(src); + if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') { + src = src[1..$-1]; + T ret; + ret.length = src.length; // maximum length; retract to actual length later + uint i = 0; + for (uint t = 0; t < src.length;) { + // process a block of non-escaped characters + uint s = t; + while (t < src.length && src[t] != '\\') ++t; // non-escaped characters + uint j = i + t - s; + ret[i..j] = src[s..t]; // copy a block + i = j; + + // process a block of escaped characters + while (t < src.length && src[t] == '\\') { + t++; + if (t == src.length) + throw new ParseException ("Invalid string: ends \\\" !"); // next char is " + ret[i++] = unEscapeChar (src[t++]); // throws if it's invalid + } + } + return ret[0..i]; + } + else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') + return toArray!(T) (src); + throw new ParseException ("Invalid string: not quoted (\"*\") or char array (['a',...,'c'])"); +} +// Unicode conversions for strings: +T deserialize(T : wchar[]) (char[] src) { + // May throw a UnicodeException; don't bother catching and rethrowing: + return Utf.toString16 (deserialize!(char[]) (src)); +} +T deserialize(T : dchar[]) (char[] src) { + // May throw a UnicodeException; don't bother catching and rethrowing: + return Utf.toString32 (deserialize!(char[]) (src)); +} + +// Binary (array special case) +T deserialize(T : ubyte[]) (char[] src) { + src = Util.trim(src); + // Standard case: + if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); + // Special case: sequence of hex digits, each pair of which is a ubyte + if (src.length >= 2 && src[0..2] == "0x") { + src = src[2..$]; // strip down to actual digits + + // Must be in pairs: + if (src.length % 2 == 1) + throw new ParseException ("Invalid binary: odd number of chars"); + + T ret; + ret.length = src.length / 2; // exact + + for (uint i, pos; pos + 1 < src.length; ++i) { + ubyte x = readHexChar(src, pos) << 4; + x |= readHexChar(src, pos); + ret[i] = x; + } + return ret; + } + else throw new ParseException ("Invalid ubyte[]: not an array and doesn't start 0x"); +} + + +// Basic types + +// Char +// Assumes value is <= 127 (for valid UTF-8), since input would be invalid UTF-8 if not anyway. +// (And we're not really interested in checking for valid unicode; char[] conversions don't either.) +T deserialize(T : char) (char[] src) { + src = Util.trim(src); + if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'') + throw new ParseException ("Invalid char: not 'x' or '\\x'"); + if (src[1] != '\\') { + if (src.length == 3) + return src[1]; // Either non escaped + throw new ParseException ("Invalid char: too long (or non-ASCII)"); + } else if (src.length == 4) + return unEscapeChar (src[2]); // Or escaped + + throw new ParseException ("Invalid char: '\\'"); +} +// Basic unicode convertions for wide-chars. +// Assumes value is <= 127 as does deserialize!(char). +T deserialize(T : wchar) (char[] src) { + return cast(T) deserialize!(char) (src); +} +T deserialize(T : dchar) (char[] src) { + return cast(T) deserialize!(char) (src); +} + +// Bool +T deserialize(T : bool) (char[] src) { + src = Util.trim(src); + if (src == "true") + return true; + if (src == "false") + return false; + uint pos; + while (src.length > pos && src[pos] == '0') ++pos; // skip leading zeros + if (src.length == pos && pos > 0) + return false; + if (src.length == pos + 1 && src[pos] == '1') + return true; + throw new ParseException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1"); +} + +// Ints +T deserialize(T : byte) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : short) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : int) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : long) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : ubyte) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : ushort) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : uint) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : ulong) (char[] src) { + return toTInt!(T) (src); +} +debug (UnitTest) unittest { + assert (deserialize!(byte) ("-5") == cast(byte) -5); + // annoyingly, octal syntax differs from D (blame tango): + assert (deserialize!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]); +} + +// Floats +T deserialize(T : float) (char[] src) { + return toTFloat!(T) (src); +} +T deserialize(T : double) (char[] src) { + return toTFloat!(T) (src); +} +T deserialize(T : real) (char[] src) { + return toTFloat!(T) (src); +} + + +// Structs +T deserialize(T) (char[] src) { + static assert (is(T == struct), "Unsupported type: "~typeof(T)); + + src = Util.trim(src); + if (src.length < 2 || src[0] != '{' || src[$-1] != '}') + throw new ParseException ("Invalid struct: not { ... }"); + + // cannot access elements of T.tupleof with non-const key, so use a type which can be + // accessed with a non-const key to store slices: + char[][T.tupleof.length] temp; + foreach (char[] pair; split (src[1..$-1])) { + uint i = 0; + while (i < pair.length) { // advance to the ':' + char c = pair[i]; + if (c == ':') + break; + // key must be an int so no need for string checks + ++i; + } + if (i >= pair.length) + throw new ParseException ("Invalid struct: encountered { ... KEY} (missing :DATA)"); + + size_t k = deserialize!(size_t) (pair[0..i]); + // Note: could check no entry was already stored in temp. + temp[k] = pair[i+1..$]; + } + T ret; + setStruct (ret, temp); + return ret; +} +//END deserialize templates + +//BEGIN Utility funcs +/** Splits a string into substrings separated by '$(B ,)' with support for characters and strings + * containing escape sequences and for embedded arrays ($(B [...])). + * + * Params: + * src A string to separate on commas. It shouldn't have enclosing brackets. + * + * Returns: + * An array of substrings within src, excluding commas. Whitespace is not stripped and + * empty strings may get returned. + * + * Remarks: + * This function is primarily intended for as a utility function for use by the templates + * parsing arrays and associative arrays, but it may be useful in other cases too. Hence the + * fact no brackets are stripped from src. + */ +//FIXME foreach struct is more efficient +char[][] split (char[] src) { + src = Util.trim (src); + if (src == "") + return []; // empty array: no elements when no data + + uint depth = 0; // surface depth (embedded arrays) + char[][] ret; + ret.length = src.length / 3; // unlikely to need a longer array + uint k = 0; // current split piece + uint i = 0, j = 0; // current read location, start of current piece + + while (i < src.length) { + char c = src[i]; + if (c == '\'' || c == '"') { // string or character + ++i; + while (i < src.length && src[i] != c) { + if (src[i] == '\\') + ++i; // escape seq. + ++i; + } // Doesn't throw if no terminal quote at end of src, but this should be caught later. + } + else if (c == '[') ++depth; + else if (c == ']') { + if (depth) + --depth; + else throw new ParseException ("Invalid array literal: closes before end of data item."); + } + else if (c == ',' && depth == 0) { // only if not an embedded array + if (ret.length <= k) + ret.length = ret.length * 2; + ret[k++] = src[j..i]; // add this piece and increment k + j = i + 1; + } + ++i; + } + if (i > src.length) + throw new ParseException ("Unterminated quote (\' or \")"); + + if (ret.length <= k) + ret.length = k + 1; + ret[k] = src[j..i]; // add final piece (i >= j) + return ret[0..k+1]; +} + +/* Templated read-int function to read (un)signed 1-4 byte integers. + * + * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions. + */ +private TInt toTInt(TInt) (char[] src) { + const char[] INT_OUT_OF_RANGE = "Integer out of range"; + bool sign; + uint radix, ate, ate2; + + // Trim off whitespace. + // NOTE: Cannot use tango.text.convert.Integer.trim to trim leading whitespace since it doesn't + // treat new-lines, etc. as whitespace which for our purposes is whitespace. + src = Util.trim (src); + + ate = cInt.trim (src, sign, radix); + if (ate == src.length) + throw new ParseException ("Invalid integer: no digits"); + ulong val = cInt.convert (src[ate..$], radix, &ate2); + ate += ate2; + + if (ate < src.length) + throw new ParseException ("Invalid integer at marked character: \"" ~ src[0..ate] ~ "'" ~ src[ate] ~ "'" ~ src[ate+1..$] ~ "\""); + + if (val > TInt.max) + throw new ParseException (INT_OUT_OF_RANGE); + if (sign) { + long sval = cast(long) -val; + if (sval > TInt.min) + return cast(TInt) sval; + else throw new ParseException (INT_OUT_OF_RANGE); + } + return cast(TInt) val; +} + +/* Basically a reimplementation of tango.text.convert.Float.toFloat which checks for + * whitespace before throwing an exception for overlong input. */ +private TFloat toTFloat(TFloat) (char[] src) { + // NOTE: As for toTInt(), this needs to strip leading as well as trailing whitespace. + src = Util.trim (src); + if (src == "") + throw new ParseException ("Invalid float: no digits"); + uint ate; + + TFloat x = cFloat.parse (src, &ate); + return x; +} + +/* Throws an exception on invalid escape sequences. Supported escape sequences are the following + * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v + */ +private char unEscapeChar (char c) +{ + // This code was generated: + if (c <= 'b') { + if (c <= '\'') { + if (c == '\"') { + return '\"'; + } else if (c == '\'') { + return '\''; + } + } else { + if (c == '\\') { + return '\\'; + } else if (c == 'a') { + return '\a'; + } else if (c == 'b') { + return '\b'; + } + } + } else { + if (c <= 'n') { + if (c == 'f') { + return '\f'; + } else if (c == 'n') { + return '\n'; + } + } else { + if (c == 'r') { + return '\r'; + } else if (c == 't') { + return '\t'; + } else if (c == 'v') { + return '\v'; + } + } + } + + // if we haven't returned: + throw new ParseException ("Bad escape sequence: \\"~c); +} + +// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length. +private ubyte readHexChar (char[] src, inout uint pos) { + ubyte x; + if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0'; + else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10; + else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10; + else throw new ParseException ("Invalid hex digit."); + ++pos; + return x; +} + +// Generic array reader +// Assumes input is of form "[xxxxx]" (i.e. first and last chars are '[', ']' and length >= 2). +private T[] toArray(T : T[]) (char[] src) { + T[] ret = new T[16]; // avoid unnecessary allocations + uint i = 0; + foreach (char[] element; split(src[1..$-1])) { + if (i == ret.length) ret.length = ret.length * 2; + ret[i] = deserialize!(T) (element); + ++i; + } + return ret[0..i]; +} + +/** Set a struct's elements from an array. +* +* For a more generic version, see http://www.dsource.org/projects/tutorials/wiki/StructTupleof +*/ +// NOTE: Efficiency? Do recursive calls get inlined? +private void setStruct(S, size_t N, size_t i = 0) (ref S s, char[][N] src) { + static assert (is(S == struct), "Only to be used with structs."); + static assert (N == S.tupleof.length, "src.length != S.tupleof.length"); + static if (i < N) { + if (src[i]) + s.tupleof[i] = deserialize!(typeof(s.tupleof[i])) (src[i]); + setStruct!(S, N, i+1) (s, src); + } +} +//END Utility funcs + +debug (UnitTest) { + import tango.util.log.Log : Log, Logger; + + private Logger logger; + static this() { + logger = Log.getLogger ("text.deserialize"); + } +unittest { + // Utility + bool throws (void delegate() dg) { + bool r = false; + try { + dg(); + } catch (Exception e) { + r = true; + logger.info ("Exception caught: "~e.msg); + } + return r; + } + assert (!throws ({ int i = 5; })); + assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); + + + // Associative arrays + char[][char] X = deserialize!(char[][char]) (`['a':"animal\n", 'b':['b','u','s','\n']]`); + char[][char] Y = ['a':cast(char[])"animal\n", 'b':['b','u','s','\n']]; + + //FIXME: when the compiler's fixed: http://d.puremagic.com/issues/show_bug.cgi?id=1671 + // just assert (X == Y) + assert (X.length == Y.length); + assert (X.keys == Y.keys); + assert (X.values == Y.values); + //X.rehash; Y.rehash; // doesn't make a difference + //assert (X == Y); // fails (compiler bug) + + assert (throws ({ deserialize!(int[int]) (`[1:1`); })); // bad brackets + assert (throws ({ deserialize!(int[char[]]) (`["ab\":1]`); })); // unterminated quote + assert (throws ({ deserialize!(int[char[]]) (`["abc,\a\b\c":1]`); })); // bad escape seq. + assert (throws ({ deserialize!(int[char[]]) (`["abc"]`); })); // no data + + + // Arrays + assert (deserialize!(double[]) (`[1.0,1.0e-10]`) == [1.0, 1.0e-10]);// generic array stuff + assert (deserialize!(double[]) (`[ ]`) == cast(double[]) []); // empty array + assert (deserialize!(int[][]) (`[[1],[2,3],[]]`) == [[1],[2,3],[]]);// sub-array + assert (throws ({ deserialize!(int[]) (`[1,2`); })); // bad brackets + assert (throws ({ deserialize!(int[][]) (`[[1]]]`); })); // bad brackets + + // char[] and char conversions, with commas, escape sequences and multichar UTF8 characters: + assert (deserialize!(char[][]) (`[ ".\"", [',','\''] ,"!\b€" ]`) == [ ".\"".dup, [',','\''] ,"!\b€" ]); + assert (throws ({ deserialize!(char[]) ("\"\\\""); })); + assert (throws ({ deserialize!(char[]) (`['a'`); })); // bad brackets + + // wchar[] and dchar[] conversions: + // The characters were pretty-much pulled at random from unicode tables. + // The last few cause some wierd (display only) effects in my editor. + assert (deserialize!(wchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"w); + assert (deserialize!(dchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"d); + + assert (deserialize!(ubyte[]) (`0x01F2aC`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] special notation + assert (deserialize!(ubyte[]) (`[01 ,0xF2, 0xAC]`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] std notation + assert (throws ({ deserialize!(ubyte[]) (`0x123`); })); // digits not in pairs + assert (throws ({ deserialize!(ubyte[]) (`[2,5`); })); // not [...] or 0x.. + assert (throws ({ deserialize!(ubyte[]) (`0x123j`); })); + + + // char types + assert (deserialize!(char) ("'\\\''") == '\''); + assert (deserialize!(wchar) ("'X'") == 'X'); + assert (deserialize!(dchar) ("'X'") == 'X'); + assert (throws ({ deserialize!(char) ("'\\'"); })); + assert (throws ({ deserialize!(char) ("'£'"); })); // non-ascii + assert (throws ({ deserialize!(char) ("''"); })); + assert (throws ({ deserialize!(char) ("'ab'"); })); + assert (throws ({ deserialize!(wchar) ("''"); })); + + + // bool + assert (deserialize!(bool[]) (`[true,false,01,00]`) == cast(bool[]) [1,0,1,0]); + assert (throws ({ deserialize!(bool) ("011"); })); + + + // ints + assert (deserialize!(byte) ("-5") == cast(byte) -5); + assert (deserialize!(int) ("-0x7FFFFFFF") == cast(int) -0x7FFF_FFFF); + // annoyingly, octal syntax differs from D (blame tango): + assert (deserialize!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]); + assert (throws ({ deserialize!(int) (""); })); + assert (throws ({ deserialize!(int) ("0x8FFFFFFF"); })); + assert (throws ({ deserialize!(uint) ("-1"); })); + assert (throws ({ deserialize!(uint) ("1a"); })); + + + // floats + assert (deserialize!(float) ("0.0") == 0.0f); + assert (deserialize!(double) ("-1e25") == -1e25); + assert (deserialize!(real) ("5.24e-269") == cast(real) 5.24e-269); + assert (throws ({ deserialize!(float) (""); })); + + + // structs + struct A { int x = 5; char y; } + struct B { A a; float b; } + A a; a.y = 'y'; + assert (deserialize!(A) ("{ 1 : 'y' }") == a); + B b; b.a = a; b.b = 1.0f; + assert (deserialize!(B) (" {1:1.0,0: { 1 : 'y' } } ") == b); + assert (throws ({ deserialize!(A) (" 1:'x'}"); })); // bad braces + assert (throws ({ deserialize!(A) ("{ 1 }"); })); // no :DATA + + + // unEscapeChar + assert (deserialize!(char[]) ("\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"") == "\a\b\t\n\v\f\r\"\'\\"); + + logger.info ("Unittest complete."); +} +}
--- a/mde/mergetag/parse/parseFrom.d Tue Jul 29 18:14:53 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,373 +0,0 @@ -/************************************************************************************************** - * copyright: Copyright (c) 2007-2008 Diggory Hardy. - * - * author: Diggory Hardy, diggory.hardy@gmail.com - * - * license: BSD style: $(LICENSE) - * - * This contains templates for converting various data-types to a char[]. - * - * parseFrom is roughly the inverse of $(B parseTo). - * It is also available in tango.scrapple. - * - * This module basically implements the following templated function for most basic D types: - * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char, wchar, - * dchar. - * It also supports arrays of any supported type (including of other arrays) and has special - * handling for strings (char[]) and binary (ubyte[]) data-types. - * ----------------------------- - * char[] parseFrom(T) (T value); - * ----------------------------- - * - * $(I value) is the value to convert; it is converted to a string and returned. - * - * Syntax: - * The syntax is the same as parseTo; but since this module only generates formatted output - * knowing the syntax shouldn't be necessary. There is currently no way to specify options like - * output base for ints, precision of floats, or - * whether to write char[] or ubyte[] types as arrays or in their more compact forms. - * - * Throws: - * On errors, an exception is thrown (UnicodeException or IllegalArgumentException). No other - * exceptions should be thrown. - * - * Remarks: - * There is currently no support for outputting wchar/dchar strings. There are, however, unicode - * conversions for converting UTF-16/32 to UTF-8. Be warned though that many wchar/dchar characters - * (any that are non-ascii) will not fit in a single char and an exception will be thrown. - * - * The code does involve some heap activity; this is necessary anyway for returning dynamic arrays. - * (Slices of a pre-allocated array could be returned instead, but for many uses would have to be - * duplicated before storage, leading to less efficient operation.) - * Most memory allocation has been kept to a minimum. - * - * Unlike the parseTo!() module, the parseFrom templates could be re-written to use static-ifs - * instead of type specialisation, thus allowing type inference. However I likely won't bother - * implementing this myself. - * - * Examples: - * ------------------------------------------------------------------------------------------------ - * // Examples are printed via Cout. - * - * // Basic examples: - * Cout (parseFrom!(byte) (-13)).newline; // -13 - * Cout (parseFrom!(real) (2.56e11)).newline; // 2.55999999999999990000e+11 - * Cout (parseFrom!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000] - * Cout (parseFrom!(bool[]) ([true,false,false])).newline; // [true,false,false] - * - * // String and ubyte[] special syntaxes (always used): - * Cout (parseFrom!(char[]) ("A string.")).newline; // "A string." (including quotes) - * Cout (parseFrom!(ubyte[]) (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110 - * - * // Associative arrays: - * Cout (parseFrom!(char[][byte]) ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"] - * - * // No limit on complexity... - * char[] somethingComplicated = parseFrom!(real[][][bool[int[][]]]) (...); - * ------------------------------------------------------------------------------------------------ - *************************************************************************************************/ - -module mde.mergetag.parse.parseFrom; - -// tango imports -import tango.core.Exception : UnicodeException, IllegalArgumentException; -import cInt = tango.text.convert.Integer; -import cFloat = tango.text.convert.Float; -import Utf = tango.text.convert.Utf; -import Util = tango.text.Util; - -//BEGIN parseFrom templates -/* Idea: could extend parseFrom with a second parameter, containing flags for things like base to output. - * Unnecessary for mergetag though. -*/ - -// Associative arrays - -char[] parseFrom(T : T[S], S) (T[S] val) { - char[] ret; - // A guess, including values themselves and [,:] elements (must be at least 2). - ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2; - ret[0] = '['; - uint i = 1; - foreach (S k, T v; val) { - char[] s = parseFrom!(S) (k) ~ ":" ~ parseFrom!(T) (v); - i += s.length; - if (i+1 >= ret.length) ret.length = ret.length * 2; // check. - ret[i-s.length .. i] = s; - ret[i++] = ','; - } - if (i == 1) ++i; // special case - not overwriting a comma - ret[i-1] = ']'; // replaces last comma - return ret[0..i]; -} -debug (UnitTest) unittest { - char[] X = parseFrom!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); - char[] Y = `['a':"animal",'b':"bus"]`; - assert (X == Y); -} - - -// Arrays - -char[] parseFrom(T : T[]) (T[] val) { - char[] ret; - // A guess, including commas and brackets (must be at least 2) - ret.length = val.length * (defLength!(T) + 1) + 2; - ret[0] = '['; - uint i = 1; - foreach (T x; val) { - char[] s = parseFrom!(T) (x); - i += s.length; - if (i+1 >= ret.length) ret.length = ret.length * 2; // check length - ret[i-s.length .. i] = s; - ret[i++] = ','; - } - if (i == 1) ++i; // special case - not overwriting a comma - ret[i-1] = ']'; // replaces last comma - return ret[0..i]; -} - -// Strings (array special case) -char[] parseFrom(T : char[]) (T val) { - char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. - ret[0] = '"'; - uint i = 1; - for (uint t = 0; t < val.length;) { - // process a block of non-escapable characters - uint s = t; - while (t < val.length && !isEscapableChar(val[t])) - ++t; // skip all non-escapable chars - uint j = i + t - s; - ret[i..j] = val[s..t]; // copy a block - i = j; - // process a block of escapable charaters - while (t < val.length && isEscapableChar(val[t])) { - ret[i++] = '\\'; // backslash; increment i - ret[i++] = replaceEscapableChar(val[t++]); // character; increment i and t - } - } - ret[i++] = '"'; - return ret[0..i]; -} -// Unicode conversions for strings: -char[] parseFrom(T : dchar[]) (T val) { - // May throw a UnicodeException; don't bother catching and rethrowing: - return parseFrom!(char[]) (Utf.toString (val)); -} -char[] parseFrom(T : wchar[]) (T val) { - // May throw a UnicodeException; don't bother catching and rethrowing: - return parseFrom!(char[]) (Utf.toString (val)); -} - -// Binary (array special case) -char[] parseFrom(T : ubyte[]) (T val) { - static const char[16] digits = "0123456789abcdef"; - - char[] ret = new char[val.length * 2 + 2]; // exact length - ret[0..2] = "0x"; - uint i = 2; - - foreach (ubyte x; val) { - ret[i++] = digits[x >> 4]; - ret[i++] = digits[x & 0x0F]; - } - return ret; -} - -debug (UnitTest) unittest { - // generic array stuff: - assert (parseFrom!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); - assert (parseFrom!(double[]) (cast(double[]) []) == `[]`); // empty array - - // char[] conversions, with commas, escape sequences and multichar UTF8 characters: - assert (parseFrom!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`); - - // wchar[] and dchar[] conversions: - // The characters were pretty-much pulled at random from unicode tables. - // The last few cause some wierd (display only) effects in my editor. - assert (parseFrom!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\""); - assert (parseFrom!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\""); - - assert (parseFrom!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation -} - - -// Basic types - -// Char -char[] parseFrom(T : char) (T val) { - // NOTE: if (val > 127) "is invalid UTF-8 single char" - // However we don't know what this is for, in particular if it will be recombined with other chars later - - // Can't return reference to static array; making dynamic is cheaper than copying. - char[] ret = new char[4]; // max length for an escaped char - ret[0] = '\''; - - if (!isEscapableChar (val)) { - ret[1] = val; - ret[2] = '\''; - return ret[0..3]; - } else { - ret[1] = '\\'; - ret[2] = replaceEscapableChar (val); - ret[3] = '\''; - return ret; - } - assert (false); -} -// Basic unicode convertions for wide-chars. -// NOTE: any other wide-chars will not fit in a single UTF-8 encoded char. -const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted to a single UTF-8 char"; -char[] parseFrom(T : wchar) (T val) { - if (val <= 127u) return parseFrom!(char) (cast(char) val); // this char can be converted - else throw new UnicodeException (WIDE_CHAR_ERROR, 0); -} -char[] parseFrom(T : dchar) (T val) { - if (val <= 127u) return parseFrom!(char) (cast(char) val); // this char can be converted - else throw new UnicodeException (WIDE_CHAR_ERROR, 0); -} -debug (UnitTest) unittest { - assert (parseFrom!(char) ('\'') == "\'\\\'\'"); - assert (parseFrom!(wchar) ('X') == "'X'"); - assert (parseFrom!(dchar) ('X') == "'X'"); -} - -// Bool -char[] parseFrom(T : bool) (T val) { - if (val) return "true"; - else return "false"; -} -// too simple to need a unittest - -// Signed ints -char[] parseFrom(T : byte) (T val) { - return formatLong (val); -} -char[] parseFrom(T : short) (T val) { - return formatLong (val); -} -char[] parseFrom(T : int) (T val) { - return formatLong (val); -} -char[] parseFrom(T : long) (T val) { - return formatLong (val); -} -// Unsigned ints -char[] parseFrom(T : ubyte) (T val) { - return formatLong (val); -} -char[] parseFrom(T : ushort) (T val) { - return formatLong (val); -} -char[] parseFrom(T : uint) (T val) { - return formatLong (val); -} -char[] parseFrom(T : ulong) (T val) { - if (val > cast(ulong) long.max) - throw new IllegalArgumentException ("No handling available for ulong where value > long.max"); - return formatLong (val); -} -debug (UnitTest) unittest { - assert (parseFrom!(byte) (cast(byte) -5) == "-5"); - // annoyingly, octal syntax differs from D (blame tango): - assert (parseFrom!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]"); -} - -// Floats -/* Old calculation (not used): -t.dig+2+4+3 // should be sufficient length (mant + (neg, dot, e, exp neg) + exp (3,4,5 for float,double,real resp.)) */ -char[] parseFrom(T : float) (T val) { - char[] ret = new char[32]; // minimum allowed by assert in format - return cFloat.format (ret, val, T.dig+2, 1); // from old C++ tests, T.dig+2 gives best(?) accuracy -} -char[] parseFrom(T : double) (T val) { - char[] ret = new char[32]; - return cFloat.format (ret, val, T.dig+2, 1); -} -char[] parseFrom(T : real) (T val) { - char[] ret = new char[32]; - return cFloat.format (ret, val, T.dig+2, 1); -} -debug (UnitTest) unittest { - // NOTE: these numbers are not particularly meaningful. - assert (parseFrom!(float) (0.0f) == "0.00000000"); - assert (parseFrom!(double) (-1e25) == "-1.00000000000000000e+25"); - assert (parseFrom!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300"); -} -//END parrseFrom templates - -//BEGIN Length templates -/* This template provides the initial length for strings for formatting various types. These strings - * can be expanded; this value is intended to cover 90% of cases or so. - * - * NOTE: This template was intended to provide specialisations for different types. - * This one value should do reasonably well for most types. - */ -private { - template defLength(T) { const uint defLength = 20; } - template defLength(T : char) { const uint defLength = 4; } - template defLength(T : bool) { const uint defLength = 5; } -} -//END Length templates - -//BEGIN Utility funcs -private char[] formatLong (long val) { - // May throw an IllegalArgumentException; don't bother catching and rethrowing: - return cInt.toString (val); -} -private bool isEscapableChar (char c) { - return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); -} -// Throws on unsupported escape sequences; however this should never actually happen within parseFrom. -private char replaceEscapableChar (char c) { - // This code was generated: - if (c <= '\v') { - if (c <= '\b') { - if (c == '\a') { - return 'a'; - } else if (c == '\b') { - return 'b'; - } - } else { - if (c == '\t') { - return 't'; - } else if (c == '\n') { - return 'n'; - } else if (c == '\v') { - return 'v'; - } - } - } else { - if (c <= '\r') { - if (c == '\f') { - return 'f'; - } else if (c == '\r') { - return 'r'; - } - } else { - if (c == '\"') { - return '\"'; - } else if (c == '\'') { - return '\''; - } else if (c == '\\') { - return '\\'; - } - } - } - - // if we haven't returned: - throw new IllegalArgumentException ("Character is not escapable (internal parseFrom error)"); -} - -debug (UnitTest) { - import tango.io.Console; - - unittest { - Cout ("Running unittest: parseFrom ...").flush; - - assert (parseFrom!(char[]) ("\a\b\t\n\v\f\r\"\'\\") == "\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\""); - - Cout (" complete").newline; - } -} -//END Utility funcs
--- a/mde/mergetag/parse/parseTo.d Tue Jul 29 18:14:53 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,515 +0,0 @@ -/************************************************************************************************** - * copyright: Copyright (c) 2007-2008 Diggory Hardy. - * - * author: Diggory Hardy, diggory.hardy@gmail.com - * - * license: BSD style: $(LICENSE) - * - * This contains templates for converting a char[] to various data-types. - * - * parseTo is roughly the inverse of $(B parseFrom) and should read any data output by $(B parseFrom). - * It is also available in tango.scrapple. - * - * This module basically implements the following templated function for most basic D types: - * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char. - * It also supports arrays and associative arrays of any supported type (including of other arrays) - * and has special handling for strings (char[]) and binary (ubyte[]) data-types. - * ----------------------------- - * T parseTo(T) (char[] source); - * ----------------------------- - * - * $(I source) is the string to parse, and data of the templated type that is read from the string - * is returned. See the examples to get a better idea of its use. - * - * Syntax: - * The syntax for parsing $(I source) is mostly the same used by D without any prefixes/suffixes - * (except 0x, 0b & 0o base specifiers). Also a special ubyte[] syntax is supported; see examples. - * The following escape sequences are supported for strings and characters: \' \" \\ - * \a \b \f \n \r \t \v . Associative array literals use the same syntax as D, described here: - * $(LINK http://www.digitalmars.com/d/2.0/expression.html#AssocArrayLiteral). All whitespace is - * ignored (except of course within strings). - * - * There are also some public utility functions with their own documentation. - * - * Throws: - * On errors, a ParseException or a UnicodeException (both extend TextException) is thrown with a - * suitable message. No other exceptions should be thrown. - * - * Remarks: - * There is currently no support for reading wchar/dchar strings. There are, however, unicode - * conversions for converting UTF-8 to UTF-16/32. Be careful if converting on a char-by-char basis; - * such conversions cannot be used for non-ascii characters. - * - * Examples: - * ------------------------------------------------------------------------------------------------ - * // Basic examples: - * ulong a = parseTo!(ulong) ("20350"); - * float d = parseTo!(float) (" 1.2e-9 "); - * int[] b = parseTo!(int[]) ("[0,1,2,3]"); - * - * // String and char[] syntax: - * char[] c = parseTo!(char[]) ("\"A string\""); - * char[] e = parseTo!(char[]) ("['a','n','o','t','h','e','r', ' ' ,'s','t','r','i','n','g']"); - * - * // These be used interchangably; here's a more complex example of an associative array: - * bool[char[]] f = parseTo!(bool[char[]]) ("[ \"one\":true, ['t','w','o']:false, \"three\":1, \"four\":000 ]"); - * - * // There is also a special notation for ubyte[] types: - * // The digits following 0x must be in pairs and each specify one ubyte. - * assert ( parseTo!(ubyte[]) (`0x01F2AC`) == parseTo!(ubyte[]) (`[01 ,0xF2, 0xAC]`) ); - * - * // There's no limit to the complexity! - * char[char[][][][char]][bool] z = ...; // don't expect me to write this! - * ------------------------------------------------------------------------------------------------ - *************************************************************************************************/ - -module mde.mergetag.parse.parseTo; - -// tango imports -import tango.core.Exception : TextException, UnicodeException; -import cInt = tango.text.convert.Integer; -import cFloat = tango.text.convert.Float; -import Utf = tango.text.convert.Utf; -import Util = tango.text.Util; - -/** - * Base class for parseTo exceptions. - */ -class ParseException : TextException -{ - this( char[] msg ) - { - super( msg ); - } -} - - -//BEGIN parseTo templates - -// Associative arrays - -const char[] AA_ERR = "Invalid associative array: "; -T[S] parseTo(T : T[S], S) (char[] src) { - src = Util.trim(src); - if (src.length < 2 || src[0] != '[' || src[$-1] != ']') - throw new ParseException (AA_ERR ~ "not [ ... ]"); // bad braces. - - T[S] ret; - foreach (char[] pair; split (src[1..$-1])) { - uint i = 0; - while (i < pair.length) { // advance to the ':' - char c = pair[i]; - if (c == ':') break; - if (c == '\'' || c == '"') { // string or character - ++i; - while (i < pair.length && pair[i] != c) { - if (pair[i] == '\\') { - if (i+2 >= pair.length) throw new ParseException (AA_ERR ~ "unfinished escape sequence within string/char"); - ++i; // escape seq. - } - ++i; - } - if (i == pair.length) { - throw new ParseException (AA_ERR ~ "encountered [ ... KEY] (missing :DATA)"); - } - } - ++i; - } - if (i == pair.length) { - throw new ParseException (AA_ERR ~ "encountered [ ... KEY:] (missing DATA)"); - } - ret[parseTo!(S) (pair[0..i])] = parseTo!(T) (pair[i+1..$]); - } - return ret; -} -debug (UnitTest) unittest { - char[][char] X = parseTo!(char[][char]) (`['a':"animal", 'b':['b','u','s']]`); - char[][char] Y = ['a':cast(char[])"animal", 'b':['b','u','s']]; - - //FIXME: when the compiler's fixed: http://d.puremagic.com/issues/show_bug.cgi?id=1671 - // just assert (X == Y) - assert (X.length == Y.length); - assert (X.keys == Y.keys); - assert (X.values == Y.values); - //X.rehash; Y.rehash; // doesn't make a difference - //assert (X == Y); // fails (compiler bug) -} - - -// Arrays - -T[] parseTo(T : T[]) (char[] src) { - src = Util.trim(src); - if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T[]) (src); - throw new ParseException ("Invalid array: not [x, ..., z]"); -} - -// String (array special case) -T parseTo(T : char[]) (char[] src) { - src = Util.trim(src); - if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') { - src = src[1..$-1]; - T ret; - ret.length = src.length; // maximum length; retract to actual length later - uint i = 0; - for (uint t = 0; t < src.length;) { - // process a block of non-escaped characters - uint s = t; - while (t < src.length && src[t] != '\\') ++t; // non-escaped characters - uint j = i + t - s; - ret[i..j] = src[s..t]; // copy a block - i = j; - - // process a block of escaped characters - while (t < src.length && src[t] == '\\') { - t++; - if (t == src.length) throw new ParseException ("Invalid string: ends \\\" !"); // next char is " - ret[i++] = replaceEscapedChar (src[t++]); // throws if it's invalid - } - } - return ret[0..i]; - } - else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); - throw new ParseException ("Invalid string: not quoted (\"*\") or char array (['a',...,'c'])"); -} -// Unicode conversions for strings: -T parseTo(T : wchar[]) (char[] src) { - // May throw a UnicodeException; don't bother catching and rethrowing: - return Utf.toString16 (parseTo!(char[]) (src)); -} -T parseTo(T : dchar[]) (char[] src) { - // May throw a UnicodeException; don't bother catching and rethrowing: - return Utf.toString32 (parseTo!(char[]) (src)); -} - -// Binary (array special case) -T parseTo(T : ubyte[]) (char[] src) { - src = Util.trim(src); - // Standard case: - if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); - // Special case: sequence of hex digits, each pair of which is a ubyte - if (src.length >= 2 && src[0..2] == "0x") { - src = src[2..$]; // strip down to actual digits - - // Must be in pairs: - if (src.length % 2 == 1) throw new ParseException ("Invalid binary: odd number of chars"); - - T ret; - ret.length = src.length / 2; // exact - - for (uint i, pos; pos + 1 < src.length; ++i) { - ubyte x = readHexChar(src, pos) << 4; - x |= readHexChar(src, pos); - ret[i] = x; - } - return ret; - } - else throw new ParseException ("Invalid ubyte[]: not an array and doesn't start 0x"); -} - -debug (UnitTest) unittest { - assert (parseTo!(double[]) (`[1.0,1.0e-10]`) == [1.0, 1.0e-10]); // generic array stuff - assert (parseTo!(double[]) (`[ ]`) == cast(double[]) []); // empty array - - // char[] and char conversions, with commas, escape sequences and multichar UTF8 characters: - assert (parseTo!(char[][]) (`[ ".\"", [',','\''] ,"!\b€" ]`) == [ ".\"".dup, [',','\''] ,"!\b€" ]); - - // wchar[] and dchar[] conversions: - // The characters were pretty-much pulled at random from unicode tables. - // The last few cause some wierd (display only) effects in my editor. - assert (parseTo!(wchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"w); - assert (parseTo!(dchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"d); - - assert (parseTo!(ubyte[]) (`0x01F2AC`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] special notation - assert (parseTo!(ubyte[]) (`[01 ,0xF2, 0xAC]`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] std notation -} - - -// Basic types - -// Char -T parseTo(T : char) (char[] src) { - src = Util.trim(src); - if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'') - throw new ParseException ("Invalid char: not quoted (e.g. 'c')"); - if (src[1] != '\\' && src.length == 3) return src[1]; // Either non escaped - if (src.length == 4) return replaceEscapedChar (src[2]); // Or escaped - - // Report various errors; warnings for likely and difficult to tell cases: - // Warn in case it's a multibyte UTF-8 character: - if (src[1] & 0xC0u) throw new UnicodeException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)", 1); - throw new ParseException ("Invalid char: too long"); -} -/* Basic unicode convertions for wide-chars. -* NOTE: c > 127 signals the start of a multibyte UTF-8 sequence which must be converted for -* UTF-16/32. But since we don't know what the next bytes are we can't do the conversion. */ -const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted from a single UTF-8 char"; -T parseTo(T : wchar) (char[] src) { - char c = parseTo!(char) (src); - if (c <= 127u) return cast(wchar) c; // this char can be converted - else throw new UnicodeException (WIDE_CHAR_ERROR, 1); -} -T parseTo(T : dchar) (char[] src) { - char c = parseTo!(char) (src); - if (c <= 127u) return cast(dchar) c; // this char can be converted - else throw new UnicodeException (WIDE_CHAR_ERROR, 1); -} -debug (UnitTest) unittest { - assert (parseTo!(char) ("\'\\\'\'") == '\''); - assert (parseTo!(wchar) ("'X'") == 'X'); - assert (parseTo!(dchar) ("'X'") == 'X'); -} - -// Bool -T parseTo(T : bool) (char[] src) { - src = Util.trim(src); - if (src == "true") return true; - if (src == "false") return false; - uint pos; - while (src.length > pos && src[pos] == '0') ++pos; // skip leading zeros - if (src.length == pos && pos > 0) return false; - if (src.length == pos + 1 && src[pos] == '1') return true; - throw new ParseException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1"); -} -debug (UnitTest) unittest { - assert (parseTo!(bool[]) (`[true,false,01,00]`) == cast(bool[]) [1,0,1,0]); -} - -// Ints -T parseTo(T : byte) (char[] src) { - return toTInt!(T) (src); -} -T parseTo(T : short) (char[] src) { - return toTInt!(T) (src); -} -T parseTo(T : int) (char[] src) { - return toTInt!(T) (src); -} -T parseTo(T : long) (char[] src) { - return toTInt!(T) (src); -} -T parseTo(T : ubyte) (char[] src) { - return toTInt!(T) (src); -} -T parseTo(T : ushort) (char[] src) { - return toTInt!(T) (src); -} -T parseTo(T : uint) (char[] src) { - return toTInt!(T) (src); -} -T parseTo(T : ulong) (char[] src) { - return toTInt!(T) (src); -} -debug (UnitTest) unittest { - assert (parseTo!(byte) ("-5") == cast(byte) -5); - // annoyingly, octal syntax differs from D (blame tango): - assert (parseTo!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]); -} - -// Floats -T parseTo(T : float) (char[] src) { - return toTFloat!(T) (src); -} -T parseTo(T : double) (char[] src) { - return toTFloat!(T) (src); -} -T parseTo(T : real) (char[] src) { - return toTFloat!(T) (src); -} -debug (UnitTest) unittest { - assert (parseTo!(float) ("0.0") == 0.0f); - assert (parseTo!(double) ("-1e25") == -1e25); - assert (parseTo!(real) ("5.24e-269") == cast(real) 5.24e-269); -} -//END parseTo templates - -//BEGIN Utility funcs -/** Trims whitespace at ends of string and checks for and removes array brackets: [] -* -* Throws: -* ParseException if brackets aren't end non-whitespace characters. -* -* Returns: -* String without brackets (and whitespace outside those brackets). Useful for passing to split. -*/ -char[] stripBrackets (char[] src) { - src = Util.trim(src); - if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return src[1..$-1]; - throw new ParseException ("Invalid bracketed string: not [...]"); -} - -/** Splits a string into substrings separated by '$(B ,)' with support for characters and strings - * containing escape sequences and for embedded arrays ($(B [...])). - * - * Params: - * src A string to separate on commas. Where used for parsing arrays, the brackets enclosing - * the array should be removed before calling this function (stripBrackets can do this). - * - * Returns: - * An array of substrings within src, excluding commas. Whitespace is not stripped and - * empty strings may get returned. - * - * Remarks: - * This function is primarily intended for as a utility function for use by the templates - * parsing arrays and associative arrays, but it may be useful in other cases too. Hence the - * fact no brackets are stripped from src. - */ -char[][] split (char[] src) { - src = Util.trim (src); - if (src == "") return []; // empty array: no elements when no data - - uint depth = 0; // surface depth (embedded arrays) - char[][] ret; - ret.length = src.length / 3; // unlikely to need a longer array - uint k = 0; // current split piece - uint i = 0, j = 0; // current read location, start of current piece - - while (i < src.length) { - char c = src[i]; - if (c == '\'' || c == '"') { // string or character - ++i; - while (i < src.length && src[i] != c) { - if (src[i] == '\\') ++i; // escape seq. - ++i; - } // Doesn't throw if no terminal quote at end of src, but this should be caught later. - } - else if (c == '[') ++depth; - else if (c == ']') { - if (depth) --depth; - else throw new ParseException ("Invalid array literal: closes before end of data item."); - } - else if (c == ',' && depth == 0) { // only if not an embedded array - if (ret.length <= k) ret.length = ret.length * 2; - ret[k++] = src[j..i]; // add this piece and increment k - j = i + 1; - } - ++i; - } - if (ret.length <= k) ret.length = k + 1; - ret[k] = src[j..i]; // add final piece (i >= j) - return ret[0..k+1]; -} - -/* Templated read-int function to read (un)signed 1-4 byte integers. - * - * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions. - */ -private TInt toTInt(TInt) (char[] src) { - const char[] INT_OUT_OF_RANGE = "Integer out of range"; - bool sign; - uint radix, ate, ate2; - - // Trim off whitespace. - // NOTE: Cannot use tango.text.convert.Integer.trim to trim leading whitespace since it doesn't - // treat new-lines, etc. as whitespace which for our purposes is whitespace. - src = Util.trim (src); - - ate = cInt.trim (src, sign, radix); - if (ate == src.length) throw new ParseException ("Invalid integer: no digits"); - ulong val = cInt.convert (src[ate..$], radix, &ate2); - ate += ate2; - - if (ate < src.length) - throw new ParseException ("Invalid integer at marked character: \"" ~ src[0..ate] ~ "'" ~ src[ate] ~ "'" ~ src[ate+1..$] ~ "\""); - - if (val > TInt.max) throw new ParseException (INT_OUT_OF_RANGE); - if (sign) { - long sval = cast(long) -val; - if (sval > TInt.min) return cast(TInt) sval; - else throw new ParseException (INT_OUT_OF_RANGE); - } - return cast(TInt) val; -} - -/* Basically a reimplementation of tango.text.convert.Float.toFloat which checks for - * whitespace before throwing an exception for overlong input. */ -private TFloat toTFloat(TFloat) (char[] src) { - // NOTE: As for toTInt(), this needs to strip leading as well as trailing whitespace. - src = Util.trim (src); - if (src == "") throw new ParseException ("Invalid float: no digits"); - uint ate; - - TFloat x = cFloat.parse (src, &ate); - return x; -} - -/* Throws an exception on invalid escape sequences. Supported escape sequences are the following - * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v - */ -private char replaceEscapedChar (char c) -{ - // This code was generated: - if (c <= 'b') { - if (c <= '\'') { - if (c == '\"') { - return '\"'; - } else if (c == '\'') { - return '\''; - } - } else { - if (c == '\\') { - return '\\'; - } else if (c == 'a') { - return '\a'; - } else if (c == 'b') { - return '\b'; - } - } - } else { - if (c <= 'n') { - if (c == 'f') { - return '\f'; - } else if (c == 'n') { - return '\n'; - } - } else { - if (c == 'r') { - return '\r'; - } else if (c == 't') { - return '\t'; - } else if (c == 'v') { - return '\v'; - } - } - } - - // if we haven't returned: - throw new ParseException ("Invalid escape sequence: \\"~c); -} - -// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length. -private ubyte readHexChar (char[] src, inout uint pos) { - ubyte x; - if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0'; - else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10; - else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10; - else throw new ParseException ("Invalid hex digit."); - ++pos; - return x; -} - -// Generic array reader -// Assumes input is of form "[xxxxx]" (i.e. first and last chars are '[', ']' and length >= 2). -private T[] toArray(T : T[]) (char[] src) { - T[] ret = new T[16]; // avoid unnecessary allocations - uint i = 0; - foreach (char[] element; split(src[1..$-1])) { - if (i == ret.length) ret.length = ret.length * 2; - ret[i] = parseTo!(T) (element); - ++i; - } - return ret[0..i]; -} - -debug (UnitTest) { - import tango.io.Console; - - unittest { - Cout ("Running unittest: parseTo ...").flush; - - assert (parseTo!(char[]) ("\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"") == "\a\b\t\n\v\f\r\"\'\\"); - - Cout (" complete").newline; - } -} -//END Utility funcs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/mergetag/serialize.d Tue Aug 05 11:51:51 2008 +0100 @@ -0,0 +1,377 @@ +/************************************************************************************************** + * Generic serialization templated function. + * + * copyright: Copyright (c) 2007-2008 Diggory Hardy. + * + * author: Diggory Hardy, diggory.hardy@gmail.com + * + * Supports: + * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. + * + * Examples: + * ------------------------------------------------------------------------------------------------ + * // Basic examples: + * Cout (serialize!(byte) (-13)).newline; // -13 + * Cout (serialize!(real) (2.56e11)).newline; // 2.55999999999999990000e+11 + * Cout (serialize!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000] + * Cout (serialize ([true,false,false])).newline; // [true,false,false] + * + * // String and ubyte[] special syntaxes (always used): + * Cout (serialize ("A string.")).newline; // "A string." (including quotes) + * Cout (serialize (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110 + * + * // Associative arrays: + * Cout (serialize ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"] + * + * // Structs: + * struct S { int a = 5; double[int[]] x; } + * S s; + * Cout (serialize (s)); + * + * // No limit on complexity... + * char[] somethingComplicated = serialize!(real[][][bool[int[][]]]) (...); + * ------------------------------------------------------------------------------------------------ + * + * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations + * instead of merely guessing? + *************************************************************************************************/ +//NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, +// or put all the code here. +module mde.mergetag.serialize; +// Since serialize is never used in a module where deserialize is not used, save an import: +public import mde.mergetag.deserialize; + +// tango imports +import tango.core.Traits; +import tango.core.Exception : UnicodeException, IllegalArgumentException; +import cInt = tango.text.convert.Integer; +import cFloat = tango.text.convert.Float; +import Utf = tango.text.convert.Utf; + + +alias serialize parseFrom; // support the old name + +// Formatting options, for where multiple formats are supported by the deserializer. + +// Output using the special binary notation (0x01F2AC instead of [01 ,0xF2, 0xAC])? +const bool SPECIAL_BINARY_NOTATION = true; + +// Output binary as true / false or 1 / 0 ? +const bool BINARY_AS_WORDS = true; + + +char[] serialize(U) (U val) { + // Associative arrays (NOTE: cannot use is() expression) + static if (isAssocArrayType!(U)) { // generic associative array + alias typeof(U.keys[0]) S; + alias typeof(U.values[0]) T; + char[] ret; + // A guess, including values themselves and [,:] elements (must be at least 2). + ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2; + ret[0] = '['; + uint i = 1; + foreach (S k, T v; val) { + char[] s = serialize!(S) (k) ~ ":" ~ serialize!(T) (v); + i += s.length; + if (i+1 >= ret.length) + ret.length = ret.length * 2; // check. + ret[i-s.length .. i] = s; + ret[i++] = ','; + } + if (i == 1) ++i; // special case - not overwriting a comma + ret[i-1] = ']'; // replaces last comma + return ret[0..i]; + } + // Arrays + else static if (is(U S == S[]) || isStaticArrayType!(U)) { + alias typeof(U[0]) T; + + static if (is(T == char)) { // string + char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. + ret[0] = '"'; + uint i = 1; + for (uint t = 0; t < val.length;) { + // process a block of non-escapable characters + uint s = t; + while (t < val.length && !isEscapableChar(val[t])) + ++t; // skip all non-escapable chars + uint j = i + t - s; + ret[i..j] = val[s..t]; // copy a block + i = j; + // process a block of escapable charaters + while (t < val.length && isEscapableChar(val[t])) { + ret[i++] = '\\'; // backslash; increment i + ret[i++] = escapeChar(val[t++]); // character; increment i and t + } + } + ret[i++] = '"'; + return ret[0..i]; + } + else static if (is(T == wchar) || is(T == dchar)) { // wstring or dstring + // May throw a UnicodeException; don't bother catching and rethrowing: + return serialize!(char[]) (Utf.toString (val)); + } + else static if (SPECIAL_BINARY_NOTATION && is(T == ubyte)) { // special binary notation + // Note: To disable the usage of this special type, set SPECIAL_BINARY_NOTATION = false. + static const char[16] digits = "0123456789abcdef"; + + char[] ret = new char[val.length * 2 + 2]; // exact length + ret[0..2] = "0x"; + uint i = 2; + + foreach (ubyte x; val) { + ret[i++] = digits[x >> 4]; + ret[i++] = digits[x & 0x0F]; + } + return ret; + } + else { // generic array + char[] ret; + // A guess, including commas and brackets (must be at least 2) + ret.length = val.length * (defLength!(T) + 1) + 2; + ret[0] = '['; + uint i = 1; + foreach (T x; val) { + char[] s = serialize!(T) (x); + i += s.length; + if (i+1 >= ret.length) + ret.length = ret.length * 2; // check length + ret[i-s.length .. i] = s; + ret[i++] = ','; + } + if (i == 1) + ++i; // special case - not overwriting a comma + ret[i-1] = ']'; // replaces last comma + return ret[0..i]; + } + } + // Structs + else static if (is(U == struct)) { + char[] ret; + // A very rough guess. + ret.length = val.sizeof * 4; + ret[0] = '{'; + uint i = 1; + foreach (k, v; val.tupleof) { + alias typeof(v) T; + char[] s = serialize!(size_t) (k) ~ ":" ~ serialize!(T) (v); + i += s.length; + if (i+1 >= ret.length) + ret.length = ret.length * 2; // check. + ret[i-s.length .. i] = s; + ret[i++] = ','; + } + if (i == 1) ++i; // special case - not overwriting a comma + ret[i-1] = '}'; // replaces last comma + return ret[0..i]; + } + // Basic types + else static if (is(U == char)) { // char (UTF-8 byte) + // Note: if (val > 127) "is invalid UTF-8 single char". However we don't know + // what this is for, in particular if it will be recombined with other chars later. + + // Can't return reference to static array; so making it dynamic is cheaper than copying. + char[] ret = new char[4]; // max length for an escaped char + ret[0] = '\''; + + if (!isEscapableChar (val)) { + ret[1] = val; + ret[2] = '\''; + return ret[0..3]; + } else { + ret[1] = '\\'; + ret[2] = escapeChar (val); + ret[3] = '\''; + return ret; + } + } else static if (is(U == wchar) || + is(U == dchar)) { // wchar or dchar (UTF-16/32 single char) + // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char? + if (val <= 127u) + return serialize!(char) (cast(char) val); // ASCII + else throw new UnicodeException ( + "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0); + } else static if (is (U == bool)) { // boolean + static if (BINARY_AS_WORDS) { + if (val) + return "true"; + else return "false"; + } else { + if (val) + return "1"; + else return "0"; + } + } else static if (is (U : long)) { // any integer type, except char types and bool + static if (is (U == ulong)) // ulong may not be supported properly + if (val > cast(ulong) long.max) + throw new IllegalArgumentException ("No handling available for ulong where value > long.max"); + return cInt.toString (val); + } else static if (is (U : real)) { // any (real) floating point type + char[] ret = new char[32]; // minimum allowed by assert in format + return cFloat.format (ret, val, U.dig+2, 1);// from old C++ tests, U.dig+2 gives best(?) accuracy + } + // Unsupported + else + static assert (false, "Unsupported type: "~U.stringof); +} + +//BEGIN Utility funcs +/* This template provides the initial length for strings for formatting various types. These strings + * can be expanded; this value is intended to cover 90% of cases or so. + * + * NOTE: This template was intended to provide specialisations for different types. + * This one value should do reasonably well for most types. + */ +private { + template defLength(T) { const uint defLength = 20; } + template defLength(T : char) { const uint defLength = 4; } + template defLength(T : bool) { const uint defLength = 5; } +} +private bool isEscapableChar (char c) { + return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); +} +// Throws on unsupported escape sequences; however this should never happen within serialize. +private char escapeChar (char c) { + // This code was generated: + if (c <= '\v') { + if (c <= '\b') { + if (c == '\a') { + return 'a'; + } else if (c == '\b') { + return 'b'; + } + } else { + if (c == '\t') { + return 't'; + } else if (c == '\n') { + return 'n'; + } else if (c == '\v') { + return 'v'; + } + } + } else { + if (c <= '\r') { + if (c == '\f') { + return 'f'; + } else if (c == '\r') { + return 'r'; + } + } else { + if (c == '\"') { + return '\"'; + } else if (c == '\'') { + return '\''; + } else if (c == '\\') { + return '\\'; + } + } + } + + // if we haven't returned: + throw new IllegalArgumentException ("Internal error (escapeChar)"); +} +//END Utility funcs + + + +debug (UnitTest) { + import tango.util.log.Log : Log, Logger; + + private Logger logger; + static this() { + logger = Log.getLogger ("text.serialize"); + } +unittest { + // Utility + bool throws (void delegate() dg) { + bool r = false; + try { + dg(); + } catch (Exception e) { + r = true; + logger.info ("Exception caught: "~e.msg); + } + return r; + } + assert (!throws ({ int i = 5; })); + assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); + + // Associative arrays + char[] X = serialize!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); + char[] Y = `['a':"animal",'b':"bus"]`; + assert (X == Y); + + + // Arrays + // generic array stuff: + assert (serialize!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); + assert (serialize!(double[]) (cast(double[]) []) == `[]`); // empty array + + // char[] conversions, with commas, escape sequences and multichar UTF8 characters: + assert (serialize!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`); + + // wchar[] and dchar[] conversions: + // The characters were pretty-much pulled at random from unicode tables. + assert (serialize!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\""); + assert (serialize!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\""); + + + static if (SPECIAL_BINARY_NOTATION) + assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation + else + assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `[1,242,172]`); + + + // Structs + struct Foo { int a = 9; char b = '\v'; float c; } + struct Bar { Foo a,b; } + static Foo foo1 = { a:150, b:'8', c:17.2f}, foo2; + Bar bar; + bar.a = foo1; + bar.b = foo2; + assert (serialize(bar) == "{0:{0:150,1:'8',2:1.72000007e+01},1:{0:9,1:'\\v',2:nan}}"); + + + // Basic Types + // Character types + assert (serialize!(char) ('\'') == "\'\\\'\'"); + assert (serialize!(wchar) ('X') == "'X'"); + assert (serialize!(dchar) ('X') == "'X'"); + assert (throws ({ char[] r = serialize!(wchar) ('£'); /* unicode U+00A3 */ })); + assert (throws ({ char[] r = serialize!(dchar) ('£'); })); + + // Bool + static if (BINARY_AS_WORDS) + assert (serialize(false) == "false"); + else + assert (serialize(true) == "1"); + + // Integers + assert (serialize (cast(byte) -5) == "-5"); + assert (serialize (cast(short) -32768) == "-32768"); + assert (serialize (-5) == "-5"); + assert (serialize (-9223372036854775807L) == "-9223372036854775807"); + assert (serialize (cast(ubyte) -1) == "255"); + assert (serialize (cast(ushort) -1) == "65535"); + assert (serialize!(uint) (-1) == "4294967295"); + assert (serialize (cast(ulong) 0x7FFF_FFFF_FFFF_FFFFLu) == "9223372036854775807"); + assert (serialize!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == + "[4,468,1025436,4294967295,0]"); + assert (throws ({ + // ulong is not properly supported. + // NOTE: this is something that should really work. + char[] r = serialize!(ulong) (0x8FFF_FFFF_FFFF_FFFFLu); + })); + + // Floats + // These numbers are not particularly meaningful: + assert (serialize!(float) (0.0f) == "0.00000000"); + assert (serialize!(double) (-1e25) == "-1.00000000000000000e+25"); + assert (serialize!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300"); + + // Escape sequences (test conversion functions) + assert (serialize ("\a\b\t\n\v\f\r\"\'\\") == `"\a\b\t\n\v\f\r\"\'\\"`); + + logger.info ("Unittest complete."); +} +}