Mercurial > projects > mde
view mde/mergetag/serialize.d @ 79:61ea26abe4dd
Moved mde/mergetag/parse/parse(To/From) to mde/mergetag/(de)serialize. Implemented (de)serialization of structs.
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Tue, 05 Aug 2008 11:51:51 +0100 |
parents | mde/mergetag/parse/parseFrom.d@7fc0a8295c83 |
children |
line wrap: on
line source
/************************************************************************************************** * Generic serialization templated function. * * copyright: Copyright (c) 2007-2008 Diggory Hardy. * * author: Diggory Hardy, diggory.hardy@gmail.com * * Supports: * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. * * Examples: * ------------------------------------------------------------------------------------------------ * // Basic examples: * Cout (serialize!(byte) (-13)).newline; // -13 * Cout (serialize!(real) (2.56e11)).newline; // 2.55999999999999990000e+11 * Cout (serialize!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000] * Cout (serialize ([true,false,false])).newline; // [true,false,false] * * // String and ubyte[] special syntaxes (always used): * Cout (serialize ("A string.")).newline; // "A string." (including quotes) * Cout (serialize (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110 * * // Associative arrays: * Cout (serialize ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"] * * // Structs: * struct S { int a = 5; double[int[]] x; } * S s; * Cout (serialize (s)); * * // No limit on complexity... * char[] somethingComplicated = serialize!(real[][][bool[int[][]]]) (...); * ------------------------------------------------------------------------------------------------ * * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations * instead of merely guessing? *************************************************************************************************/ //NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, // or put all the code here. module mde.mergetag.serialize; // Since serialize is never used in a module where deserialize is not used, save an import: public import mde.mergetag.deserialize; // tango imports import tango.core.Traits; import tango.core.Exception : UnicodeException, IllegalArgumentException; import cInt = tango.text.convert.Integer; import cFloat = tango.text.convert.Float; import Utf = tango.text.convert.Utf; alias serialize parseFrom; // support the old name // Formatting options, for where multiple formats are supported by the deserializer. // Output using the special binary notation (0x01F2AC instead of [01 ,0xF2, 0xAC])? const bool SPECIAL_BINARY_NOTATION = true; // Output binary as true / false or 1 / 0 ? const bool BINARY_AS_WORDS = true; char[] serialize(U) (U val) { // Associative arrays (NOTE: cannot use is() expression) static if (isAssocArrayType!(U)) { // generic associative array alias typeof(U.keys[0]) S; alias typeof(U.values[0]) T; char[] ret; // A guess, including values themselves and [,:] elements (must be at least 2). ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2; ret[0] = '['; uint i = 1; foreach (S k, T v; val) { char[] s = serialize!(S) (k) ~ ":" ~ serialize!(T) (v); i += s.length; if (i+1 >= ret.length) ret.length = ret.length * 2; // check. ret[i-s.length .. i] = s; ret[i++] = ','; } if (i == 1) ++i; // special case - not overwriting a comma ret[i-1] = ']'; // replaces last comma return ret[0..i]; } // Arrays else static if (is(U S == S[]) || isStaticArrayType!(U)) { alias typeof(U[0]) T; static if (is(T == char)) { // string char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. ret[0] = '"'; uint i = 1; for (uint t = 0; t < val.length;) { // process a block of non-escapable characters uint s = t; while (t < val.length && !isEscapableChar(val[t])) ++t; // skip all non-escapable chars uint j = i + t - s; ret[i..j] = val[s..t]; // copy a block i = j; // process a block of escapable charaters while (t < val.length && isEscapableChar(val[t])) { ret[i++] = '\\'; // backslash; increment i ret[i++] = escapeChar(val[t++]); // character; increment i and t } } ret[i++] = '"'; return ret[0..i]; } else static if (is(T == wchar) || is(T == dchar)) { // wstring or dstring // May throw a UnicodeException; don't bother catching and rethrowing: return serialize!(char[]) (Utf.toString (val)); } else static if (SPECIAL_BINARY_NOTATION && is(T == ubyte)) { // special binary notation // Note: To disable the usage of this special type, set SPECIAL_BINARY_NOTATION = false. static const char[16] digits = "0123456789abcdef"; char[] ret = new char[val.length * 2 + 2]; // exact length ret[0..2] = "0x"; uint i = 2; foreach (ubyte x; val) { ret[i++] = digits[x >> 4]; ret[i++] = digits[x & 0x0F]; } return ret; } else { // generic array char[] ret; // A guess, including commas and brackets (must be at least 2) ret.length = val.length * (defLength!(T) + 1) + 2; ret[0] = '['; uint i = 1; foreach (T x; val) { char[] s = serialize!(T) (x); i += s.length; if (i+1 >= ret.length) ret.length = ret.length * 2; // check length ret[i-s.length .. i] = s; ret[i++] = ','; } if (i == 1) ++i; // special case - not overwriting a comma ret[i-1] = ']'; // replaces last comma return ret[0..i]; } } // Structs else static if (is(U == struct)) { char[] ret; // A very rough guess. ret.length = val.sizeof * 4; ret[0] = '{'; uint i = 1; foreach (k, v; val.tupleof) { alias typeof(v) T; char[] s = serialize!(size_t) (k) ~ ":" ~ serialize!(T) (v); i += s.length; if (i+1 >= ret.length) ret.length = ret.length * 2; // check. ret[i-s.length .. i] = s; ret[i++] = ','; } if (i == 1) ++i; // special case - not overwriting a comma ret[i-1] = '}'; // replaces last comma return ret[0..i]; } // Basic types else static if (is(U == char)) { // char (UTF-8 byte) // Note: if (val > 127) "is invalid UTF-8 single char". However we don't know // what this is for, in particular if it will be recombined with other chars later. // Can't return reference to static array; so making it dynamic is cheaper than copying. char[] ret = new char[4]; // max length for an escaped char ret[0] = '\''; if (!isEscapableChar (val)) { ret[1] = val; ret[2] = '\''; return ret[0..3]; } else { ret[1] = '\\'; ret[2] = escapeChar (val); ret[3] = '\''; return ret; } } else static if (is(U == wchar) || is(U == dchar)) { // wchar or dchar (UTF-16/32 single char) // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char? if (val <= 127u) return serialize!(char) (cast(char) val); // ASCII else throw new UnicodeException ( "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0); } else static if (is (U == bool)) { // boolean static if (BINARY_AS_WORDS) { if (val) return "true"; else return "false"; } else { if (val) return "1"; else return "0"; } } else static if (is (U : long)) { // any integer type, except char types and bool static if (is (U == ulong)) // ulong may not be supported properly if (val > cast(ulong) long.max) throw new IllegalArgumentException ("No handling available for ulong where value > long.max"); return cInt.toString (val); } else static if (is (U : real)) { // any (real) floating point type char[] ret = new char[32]; // minimum allowed by assert in format return cFloat.format (ret, val, U.dig+2, 1);// from old C++ tests, U.dig+2 gives best(?) accuracy } // Unsupported else static assert (false, "Unsupported type: "~U.stringof); } //BEGIN Utility funcs /* This template provides the initial length for strings for formatting various types. These strings * can be expanded; this value is intended to cover 90% of cases or so. * * NOTE: This template was intended to provide specialisations for different types. * This one value should do reasonably well for most types. */ private { template defLength(T) { const uint defLength = 20; } template defLength(T : char) { const uint defLength = 4; } template defLength(T : bool) { const uint defLength = 5; } } private bool isEscapableChar (char c) { return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); } // Throws on unsupported escape sequences; however this should never happen within serialize. private char escapeChar (char c) { // This code was generated: if (c <= '\v') { if (c <= '\b') { if (c == '\a') { return 'a'; } else if (c == '\b') { return 'b'; } } else { if (c == '\t') { return 't'; } else if (c == '\n') { return 'n'; } else if (c == '\v') { return 'v'; } } } else { if (c <= '\r') { if (c == '\f') { return 'f'; } else if (c == '\r') { return 'r'; } } else { if (c == '\"') { return '\"'; } else if (c == '\'') { return '\''; } else if (c == '\\') { return '\\'; } } } // if we haven't returned: throw new IllegalArgumentException ("Internal error (escapeChar)"); } //END Utility funcs debug (UnitTest) { import tango.util.log.Log : Log, Logger; private Logger logger; static this() { logger = Log.getLogger ("text.serialize"); } unittest { // Utility bool throws (void delegate() dg) { bool r = false; try { dg(); } catch (Exception e) { r = true; logger.info ("Exception caught: "~e.msg); } return r; } assert (!throws ({ int i = 5; })); assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); // Associative arrays char[] X = serialize!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); char[] Y = `['a':"animal",'b':"bus"]`; assert (X == Y); // Arrays // generic array stuff: assert (serialize!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); assert (serialize!(double[]) (cast(double[]) []) == `[]`); // empty array // char[] conversions, with commas, escape sequences and multichar UTF8 characters: assert (serialize!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`); // wchar[] and dchar[] conversions: // The characters were pretty-much pulled at random from unicode tables. assert (serialize!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\""); assert (serialize!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\""); static if (SPECIAL_BINARY_NOTATION) assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation else assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `[1,242,172]`); // Structs struct Foo { int a = 9; char b = '\v'; float c; } struct Bar { Foo a,b; } static Foo foo1 = { a:150, b:'8', c:17.2f}, foo2; Bar bar; bar.a = foo1; bar.b = foo2; assert (serialize(bar) == "{0:{0:150,1:'8',2:1.72000007e+01},1:{0:9,1:'\\v',2:nan}}"); // Basic Types // Character types assert (serialize!(char) ('\'') == "\'\\\'\'"); assert (serialize!(wchar) ('X') == "'X'"); assert (serialize!(dchar) ('X') == "'X'"); assert (throws ({ char[] r = serialize!(wchar) ('£'); /* unicode U+00A3 */ })); assert (throws ({ char[] r = serialize!(dchar) ('£'); })); // Bool static if (BINARY_AS_WORDS) assert (serialize(false) == "false"); else assert (serialize(true) == "1"); // Integers assert (serialize (cast(byte) -5) == "-5"); assert (serialize (cast(short) -32768) == "-32768"); assert (serialize (-5) == "-5"); assert (serialize (-9223372036854775807L) == "-9223372036854775807"); assert (serialize (cast(ubyte) -1) == "255"); assert (serialize (cast(ushort) -1) == "65535"); assert (serialize!(uint) (-1) == "4294967295"); assert (serialize (cast(ulong) 0x7FFF_FFFF_FFFF_FFFFLu) == "9223372036854775807"); assert (serialize!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]"); assert (throws ({ // ulong is not properly supported. // NOTE: this is something that should really work. char[] r = serialize!(ulong) (0x8FFF_FFFF_FFFF_FFFFLu); })); // Floats // These numbers are not particularly meaningful: assert (serialize!(float) (0.0f) == "0.00000000"); assert (serialize!(double) (-1e25) == "-1.00000000000000000e+25"); assert (serialize!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300"); // Escape sequences (test conversion functions) assert (serialize ("\a\b\t\n\v\f\r\"\'\\") == `"\a\b\t\n\v\f\r\"\'\\"`); logger.info ("Unittest complete."); } }