Mercurial > projects > mde
view mde/file/serialize.d @ 88:01f4f5f1acc9
Changes to init and to allow compiling with gdc.
Tweaked init code to allow using circular iterators (disabled until my patch makes it into tango).
Changes to allow compiling with gdc. Building is successful and unittests complete, but in my experience a SIGSEGV occurs within SDL.
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Mon, 29 Sep 2008 12:09:44 +0100 |
parents | 79d816b3e2d2 |
children | 97e6dce08037 |
line wrap: on
line source
/* LICENSE BLOCK Part of mde: a Modular D game-oriented Engine Copyright © 2007-2008 Diggory Hardy This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ /************************************************************************************************** * Generic serialization templated function. * * Supports: * Associative arrays, dynamic arrays (with usual formatting of strings), structs, char types, * bool, int types, float types. * * Examples: * ------------------------------------------------------------------------------------------------ * // Basic examples: * Cout (serialize!(byte) (-13)).newline; // -13 * Cout (serialize!(real) (2.56e11)).newline; // 2.55999999999999990000e+11 * Cout (serialize!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000] * Cout (serialize ([true,false,false])).newline; // [true,false,false] * * // String and ubyte[] special syntaxes (always used): * Cout (serialize ("A string.")).newline; // "A string." (including quotes) * Cout (serialize (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110 * * // Associative arrays: * Cout (serialize ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"] * * // Structs: * struct S { int a = 5; double[int[]] x; } * S s; * Cout (serialize (s)); * * // No limit on complexity... * char[] somethingComplicated = serialize!(real[][][bool[int[][]]]) (...); * ------------------------------------------------------------------------------------------------ * * throws: * May throw a UnicodeException or an IllegalArgumentException. * * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations * instead of merely guessing? *************************************************************************************************/ //NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, // or put all the code here. //FIXME: Optimize by using a slicing buffer. Put everything in a struct containing this buffer to // make it thread-safe. module mde.file.serialize; // Since serialize is never used in a module where deserialize is not used, save an import: public import mde.file.deserialize; // tango imports import tango.core.Traits; import tango.core.Exception : UnicodeException, IllegalArgumentException; import cInt = tango.text.convert.Integer; import cFloat = tango.text.convert.Float; import Utf = tango.text.convert.Utf; alias serialize parseFrom; // support the old name // Formatting options, for where multiple formats are supported by the deserializer. // Output using the special binary notation (0x01F2AC instead of [01 ,0xF2, 0xAC])? const bool SPECIAL_BINARY_NOTATION = true; // Output binary as true / false or 1 / 0 ? const bool BINARY_AS_WORDS = true; char[] serialize(U) (U val) { // Associative arrays (NOTE: cannot use is() expression) static if (isAssocArrayType!(U)) { // generic associative array alias typeof(U.keys[0]) S; alias typeof(U.values[0]) T; char[] ret; // A guess, including values themselves and [,:] elements (must be at least 2). ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2; ret[0] = '['; uint i = 1; foreach (S k, T v; val) { char[] s = serialize!(S) (k) ~ ":" ~ serialize!(T) (v); i += s.length; if (i+1 >= ret.length) ret.length = ret.length * 2; // check. ret[i-s.length .. i] = s; ret[i++] = ','; } if (i == 1) ++i; // special case - not overwriting a comma ret[i-1] = ']'; // replaces last comma return ret[0..i]; } // Arrays else static if (is(U S == S[]) || isStaticArrayType!(U)) { alias typeof(U[0]) T; static if (is(T == char)) { // string char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. ret[0] = '"'; uint i = 1; for (uint t = 0; t < val.length;) { // process a block of non-escapable characters uint s = t; while (t < val.length && !isEscapableChar(val[t])) ++t; // skip all non-escapable chars uint j = i + t - s; ret[i..j] = val[s..t]; // copy a block i = j; // process a block of escapable charaters while (t < val.length && isEscapableChar(val[t])) { ret[i++] = '\\'; // backslash; increment i ret[i++] = escapeChar(val[t++]); // character; increment i and t } } ret[i++] = '"'; return ret[0..i]; } else static if (is(T == wchar) || is(T == dchar)) { // wstring or dstring // May throw a UnicodeException; don't bother catching and rethrowing: return serialize!(char[]) (Utf.toString (val)); } else static if (SPECIAL_BINARY_NOTATION && is(T == ubyte)) { // special binary notation // Note: To disable the usage of this special type, set SPECIAL_BINARY_NOTATION = false. static const char[16] digits = "0123456789abcdef"; char[] ret = new char[val.length * 2 + 2]; // exact length ret[0..2] = "0x"; uint i = 2; foreach (ubyte x; val) { ret[i++] = digits[x >> 4]; ret[i++] = digits[x & 0x0F]; } return ret; } else { // generic array char[] ret; // A guess, including commas and brackets (must be at least 2) ret.length = val.length * (defLength!(T) + 1) + 2; ret[0] = '['; uint i = 1; foreach (T x; val) { char[] s = serialize!(T) (x); i += s.length; if (i+1 >= ret.length) ret.length = ret.length * 2; // check length ret[i-s.length .. i] = s; ret[i++] = ','; } if (i == 1) ++i; // special case - not overwriting a comma ret[i-1] = ']'; // replaces last comma return ret[0..i]; } } // Structs else static if (is(U == struct)) { char[] ret; // A very rough guess. ret.length = val.sizeof * 4; ret[0] = '{'; uint i = 1; foreach (k, v; val.tupleof) { alias typeof(v) T; char[] s = serialize!(size_t) (k) ~ ":" ~ serialize!(T) (v); i += s.length; if (i+1 >= ret.length) ret.length = ret.length * 2; // check. ret[i-s.length .. i] = s; ret[i++] = ','; } if (i == 1) ++i; // special case - not overwriting a comma ret[i-1] = '}'; // replaces last comma return ret[0..i]; } // Basic types else static if (is(U == char)) { // char (UTF-8 byte) if (val > 127) // outputing invalid utf-8 could corrupt the output stream throw new IllegalArgumentException ("Not a valid UTF-8 character"); // Can't return reference to static array; so making it dynamic is cheaper than copying. char[] ret = new char[4]; // max length for an escaped char ret[0] = '\''; if (!isEscapableChar (val)) { ret[1] = val; ret[2] = '\''; return ret[0..3]; } else { ret[1] = '\\'; ret[2] = escapeChar (val); ret[3] = '\''; return ret; } } else static if (is(U == wchar) || is(U == dchar)) { // wchar or dchar (UTF-16/32 single char) if (val <= 127u) return serialize!(char) (cast(char) val); // ASCII else { // convert to a multi-byte UTF-8 char // NOTE: suboptimal char[] t,ret; t = Utf.toString([val]); ret.length = t.length + 2; ret = '\'' ~ t ~ '\''; return ret; } } else static if (is (U == bool)) { // boolean static if (BINARY_AS_WORDS) { if (val) return "true"; else return "false"; } else { if (val) return "1"; else return "0"; } } else static if (is (U : long)) { // any integer type, except char types and bool static if (is (U == ulong)) // ulong may not be supported properly if (val > cast(ulong) long.max) throw new IllegalArgumentException ("No handling available for ulong where value > long.max"); return cInt.toString (val); } else static if (is (U : real)) { // any (real) floating point type char[] ret = new char[32]; // minimum allowed by assert in format return cFloat.format (ret, val, U.dig+2, 1);// from old C++ tests, U.dig+2 gives best(?) accuracy } // Unsupported else static assert (false, "Unsupported type: "~U.stringof); } //BEGIN Utility funcs /* This template provides the initial length for strings for formatting various types. These strings * can be expanded; this value is intended to cover 90% of cases or so. * * NOTE: This template was intended to provide specialisations for different types. * This one value should do reasonably well for most types. */ private { template defLength(T) { const uint defLength = 20; } template defLength(T : char) { const uint defLength = 4; } template defLength(T : bool) { const uint defLength = 5; } } private bool isEscapableChar (char c) { return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); } // Throws on unsupported escape sequences; however this should never happen within serialize. private char escapeChar (char c) { // This code was generated: if (c <= '\v') { if (c <= '\b') { if (c == '\a') { return 'a'; } else if (c == '\b') { return 'b'; } } else { if (c == '\t') { return 't'; } else if (c == '\n') { return 'n'; } else if (c == '\v') { return 'v'; } } } else { if (c <= '\r') { if (c == '\f') { return 'f'; } else if (c == '\r') { return 'r'; } } else { if (c == '\"') { return '\"'; } else if (c == '\'') { return '\''; } else if (c == '\\') { return '\\'; } } } // if we haven't returned: throw new IllegalArgumentException ("Internal error (escapeChar)"); } //END Utility funcs debug (mdeUnitTest) { import tango.util.log.Log : Log, Logger; private Logger logger; static this() { logger = Log.getLogger ("mde.file.serialize"); } unittest { // Utility bool throws (void delegate() dg) { bool r = false; try { dg(); } catch (Exception e) { r = true; } return r; } assert (!throws ({ int i = 5; })); assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); // Associative arrays char[] X = serialize!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); char[] Y = `['a':"animal",'b':"bus"]`; assert (X == Y); // Arrays // generic array stuff: assert (serialize!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); assert (serialize!(double[]) (cast(double[]) []) == `[]`); // empty array // char[] conversions, with commas, escape sequences and multichar UTF8 characters: assert (serialize!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`); // wchar[] and dchar[] conversions: // The characters were pretty-much pulled at random from unicode tables. assert (serialize!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\""); assert (serialize!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\""); static if (SPECIAL_BINARY_NOTATION) assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation else assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `[1,242,172]`); // Structs struct Foo { int a = 9; char b = '\v'; float c; } struct Bar { Foo a,b; } static Foo foo1 = { a:150, b:'8'}, foo2; Bar bar; bar.a = foo1; bar.b = foo2; assert (serialize(bar) == "{0:{0:150,1:'8',2:nan},1:{0:9,1:'\\v',2:nan}}"); // Basic Types // Character types assert (serialize!(char) ('\'') == "\'\\\'\'"); assert (serialize!(wchar) ('X') == "'X'"); assert (serialize!(dchar) ('X') == "'X'"); assert (serialize!(wchar) ('£') == "'£'"); // unicode U+00A3 i.e. a multi-byte UTF-8 char assert (serialize!(dchar) ('£') == "'£'"); assert (throws ({ serialize!(char) ('£'); })); // compiler converts £ to char, but it's not valid UTF-8 // Bool static if (BINARY_AS_WORDS) assert (serialize(false) == "false"); else assert (serialize(true) == "1"); // Integers assert (serialize (cast(byte) -5) == "-5"); assert (serialize (cast(short) -32768) == "-32768"); assert (serialize (-5) == "-5"); assert (serialize (-9223372036854775807L) == "-9223372036854775807"); assert (serialize (cast(ubyte) -1) == "255"); assert (serialize (cast(ushort) -1) == "65535"); assert (serialize!(uint) (-1) == "4294967295"); assert (serialize (cast(ulong) 0x7FFF_FFFF_FFFF_FFFFLu) == "9223372036854775807"); assert (serialize!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]"); assert (throws ({ // ulong is not properly supported. // NOTE: this is something that should really work. char[] r = serialize!(ulong) (0x8FFF_FFFF_FFFF_FFFFLu); })); // Floats // We can't do a proper float-test because we can't rely on numbers being printed to the same // number of figures on all platforms. Do nan tests to check type is supported. assert (serialize!(float) (float.init) == "nan"); assert (serialize!(double) (double.init) == "nan"); assert (serialize!(real) (real.init) == "nan"); // Escape sequences (test conversion functions) assert (serialize ("\a\b\t\n\v\f\r\"\'\\") == `"\a\b\t\n\v\f\r\"\'\\"`); logger.info ("Unittest complete."); } }