Mercurial > projects > mde
diff mde/file/serialize.d @ 81:d8fccaa45d5f
Moved file IO code from mde/mergetag to mde/file[/mergetag] and changed how some errors are caught.
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Fri, 29 Aug 2008 11:59:43 +0100 |
parents | mde/mergetag/serialize.d@61ea26abe4dd |
children | ac1e3fd07275 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/serialize.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,391 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/************************************************************************************************** + * Generic serialization templated function. + * + * Supports: + * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. + * + * Examples: + * ------------------------------------------------------------------------------------------------ + * // Basic examples: + * Cout (serialize!(byte) (-13)).newline; // -13 + * Cout (serialize!(real) (2.56e11)).newline; // 2.55999999999999990000e+11 + * Cout (serialize!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000] + * Cout (serialize ([true,false,false])).newline; // [true,false,false] + * + * // String and ubyte[] special syntaxes (always used): + * Cout (serialize ("A string.")).newline; // "A string." (including quotes) + * Cout (serialize (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110 + * + * // Associative arrays: + * Cout (serialize ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"] + * + * // Structs: + * struct S { int a = 5; double[int[]] x; } + * S s; + * Cout (serialize (s)); + * + * // No limit on complexity... + * char[] somethingComplicated = serialize!(real[][][bool[int[][]]]) (...); + * ------------------------------------------------------------------------------------------------ + * + * throws: + * May throw a UnicodeException or an IllegalArgumentException. + * + * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations + * instead of merely guessing? + *************************************************************************************************/ +//NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, +// or put all the code here. +module mde.file.serialize; +// Since serialize is never used in a module where deserialize is not used, save an import: +public import mde.file.deserialize; + +// tango imports +import tango.core.Traits; +import tango.core.Exception : UnicodeException, IllegalArgumentException; +import cInt = tango.text.convert.Integer; +import cFloat = tango.text.convert.Float; +import Utf = tango.text.convert.Utf; + + +alias serialize parseFrom; // support the old name + +// Formatting options, for where multiple formats are supported by the deserializer. + +// Output using the special binary notation (0x01F2AC instead of [01 ,0xF2, 0xAC])? +const bool SPECIAL_BINARY_NOTATION = true; + +// Output binary as true / false or 1 / 0 ? +const bool BINARY_AS_WORDS = true; + + +char[] serialize(U) (U val) { + // Associative arrays (NOTE: cannot use is() expression) + static if (isAssocArrayType!(U)) { // generic associative array + alias typeof(U.keys[0]) S; + alias typeof(U.values[0]) T; + char[] ret; + // A guess, including values themselves and [,:] elements (must be at least 2). + ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2; + ret[0] = '['; + uint i = 1; + foreach (S k, T v; val) { + char[] s = serialize!(S) (k) ~ ":" ~ serialize!(T) (v); + i += s.length; + if (i+1 >= ret.length) + ret.length = ret.length * 2; // check. + ret[i-s.length .. i] = s; + ret[i++] = ','; + } + if (i == 1) ++i; // special case - not overwriting a comma + ret[i-1] = ']'; // replaces last comma + return ret[0..i]; + } + // Arrays + else static if (is(U S == S[]) || isStaticArrayType!(U)) { + alias typeof(U[0]) T; + + static if (is(T == char)) { // string + char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. + ret[0] = '"'; + uint i = 1; + for (uint t = 0; t < val.length;) { + // process a block of non-escapable characters + uint s = t; + while (t < val.length && !isEscapableChar(val[t])) + ++t; // skip all non-escapable chars + uint j = i + t - s; + ret[i..j] = val[s..t]; // copy a block + i = j; + // process a block of escapable charaters + while (t < val.length && isEscapableChar(val[t])) { + ret[i++] = '\\'; // backslash; increment i + ret[i++] = escapeChar(val[t++]); // character; increment i and t + } + } + ret[i++] = '"'; + return ret[0..i]; + } + else static if (is(T == wchar) || is(T == dchar)) { // wstring or dstring + // May throw a UnicodeException; don't bother catching and rethrowing: + return serialize!(char[]) (Utf.toString (val)); + } + else static if (SPECIAL_BINARY_NOTATION && is(T == ubyte)) { // special binary notation + // Note: To disable the usage of this special type, set SPECIAL_BINARY_NOTATION = false. + static const char[16] digits = "0123456789abcdef"; + + char[] ret = new char[val.length * 2 + 2]; // exact length + ret[0..2] = "0x"; + uint i = 2; + + foreach (ubyte x; val) { + ret[i++] = digits[x >> 4]; + ret[i++] = digits[x & 0x0F]; + } + return ret; + } + else { // generic array + char[] ret; + // A guess, including commas and brackets (must be at least 2) + ret.length = val.length * (defLength!(T) + 1) + 2; + ret[0] = '['; + uint i = 1; + foreach (T x; val) { + char[] s = serialize!(T) (x); + i += s.length; + if (i+1 >= ret.length) + ret.length = ret.length * 2; // check length + ret[i-s.length .. i] = s; + ret[i++] = ','; + } + if (i == 1) + ++i; // special case - not overwriting a comma + ret[i-1] = ']'; // replaces last comma + return ret[0..i]; + } + } + // Structs + else static if (is(U == struct)) { + char[] ret; + // A very rough guess. + ret.length = val.sizeof * 4; + ret[0] = '{'; + uint i = 1; + foreach (k, v; val.tupleof) { + alias typeof(v) T; + char[] s = serialize!(size_t) (k) ~ ":" ~ serialize!(T) (v); + i += s.length; + if (i+1 >= ret.length) + ret.length = ret.length * 2; // check. + ret[i-s.length .. i] = s; + ret[i++] = ','; + } + if (i == 1) ++i; // special case - not overwriting a comma + ret[i-1] = '}'; // replaces last comma + return ret[0..i]; + } + // Basic types + else static if (is(U == char)) { // char (UTF-8 byte) + // Note: if (val > 127) "is invalid UTF-8 single char". However we don't know + // what this is for, in particular if it will be recombined with other chars later. + + // Can't return reference to static array; so making it dynamic is cheaper than copying. + char[] ret = new char[4]; // max length for an escaped char + ret[0] = '\''; + + if (!isEscapableChar (val)) { + ret[1] = val; + ret[2] = '\''; + return ret[0..3]; + } else { + ret[1] = '\\'; + ret[2] = escapeChar (val); + ret[3] = '\''; + return ret; + } + } else static if (is(U == wchar) || + is(U == dchar)) { // wchar or dchar (UTF-16/32 single char) + // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char? + if (val <= 127u) + return serialize!(char) (cast(char) val); // ASCII + else throw new UnicodeException ( + "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0); + } else static if (is (U == bool)) { // boolean + static if (BINARY_AS_WORDS) { + if (val) + return "true"; + else return "false"; + } else { + if (val) + return "1"; + else return "0"; + } + } else static if (is (U : long)) { // any integer type, except char types and bool + static if (is (U == ulong)) // ulong may not be supported properly + if (val > cast(ulong) long.max) + throw new IllegalArgumentException ("No handling available for ulong where value > long.max"); + return cInt.toString (val); + } else static if (is (U : real)) { // any (real) floating point type + char[] ret = new char[32]; // minimum allowed by assert in format + return cFloat.format (ret, val, U.dig+2, 1);// from old C++ tests, U.dig+2 gives best(?) accuracy + } + // Unsupported + else + static assert (false, "Unsupported type: "~U.stringof); +} + +//BEGIN Utility funcs +/* This template provides the initial length for strings for formatting various types. These strings + * can be expanded; this value is intended to cover 90% of cases or so. + * + * NOTE: This template was intended to provide specialisations for different types. + * This one value should do reasonably well for most types. + */ +private { + template defLength(T) { const uint defLength = 20; } + template defLength(T : char) { const uint defLength = 4; } + template defLength(T : bool) { const uint defLength = 5; } +} +private bool isEscapableChar (char c) { + return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); +} +// Throws on unsupported escape sequences; however this should never happen within serialize. +private char escapeChar (char c) { + // This code was generated: + if (c <= '\v') { + if (c <= '\b') { + if (c == '\a') { + return 'a'; + } else if (c == '\b') { + return 'b'; + } + } else { + if (c == '\t') { + return 't'; + } else if (c == '\n') { + return 'n'; + } else if (c == '\v') { + return 'v'; + } + } + } else { + if (c <= '\r') { + if (c == '\f') { + return 'f'; + } else if (c == '\r') { + return 'r'; + } + } else { + if (c == '\"') { + return '\"'; + } else if (c == '\'') { + return '\''; + } else if (c == '\\') { + return '\\'; + } + } + } + + // if we haven't returned: + throw new IllegalArgumentException ("Internal error (escapeChar)"); +} +//END Utility funcs + + + +debug (UnitTest) { + import tango.util.log.Log : Log, Logger; + + private Logger logger; + static this() { + logger = Log.getLogger ("text.serialize"); + } +unittest { + // Utility + bool throws (void delegate() dg) { + bool r = false; + try { + dg(); + } catch (Exception e) { + r = true; + logger.info ("Exception caught: "~e.msg); + } + return r; + } + assert (!throws ({ int i = 5; })); + assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); + + // Associative arrays + char[] X = serialize!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); + char[] Y = `['a':"animal",'b':"bus"]`; + assert (X == Y); + + + // Arrays + // generic array stuff: + assert (serialize!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); + assert (serialize!(double[]) (cast(double[]) []) == `[]`); // empty array + + // char[] conversions, with commas, escape sequences and multichar UTF8 characters: + assert (serialize!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`); + + // wchar[] and dchar[] conversions: + // The characters were pretty-much pulled at random from unicode tables. + assert (serialize!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\""); + assert (serialize!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\""); + + + static if (SPECIAL_BINARY_NOTATION) + assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation + else + assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `[1,242,172]`); + + + // Structs + struct Foo { int a = 9; char b = '\v'; float c; } + struct Bar { Foo a,b; } + static Foo foo1 = { a:150, b:'8', c:17.2f}, foo2; + Bar bar; + bar.a = foo1; + bar.b = foo2; + assert (serialize(bar) == "{0:{0:150,1:'8',2:1.72000007e+01},1:{0:9,1:'\\v',2:nan}}"); + + + // Basic Types + // Character types + assert (serialize!(char) ('\'') == "\'\\\'\'"); + assert (serialize!(wchar) ('X') == "'X'"); + assert (serialize!(dchar) ('X') == "'X'"); + assert (throws ({ char[] r = serialize!(wchar) ('£'); /* unicode U+00A3 */ })); + assert (throws ({ char[] r = serialize!(dchar) ('£'); })); + + // Bool + static if (BINARY_AS_WORDS) + assert (serialize(false) == "false"); + else + assert (serialize(true) == "1"); + + // Integers + assert (serialize (cast(byte) -5) == "-5"); + assert (serialize (cast(short) -32768) == "-32768"); + assert (serialize (-5) == "-5"); + assert (serialize (-9223372036854775807L) == "-9223372036854775807"); + assert (serialize (cast(ubyte) -1) == "255"); + assert (serialize (cast(ushort) -1) == "65535"); + assert (serialize!(uint) (-1) == "4294967295"); + assert (serialize (cast(ulong) 0x7FFF_FFFF_FFFF_FFFFLu) == "9223372036854775807"); + assert (serialize!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == + "[4,468,1025436,4294967295,0]"); + assert (throws ({ + // ulong is not properly supported. + // NOTE: this is something that should really work. + char[] r = serialize!(ulong) (0x8FFF_FFFF_FFFF_FFFFLu); + })); + + // Floats + // These numbers are not particularly meaningful: + assert (serialize!(float) (0.0f) == "0.00000000"); + assert (serialize!(double) (-1e25) == "-1.00000000000000000e+25"); + assert (serialize!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300"); + + // Escape sequences (test conversion functions) + assert (serialize ("\a\b\t\n\v\f\r\"\'\\") == `"\a\b\t\n\v\f\r\"\'\\"`); + + logger.info ("Unittest complete."); +} +}