Mercurial > projects > mde
diff mde/file/serialize.d @ 82:ac1e3fd07275
New ssi file format.
(De)serializer now supports non-ascii wide characters (encoded to UTF-8) and no longer supports non-ascii 8-bit chars which would result in bad UTF-8.
Moved/renamed a few things left over from the last commit.
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Sat, 30 Aug 2008 09:37:35 +0100 |
parents | d8fccaa45d5f |
children | 79d816b3e2d2 |
line wrap: on
line diff
--- a/mde/file/serialize.d Fri Aug 29 11:59:43 2008 +0100 +++ b/mde/file/serialize.d Sat Aug 30 09:37:35 2008 +0100 @@ -17,7 +17,8 @@ * Generic serialization templated function. * * Supports: - * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. + * Associative arrays, dynamic arrays (with usual formatting of strings), structs, char types, + * bool, int types, float types. * * Examples: * ------------------------------------------------------------------------------------------------ @@ -51,6 +52,8 @@ *************************************************************************************************/ //NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, // or put all the code here. +//FIXME: Optimize by using a slicing buffer. Put everything in a struct containing this buffer to +// make it thread-safe. module mde.file.serialize; // Since serialize is never used in a module where deserialize is not used, save an import: public import mde.file.deserialize; @@ -181,8 +184,8 @@ } // Basic types else static if (is(U == char)) { // char (UTF-8 byte) - // Note: if (val > 127) "is invalid UTF-8 single char". However we don't know - // what this is for, in particular if it will be recombined with other chars later. + if (val > 127) // outputing invalid utf-8 could corrupt the output stream + throw new IllegalArgumentException ("Not a valid UTF-8 character"); // Can't return reference to static array; so making it dynamic is cheaper than copying. char[] ret = new char[4]; // max length for an escaped char @@ -200,11 +203,16 @@ } } else static if (is(U == wchar) || is(U == dchar)) { // wchar or dchar (UTF-16/32 single char) - // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char? if (val <= 127u) return serialize!(char) (cast(char) val); // ASCII - else throw new UnicodeException ( - "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0); + else { // convert to a multi-byte UTF-8 char + // NOTE: suboptimal + char[] t,ret; + t = Utf.toString([val]); + ret.length = t.length + 2; + ret = '\'' ~ t ~ '\''; + return ret; + } } else static if (is (U == bool)) { // boolean static if (BINARY_AS_WORDS) { if (val) @@ -288,12 +296,12 @@ -debug (UnitTest) { +debug (mdeUnitTest) { import tango.util.log.Log : Log, Logger; private Logger logger; static this() { - logger = Log.getLogger ("text.serialize"); + logger = Log.getLogger ("mde.file.serialize"); } unittest { // Utility @@ -303,7 +311,7 @@ dg(); } catch (Exception e) { r = true; - logger.info ("Exception caught: "~e.msg); + logger.trace ("Exception caught: "~e.msg); } return r; } @@ -351,8 +359,9 @@ assert (serialize!(char) ('\'') == "\'\\\'\'"); assert (serialize!(wchar) ('X') == "'X'"); assert (serialize!(dchar) ('X') == "'X'"); - assert (throws ({ char[] r = serialize!(wchar) ('£'); /* unicode U+00A3 */ })); - assert (throws ({ char[] r = serialize!(dchar) ('£'); })); + assert (serialize!(wchar) ('£') == "'£'"); // unicode U+00A3 i.e. a multi-byte UTF-8 char + assert (serialize!(dchar) ('£') == "'£'"); + assert (throws ({ serialize!(char) ('£'); })); // compiler converts £ to char, but it's not valid UTF-8 // Bool static if (BINARY_AS_WORDS)