comparison mde/file/serialize.d @ 81:d8fccaa45d5f

Moved file IO code from mde/mergetag to mde/file[/mergetag] and changed how some errors are caught.
author Diggory Hardy <diggory.hardy@gmail.com>
date Fri, 29 Aug 2008 11:59:43 +0100
parents mde/mergetag/serialize.d@61ea26abe4dd
children ac1e3fd07275
comparison
equal deleted inserted replaced
80:ea58f277f487 81:d8fccaa45d5f
1 /* LICENSE BLOCK
2 Part of mde: a Modular D game-oriented Engine
3 Copyright © 2007-2008 Diggory Hardy
4
5 This program is free software: you can redistribute it and/or modify it under the terms
6 of the GNU General Public License as published by the Free Software Foundation, either
7 version 2 of the License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
10 without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 See the GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program. If not, see <http://www.gnu.org/licenses/>. */
15
16 /**************************************************************************************************
17 * Generic serialization templated function.
18 *
19 * Supports:
20 * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types.
21 *
22 * Examples:
23 * ------------------------------------------------------------------------------------------------
24 * // Basic examples:
25 * Cout (serialize!(byte) (-13)).newline; // -13
26 * Cout (serialize!(real) (2.56e11)).newline; // 2.55999999999999990000e+11
27 * Cout (serialize!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000]
28 * Cout (serialize ([true,false,false])).newline; // [true,false,false]
29 *
30 * // String and ubyte[] special syntaxes (always used):
31 * Cout (serialize ("A string.")).newline; // "A string." (including quotes)
32 * Cout (serialize (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110
33 *
34 * // Associative arrays:
35 * Cout (serialize ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"]
36 *
37 * // Structs:
38 * struct S { int a = 5; double[int[]] x; }
39 * S s;
40 * Cout (serialize (s));
41 *
42 * // No limit on complexity...
43 * char[] somethingComplicated = serialize!(real[][][bool[int[][]]]) (...);
44 * ------------------------------------------------------------------------------------------------
45 *
46 * throws:
47 * May throw a UnicodeException or an IllegalArgumentException.
48 *
49 * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations
50 * instead of merely guessing?
51 *************************************************************************************************/
52 //NOTE: in case of multiple formats, make this a dummy module importing both serialize modules,
53 // or put all the code here.
54 module mde.file.serialize;
55 // Since serialize is never used in a module where deserialize is not used, save an import:
56 public import mde.file.deserialize;
57
58 // tango imports
59 import tango.core.Traits;
60 import tango.core.Exception : UnicodeException, IllegalArgumentException;
61 import cInt = tango.text.convert.Integer;
62 import cFloat = tango.text.convert.Float;
63 import Utf = tango.text.convert.Utf;
64
65
66 alias serialize parseFrom; // support the old name
67
68 // Formatting options, for where multiple formats are supported by the deserializer.
69
70 // Output using the special binary notation (0x01F2AC instead of [01 ,0xF2, 0xAC])?
71 const bool SPECIAL_BINARY_NOTATION = true;
72
73 // Output binary as true / false or 1 / 0 ?
74 const bool BINARY_AS_WORDS = true;
75
76
77 char[] serialize(U) (U val) {
78 // Associative arrays (NOTE: cannot use is() expression)
79 static if (isAssocArrayType!(U)) { // generic associative array
80 alias typeof(U.keys[0]) S;
81 alias typeof(U.values[0]) T;
82 char[] ret;
83 // A guess, including values themselves and [,:] elements (must be at least 2).
84 ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2;
85 ret[0] = '[';
86 uint i = 1;
87 foreach (S k, T v; val) {
88 char[] s = serialize!(S) (k) ~ ":" ~ serialize!(T) (v);
89 i += s.length;
90 if (i+1 >= ret.length)
91 ret.length = ret.length * 2; // check.
92 ret[i-s.length .. i] = s;
93 ret[i++] = ',';
94 }
95 if (i == 1) ++i; // special case - not overwriting a comma
96 ret[i-1] = ']'; // replaces last comma
97 return ret[0..i];
98 }
99 // Arrays
100 else static if (is(U S == S[]) || isStaticArrayType!(U)) {
101 alias typeof(U[0]) T;
102
103 static if (is(T == char)) { // string
104 char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough.
105 ret[0] = '"';
106 uint i = 1;
107 for (uint t = 0; t < val.length;) {
108 // process a block of non-escapable characters
109 uint s = t;
110 while (t < val.length && !isEscapableChar(val[t]))
111 ++t; // skip all non-escapable chars
112 uint j = i + t - s;
113 ret[i..j] = val[s..t]; // copy a block
114 i = j;
115 // process a block of escapable charaters
116 while (t < val.length && isEscapableChar(val[t])) {
117 ret[i++] = '\\'; // backslash; increment i
118 ret[i++] = escapeChar(val[t++]); // character; increment i and t
119 }
120 }
121 ret[i++] = '"';
122 return ret[0..i];
123 }
124 else static if (is(T == wchar) || is(T == dchar)) { // wstring or dstring
125 // May throw a UnicodeException; don't bother catching and rethrowing:
126 return serialize!(char[]) (Utf.toString (val));
127 }
128 else static if (SPECIAL_BINARY_NOTATION && is(T == ubyte)) { // special binary notation
129 // Note: To disable the usage of this special type, set SPECIAL_BINARY_NOTATION = false.
130 static const char[16] digits = "0123456789abcdef";
131
132 char[] ret = new char[val.length * 2 + 2]; // exact length
133 ret[0..2] = "0x";
134 uint i = 2;
135
136 foreach (ubyte x; val) {
137 ret[i++] = digits[x >> 4];
138 ret[i++] = digits[x & 0x0F];
139 }
140 return ret;
141 }
142 else { // generic array
143 char[] ret;
144 // A guess, including commas and brackets (must be at least 2)
145 ret.length = val.length * (defLength!(T) + 1) + 2;
146 ret[0] = '[';
147 uint i = 1;
148 foreach (T x; val) {
149 char[] s = serialize!(T) (x);
150 i += s.length;
151 if (i+1 >= ret.length)
152 ret.length = ret.length * 2; // check length
153 ret[i-s.length .. i] = s;
154 ret[i++] = ',';
155 }
156 if (i == 1)
157 ++i; // special case - not overwriting a comma
158 ret[i-1] = ']'; // replaces last comma
159 return ret[0..i];
160 }
161 }
162 // Structs
163 else static if (is(U == struct)) {
164 char[] ret;
165 // A very rough guess.
166 ret.length = val.sizeof * 4;
167 ret[0] = '{';
168 uint i = 1;
169 foreach (k, v; val.tupleof) {
170 alias typeof(v) T;
171 char[] s = serialize!(size_t) (k) ~ ":" ~ serialize!(T) (v);
172 i += s.length;
173 if (i+1 >= ret.length)
174 ret.length = ret.length * 2; // check.
175 ret[i-s.length .. i] = s;
176 ret[i++] = ',';
177 }
178 if (i == 1) ++i; // special case - not overwriting a comma
179 ret[i-1] = '}'; // replaces last comma
180 return ret[0..i];
181 }
182 // Basic types
183 else static if (is(U == char)) { // char (UTF-8 byte)
184 // Note: if (val > 127) "is invalid UTF-8 single char". However we don't know
185 // what this is for, in particular if it will be recombined with other chars later.
186
187 // Can't return reference to static array; so making it dynamic is cheaper than copying.
188 char[] ret = new char[4]; // max length for an escaped char
189 ret[0] = '\'';
190
191 if (!isEscapableChar (val)) {
192 ret[1] = val;
193 ret[2] = '\'';
194 return ret[0..3];
195 } else {
196 ret[1] = '\\';
197 ret[2] = escapeChar (val);
198 ret[3] = '\'';
199 return ret;
200 }
201 } else static if (is(U == wchar) ||
202 is(U == dchar)) { // wchar or dchar (UTF-16/32 single char)
203 // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char?
204 if (val <= 127u)
205 return serialize!(char) (cast(char) val); // ASCII
206 else throw new UnicodeException (
207 "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0);
208 } else static if (is (U == bool)) { // boolean
209 static if (BINARY_AS_WORDS) {
210 if (val)
211 return "true";
212 else return "false";
213 } else {
214 if (val)
215 return "1";
216 else return "0";
217 }
218 } else static if (is (U : long)) { // any integer type, except char types and bool
219 static if (is (U == ulong)) // ulong may not be supported properly
220 if (val > cast(ulong) long.max)
221 throw new IllegalArgumentException ("No handling available for ulong where value > long.max");
222 return cInt.toString (val);
223 } else static if (is (U : real)) { // any (real) floating point type
224 char[] ret = new char[32]; // minimum allowed by assert in format
225 return cFloat.format (ret, val, U.dig+2, 1);// from old C++ tests, U.dig+2 gives best(?) accuracy
226 }
227 // Unsupported
228 else
229 static assert (false, "Unsupported type: "~U.stringof);
230 }
231
232 //BEGIN Utility funcs
233 /* This template provides the initial length for strings for formatting various types. These strings
234 * can be expanded; this value is intended to cover 90% of cases or so.
235 *
236 * NOTE: This template was intended to provide specialisations for different types.
237 * This one value should do reasonably well for most types.
238 */
239 private {
240 template defLength(T) { const uint defLength = 20; }
241 template defLength(T : char) { const uint defLength = 4; }
242 template defLength(T : bool) { const uint defLength = 5; }
243 }
244 private bool isEscapableChar (char c) {
245 return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\');
246 }
247 // Throws on unsupported escape sequences; however this should never happen within serialize.
248 private char escapeChar (char c) {
249 // This code was generated:
250 if (c <= '\v') {
251 if (c <= '\b') {
252 if (c == '\a') {
253 return 'a';
254 } else if (c == '\b') {
255 return 'b';
256 }
257 } else {
258 if (c == '\t') {
259 return 't';
260 } else if (c == '\n') {
261 return 'n';
262 } else if (c == '\v') {
263 return 'v';
264 }
265 }
266 } else {
267 if (c <= '\r') {
268 if (c == '\f') {
269 return 'f';
270 } else if (c == '\r') {
271 return 'r';
272 }
273 } else {
274 if (c == '\"') {
275 return '\"';
276 } else if (c == '\'') {
277 return '\'';
278 } else if (c == '\\') {
279 return '\\';
280 }
281 }
282 }
283
284 // if we haven't returned:
285 throw new IllegalArgumentException ("Internal error (escapeChar)");
286 }
287 //END Utility funcs
288
289
290
291 debug (UnitTest) {
292 import tango.util.log.Log : Log, Logger;
293
294 private Logger logger;
295 static this() {
296 logger = Log.getLogger ("text.serialize");
297 }
298 unittest {
299 // Utility
300 bool throws (void delegate() dg) {
301 bool r = false;
302 try {
303 dg();
304 } catch (Exception e) {
305 r = true;
306 logger.info ("Exception caught: "~e.msg);
307 }
308 return r;
309 }
310 assert (!throws ({ int i = 5; }));
311 assert (throws ({ throw new Exception ("Test - this exception should be caught"); }));
312
313 // Associative arrays
314 char[] X = serialize!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]);
315 char[] Y = `['a':"animal",'b':"bus"]`;
316 assert (X == Y);
317
318
319 // Arrays
320 // generic array stuff:
321 assert (serialize!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`);
322 assert (serialize!(double[]) (cast(double[]) []) == `[]`); // empty array
323
324 // char[] conversions, with commas, escape sequences and multichar UTF8 characters:
325 assert (serialize!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`);
326
327 // wchar[] and dchar[] conversions:
328 // The characters were pretty-much pulled at random from unicode tables.
329 assert (serialize!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\"");
330 assert (serialize!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\"");
331
332
333 static if (SPECIAL_BINARY_NOTATION)
334 assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation
335 else
336 assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `[1,242,172]`);
337
338
339 // Structs
340 struct Foo { int a = 9; char b = '\v'; float c; }
341 struct Bar { Foo a,b; }
342 static Foo foo1 = { a:150, b:'8', c:17.2f}, foo2;
343 Bar bar;
344 bar.a = foo1;
345 bar.b = foo2;
346 assert (serialize(bar) == "{0:{0:150,1:'8',2:1.72000007e+01},1:{0:9,1:'\\v',2:nan}}");
347
348
349 // Basic Types
350 // Character types
351 assert (serialize!(char) ('\'') == "\'\\\'\'");
352 assert (serialize!(wchar) ('X') == "'X'");
353 assert (serialize!(dchar) ('X') == "'X'");
354 assert (throws ({ char[] r = serialize!(wchar) ('£'); /* unicode U+00A3 */ }));
355 assert (throws ({ char[] r = serialize!(dchar) ('£'); }));
356
357 // Bool
358 static if (BINARY_AS_WORDS)
359 assert (serialize(false) == "false");
360 else
361 assert (serialize(true) == "1");
362
363 // Integers
364 assert (serialize (cast(byte) -5) == "-5");
365 assert (serialize (cast(short) -32768) == "-32768");
366 assert (serialize (-5) == "-5");
367 assert (serialize (-9223372036854775807L) == "-9223372036854775807");
368 assert (serialize (cast(ubyte) -1) == "255");
369 assert (serialize (cast(ushort) -1) == "65535");
370 assert (serialize!(uint) (-1) == "4294967295");
371 assert (serialize (cast(ulong) 0x7FFF_FFFF_FFFF_FFFFLu) == "9223372036854775807");
372 assert (serialize!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) ==
373 "[4,468,1025436,4294967295,0]");
374 assert (throws ({
375 // ulong is not properly supported.
376 // NOTE: this is something that should really work.
377 char[] r = serialize!(ulong) (0x8FFF_FFFF_FFFF_FFFFLu);
378 }));
379
380 // Floats
381 // These numbers are not particularly meaningful:
382 assert (serialize!(float) (0.0f) == "0.00000000");
383 assert (serialize!(double) (-1e25) == "-1.00000000000000000e+25");
384 assert (serialize!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300");
385
386 // Escape sequences (test conversion functions)
387 assert (serialize ("\a\b\t\n\v\f\r\"\'\\") == `"\a\b\t\n\v\f\r\"\'\\"`);
388
389 logger.info ("Unittest complete.");
390 }
391 }