Mercurial > projects > mde
comparison mde/file/serialize.d @ 81:d8fccaa45d5f
Moved file IO code from mde/mergetag to mde/file[/mergetag] and changed how some errors are caught.
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Fri, 29 Aug 2008 11:59:43 +0100 |
parents | mde/mergetag/serialize.d@61ea26abe4dd |
children | ac1e3fd07275 |
comparison
equal
deleted
inserted
replaced
80:ea58f277f487 | 81:d8fccaa45d5f |
---|---|
1 /* LICENSE BLOCK | |
2 Part of mde: a Modular D game-oriented Engine | |
3 Copyright © 2007-2008 Diggory Hardy | |
4 | |
5 This program is free software: you can redistribute it and/or modify it under the terms | |
6 of the GNU General Public License as published by the Free Software Foundation, either | |
7 version 2 of the License, or (at your option) any later version. | |
8 | |
9 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; | |
10 without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
11 See the GNU General Public License for more details. | |
12 | |
13 You should have received a copy of the GNU General Public License | |
14 along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
15 | |
16 /************************************************************************************************** | |
17 * Generic serialization templated function. | |
18 * | |
19 * Supports: | |
20 * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. | |
21 * | |
22 * Examples: | |
23 * ------------------------------------------------------------------------------------------------ | |
24 * // Basic examples: | |
25 * Cout (serialize!(byte) (-13)).newline; // -13 | |
26 * Cout (serialize!(real) (2.56e11)).newline; // 2.55999999999999990000e+11 | |
27 * Cout (serialize!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000] | |
28 * Cout (serialize ([true,false,false])).newline; // [true,false,false] | |
29 * | |
30 * // String and ubyte[] special syntaxes (always used): | |
31 * Cout (serialize ("A string.")).newline; // "A string." (including quotes) | |
32 * Cout (serialize (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110 | |
33 * | |
34 * // Associative arrays: | |
35 * Cout (serialize ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"] | |
36 * | |
37 * // Structs: | |
38 * struct S { int a = 5; double[int[]] x; } | |
39 * S s; | |
40 * Cout (serialize (s)); | |
41 * | |
42 * // No limit on complexity... | |
43 * char[] somethingComplicated = serialize!(real[][][bool[int[][]]]) (...); | |
44 * ------------------------------------------------------------------------------------------------ | |
45 * | |
46 * throws: | |
47 * May throw a UnicodeException or an IllegalArgumentException. | |
48 * | |
49 * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations | |
50 * instead of merely guessing? | |
51 *************************************************************************************************/ | |
52 //NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, | |
53 // or put all the code here. | |
54 module mde.file.serialize; | |
55 // Since serialize is never used in a module where deserialize is not used, save an import: | |
56 public import mde.file.deserialize; | |
57 | |
58 // tango imports | |
59 import tango.core.Traits; | |
60 import tango.core.Exception : UnicodeException, IllegalArgumentException; | |
61 import cInt = tango.text.convert.Integer; | |
62 import cFloat = tango.text.convert.Float; | |
63 import Utf = tango.text.convert.Utf; | |
64 | |
65 | |
66 alias serialize parseFrom; // support the old name | |
67 | |
68 // Formatting options, for where multiple formats are supported by the deserializer. | |
69 | |
70 // Output using the special binary notation (0x01F2AC instead of [01 ,0xF2, 0xAC])? | |
71 const bool SPECIAL_BINARY_NOTATION = true; | |
72 | |
73 // Output binary as true / false or 1 / 0 ? | |
74 const bool BINARY_AS_WORDS = true; | |
75 | |
76 | |
77 char[] serialize(U) (U val) { | |
78 // Associative arrays (NOTE: cannot use is() expression) | |
79 static if (isAssocArrayType!(U)) { // generic associative array | |
80 alias typeof(U.keys[0]) S; | |
81 alias typeof(U.values[0]) T; | |
82 char[] ret; | |
83 // A guess, including values themselves and [,:] elements (must be at least 2). | |
84 ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2; | |
85 ret[0] = '['; | |
86 uint i = 1; | |
87 foreach (S k, T v; val) { | |
88 char[] s = serialize!(S) (k) ~ ":" ~ serialize!(T) (v); | |
89 i += s.length; | |
90 if (i+1 >= ret.length) | |
91 ret.length = ret.length * 2; // check. | |
92 ret[i-s.length .. i] = s; | |
93 ret[i++] = ','; | |
94 } | |
95 if (i == 1) ++i; // special case - not overwriting a comma | |
96 ret[i-1] = ']'; // replaces last comma | |
97 return ret[0..i]; | |
98 } | |
99 // Arrays | |
100 else static if (is(U S == S[]) || isStaticArrayType!(U)) { | |
101 alias typeof(U[0]) T; | |
102 | |
103 static if (is(T == char)) { // string | |
104 char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. | |
105 ret[0] = '"'; | |
106 uint i = 1; | |
107 for (uint t = 0; t < val.length;) { | |
108 // process a block of non-escapable characters | |
109 uint s = t; | |
110 while (t < val.length && !isEscapableChar(val[t])) | |
111 ++t; // skip all non-escapable chars | |
112 uint j = i + t - s; | |
113 ret[i..j] = val[s..t]; // copy a block | |
114 i = j; | |
115 // process a block of escapable charaters | |
116 while (t < val.length && isEscapableChar(val[t])) { | |
117 ret[i++] = '\\'; // backslash; increment i | |
118 ret[i++] = escapeChar(val[t++]); // character; increment i and t | |
119 } | |
120 } | |
121 ret[i++] = '"'; | |
122 return ret[0..i]; | |
123 } | |
124 else static if (is(T == wchar) || is(T == dchar)) { // wstring or dstring | |
125 // May throw a UnicodeException; don't bother catching and rethrowing: | |
126 return serialize!(char[]) (Utf.toString (val)); | |
127 } | |
128 else static if (SPECIAL_BINARY_NOTATION && is(T == ubyte)) { // special binary notation | |
129 // Note: To disable the usage of this special type, set SPECIAL_BINARY_NOTATION = false. | |
130 static const char[16] digits = "0123456789abcdef"; | |
131 | |
132 char[] ret = new char[val.length * 2 + 2]; // exact length | |
133 ret[0..2] = "0x"; | |
134 uint i = 2; | |
135 | |
136 foreach (ubyte x; val) { | |
137 ret[i++] = digits[x >> 4]; | |
138 ret[i++] = digits[x & 0x0F]; | |
139 } | |
140 return ret; | |
141 } | |
142 else { // generic array | |
143 char[] ret; | |
144 // A guess, including commas and brackets (must be at least 2) | |
145 ret.length = val.length * (defLength!(T) + 1) + 2; | |
146 ret[0] = '['; | |
147 uint i = 1; | |
148 foreach (T x; val) { | |
149 char[] s = serialize!(T) (x); | |
150 i += s.length; | |
151 if (i+1 >= ret.length) | |
152 ret.length = ret.length * 2; // check length | |
153 ret[i-s.length .. i] = s; | |
154 ret[i++] = ','; | |
155 } | |
156 if (i == 1) | |
157 ++i; // special case - not overwriting a comma | |
158 ret[i-1] = ']'; // replaces last comma | |
159 return ret[0..i]; | |
160 } | |
161 } | |
162 // Structs | |
163 else static if (is(U == struct)) { | |
164 char[] ret; | |
165 // A very rough guess. | |
166 ret.length = val.sizeof * 4; | |
167 ret[0] = '{'; | |
168 uint i = 1; | |
169 foreach (k, v; val.tupleof) { | |
170 alias typeof(v) T; | |
171 char[] s = serialize!(size_t) (k) ~ ":" ~ serialize!(T) (v); | |
172 i += s.length; | |
173 if (i+1 >= ret.length) | |
174 ret.length = ret.length * 2; // check. | |
175 ret[i-s.length .. i] = s; | |
176 ret[i++] = ','; | |
177 } | |
178 if (i == 1) ++i; // special case - not overwriting a comma | |
179 ret[i-1] = '}'; // replaces last comma | |
180 return ret[0..i]; | |
181 } | |
182 // Basic types | |
183 else static if (is(U == char)) { // char (UTF-8 byte) | |
184 // Note: if (val > 127) "is invalid UTF-8 single char". However we don't know | |
185 // what this is for, in particular if it will be recombined with other chars later. | |
186 | |
187 // Can't return reference to static array; so making it dynamic is cheaper than copying. | |
188 char[] ret = new char[4]; // max length for an escaped char | |
189 ret[0] = '\''; | |
190 | |
191 if (!isEscapableChar (val)) { | |
192 ret[1] = val; | |
193 ret[2] = '\''; | |
194 return ret[0..3]; | |
195 } else { | |
196 ret[1] = '\\'; | |
197 ret[2] = escapeChar (val); | |
198 ret[3] = '\''; | |
199 return ret; | |
200 } | |
201 } else static if (is(U == wchar) || | |
202 is(U == dchar)) { // wchar or dchar (UTF-16/32 single char) | |
203 // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char? | |
204 if (val <= 127u) | |
205 return serialize!(char) (cast(char) val); // ASCII | |
206 else throw new UnicodeException ( | |
207 "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0); | |
208 } else static if (is (U == bool)) { // boolean | |
209 static if (BINARY_AS_WORDS) { | |
210 if (val) | |
211 return "true"; | |
212 else return "false"; | |
213 } else { | |
214 if (val) | |
215 return "1"; | |
216 else return "0"; | |
217 } | |
218 } else static if (is (U : long)) { // any integer type, except char types and bool | |
219 static if (is (U == ulong)) // ulong may not be supported properly | |
220 if (val > cast(ulong) long.max) | |
221 throw new IllegalArgumentException ("No handling available for ulong where value > long.max"); | |
222 return cInt.toString (val); | |
223 } else static if (is (U : real)) { // any (real) floating point type | |
224 char[] ret = new char[32]; // minimum allowed by assert in format | |
225 return cFloat.format (ret, val, U.dig+2, 1);// from old C++ tests, U.dig+2 gives best(?) accuracy | |
226 } | |
227 // Unsupported | |
228 else | |
229 static assert (false, "Unsupported type: "~U.stringof); | |
230 } | |
231 | |
232 //BEGIN Utility funcs | |
233 /* This template provides the initial length for strings for formatting various types. These strings | |
234 * can be expanded; this value is intended to cover 90% of cases or so. | |
235 * | |
236 * NOTE: This template was intended to provide specialisations for different types. | |
237 * This one value should do reasonably well for most types. | |
238 */ | |
239 private { | |
240 template defLength(T) { const uint defLength = 20; } | |
241 template defLength(T : char) { const uint defLength = 4; } | |
242 template defLength(T : bool) { const uint defLength = 5; } | |
243 } | |
244 private bool isEscapableChar (char c) { | |
245 return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); | |
246 } | |
247 // Throws on unsupported escape sequences; however this should never happen within serialize. | |
248 private char escapeChar (char c) { | |
249 // This code was generated: | |
250 if (c <= '\v') { | |
251 if (c <= '\b') { | |
252 if (c == '\a') { | |
253 return 'a'; | |
254 } else if (c == '\b') { | |
255 return 'b'; | |
256 } | |
257 } else { | |
258 if (c == '\t') { | |
259 return 't'; | |
260 } else if (c == '\n') { | |
261 return 'n'; | |
262 } else if (c == '\v') { | |
263 return 'v'; | |
264 } | |
265 } | |
266 } else { | |
267 if (c <= '\r') { | |
268 if (c == '\f') { | |
269 return 'f'; | |
270 } else if (c == '\r') { | |
271 return 'r'; | |
272 } | |
273 } else { | |
274 if (c == '\"') { | |
275 return '\"'; | |
276 } else if (c == '\'') { | |
277 return '\''; | |
278 } else if (c == '\\') { | |
279 return '\\'; | |
280 } | |
281 } | |
282 } | |
283 | |
284 // if we haven't returned: | |
285 throw new IllegalArgumentException ("Internal error (escapeChar)"); | |
286 } | |
287 //END Utility funcs | |
288 | |
289 | |
290 | |
291 debug (UnitTest) { | |
292 import tango.util.log.Log : Log, Logger; | |
293 | |
294 private Logger logger; | |
295 static this() { | |
296 logger = Log.getLogger ("text.serialize"); | |
297 } | |
298 unittest { | |
299 // Utility | |
300 bool throws (void delegate() dg) { | |
301 bool r = false; | |
302 try { | |
303 dg(); | |
304 } catch (Exception e) { | |
305 r = true; | |
306 logger.info ("Exception caught: "~e.msg); | |
307 } | |
308 return r; | |
309 } | |
310 assert (!throws ({ int i = 5; })); | |
311 assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); | |
312 | |
313 // Associative arrays | |
314 char[] X = serialize!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); | |
315 char[] Y = `['a':"animal",'b':"bus"]`; | |
316 assert (X == Y); | |
317 | |
318 | |
319 // Arrays | |
320 // generic array stuff: | |
321 assert (serialize!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); | |
322 assert (serialize!(double[]) (cast(double[]) []) == `[]`); // empty array | |
323 | |
324 // char[] conversions, with commas, escape sequences and multichar UTF8 characters: | |
325 assert (serialize!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`); | |
326 | |
327 // wchar[] and dchar[] conversions: | |
328 // The characters were pretty-much pulled at random from unicode tables. | |
329 assert (serialize!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\""); | |
330 assert (serialize!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\""); | |
331 | |
332 | |
333 static if (SPECIAL_BINARY_NOTATION) | |
334 assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation | |
335 else | |
336 assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `[1,242,172]`); | |
337 | |
338 | |
339 // Structs | |
340 struct Foo { int a = 9; char b = '\v'; float c; } | |
341 struct Bar { Foo a,b; } | |
342 static Foo foo1 = { a:150, b:'8', c:17.2f}, foo2; | |
343 Bar bar; | |
344 bar.a = foo1; | |
345 bar.b = foo2; | |
346 assert (serialize(bar) == "{0:{0:150,1:'8',2:1.72000007e+01},1:{0:9,1:'\\v',2:nan}}"); | |
347 | |
348 | |
349 // Basic Types | |
350 // Character types | |
351 assert (serialize!(char) ('\'') == "\'\\\'\'"); | |
352 assert (serialize!(wchar) ('X') == "'X'"); | |
353 assert (serialize!(dchar) ('X') == "'X'"); | |
354 assert (throws ({ char[] r = serialize!(wchar) ('£'); /* unicode U+00A3 */ })); | |
355 assert (throws ({ char[] r = serialize!(dchar) ('£'); })); | |
356 | |
357 // Bool | |
358 static if (BINARY_AS_WORDS) | |
359 assert (serialize(false) == "false"); | |
360 else | |
361 assert (serialize(true) == "1"); | |
362 | |
363 // Integers | |
364 assert (serialize (cast(byte) -5) == "-5"); | |
365 assert (serialize (cast(short) -32768) == "-32768"); | |
366 assert (serialize (-5) == "-5"); | |
367 assert (serialize (-9223372036854775807L) == "-9223372036854775807"); | |
368 assert (serialize (cast(ubyte) -1) == "255"); | |
369 assert (serialize (cast(ushort) -1) == "65535"); | |
370 assert (serialize!(uint) (-1) == "4294967295"); | |
371 assert (serialize (cast(ulong) 0x7FFF_FFFF_FFFF_FFFFLu) == "9223372036854775807"); | |
372 assert (serialize!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == | |
373 "[4,468,1025436,4294967295,0]"); | |
374 assert (throws ({ | |
375 // ulong is not properly supported. | |
376 // NOTE: this is something that should really work. | |
377 char[] r = serialize!(ulong) (0x8FFF_FFFF_FFFF_FFFFLu); | |
378 })); | |
379 | |
380 // Floats | |
381 // These numbers are not particularly meaningful: | |
382 assert (serialize!(float) (0.0f) == "0.00000000"); | |
383 assert (serialize!(double) (-1e25) == "-1.00000000000000000e+25"); | |
384 assert (serialize!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300"); | |
385 | |
386 // Escape sequences (test conversion functions) | |
387 assert (serialize ("\a\b\t\n\v\f\r\"\'\\") == `"\a\b\t\n\v\f\r\"\'\\"`); | |
388 | |
389 logger.info ("Unittest complete."); | |
390 } | |
391 } |