# HG changeset patch # User Diggory Hardy # Date 1220007583 -3600 # Node ID d8fccaa45d5f26b4841a766bd593af2224d48b5c # Parent ea58f277f48736a2b161fc456a3f119719d1359f Moved file IO code from mde/mergetag to mde/file[/mergetag] and changed how some errors are caught. diff -r ea58f277f487 -r d8fccaa45d5f codeDoc/mergetag/new-models.vym Binary file codeDoc/mergetag/new-models.vym has changed diff -r ea58f277f487 -r d8fccaa45d5f mde/file/deserialize.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/deserialize.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,615 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/************************************************************************************************** + * Generic deserialization templated function. + * + * Supports: + * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. + * + * There are also some public utility functions with their own documentation. + * + * Examples: + * ------------------------------------------------------------------------------------------------ + * // Basic examples: + * ulong a = deserialize!(ulong) ("20350"); + * float d = deserialize!(float) (" 1.2e-9 "); + * int[] b = deserialize!(int[]) ("[0,1,2,3]"); + * + * // String and char[] syntax: + * char[] c = deserialize!(char[]) ("\"A string\""); + * char[] e = deserialize!(char[]) ("['a','n','o','t','h','e','r', ' ' ,'s','t','r','i','n','g']"); + * + * // These be used interchangably; here's a more complex example of an associative array: + * bool[char[]] f = deserialize!(bool[char[]]) ("[ \"one\":true, ['t','w','o']:false, \"three\":1, \"four\":000 ]"); + * + * // There is also a special notation for ubyte[] types: + * // The digits following 0x must be in pairs and each specify one ubyte. + * assert ( deserialize!(ubyte[]) (`0x01F2AC`) == deserialize!(ubyte[]) (`[01 ,0xF2, 0xAC]`) ); + * + * // There's no limit to the complexity! + * char[char[][][][char]][bool] z = ...; // don't expect me to write this! + * ------------------------------------------------------------------------------------------------ + * + * Throws: + * May throw a ParseException or a UnicodeException (which both extend TextException). + * + * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations + * instead of merely guessing? + *************************************************************************************************/ +//NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, +// or put all the code here. +module mde.file.deserialize; + +// tango imports +import tango.core.Exception : TextException, UnicodeException; +import cInt = tango.text.convert.Integer; +import cFloat = tango.text.convert.Float; +import Utf = tango.text.convert.Utf; +import Util = tango.text.Util; + +/** + * Base class for deserialize exceptions. + */ +class ParseException : TextException +{ + this( char[] msg ) + { + super( msg ); + } +} + +alias deserialize parseTo; // support the old name + +//BEGIN deserialize templates + +// Associative arrays + +T[S] deserialize(T : T[S], S) (char[] src) { + src = Util.trim(src); + if (src.length < 2 || src[0] != '[' || src[$-1] != ']') + throw new ParseException ("Invalid associative array: not [ ... ]"); // bad braces. + + T[S] ret; + foreach (char[] pair; split (src[1..$-1])) { + uint i = 0; + while (i < pair.length) { // advance to the ':' + char c = pair[i]; + if (c == ':') break; + if (c == '\'' || c == '"') { // string or character + ++i; + while (i < pair.length && pair[i] != c) { + if (pair[i] == '\\') + ++i; // escape seq. + ++i; + } + // Could have an unterminated ' or " causing i >= pair.length, but: + // 1. Impossible: split would have thrown + // 2. In any case this would be caught below. + } + ++i; + } + if (i >= pair.length) + throw new ParseException ("Invalid associative array: encountered [ ... KEY] (missing :DATA)"); + ret[deserialize!(S) (pair[0..i])] = deserialize!(T) (pair[i+1..$]); + } + return ret; +} + + +// Arrays + +T[] deserialize(T : T[]) (char[] src) { + src = Util.trim(src); + if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') + return toArray!(T[]) (src); + throw new ParseException ("Invalid array: not [ ... ]"); +} + +// String (array special case) +T deserialize(T : char[]) (char[] src) { + src = Util.trim(src); + if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') { + src = src[1..$-1]; + T ret; + ret.length = src.length; // maximum length; retract to actual length later + uint i = 0; + for (uint t = 0; t < src.length;) { + // process a block of non-escaped characters + uint s = t; + while (t < src.length && src[t] != '\\') ++t; // non-escaped characters + uint j = i + t - s; + ret[i..j] = src[s..t]; // copy a block + i = j; + + // process a block of escaped characters + while (t < src.length && src[t] == '\\') { + t++; + if (t == src.length) + throw new ParseException ("Invalid string: ends \\\" !"); // next char is " + ret[i++] = unEscapeChar (src[t++]); // throws if it's invalid + } + } + return ret[0..i]; + } + else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') + return toArray!(T) (src); + throw new ParseException ("Invalid string: not quoted (\"*\") or char array (['a',...,'c'])"); +} +// Unicode conversions for strings: +T deserialize(T : wchar[]) (char[] src) { + // May throw a UnicodeException; don't bother catching and rethrowing: + return Utf.toString16 (deserialize!(char[]) (src)); +} +T deserialize(T : dchar[]) (char[] src) { + // May throw a UnicodeException; don't bother catching and rethrowing: + return Utf.toString32 (deserialize!(char[]) (src)); +} + +// Binary (array special case) +T deserialize(T : ubyte[]) (char[] src) { + src = Util.trim(src); + // Standard case: + if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); + // Special case: sequence of hex digits, each pair of which is a ubyte + if (src.length >= 2 && src[0..2] == "0x") { + src = src[2..$]; // strip down to actual digits + + // Must be in pairs: + if (src.length % 2 == 1) + throw new ParseException ("Invalid binary: odd number of chars"); + + T ret; + ret.length = src.length / 2; // exact + + for (uint i, pos; pos + 1 < src.length; ++i) { + ubyte x = readHexChar(src, pos) << 4; + x |= readHexChar(src, pos); + ret[i] = x; + } + return ret; + } + else throw new ParseException ("Invalid ubyte[]: not an array and doesn't start 0x"); +} + + +// Basic types + +// Char +// Assumes value is <= 127 (for valid UTF-8), since input would be invalid UTF-8 if not anyway. +// (And we're not really interested in checking for valid unicode; char[] conversions don't either.) +T deserialize(T : char) (char[] src) { + src = Util.trim(src); + if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'') + throw new ParseException ("Invalid char: not 'x' or '\\x'"); + if (src[1] != '\\') { + if (src.length == 3) + return src[1]; // Either non escaped + throw new ParseException ("Invalid char: too long (or non-ASCII)"); + } else if (src.length == 4) + return unEscapeChar (src[2]); // Or escaped + + throw new ParseException ("Invalid char: '\\'"); +} +// Basic unicode convertions for wide-chars. +// Assumes value is <= 127 as does deserialize!(char). +T deserialize(T : wchar) (char[] src) { + return cast(T) deserialize!(char) (src); +} +T deserialize(T : dchar) (char[] src) { + return cast(T) deserialize!(char) (src); +} + +// Bool +T deserialize(T : bool) (char[] src) { + src = Util.trim(src); + if (src == "true") + return true; + if (src == "false") + return false; + uint pos; + while (src.length > pos && src[pos] == '0') ++pos; // skip leading zeros + if (src.length == pos && pos > 0) + return false; + if (src.length == pos + 1 && src[pos] == '1') + return true; + throw new ParseException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1"); +} + +// Ints +T deserialize(T : byte) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : short) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : int) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : long) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : ubyte) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : ushort) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : uint) (char[] src) { + return toTInt!(T) (src); +} +T deserialize(T : ulong) (char[] src) { + return toTInt!(T) (src); +} +debug (UnitTest) unittest { + assert (deserialize!(byte) ("-5") == cast(byte) -5); + // annoyingly, octal syntax differs from D (blame tango): + assert (deserialize!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]); +} + +// Floats +T deserialize(T : float) (char[] src) { + return toTFloat!(T) (src); +} +T deserialize(T : double) (char[] src) { + return toTFloat!(T) (src); +} +T deserialize(T : real) (char[] src) { + return toTFloat!(T) (src); +} + + +// Structs +T deserialize(T) (char[] src) { + static assert (is(T == struct), "Unsupported type: "~typeof(T)); + + src = Util.trim(src); + if (src.length < 2 || src[0] != '{' || src[$-1] != '}') + throw new ParseException ("Invalid struct: not { ... }"); + + // cannot access elements of T.tupleof with non-const key, so use a type which can be + // accessed with a non-const key to store slices: + char[][T.tupleof.length] temp; + foreach (char[] pair; split (src[1..$-1])) { + uint i = 0; + while (i < pair.length) { // advance to the ':' + char c = pair[i]; + if (c == ':') + break; + // key must be an int so no need for string checks + ++i; + } + if (i >= pair.length) + throw new ParseException ("Invalid struct: encountered { ... KEY} (missing :DATA)"); + + size_t k = deserialize!(size_t) (pair[0..i]); + // Note: could check no entry was already stored in temp. + temp[k] = pair[i+1..$]; + } + T ret; + setStruct (ret, temp); + return ret; +} +//END deserialize templates + +//BEGIN Utility funcs +/** Splits a string into substrings separated by '$(B ,)' with support for characters and strings + * containing escape sequences and for embedded arrays ($(B [...])). + * + * Params: + * src A string to separate on commas. It shouldn't have enclosing brackets. + * + * Returns: + * An array of substrings within src, excluding commas. Whitespace is not stripped and + * empty strings may get returned. + * + * Remarks: + * This function is primarily intended for as a utility function for use by the templates + * parsing arrays and associative arrays, but it may be useful in other cases too. Hence the + * fact no brackets are stripped from src. + */ +//FIXME foreach struct is more efficient +char[][] split (char[] src) { + src = Util.trim (src); + if (src == "") + return []; // empty array: no elements when no data + + uint depth = 0; // surface depth (embedded arrays) + char[][] ret; + ret.length = src.length / 3; // unlikely to need a longer array + uint k = 0; // current split piece + uint i = 0, j = 0; // current read location, start of current piece + + while (i < src.length) { + char c = src[i]; + if (c == '\'' || c == '"') { // string or character + ++i; + while (i < src.length && src[i] != c) { + if (src[i] == '\\') + ++i; // escape seq. + ++i; + } // Doesn't throw if no terminal quote at end of src, but this should be caught later. + } + else if (c == '[') ++depth; + else if (c == ']') { + if (depth) + --depth; + else throw new ParseException ("Invalid array literal: closes before end of data item."); + } + else if (c == ',' && depth == 0) { // only if not an embedded array + if (ret.length <= k) + ret.length = ret.length * 2; + ret[k++] = src[j..i]; // add this piece and increment k + j = i + 1; + } + ++i; + } + if (i > src.length) + throw new ParseException ("Unterminated quote (\' or \")"); + + if (ret.length <= k) + ret.length = k + 1; + ret[k] = src[j..i]; // add final piece (i >= j) + return ret[0..k+1]; +} + +/* Templated read-int function to read (un)signed 1-4 byte integers. + * + * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions. + */ +private TInt toTInt(TInt) (char[] src) { + const char[] INT_OUT_OF_RANGE = "Integer out of range"; + bool sign; + uint radix, ate, ate2; + + // Trim off whitespace. + // NOTE: Cannot use tango.text.convert.Integer.trim to trim leading whitespace since it doesn't + // treat new-lines, etc. as whitespace which for our purposes is whitespace. + src = Util.trim (src); + + ate = cInt.trim (src, sign, radix); + if (ate == src.length) + throw new ParseException ("Invalid integer: no digits"); + ulong val = cInt.convert (src[ate..$], radix, &ate2); + ate += ate2; + + if (ate < src.length) + throw new ParseException ("Invalid integer at marked character: \"" ~ src[0..ate] ~ "'" ~ src[ate] ~ "'" ~ src[ate+1..$] ~ "\""); + + if (val > TInt.max) + throw new ParseException (INT_OUT_OF_RANGE); + if (sign) { + long sval = cast(long) -val; + if (sval > TInt.min) + return cast(TInt) sval; + else throw new ParseException (INT_OUT_OF_RANGE); + } + return cast(TInt) val; +} + +/* Basically a reimplementation of tango.text.convert.Float.toFloat which checks for + * whitespace before throwing an exception for overlong input. */ +private TFloat toTFloat(TFloat) (char[] src) { + // NOTE: As for toTInt(), this needs to strip leading as well as trailing whitespace. + src = Util.trim (src); + if (src == "") + throw new ParseException ("Invalid float: no digits"); + uint ate; + + TFloat x = cFloat.parse (src, &ate); + return x; +} + +/* Throws an exception on invalid escape sequences. Supported escape sequences are the following + * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v + */ +private char unEscapeChar (char c) +{ + // This code was generated: + if (c <= 'b') { + if (c <= '\'') { + if (c == '\"') { + return '\"'; + } else if (c == '\'') { + return '\''; + } + } else { + if (c == '\\') { + return '\\'; + } else if (c == 'a') { + return '\a'; + } else if (c == 'b') { + return '\b'; + } + } + } else { + if (c <= 'n') { + if (c == 'f') { + return '\f'; + } else if (c == 'n') { + return '\n'; + } + } else { + if (c == 'r') { + return '\r'; + } else if (c == 't') { + return '\t'; + } else if (c == 'v') { + return '\v'; + } + } + } + + // if we haven't returned: + throw new ParseException ("Bad escape sequence: \\"~c); +} + +// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length. +private ubyte readHexChar (char[] src, inout uint pos) { + ubyte x; + if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0'; + else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10; + else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10; + else throw new ParseException ("Invalid hex digit."); + ++pos; + return x; +} + +// Generic array reader +// Assumes input is of form "[xxxxx]" (i.e. first and last chars are '[', ']' and length >= 2). +private T[] toArray(T : T[]) (char[] src) { + T[] ret = new T[16]; // avoid unnecessary allocations + uint i = 0; + foreach (char[] element; split(src[1..$-1])) { + if (i == ret.length) ret.length = ret.length * 2; + ret[i] = deserialize!(T) (element); + ++i; + } + return ret[0..i]; +} + +/** Set a struct's elements from an array. +* +* For a more generic version, see http://www.dsource.org/projects/tutorials/wiki/StructTupleof +*/ +// NOTE: Efficiency? Do recursive calls get inlined? +private void setStruct(S, size_t N, size_t i = 0) (ref S s, char[][N] src) { + static assert (is(S == struct), "Only to be used with structs."); + static assert (N == S.tupleof.length, "src.length != S.tupleof.length"); + static if (i < N) { + if (src[i]) + s.tupleof[i] = deserialize!(typeof(s.tupleof[i])) (src[i]); + setStruct!(S, N, i+1) (s, src); + } +} +//END Utility funcs + +debug (UnitTest) { + import tango.util.log.Log : Log, Logger; + + private Logger logger; + static this() { + logger = Log.getLogger ("text.deserialize"); + } +unittest { + // Utility + bool throws (void delegate() dg) { + bool r = false; + try { + dg(); + } catch (Exception e) { + r = true; + logger.info ("Exception caught: "~e.msg); + } + return r; + } + assert (!throws ({ int i = 5; })); + assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); + + + // Associative arrays + char[][char] X = deserialize!(char[][char]) (`['a':"animal\n", 'b':['b','u','s','\n']]`); + char[][char] Y = ['a':cast(char[])"animal\n", 'b':['b','u','s','\n']]; + + //FIXME: when the compiler's fixed: http://d.puremagic.com/issues/show_bug.cgi?id=1671 + // just assert (X == Y) + assert (X.length == Y.length); + assert (X.keys == Y.keys); + assert (X.values == Y.values); + //X.rehash; Y.rehash; // doesn't make a difference + //assert (X == Y); // fails (compiler bug) + + assert (throws ({ deserialize!(int[int]) (`[1:1`); })); // bad brackets + assert (throws ({ deserialize!(int[char[]]) (`["ab\":1]`); })); // unterminated quote + assert (throws ({ deserialize!(int[char[]]) (`["abc,\a\b\c":1]`); })); // bad escape seq. + assert (throws ({ deserialize!(int[char[]]) (`["abc"]`); })); // no data + + + // Arrays + assert (deserialize!(double[]) (`[1.0,1.0e-10]`) == [1.0, 1.0e-10]);// generic array stuff + assert (deserialize!(double[]) (`[ ]`) == cast(double[]) []); // empty array + assert (deserialize!(int[][]) (`[[1],[2,3],[]]`) == [[1],[2,3],[]]);// sub-array + assert (throws ({ deserialize!(int[]) (`[1,2`); })); // bad brackets + assert (throws ({ deserialize!(int[][]) (`[[1]]]`); })); // bad brackets + + // char[] and char conversions, with commas, escape sequences and multichar UTF8 characters: + assert (deserialize!(char[][]) (`[ ".\"", [',','\''] ,"!\b€" ]`) == [ ".\"".dup, [',','\''] ,"!\b€" ]); + assert (throws ({ deserialize!(char[]) ("\"\\\""); })); + assert (throws ({ deserialize!(char[]) (`['a'`); })); // bad brackets + + // wchar[] and dchar[] conversions: + // The characters were pretty-much pulled at random from unicode tables. + // The last few cause some wierd (display only) effects in my editor. + assert (deserialize!(wchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"w); + assert (deserialize!(dchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"d); + + assert (deserialize!(ubyte[]) (`0x01F2aC`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] special notation + assert (deserialize!(ubyte[]) (`[01 ,0xF2, 0xAC]`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] std notation + assert (throws ({ deserialize!(ubyte[]) (`0x123`); })); // digits not in pairs + assert (throws ({ deserialize!(ubyte[]) (`[2,5`); })); // not [...] or 0x.. + assert (throws ({ deserialize!(ubyte[]) (`0x123j`); })); + + + // char types + assert (deserialize!(char) ("'\\\''") == '\''); + assert (deserialize!(wchar) ("'X'") == 'X'); + assert (deserialize!(dchar) ("'X'") == 'X'); + assert (throws ({ deserialize!(char) ("'\\'"); })); + assert (throws ({ deserialize!(char) ("'£'"); })); // non-ascii + assert (throws ({ deserialize!(char) ("''"); })); + assert (throws ({ deserialize!(char) ("'ab'"); })); + assert (throws ({ deserialize!(wchar) ("''"); })); + + + // bool + assert (deserialize!(bool[]) (`[true,false,01,00]`) == cast(bool[]) [1,0,1,0]); + assert (throws ({ deserialize!(bool) ("011"); })); + + + // ints + assert (deserialize!(byte) ("-5") == cast(byte) -5); + assert (deserialize!(int) ("-0x7FFFFFFF") == cast(int) -0x7FFF_FFFF); + // annoyingly, octal syntax differs from D (blame tango): + assert (deserialize!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]); + assert (throws ({ deserialize!(int) (""); })); + assert (throws ({ deserialize!(int) ("0x8FFFFFFF"); })); + assert (throws ({ deserialize!(uint) ("-1"); })); + assert (throws ({ deserialize!(uint) ("1a"); })); + + + // floats + assert (deserialize!(float) ("0.0") == 0.0f); + assert (deserialize!(double) ("-1e25") == -1e25); + assert (deserialize!(real) ("5.24e-269") == cast(real) 5.24e-269); + assert (throws ({ deserialize!(float) (""); })); + + + // structs + struct A { int x = 5; char y; } + struct B { A a; float b; } + A a; a.y = 'y'; + assert (deserialize!(A) ("{ 1 : 'y' }") == a); + B b; b.a = a; b.b = 1.0f; + assert (deserialize!(B) (" {1:1.0,0: { 1 : 'y' } } ") == b); + assert (throws ({ deserialize!(A) (" 1:'x'}"); })); // bad braces + assert (throws ({ deserialize!(A) ("{ 1 }"); })); // no :DATA + + + // unEscapeChar + assert (deserialize!(char[]) ("\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"") == "\a\b\t\n\v\f\r\"\'\\"); + + logger.info ("Unittest complete."); +} +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/exception.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/exception.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,48 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/// Base file exception classes +module mde.file.exception; + +import mde.exception; + +/// Base exception for all file exceptions +class fileException : mdeException +{ + char[] getSymbol () { + return super.getSymbol ~ ".file"; + } + + this (char[] msg) { + super(msg); + } +} + +/** Exception for errors finding, opening, closing, reading or writing files, + * but not for parsing content. */ +class ioException : fileException +{ + this (char[] msg) { + super(msg); + } +} + +/** Exception for parsing, formatting and serializing content. */ +class parseException : fileException +{ + this (char[] msg) { + super(msg); + } +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/DataSet.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/DataSet.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,70 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/** This module contains the mergetag DataSet class, used for all reading and writing operations. + */ +module mde.file.mergetag.DataSet; + +// package imports +public import mde.file.mergetag.iface.IDataSection; +import mde.file.mergetag.DefaultData; + + +/************************************************************************************************** + * Data class; contains a DataSection class instance for each loaded section of a file. + * + * Stored data is available for direct access via header and sec; all functions are just helper + * functions. + * + * Any class implementing IDataSection may be used to store data; by default a DefaultData class is + * used when reading a file. Another class may be used by creating the sections before reading the + * file or passing the reader a function to create the sections (see Reader.dataSecCreator). + * + * Could be a struct, except that structs are value types (not reference types). + */ +class DataSet +{ + DefaultData header; /// Header section. + IDataSection[ID] sec; /// Dynamic array of sections + + /// Template to return all sections of a child-class type. + T[ID] getSections (T : IDataSection) () { + T[ID] ret; + foreach (ID id, IDataSection s; sec) { + T x = cast(T) s; + if (x) ret[id] = x; // if non-null + } + return ret; + } +} + +debug (mdeUnitTest) { + import tango.util.log.Log : Log, Logger; + + private Logger logger; + static this() { + logger = Log.getLogger ("mde.mergetag.DataSet"); + } + + unittest { // Only covers DataSet really. + DataSet ds = new DataSet; + ds.sec[cast(ID)"test"] = new DefaultData; + assert (ds.getSections!(DefaultData)().length == 1); + ds.sec[cast(ID)"test"].addTag ("char[]",cast(ID)"T"," \"ut tag 1 \" "); + assert (ds.getSections!(DefaultData)()[cast(ID)"test"].Arg!(char[])[cast(ID)"T"] == "ut tag 1 "); + + logger.info ("Unittest complete."); + } +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/DefaultData.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/DefaultData.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,181 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/** This module contains the DefaultData class, and some notes possibly useful for implementing +* other types of DataSection. +*/ +module mde.file.mergetag.DefaultData; + +public import mde.file.mergetag.iface.IDataSection; +import mde.file.serialize; + + +/************************************************************************************************* + * Default DataSection class. + * + * Supported types are given by dataTypes. + * + * Currently DefaultData is only used for headers, and thus the list of supported types has been + * reduced to just those used in headers. Load order is HIGH_LOW, i.e. existing entries aren't + * overwritten. + *************************************************************************************************/ +/* The implementation now uses a fair bit of generic programming. Adjusting the types supported +* should be as simple as adjusting the list dataTypes, and possibly implemting new conversions in +* parseFrom and parseTo if you add new types (e.g. for cent or imaginary/complex types, or user types). +* +* There shouldn't really be any need to adjust the implementation, except perhaps to add new +* functions to the class (such as another type of output where the delegate used in writeAll isn't +* enough). +*/ +class DefaultData : IDataSection +{ + //BEGIN META + /* These functions are used to generate code. Compile-time functions rather than templates are + * used because they are easier to write and understand. Mixins are used to compile the resultant + * code. Must be declared before used since forward references aren't supported for compile-time + * functions. */ + + // Generate the correct name for each variable type. + static char[] varName (char[] type) { + char[] append = ""; + while (type.length >= 2 && type[$-2..$] == "[]") { + type = type[0..$-2]; + append ~= "A"; + } + return "_" ~ type ~ append; + } + + // Int-to-string converter, which may not be efficient but will run at compile time. + static char[] int2str (uint i) { + char[] ret; + const digits = "0123456789"; + if (i == 0) ret = "0"; + else for (; i > 0; i /= 10) ret = digits[i%10] ~ ret; + return ret; + } + + // Generate the code for variable declarations. + static char[] declerations (char[][] types) { + char[] ret = ""; + foreach (char[] type; types) ret ~= type ~ "[ID]\t" ~ varName(type) ~ ";\n"; + return ret; + } + + // Purely to add indentation. Could just return "" without affecting functionality. + static char[] indent (uint i) { + char[] ret = ""; + for (; i > 0; --i) ret ~= " "; + // This is not executable at compile time: + //ret.length = i * 4; // number of characters for each indentation + //ret[] = ' '; // character to indent with + return ret; + } + + /* Generates a binary search algorithm. + * + * Currently this is tailored to it's particular use (addTag). */ + static char[] binarySearch (char[] var, char[][] consts, int indents = 0) { + if (consts.length > 3) { + return indent(indents) ~ "if (" ~ var ~ " <= \"" ~ consts[$/2 - 1] ~ "\") {\n" ~ + binarySearch (var, consts[0 .. $/2], indents + 1) ~ + indent(indents) ~ "} else {\n" ~ + binarySearch (var, consts[$/2 .. $], indents + 1) ~ + indent(indents) ~ "}\n"; + } else { + char[] ret; + ret ~= indent(indents); + foreach (c; consts) { + ret ~= "if (" ~ var ~ " == \"" ~ c ~ "\") {\n" ~ + //indent(indents+1) ~ varName(c) ~ "[id] = parseTo!(" ~ c ~ ") (dt);\n" ~ + indent(indents+1) ~ "if ((id in "~varName(c)~") is null)\n" ~ + indent(indents+2) ~ varName(c)~"[id] = parseTo!(" ~ c ~ ") (dt);\n" ~ + indent(indents) ~ "} else "; + } + ret = ret[0..$-6] ~ '\n'; // remove last else + return ret; + } + } + + // Generates the code to write data members (writeAll). + static char[] writeVars () { + char[] code = ""; + foreach (i,type; dataTypes) { + code ~= "foreach (id, dt; " ~ varName(type) ~ ") itemdlg (dataTypes[" ~ int2str(i) ~ "], id, parseFrom!(" ~ type ~ ")(dt));\n"; + } + return code; + } + //END META + + /** Data Members + * + * These types are all stored directly, as below, are available for direct access. The variable + * names are created dynamically at compile-time based on the dataTypes list. + * ------------------ + * int[ID] _int; // name is type prefixed by _ + * char[][ID] _charA; // [] is replaced by A + * ------------------ + * + * An alternative access method is to use the provided templates: + * -------------------- + * template Arg(T) { + * alias Name Arg; + * } + * + * type y = Arg!(type).Arg; // example of use + * -------------------- + * Note: trying to use Arg!(type) to implicitly refer to Arg!(type).Arg causes compiler errors + * due to the "alias Name Arg;" statement actually being a mixin. + */ + /+ All types previously supported. Most of these weren't used. + const char[][] dataTypes = ["bool","bool[]", + "byte","byte[]", + "char","char[]","char[][]", + "double","double[]", + "float","float[]", + "int","int[]", + "long","long[]", + "real","real[]", + "short","short[]", + "ubyte","ubyte[]", + "uint","uint[]", + "ulong","ulong[]", + "ushort","ushort[]"]; + +/ + const char[][] dataTypes = ["char[]", "char[][]"]; + + mixin (declerations (dataTypes)); // Declare all the variables. + + void addTag (char[] type, ID id, char[] dt) { /// Supports all types listed in dataTypes. + mixin (binarySearch ("type", dataTypes)); + } + + void writeAll (ItemDelg itemdlg) { /// Supports all types listed in dataTypes. + mixin (writeVars ()); + } + + /* These make no attempt to check Arg is valid. + * But if the symbol doesn't exist the complier will throw an error anyway, e.g.: + * Error: identifier '_boolAA' is not defined + */ + template ArgName (T : T[]) { + const char[] ArgName = ArgName!(T)~`A`; + } + template ArgName (T) { + const char[] ArgName = `_`~T.stringof; + } + template Arg(T) { + mixin(`alias `~ArgName!(T)~` Arg;`); + } +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/Reader.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/Reader.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,526 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/************************************************************************************************** + * This module contains all reading functions, for both binary and text MergeTag files. + *************************************************************************************************/ +module mde.file.mergetag.Reader; + +// package imports +public import mde.file.mergetag.iface.IReader; +import mde.file.mergetag.DataSet; +import mde.file.mergetag.DefaultData; +import mde.file.mergetag.exception; +import mde.file.mergetag.internal; + +import tango.core.Exception; + +// tango imports +import tango.io.FilePath; +import tango.io.UnicodeFile; +import Util = tango.text.Util; +import ConvInt = tango.text.convert.Integer; +//import tango.util.collection.model.View : View; +import tango.util.collection.HashSet : HashSet; +import tango.util.log.Log : Log, Logger; + +private Logger logger; +static this() { + logger = Log.getLogger ("mde.mergetag.Reader"); +} + +// TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions) + +/** Make an IReader class. +* +* Create an appropriate reader: MTTReader or MTBReader. +* +* Throws: +* $(TABLE +* $(TR $(TH Exception) $(TH Thrown when)) +* $(TR $(TD MTFileIOException) $(TD When extension given is neither mtt nor mtb)) +* ) +* +*/ +IReader makeReader (FilePath path, DataSet ds = null, bool rdHeader = false) { + if (path.ext == "mtb") return new MTBReader (path, ds, rdHeader); + else if (path.ext == "mtt") return new MTTReader (path, ds, rdHeader); + else throw new MTFileIOException ("Invalid mergetag extension"); +} + +/** Resolve a file path. + * + * Tries adding both ".mtt" and ".mtb" extensions, returning whichever exists (the most recently + * modified if both exist), or returns null if neither exist. */ +FilePath findFile (char[] path) { + if (path is null) return null; + + FilePath tPath = new FilePath (path ~ ".mtt"); + FilePath bPath = new FilePath (path ~ ".mtb"); + + bool bPathExists = bPath.exists; + + if (tPath.exists) { + if (bPathExists) { + // take the latest version (roughly speaking...) + return (tPath.modified > bPath.modified ? tPath : bPath); + } else return tPath; + } else { + if (bPathExists) return bPath; + else return null; + } +} + +/** + * Class for reading a mergetag text file. + * + * Use as: + * ----------------------- + * IReader foo; + * try { + * foo = new MTTReader("foo.mtt"); + * foo.read(); + * } + * catch (MTException) {} + * // get your data from foo.dataset. + * ----------------------- + * + * Throws: + * $(TABLE + * $(TR $(TH Exception) $(TH Thrown when)) + * $(TR $(TD MTFileIOException) $(TD An error occurs while opening the file)) + * $(TR $(TD MTFileFormatException) $(TD The file doesn't start with a recognised header/version)) + * $(TR $(TD MTSyntaxException) $(TD A file syntax error occurs)) + * $(TR $(TD MTException) $(TD An unexpected error occurs)) + * ) + * Note that all exceptions extend MTException and when any exception is thrown the class is + * rendered unusable: any subsequent calls to read will be ignored. + * + * Threading: Separate instances of Reader should be thread-safe provided access to the same + * dataset is synchronized; i.e. no two readers refering to the same dataset should run + * simultaneously. (The Reader class could be made thread-safe w.r.t. datasets, but + * performance-wise I doubt it would be worth it.) + * Do not run a single instance of Reader in multiple threads simultaneously. + */ +class MTTReader : IReader +{ +//BEGIN DATA + /** Get or set the DataSet + * + * A container for all read data. + * + * This may be accessed from here; however it may be preferable to use an external reference + * (passed to the class on initialisation). + */ + DataSet dataset () { return _dataset; } + void dataset (DataSet ds) /// ditto + { _dataset = ds; } + + /** A delegate for creating new DataSections within the dataset. + * + * Allows a user-made class to be used in the DataSet instead of DefaultData (used if no + * dataSecCreator exists). Also allows an existing class instance to be used instead of a new + * one. + * + * This works by supplying a function which returns a reference to an instance of a class + * implementing IDataSection. The function is passed the ID of the new section and may use this + * to use different IDataSection classes for different sections. + * + * The function may also return null, in which case the section will be skipped. In the version + * of read taking a set of sections to read, the section will not be marked as read and may + * still be read later (assuming dataSecCreator returns non-null). However, in the version of + * read not taking the set argument, all sections are set as read regardless, and the section + * cannot be read later. + */ + void dataSecCreator (IDataSection delegate (ID) dSC) { + _dataSecCreator = dSC; + } + +private: + static Logger logger; + + // Non-static symbols: + final char[] ErrFile; // added after ErrInFile to do the same without the "in " bit. + final char[] ErrInFile; // something like "in \"path/file.mtt\"" + + final char[] fbuf; // file is read into this + MTFormatVersion.VERS fileVer = MTFormatVersion.VERS.INVALID; // Remains INVALID until set otherwise by CTOR. + + IDataSection delegate (ID) _dataSecCreator = null; // see property setter above + + size_t endOfHeader; + bool allRead = false; // true if endOfHeader == fbuf.length or read([]) has run + bool fatal = false; // a fatal file error occured; don't try to recover + /* If the file is scanned for sections, the starting position of all sections are stored + * in secTable. If this is empty, either no sections exist (and endOfHeader == fbuf.length) + * or a section scan has not been run (read() with no section names doesn't need to do so). + */ + struct SecMD { // sec meta data + static SecMD opCall (size_t _pos, bool _read) { + SecMD ret; + ret.pos = _pos; + ret.read = _read; + return ret; + } + size_t pos; // position to start reading + bool read; // true if already read + } + SecMD [ID] secTable; + + DataSet _dataset; +//END DATA + +//BEGIN METHODS: CTOR / DTOR + static this () { + logger = Log.getLogger ("mde.mergetag.read.Reader"); + } + + /** Tries to open file path and read it into a buffer. + * + * Params: + * path = The name or FilePath of the file to open. + * Standard extensions are .mtt and .mtb for text and binary files respectively. + * ds = If null create a new DataSet, else use existing DataSet ds and merge read + * data into it. + * rdHeader = If true, read the header like a standard section. Doesn't read the header by + * default since if it's not requested it's likely not wanted. + * + * Memory: + * This currently works by loading the whole file into memory at once. This should be fine most + * of the time, but could potentially be a problem. Changing this would mean significantly + * changes to the way the code works. + */ + /* Ideas for implementing a partial-loading memory model: + * Use a conduit directly. + * Use a fiber to do the parsing; let it switch back when it runs out of memory. + * Redesign the code so it never needs to look backwards in the buffer? + * + * Major problem: reading only some sections and keeping references to other sections + * would no longer be possible. + */ + public this (char[] path, DataSet ds = null, bool rdHeader = false) { + this (new FilePath (path), ds, rdHeader); + } + /** ditto */ + public this (FilePath path, DataSet ds = null, bool rdHeader = false) { + // Create a dataset or use an existing one + if (ds !is null) _dataset = ds; + else _dataset = new DataSet(); + + // Open & read the file + try { // Supports unicode files with a BOM; defaults to UTF8 when there isn't a BOM: + scope file = new UnicodeFile!(char) (path, Encoding.Unknown); + fbuf = cast(char[]) file.read(); + } catch (Exception e) { + throwMTErr ("Error reading file: " ~ e.msg, new MTFileIOException); + } + // Remember the file name so that we can report errors (somewhat) informatively: + ErrFile = path.path ~ path.file; + ErrInFile = " in \"" ~ ErrFile ~ '"'; + + // Version checking & matching header section tag: + if (fbuf.length < 6 || fbuf[0] != '{' || fbuf[1] != 'M' || fbuf[2] != 'T' || fbuf[5] != '}') + throwMTErr("Not a valid MergeTag text file" ~ ErrInFile, new MTFileFormatException); + fileVer = MTFormatVersion.parseString (fbuf[3..5]); + if (fileVer == MTFormatVersion.VERS.INVALID) + throwMTErr("Unrecognised MergeTag version: MT" ~ fbuf[3..5] ~ ErrInFile, new MTFileFormatException); + + // Header reading/skipping: + if (rdHeader) { // only bother actually reading it if it was requested + // If already existing, merge; else create a new DefaultData. + if (!_dataset.header) _dataset.header = new DefaultData; + endOfHeader = parseSection (6, cast(IDataSection) _dataset.header); + } + else endOfHeader = parseSection (6,null); + } +//END METHODS: CTOR / DTOR + +//BEGIN METHODS: PUBLIC + /** Scans for sections if not already done and returns a list of IDs. + * + * Won't work (will return an empty array) if all sections have already been read without + * scanning for sections. + */ + public ID[] getSectionNames () { + if (fatal) return []; + if (!secTable.length) read([]); // scan for sections + return secTable.keys; + } + + /** Reads (some) sections of the file into data. Note that sections will never be _read twice. + * + * To be more accurate, the file is copied into a buffer by this(). read() then parses the + * contents of this buffer, and stores the contents in dataset. + * + * Each section read is stored in a DataSection class. By default this is an instance of + * DefaultData; this can be customised (see dataSecCreator). + * + * If secSet is provided, reading is restricted to sections given in secSet, otherwise all + * sections are read. Sections given in secSet but not found in the file are not reported as an + * error. Suggested: supply a HashSet!(uint) as the View!(ID). An ArrayBag!(ID) as used is not a + * good choice, except that in this case it's empty. + * + * Merging: + * Where a section already exists in the DataSet (when either the section is given more than + * once in the file, or it was read from a different file by another reader) it is merged. + * Entries already in the DataSet take priority. + * + * Performance: + * Note that loading only desired sections like this still parses the sections not + * read (although it does not try to understand the type or data fields), so there is only a + * small performance advantage to this where other sections do exist in the file. There is also + * some overhead in only partially reading the file to keep track of where other sections are so + * that the entire file need not be re-read if further (or all remaining) sections are read + * later. + */ + public void read () { + if (secTable.length) { + foreach (ID id, ref SecMD smd; secTable) { + if (!smd.read) { + IDataSection ds = getOrCreateSec (id); + parseSection (smd.pos, ds); + // allRead is set true so there's no point setting smd.read = true + } + } + } else { // this time we don't need to use secTable + for (size_t pos = endOfHeader; pos < fbuf.length;) { + ID id = fbufReadSecMarker (pos); + IDataSection ds = getOrCreateSec (id); + pos = parseSection (pos, ds); + } + } + + allRead = true; + } + /** ditto */ + public void read (ID[] secSet) { + HashSet!(ID) hs = new HashSet!(ID); + foreach (id; secSet) hs.add(id); + read (hs); + } + /** ditto */ + public void read (View!(ID) secSet) { + if (allRead || fatal) return; // never do anything in either case + + if (secTable.length) { + foreach (ID id; secSet) { + SecMD* psmd = id in secTable; + if (psmd && !psmd.read) { // may not exist + IDataSection ds = getOrCreateSec (id); + parseSection (psmd.pos, ds); + if (ds !is null) psmd.read = true; // getOrCreateSec may return null + } + } + } else { + for (size_t pos = endOfHeader; pos < fbuf.length;) { + ID id = fbufReadSecMarker (pos); + secTable[id] = SecMD(pos,false); // add to table + if (secSet.contains(id)) { + IDataSection ds = getOrCreateSec (id); + pos = parseSection (pos, ds); + if (ds !is null) secTable[id].read = true; + } else { + pos = parseSection (pos, null); // skip section + } + } + } + } +//END METHODS: PUBLIC + +//BEGIN METHODS: PRIVATE + /* Utility function for read + * Look for a section; return it if it exists otherwise create a new section: + * use _dataSecCreator if it exists or just create a DefaultData if not. + * However if _dataSecCreator returns null don't add it to the dataset. + */ + private IDataSection getOrCreateSec (ID id) { + IDataSection* i = id in _dataset.sec; + if (i) return *i; + else { + IDataSection s; + if (_dataSecCreator !is null) s = _dataSecCreator(id); + else s = new DefaultData; + if (s !is null) _dataset.sec[id] = s; + return s; + } + } + + /* Reads a section, starting from index pos, finishing at the next section marker (returning + the position of the start of the marker). pos should start after the section marker. + + After analysing tags, the function passes the type, ID and data to addTag. + + NOTE: from performance tests on indexing char[]'s and dereferencing char*'s, the char*'s are + slightly faster, but a tiny difference isn't worth the extra effort/risk of using char*'s. + */ + private size_t parseSection (size_t pos, IDataSection dsec) { + debug scope (failure) + logger.trace ("MTTReader.parseSection: failure"); + /* Searches fbuf starting from start to find one of <=>| and stops at its index. + + If quotable then be quote-aware for single and double quotes. + Note: there's no length restriction for the content of the quote since it could be a single + non-ascii UTF-8 char which would look like several chars. + */ + void fbufLocateDataTagChar (ref size_t pos, bool quotable) { + while (true) { + fbufIncrement (pos); + + if ((fbuf[pos] >= '<' && fbuf[pos] <= '>') || fbuf[pos] == '|') return; + else if (quotable) { + char c = fbuf[pos]; + if (c == '\'' || c == '"') { + fbufIncrement(pos); + while (fbuf[pos] != c) { + if (fbuf[pos] == '\\') ++pos; // escape seq. + fbufIncrement(pos); + } + } + } + } + } + + // Used to ignore a tag (if it starts !< or !{ or should otherwise be ignored): + bool comment = false; + for (; pos < fbuf.length; ++pos) { + if (Util.isSpace(fbuf[pos])) continue; // whitespace + else if (fbuf[pos] == '<') { // data tag + char[] ErrDTAG = "Bad data tag format: not " ~ ErrInFile; + + // Type section of tag: + size_t pos_s = pos + 1; + fbufLocateDataTagChar (pos, false); // find end of type section + if (fbuf[pos] != '|') throwMTErr (ErrDTAG, new MTSyntaxException); + char[] type = fbuf[pos_s..pos]; + + // ID section of tag: + pos_s = pos + 1; + fbufLocateDataTagChar (pos, false); // find end of type section + if (fbuf[pos] != '=') throwMTErr (ErrDTAG, new MTSyntaxException); + ID tagID = cast(ID) fbuf[pos_s..pos]; + + // Data section of tag: + pos_s = pos + 1; + fbufLocateDataTagChar (pos, true); // find end of data section + if (fbuf[pos] != '>') throwMTErr (ErrDTAG, new MTSyntaxException); + char[] data = fbuf[pos_s..pos]; + + if (!comment && dsec !is null) { + type = Util.trim(type); + try { + dsec.addTag (type, tagID, data); + } + catch (TextException e) { + logger.error ("TextException while reading " ~ ErrFile ~ ":"); // following a parse error + logger.error (e.msg); + logger.error ("Tag ignored: <"~type~"|"~tagID~"="~data~">"); + // No throw: tag is just ignored + } + catch (Exception e) { + logger.error ("Unknown error occured" ~ ErrInFile ~ ':'); + logger.error (e.msg); + throwMTErr (e.msg); // Fatal to Reader + } + } else comment = false; // cancel comment status now + } + else if (fbuf[pos] == '{') { + if (comment) { // simple block comment + uint depth = 0; // depth of embedded comment blocks + while (true) { + fbufIncrement (pos); + if (fbuf[pos] == '}') { + if (depth == 0) break; + else --depth; + } else if (fbuf[pos] == '{') + ++depth; + } + comment = false; // end of this comment + } else { + return pos; // next section coming up; we are done + } + } + else if (fbuf[pos] == '!') { // possibly a comment; check next char + comment = true; // starting a comment (or an error) + // variable is reset at end of comment + } else // must be an error + throwMTErr ("Invalid character (or sequence starting \"!\") outside of tag" ~ ErrInFile, new MTSyntaxException); + } + // if code execution reaches here, we're at EOF + // possible error: last character was ! (but don't bother checking since it's inconsequential) + return pos; + } + + /* Parses fbuf for a section marker. Already knows fbuf[pos] == '{'. + */ + private ID fbufReadSecMarker (ref size_t pos) { + // at this point pos is whatever a parseSection run returned + // since we haven't hit EOF, fbuf[pos] MUST be '{' so no need to check + fbufIncrement(pos); + + size_t start = pos; + for (; pos < fbuf.length; ++pos) + if (fbuf[pos] == '}' || fbuf[pos] == '{') break; + + if (pos >= fbuf.length || fbuf[pos] != '}') + throwMTErr ("Bad section tag format: not {id}" ~ ErrInFile, new MTSyntaxException); + + ID id = cast(ID) fbuf[start..pos]; + fbufIncrement(pos); + return id; + } + + /* Increments pos and checks it hasn't hit fbuf.length . */ + private void fbufIncrement(ref size_t pos) { + ++pos; + if (pos >= fbuf.length) throwMTErr("Unexpected EOF" ~ ErrInFile, new MTSyntaxException); + } + + private void throwMTErr (char[] msg, MTException exc = new MTException) { + fatal = true; // if anyone catches the error and tries to do anything --- we're dead now + logger.error (msg); // report the error + throw exc; // and signal our error + } +//END METHODS: PRIVATE +} + + +/** +* Class for reading a mergetag text file. +* +* Currently only a dummy class: a MTNotImplementedException will be thrown if created. +*/ +class MTBReader : IReader +{ + public this (char[] path, DataSet ds = null, bool rdHeader = false) { + this (new FilePath (path), ds, rdHeader); + } + public this (PathView path, DataSet ds = null, bool rdHeader = false) { + throw new MTNotImplementedException; + } + + DataSet dataset () { /// Get the DataSet + return null; + } + void dataset (DataSet) {} /// Set the DataSet + + void dataSecCreator (IDataSection delegate (ID)) {} /// Set the dataSecCreator + + ID[] getSectionNames () { /// Get identifiers for all sections + return []; + } + void read () {} /// Commence reading + void read (ID[] secSet) {} /// ditto + void read (View!(ID) secSet) {} /// ditto +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/Writer.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/Writer.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,279 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/************************************************************************************************** + * This module contains all writing functions, for both binary and text MergeTag files. + * + * Files can be written in a text or binary form; binary is faster and smaller while text allows + * editing with an ordinary text editor. TextWriter and BinaryWriter are the main classes, both of + * which implement the interface IWriter. DualWriter is another class implementing IWriter, which + * contains a private instance of a TextWriter and a BinaryWriter and implements all methods in the + * interface simply by chaining the appropriate method from each of these classes, thus performing + * two writes at once. + * + * Any of these three classes may be used directly, or makeWriter may be invoked to create an + * instance of the appropriate class. + *************************************************************************************************/ + module mde.file.mergetag.Writer; + +// package imports +public import mde.file.mergetag.iface.IWriter; +import mde.file.mergetag.DataSet; +import mde.file.mergetag.internal; +import mde.file.mergetag.exception; + +// tango imports +import tango.core.Exception; +import tango.io.FileConduit; +import tango.io.Buffer : Buffer, IBuffer; +import tango.io.Print : Print; +import convInt = tango.text.convert.Integer; +import tango.util.log.Log : Log, Logger; + +private Logger logger; +static this () { + logger = Log.getLogger ("mde.mergetag.Writer"); +} + + +/** Method to create and return either a MTTWriter or a MTBWriter. + * + * Has two modes of operation: if method is FromExtension, examines the existing extension and + * creates a MTT/MTB writer if the extension is mtt or mtb (throwing if not). + * + * Otherwise, writing format is determined directly by method, and appropriate extensions are + * added to the file name without checking for an existing extension. + * + * Params: + * path = File path + * dataset = Dataset passed to Writer to write from (if null, must be set before write() is called) + * method = $(TABLE + * $(TR $(TH Value) $(TH Writer returned) $(TH Suffix added)) + * $(TR $(TD FromExtension) $(TD MTBWriter or MTTWriter)$(TD $(I none))) + * $(TR $(TD Binary) $(TD MTBWriter) $(TD .mtb)) + * $(TR $(TD Text) $(TD MTTWriter) $(TD .mtt)) + * $(TR $(TD Both) $(TD DualWriter) $(TD .mtb / .mtt)) + * ) + * + * Throws: + * MTFileFormatException if neither test can deduce the writing method, the supplied writing + * method is invalid or the determined/supplied method is not yet implemented. + * + * Use as: + * ----------------------- + * DataSet dataset; // contains data to write + * IWriter foo; + * try { + * foo = makeWriter(...); + * foo.write(); + * } + * catch (MTException) {} + * ----------------------- + * Where the makeWriter line has one of the following forms: + * ----------------------- + * foo = makeWriter("foo.mtt", dataset); + * foo = makeWriter("foo", dataset, WriterMethod.Text); + * ----------------------- + * + * Throws: + * MTFileFormatException if unable to determine writing format or use requested format. + */ +//FIXME: separate functions for separate functionality, like in Reader? +IWriter makeWriter (char[] path, DataSet dataset = null, + WriterMethod method = WriterMethod.FromExtension) { + if (method == WriterMethod.FromExtension) { + if (path.length > 4 && path[$-4..$] == ".mtt") + return new MTTWriter (path, dataset); + else if (path.length > 4 && path[$-4..$] == ".mtb") + return new MTBWriter (path, dataset); + else { + logger.error ("Unable to determine writing format: text or binary"); + throw new MTFileFormatException; + } + } + else { + if (method == WriterMethod.Binary) return new MTBWriter (path~".mtb", dataset); + else if (method == WriterMethod.Text) return new MTTWriter (path~".mtt", dataset); + else if (method == WriterMethod.Both) return new DualWriter (path, dataset); + else throw new MTFileFormatException; + } +} + + +/** + * Class to write a dataset to a file. + * + * Files are only actually open for writing while the write() method is running. + * + * Throws: + * $(TABLE + * $(TR $(TH Exception) $(TH Thrown when)) + * $(TR $(TD MTNoDataSetException) $(TD No dataset is available to write from)) + * $(TR $(TD MTFileIOException) $(TD An error occurs while attemting to write the file)) + * $(TR $(TD MTException) $(TD An unexpected error occurs)) + * ) + * Note that all exceptions extend MTException; unlike Reader exceptions don't block further calls. + */ +class MTTWriter : IWriter +{ +//BEGIN DATA + /// Get or set the DataSet (i.e. the container from which all data is written). + DataSet dataset () { return _dataset; } + void dataset (DataSet ds) /// ditto + { _dataset = ds; } + + +private: + // taken from tango.io.Console, mostly to make sure notepad can read our files: + version (Win32) + const char[] Eol = "\r\n"; + else + const char[] Eol = "\n"; + + /* The container where data is written from. */ + DataSet _dataset; + + char[] _path; +//END DATA + +//BEGIN CTOR / DTOR + /** Prepares to open file path for writing. + * + * The call doesn't actually execute any code so cannot fail (unless out of memory). + * + * Params: + * path = The name of the file to open. + * Standard extensions are .mtt and .mtb for text and binary files respectively. + * dataset_ = If null create a new DataSet, else use existing DataSet *dataset_ and merge read + * data into it. + */ + public this (char[] path, DataSet ds = null) { + _path = path; + _dataset = ds; + } +//END CTOR / DTOR + + /** Writes the header and all DataSections. + * + * Firstly writes the header unless it has already been written. Then writes all DataSections + * to the file. Thus if write is called more than once with or without changing the DataSet the + * header should be written only once. This behaviour could, for instance, be used to write + * multiple DataSets into one file without firstly merging them. Note that this behaviour may + * be changed when binary support is added. + */ + public void write () + { + if (!_dataset) throwMTErr ("write(): no Dataset available to write from!", new MTNoDataSetException ()); + + try { + FileConduit conduit; // actual conduit; don't use directly when there's content in the buffer + IBuffer buffer; // write strings directly to this (use opCall(void[]) ) + + // Open a conduit on the file: + conduit = new FileConduit (_path, FileConduit.WriteCreate); + scope(exit) conduit.close(); + + buffer = new Buffer(conduit); // And a buffer + scope(exit) buffer.flush(); + + // Write the header: + buffer ("{MT" ~ MTFormatVersion.CurrentString ~ "}" ~ Eol); + if (_dataset.header !is null) writeSection (buffer, _dataset.header); + + // Write the rest: + foreach (ID id, IDataSection sec; _dataset.sec) { + writeSectionIdentifier (buffer, id); + writeSection (buffer, sec); + } + + buffer.flush(); + + } + catch (IOException e) { + throwMTErr ("Error writing to file: " ~ e.msg, new MTFileIOException); + } + catch (Exception e) { + throwMTErr ("Unexpected exception when writing file: " ~ e.msg); + } + } + + private void writeSectionIdentifier (IBuffer buffer, ID id) { + buffer ("{" ~ cast(char[])id ~ "}" ~ Eol); + } + + private void writeSection (IBuffer buffer, IDataSection sec) { + void writeItem (char[] tp, ID id, char[] dt) { // actually writes an item + buffer ("<" ~ tp ~ "|" ~ cast(char[])id ~"=" ~ dt ~ ">" ~ Eol); + } + sec.writeAll (&writeItem); + + buffer (Eol); // blank line at end of each section + } + + private void throwMTErr (char[] msg, Exception exc = new MTException) { + logger.error (msg); // report the error + throw exc; // and signal our error + } +} + +/* +* Implement MTBWriter (and move both writers to own modules?). +*/ +class MTBWriter : IWriter { + public this (char[] path, DataSet ds = null) { + throw new MTNotImplementedException; + + /+_path = path; + _dataset = ds;+/ + } + + DataSet dataset () { + return null; + } + void dataset (DataSet) {} + + void write () {} +} + +/* Basic implementation for mtt only. +* +*Implement std CTORs which add extensions to each filename and extra CTORs which take two filenames. +*/ +class DualWriter : IWriter { + /** The individual writers. + * + * Potentially could be used directly, but there should be no need. */ + MTTWriter mtt; + //MTBWriter mtb; /** ditto */ + + public this (char[] path, DataSet ds = null) { + mtt = new MTTWriter (path~".mtt", ds); + } + + DataSet dataset () { + return mtt.dataset; + } + void dataset (DataSet ds) { + mtt.dataset = ds; + } + + /** Write. + * + * Write text then binary, so the mtb file will be the most recent. + */ + void write () { + mtt.write(); + } +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/exception.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/exception.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,91 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/******************************************* + * Contains exception classes for MergeTag. + * + * Publically imports mde.exception. + ******************************************/ +module mde.file.mergetag.exception; + +public import mde.exception; + +/// Base MergeTag exception class. +class MTException : mdeException { + char[] getSymbol () { + return super.getSymbol ~ ".mergetag"; + } + + this (char[] msg) { + super(msg); + } + this () { // Only called when an unexpected exception/error occurs + super ("Unknown exception"); + } +} + +/** Thrown on file IO errors. */ +class MTFileIOException : MTException { + this () { + super ("File IO exception"); + } + this (char[] msg) { + super (msg); + } +} + +/** Thrown on unknown format errors; when reading or writing and the filetype cannot be guessed. */ +class MTFileFormatException : MTException { + this () { + super ("File format exception"); + } +} + +/** Thrown on syntax errors when reading; bad tags or unexpected EOF. */ +class MTSyntaxException : MTException { + this () { + super ("Syntax exception"); + } +} + +/** Thrown by addTag (in classes implementing IDataSection) when a data parsing error occurs +* (really just to make whoever called addTag to log a warning saying where the error occured). */ +class MTaddTagParseException : MTException { + this () { + super ("Parse exception within addTag"); + } +} + +/+ +/// Thrown by TypeView.parse on errors. +class MTBadTypeStringException : MTException { + this () {} +} ++/ + +/// Thrown by *Writer.write. +class MTNoDataSetException : MTException { + this () { + super ("No dataset"); + } +} + +/// Thrown when attempting to use an unimplemented part of the package +/// Really, just until MTB stuff is implemented +class MTNotImplementedException : MTException { + this () { + super ("Functionality not implemented!"); + } +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/iface/IDataSection.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/iface/IDataSection.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,68 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/** This module contains the interface IDataSection used by DataSet. +* +* It has been given its own module to avoid cyclic dependancies and separate out the functionality +* of mergetag. +* +* Also some base mergetag symbols have been moved here. +*/ +module mde.file.mergetag.iface.IDataSection; + +/** Typedef for data & section indexes. +* +* Make it an alias, there doesn't appear to be any point having it as a typedef. */ +alias char[] ID; + +/** + * Interface for data storage classes, generally called DataSections, which contain all data-tags + * loaded from a single section of a file. + * + * A class implementing this may implement the addTag function to do whatever it likes with the + * data passed. DefaultData is one implementation which separates this data out into supported + * types and stores it appropriately (allowing merging with existing entries by keeping whichever + * tag was last loaded), while ignoring unsupported types. A different + * implementation could filter out the tags desired and use them directly, and ignore the rest. + * + * The mde.mergetag.parse.parseTo module provides a useful set of templated functions to + * convert the data accordingly. It is advised to keep the type definitions as defined in the file- + * format except for user-defined types, although this isn't necessary for library operation + * (addTag and writeAll are solely responsible for using and setting the type, ID and data fields). + * + * Another idea for a DataSection class: + * Use a void*[ID] variable to store all data (may also need a type var for each item). + * addTag should call a templated function which calls parse then casts to a void* and stores the data. + * Use a templated get(T)(ID) method which checks the type and casts to T. + */ +interface IDataSection +{ + /** Delegate passed to writeAll. */ + typedef void delegate (char[],ID,char[]) ItemDelg; + + /** Handles parsing of data items for all recognised types. + * + * Should ignore unsupported types/unwanted tags. + * + * TextExceptions (thrown by parseTo/parseFrom) are caught and a warning logged; execution + * then continues (so the offending tag gets dropped). */ + void addTag (char[],ID,char[]); + + /** Responsible for getting all data tags saved. + * + * writeAll should call the ItemDelg once for each tag to be saved with parameters in the same + * form as received by addTag (char[] type, ID id, char[] data). */ + void writeAll (ItemDelg); +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/iface/IReader.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/iface/IReader.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,37 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/** +* Interface for readers. +*/ +module mde.file.mergetag.iface.IReader; + +import mde.file.mergetag.DataSet; + +import tango.util.collection.model.View : View; + +/** Interface for all mergetag readers (MTTReader etc.). +*/ +interface IReader { + DataSet dataset (); /// Get the DataSet + void dataset (DataSet); /// Set the DataSet + + void dataSecCreator (IDataSection delegate (ID)); /// Set the dataSecCreator + + ID[] getSectionNames (); /// Get identifiers for all sections + void read (); /// Commence reading + void read (ID[] secSet); /// ditto + void read (View!(ID) secSet); /// ditto +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/iface/IWriter.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/iface/IWriter.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,48 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/** +* Interface for writers. +*/ +module mde.file.mergetag.iface.IWriter; + +import mde.file.mergetag.DataSet; + + +/** Interface for all mergetag writers (MTTWriter etc.). +*/ +interface IWriter { + DataSet dataset (); /// Get the DataSet + void dataset (DataSet); /// Set the DataSet + + void write (); /// Commence writing +} + +/** +* Enumeration for specifying the writing method ("Params" section shows possible values). +* +* Params: +* FromExtension = Determine writing format from file name extension (must be one of .mtb or .mtt). +* Binary = Use binary mode (adds extension .mtb without checking for an existing extension). +* Text = Use text mode (adds extension .mtt without checking for an existing extension). +* Both = Write simultaneously in binary and text modes (with appropriate extensions added to each +* file name. +*/ +enum WriterMethod : byte { + FromExtension = -1, + Binary = 1, + Text = 2, + Both = 3 +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/internal.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/internal.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,35 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/// Contains functions/data structures used internally by mergetag. +module mde.file.mergetag.internal; + +package abstract class MTFormatVersion { + enum VERS : ubyte { // convenient list of all known file format versions + INVALID = 0x00, + MT01 = 0x01, // not yet final + } + /// The current MergeTag version + static const VERS Current = VERS.MT01; + static const char[2] CurrentString = "01"; + + static VERS parseString (char[] str) + in { + assert (str.length == 2); + } body { + if (str[0] == '0' && str[1] == '1') return VERS.MT01; + else return VERS.INVALID; + } +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/mergetag/mdeUT.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/mdeUT.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,100 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/// This module provides a unittest for mergetag. +module mde.file.mergetag.mdeUT; + +debug (mdeUnitTest) { + import mde.file.mergetag.Reader; + import mde.file.mergetag.Writer; + import mde.file.mergetag.DataSet; + import mde.file.mergetag.DefaultData; + import mde.file.deserialize; + import mde.file.serialize; + + import tango.io.FilePath; + import tango.util.log.Log : Log, Logger; + + private Logger logger; + static this() { + logger = Log.getLogger ("mde.mergetag.unittest"); + } + + unittest { + /* This does a basic write-out and read-in test for each type with its default value. + * Thus it provides some basic testing for the whole mergetag package. */ + + const file = "unittest"; + const ID UT_ID = cast (ID) "mdeUT"; + const headInfo = "mde Unit Test"; + + DataSet dsW = new DataSet(); + + dsW.header = new DefaultData(); + dsW.header._charA[UT_ID] = headInfo; + + DefaultData secW = new DefaultData(); + dsW.sec[UT_ID] = secW; + + static char[] genUTCode () { + char[] ret; + foreach (type; DefaultData.dataTypes) { + ret ~= `secW.`~DefaultData.varName(type)~`[UT_ID] = (`~type~`).init;`; + } + return ret; + } + mixin (genUTCode()); // Add an entry to dd for each type + + IWriter w = makeWriter (file, dsW, WriterMethod.Both); + w.write(); + + // FIXME: when binary writing is supported, read both formats and check + IReader r = makeReader (FilePath (file~".mtt"), null, true); + r.read(); + + DataSet dsR = r.dataset; + assert (dsR !is null); + + assert (dsR.header !is null); + char[]* p = UT_ID in dsW.header._charA; + assert (p); + assert (*p == headInfo); + + IDataSection* sec_p = (UT_ID in dsR.sec); + assert (sec_p); + DefaultData secR = cast(DefaultData) *sec_p; + assert (secR !is null); + + // FIXME: when comparing associative arrays works, use that. In the mean-time, format!() should work. + static char[] genCheckCode (char[] dd1, char[] dd2) { + const char[] failureMsg = "Assertion failed for type; values: "; + char[] ret; + foreach (type; DefaultData.dataTypes) { + char[] tName = DefaultData.varName(type); + ret ~= `char[] `~tName~`Val1 = parseFrom!(`~type~`[char[]]) (cast(`~type~`[char[]]) `~dd1~`.`~tName~`); +char[] `~tName~`Val2 = parseFrom!(`~type~`[char[]]) (cast(`~type~`[char[]]) `~dd2~`.`~tName~`); +assert (`~tName~`Val1 == `~tName~`Val2, "Assertion failed for type `~type~`; values: "~`~tName~`Val1~", "~`~tName~`Val2 ); +`; + } + return ret; + } + mixin (genCheckCode (`secW`,`secR`)); + + // Delete the unittest file now + FilePath (file~".mtt").remove; + + logger.info ("Unittest complete (for DefaultData)."); + } +} diff -r ea58f277f487 -r d8fccaa45d5f mde/file/serialize.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/serialize.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,391 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . */ + +/************************************************************************************************** + * Generic serialization templated function. + * + * Supports: + * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. + * + * Examples: + * ------------------------------------------------------------------------------------------------ + * // Basic examples: + * Cout (serialize!(byte) (-13)).newline; // -13 + * Cout (serialize!(real) (2.56e11)).newline; // 2.55999999999999990000e+11 + * Cout (serialize!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000] + * Cout (serialize ([true,false,false])).newline; // [true,false,false] + * + * // String and ubyte[] special syntaxes (always used): + * Cout (serialize ("A string.")).newline; // "A string." (including quotes) + * Cout (serialize (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110 + * + * // Associative arrays: + * Cout (serialize ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"] + * + * // Structs: + * struct S { int a = 5; double[int[]] x; } + * S s; + * Cout (serialize (s)); + * + * // No limit on complexity... + * char[] somethingComplicated = serialize!(real[][][bool[int[][]]]) (...); + * ------------------------------------------------------------------------------------------------ + * + * throws: + * May throw a UnicodeException or an IllegalArgumentException. + * + * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations + * instead of merely guessing? + *************************************************************************************************/ +//NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, +// or put all the code here. +module mde.file.serialize; +// Since serialize is never used in a module where deserialize is not used, save an import: +public import mde.file.deserialize; + +// tango imports +import tango.core.Traits; +import tango.core.Exception : UnicodeException, IllegalArgumentException; +import cInt = tango.text.convert.Integer; +import cFloat = tango.text.convert.Float; +import Utf = tango.text.convert.Utf; + + +alias serialize parseFrom; // support the old name + +// Formatting options, for where multiple formats are supported by the deserializer. + +// Output using the special binary notation (0x01F2AC instead of [01 ,0xF2, 0xAC])? +const bool SPECIAL_BINARY_NOTATION = true; + +// Output binary as true / false or 1 / 0 ? +const bool BINARY_AS_WORDS = true; + + +char[] serialize(U) (U val) { + // Associative arrays (NOTE: cannot use is() expression) + static if (isAssocArrayType!(U)) { // generic associative array + alias typeof(U.keys[0]) S; + alias typeof(U.values[0]) T; + char[] ret; + // A guess, including values themselves and [,:] elements (must be at least 2). + ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2; + ret[0] = '['; + uint i = 1; + foreach (S k, T v; val) { + char[] s = serialize!(S) (k) ~ ":" ~ serialize!(T) (v); + i += s.length; + if (i+1 >= ret.length) + ret.length = ret.length * 2; // check. + ret[i-s.length .. i] = s; + ret[i++] = ','; + } + if (i == 1) ++i; // special case - not overwriting a comma + ret[i-1] = ']'; // replaces last comma + return ret[0..i]; + } + // Arrays + else static if (is(U S == S[]) || isStaticArrayType!(U)) { + alias typeof(U[0]) T; + + static if (is(T == char)) { // string + char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. + ret[0] = '"'; + uint i = 1; + for (uint t = 0; t < val.length;) { + // process a block of non-escapable characters + uint s = t; + while (t < val.length && !isEscapableChar(val[t])) + ++t; // skip all non-escapable chars + uint j = i + t - s; + ret[i..j] = val[s..t]; // copy a block + i = j; + // process a block of escapable charaters + while (t < val.length && isEscapableChar(val[t])) { + ret[i++] = '\\'; // backslash; increment i + ret[i++] = escapeChar(val[t++]); // character; increment i and t + } + } + ret[i++] = '"'; + return ret[0..i]; + } + else static if (is(T == wchar) || is(T == dchar)) { // wstring or dstring + // May throw a UnicodeException; don't bother catching and rethrowing: + return serialize!(char[]) (Utf.toString (val)); + } + else static if (SPECIAL_BINARY_NOTATION && is(T == ubyte)) { // special binary notation + // Note: To disable the usage of this special type, set SPECIAL_BINARY_NOTATION = false. + static const char[16] digits = "0123456789abcdef"; + + char[] ret = new char[val.length * 2 + 2]; // exact length + ret[0..2] = "0x"; + uint i = 2; + + foreach (ubyte x; val) { + ret[i++] = digits[x >> 4]; + ret[i++] = digits[x & 0x0F]; + } + return ret; + } + else { // generic array + char[] ret; + // A guess, including commas and brackets (must be at least 2) + ret.length = val.length * (defLength!(T) + 1) + 2; + ret[0] = '['; + uint i = 1; + foreach (T x; val) { + char[] s = serialize!(T) (x); + i += s.length; + if (i+1 >= ret.length) + ret.length = ret.length * 2; // check length + ret[i-s.length .. i] = s; + ret[i++] = ','; + } + if (i == 1) + ++i; // special case - not overwriting a comma + ret[i-1] = ']'; // replaces last comma + return ret[0..i]; + } + } + // Structs + else static if (is(U == struct)) { + char[] ret; + // A very rough guess. + ret.length = val.sizeof * 4; + ret[0] = '{'; + uint i = 1; + foreach (k, v; val.tupleof) { + alias typeof(v) T; + char[] s = serialize!(size_t) (k) ~ ":" ~ serialize!(T) (v); + i += s.length; + if (i+1 >= ret.length) + ret.length = ret.length * 2; // check. + ret[i-s.length .. i] = s; + ret[i++] = ','; + } + if (i == 1) ++i; // special case - not overwriting a comma + ret[i-1] = '}'; // replaces last comma + return ret[0..i]; + } + // Basic types + else static if (is(U == char)) { // char (UTF-8 byte) + // Note: if (val > 127) "is invalid UTF-8 single char". However we don't know + // what this is for, in particular if it will be recombined with other chars later. + + // Can't return reference to static array; so making it dynamic is cheaper than copying. + char[] ret = new char[4]; // max length for an escaped char + ret[0] = '\''; + + if (!isEscapableChar (val)) { + ret[1] = val; + ret[2] = '\''; + return ret[0..3]; + } else { + ret[1] = '\\'; + ret[2] = escapeChar (val); + ret[3] = '\''; + return ret; + } + } else static if (is(U == wchar) || + is(U == dchar)) { // wchar or dchar (UTF-16/32 single char) + // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char? + if (val <= 127u) + return serialize!(char) (cast(char) val); // ASCII + else throw new UnicodeException ( + "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0); + } else static if (is (U == bool)) { // boolean + static if (BINARY_AS_WORDS) { + if (val) + return "true"; + else return "false"; + } else { + if (val) + return "1"; + else return "0"; + } + } else static if (is (U : long)) { // any integer type, except char types and bool + static if (is (U == ulong)) // ulong may not be supported properly + if (val > cast(ulong) long.max) + throw new IllegalArgumentException ("No handling available for ulong where value > long.max"); + return cInt.toString (val); + } else static if (is (U : real)) { // any (real) floating point type + char[] ret = new char[32]; // minimum allowed by assert in format + return cFloat.format (ret, val, U.dig+2, 1);// from old C++ tests, U.dig+2 gives best(?) accuracy + } + // Unsupported + else + static assert (false, "Unsupported type: "~U.stringof); +} + +//BEGIN Utility funcs +/* This template provides the initial length for strings for formatting various types. These strings + * can be expanded; this value is intended to cover 90% of cases or so. + * + * NOTE: This template was intended to provide specialisations for different types. + * This one value should do reasonably well for most types. + */ +private { + template defLength(T) { const uint defLength = 20; } + template defLength(T : char) { const uint defLength = 4; } + template defLength(T : bool) { const uint defLength = 5; } +} +private bool isEscapableChar (char c) { + return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); +} +// Throws on unsupported escape sequences; however this should never happen within serialize. +private char escapeChar (char c) { + // This code was generated: + if (c <= '\v') { + if (c <= '\b') { + if (c == '\a') { + return 'a'; + } else if (c == '\b') { + return 'b'; + } + } else { + if (c == '\t') { + return 't'; + } else if (c == '\n') { + return 'n'; + } else if (c == '\v') { + return 'v'; + } + } + } else { + if (c <= '\r') { + if (c == '\f') { + return 'f'; + } else if (c == '\r') { + return 'r'; + } + } else { + if (c == '\"') { + return '\"'; + } else if (c == '\'') { + return '\''; + } else if (c == '\\') { + return '\\'; + } + } + } + + // if we haven't returned: + throw new IllegalArgumentException ("Internal error (escapeChar)"); +} +//END Utility funcs + + + +debug (UnitTest) { + import tango.util.log.Log : Log, Logger; + + private Logger logger; + static this() { + logger = Log.getLogger ("text.serialize"); + } +unittest { + // Utility + bool throws (void delegate() dg) { + bool r = false; + try { + dg(); + } catch (Exception e) { + r = true; + logger.info ("Exception caught: "~e.msg); + } + return r; + } + assert (!throws ({ int i = 5; })); + assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); + + // Associative arrays + char[] X = serialize!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); + char[] Y = `['a':"animal",'b':"bus"]`; + assert (X == Y); + + + // Arrays + // generic array stuff: + assert (serialize!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); + assert (serialize!(double[]) (cast(double[]) []) == `[]`); // empty array + + // char[] conversions, with commas, escape sequences and multichar UTF8 characters: + assert (serialize!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`); + + // wchar[] and dchar[] conversions: + // The characters were pretty-much pulled at random from unicode tables. + assert (serialize!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\""); + assert (serialize!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\""); + + + static if (SPECIAL_BINARY_NOTATION) + assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation + else + assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `[1,242,172]`); + + + // Structs + struct Foo { int a = 9; char b = '\v'; float c; } + struct Bar { Foo a,b; } + static Foo foo1 = { a:150, b:'8', c:17.2f}, foo2; + Bar bar; + bar.a = foo1; + bar.b = foo2; + assert (serialize(bar) == "{0:{0:150,1:'8',2:1.72000007e+01},1:{0:9,1:'\\v',2:nan}}"); + + + // Basic Types + // Character types + assert (serialize!(char) ('\'') == "\'\\\'\'"); + assert (serialize!(wchar) ('X') == "'X'"); + assert (serialize!(dchar) ('X') == "'X'"); + assert (throws ({ char[] r = serialize!(wchar) ('£'); /* unicode U+00A3 */ })); + assert (throws ({ char[] r = serialize!(dchar) ('£'); })); + + // Bool + static if (BINARY_AS_WORDS) + assert (serialize(false) == "false"); + else + assert (serialize(true) == "1"); + + // Integers + assert (serialize (cast(byte) -5) == "-5"); + assert (serialize (cast(short) -32768) == "-32768"); + assert (serialize (-5) == "-5"); + assert (serialize (-9223372036854775807L) == "-9223372036854775807"); + assert (serialize (cast(ubyte) -1) == "255"); + assert (serialize (cast(ushort) -1) == "65535"); + assert (serialize!(uint) (-1) == "4294967295"); + assert (serialize (cast(ulong) 0x7FFF_FFFF_FFFF_FFFFLu) == "9223372036854775807"); + assert (serialize!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == + "[4,468,1025436,4294967295,0]"); + assert (throws ({ + // ulong is not properly supported. + // NOTE: this is something that should really work. + char[] r = serialize!(ulong) (0x8FFF_FFFF_FFFF_FFFFLu); + })); + + // Floats + // These numbers are not particularly meaningful: + assert (serialize!(float) (0.0f) == "0.00000000"); + assert (serialize!(double) (-1e25) == "-1.00000000000000000e+25"); + assert (serialize!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300"); + + // Escape sequences (test conversion functions) + assert (serialize ("\a\b\t\n\v\f\r\"\'\\") == `"\a\b\t\n\v\f\r\"\'\\"`); + + logger.info ("Unittest complete."); +} +} diff -r ea58f277f487 -r d8fccaa45d5f mde/font/font.d --- a/mde/font/font.d Thu Aug 07 11:25:27 2008 +0100 +++ b/mde/font/font.d Fri Aug 29 11:59:43 2008 +0100 @@ -21,15 +21,14 @@ import mde.font.FontTexture; import mde.font.exception; -import mde.mergetag.Reader; -import mde.mergetag.DataSet; -import mde.mergetag.exception; +import mde.file.mergetag.Reader; +import mde.file.mergetag.DataSet; import mde.setup.paths; import derelict.freetype.ft; import derelict.opengl.gl; -import mde.mergetag.deserialize; +import mde.file.deserialize; import tango.stdc.stringz; import Util = tango.text.Util; import tango.util.log.Log : Log, Logger; @@ -59,9 +58,6 @@ /** Load the freetype library from the file fileName. */ private const fileName = "fonts"; void initialize () { - if (!confDir.exists (fileName)) - throw new fontException ("No font settings file (fonts.[mtt|mtb])"); - if (FT_Init_FreeType (&library)) throw new fontException ("error initialising the FreeType library"); @@ -110,9 +106,10 @@ if (p is null) throw new fontException ("No fallback font style specified"); fallbackName = *p; - } - catch (MTException e) { - throw new fontException ("Mergetag exception: "~e.msg); + } catch (NoFileException) { + throw new fontException ("No font settings file (fonts.[mtt|mtb])"); + } catch (Exception e) { + throw new fontException ("Reading font settings failed: "~e.msg); } // Find the fallback diff -r ea58f277f487 -r d8fccaa45d5f mde/gui/WidgetDataSet.d --- a/mde/gui/WidgetDataSet.d Thu Aug 07 11:25:27 2008 +0100 +++ b/mde/gui/WidgetDataSet.d Fri Aug 29 11:59:43 2008 +0100 @@ -30,10 +30,9 @@ public import mde.gui.types; // For loading from file: -import mt = mde.mergetag.DataSet; -import mt = mde.mergetag.DefaultData; -import mt = mde.mergetag.exception; -import mde.mergetag.serialize; +import mt = mde.file.mergetag.DataSet; +import mt = mde.file.mergetag.DefaultData; +import mde.file.serialize; import tango.util.log.Log : Log, Logger; private Logger logger; diff -r ea58f277f487 -r d8fccaa45d5f mde/gui/WidgetManager.d --- a/mde/gui/WidgetManager.d Thu Aug 07 11:25:27 2008 +0100 +++ b/mde/gui/WidgetManager.d Fri Aug 29 11:59:43 2008 +0100 @@ -187,8 +187,8 @@ import mde.gui.widget.Ifaces; import mde.gui.widget.createWidget; -import mde.mergetag.Reader; -import mde.mergetag.Writer; +import mde.file.mergetag.Reader; +import mde.file.mergetag.Writer; import mde.setup.paths; /************************************************************************************************* @@ -221,11 +221,6 @@ if (allLoaded || (defaultDesign !is null && allDesigns == false)) return; // test if already loaded - if (!confDir.exists (fileName)) { - logger.error ("Unable to load GUI: no config file!"); - return; // not a fatal error (so long as the game can run without a GUI!) - } - // Set up a reader scope IReader reader; try { @@ -266,6 +261,9 @@ allLoaded = true; } else reader.read([defaultDesign]); + } catch (NoFileException) { + logger.error ("Unable to load GUI: no config file!"); + // just return: not a fatal error (so long as the game can run without a GUI!) } catch (Exception e) { logger.error ("Unable to load GUI: errors parsing config file ("~confDir.getFileName(fileName,PRIORITY.HIGH_LOW)~"):"); logger.error (e.msg); diff -r ea58f277f487 -r d8fccaa45d5f mde/input/Config.d --- a/mde/input/Config.d Thu Aug 07 11:25:27 2008 +0100 +++ b/mde/input/Config.d Fri Aug 29 11:59:43 2008 +0100 @@ -18,10 +18,10 @@ import mde.input.exception; -import MT = mde.mergetag.Reader; +import MT = mde.file.mergetag.Reader; import mde.setup.paths; -import mde.mergetag.deserialize; -debug import mde.mergetag.serialize; +import mde.file.deserialize; +debug import mde.file.serialize; import tango.util.log.Log : Log, Logger; import tango.util.collection.TreeBag : TreeBag; diff -r ea58f277f487 -r d8fccaa45d5f mde/lookup/Options.d --- a/mde/lookup/Options.d Thu Aug 07 11:25:27 2008 +0100 +++ b/mde/lookup/Options.d Fri Aug 29 11:59:43 2008 +0100 @@ -24,11 +24,10 @@ import mde.setup.paths; import mde.exception; -import mde.mergetag.Reader; -import mde.mergetag.Writer; -import mde.mergetag.DataSet; -import mde.mergetag.exception; -import mde.mergetag.serialize; +import mde.file.mergetag.Reader; +import mde.file.mergetag.Writer; +import mde.file.mergetag.DataSet; +import mde.file.serialize; import tango.core.Exception : ArrayBoundsException; import tango.util.log.Log : Log, Logger; @@ -134,10 +133,6 @@ */ private const fileName = "options"; void load () { - // Check it exists (if not it should still be created on exit). - // Don't bother checking it's not a folder, because it could still be a block or something. - if (!confDir.exists (fileName)) return; - try { IReader reader; reader = confDir.makeMTReader (fileName, PRIORITY.LOW_HIGH); @@ -148,10 +143,11 @@ else return null; }; reader.read; - } catch (MTException e) { - logger.fatal ("Loading options aborted:"); - logger.fatal (e.msg); - throw new optionsLoadException ("Mergetag exception (see above message)"); + } catch (NoFileException e) { + // Just return. Options file will be created on exit. + } catch (Exception e) { + logger.warn ("Loading options failed: "~e.msg); + logger.warn ("If warning persists, delete the offending file."); // FIXME - delete the bad file somehow } } void save () { @@ -170,19 +166,17 @@ reader.read; } catch (NoFileException) { // No user file exists; not an error. - } catch (MTException e) { + } catch (Exception e) { // Log a message and continue, overwriting the file: - logger.error ("Loading options aborted:"); - logger.error (e.msg); + logger.error ("Loading options aborted: " ~ e.msg); } try { IWriter writer; writer = confDir.makeMTWriter (fileName, ds); writer.write(); - } catch (MTException e) { - logger.error ("Saving options aborted! Reason:"); - logger.error (e.msg); + } catch (Exception e) { + logger.error ("Saving options aborted: "~e.msg); } } diff -r ea58f277f487 -r d8fccaa45d5f mde/lookup/Translation.d --- a/mde/lookup/Translation.d Thu Aug 07 11:25:27 2008 +0100 +++ b/mde/lookup/Translation.d Fri Aug 29 11:59:43 2008 +0100 @@ -41,10 +41,10 @@ import mde.setup.paths; import mde.exception; -import mde.mergetag.DataSet; -import mde.mergetag.Reader; -import mde.mergetag.exception; -import mde.mergetag.deserialize; +import mde.file.mergetag.DataSet; +import mde.file.mergetag.Reader; +import mde.file.mergetag.exception; +import mde.file.deserialize; import tango.util.log.Log : Log, Logger; diff -r ea58f277f487 -r d8fccaa45d5f mde/mde.d --- a/mde/mde.d Thu Aug 07 11:25:27 2008 +0100 +++ b/mde/mde.d Fri Aug 29 11:59:43 2008 +0100 @@ -35,6 +35,7 @@ import tango.core.Thread : Thread; // Thread.sleep() import tango.time.Clock; // Clock.now() import tango.util.log.Log : Log, Logger; +debug (mdeUnitTest) import mde.file.mergetag.mdeUT; int main(char[][] args) { diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/DataSet.d --- a/mde/mergetag/DataSet.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/** This module contains the mergetag DataSet class, used for all reading and writing operations. - */ -module mde.mergetag.DataSet; - -// package imports -public import mde.mergetag.iface.IDataSection; -import mde.mergetag.DefaultData; -import mde.mergetag.exception; - - -/************************************************************************************************** - * Data class; contains a DataSection class instance for each loaded section of a file. - * - * Stored data is available for direct access via header and sec; all functions are just helper - * functions. - * - * Any class implementing IDataSection may be used to store data; by default a DefaultData class is - * used when reading a file. Another class may be used by creating the sections before reading the - * file or passing the reader a function to create the sections (see Reader.dataSecCreator). - * - * Could be a struct, except that structs are value types (not reference types). - */ -class DataSet -{ - DefaultData header; /// Header section. - IDataSection[ID] sec; /// Dynamic array of sections - - /// Template to return all sections of a child-class type. - T[ID] getSections (T : IDataSection) () { - T[ID] ret; - foreach (ID id, IDataSection s; sec) { - T x = cast(T) s; - if (x) ret[id] = x; // if non-null - } - return ret; - } -} - -debug (mdeUnitTest) { - import tango.util.log.Log : Log, Logger; - - private Logger logger; - static this() { - logger = Log.getLogger ("mde.mergetag.DataSet"); - } - - unittest { // Only covers DataSet really. - DataSet ds = new DataSet; - ds.sec[cast(ID)"test"] = new DefaultData; - assert (ds.getSections!(DefaultData)().length == 1); - ds.sec[cast(ID)"test"].addTag ("char[]",cast(ID)"T"," \"ut tag 1 \" "); - assert (ds.getSections!(DefaultData)()[cast(ID)"test"].Arg!(char[])[cast(ID)"T"] == "ut tag 1 "); - - logger.info ("Unittest complete."); - } -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/DefaultData.d --- a/mde/mergetag/DefaultData.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,183 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/** This module contains the DefaultData class, and some notes possibly useful for implementing -* other types of DataSection. -*/ -module mde.mergetag.DefaultData; - -public import mde.mergetag.iface.IDataSection; -import mde.mergetag.exception; - -import mde.mergetag.serialize; - - -/************************************************************************************************* - * Default DataSection class. - * - * Supported types are given by dataTypes. - * - * Currently DefaultData is only used for headers, and thus the list of supported types has been - * reduced to just those used in headers. Load order is HIGH_LOW, i.e. existing entries aren't - * overwritten. - *************************************************************************************************/ -/* The implementation now uses a fair bit of generic programming. Adjusting the types supported -* should be as simple as adjusting the list dataTypes, and possibly implemting new conversions in -* parseFrom and parseTo if you add new types (e.g. for cent or imaginary/complex types, or user types). -* -* There shouldn't really be any need to adjust the implementation, except perhaps to add new -* functions to the class (such as another type of output where the delegate used in writeAll isn't -* enough). -*/ -class DefaultData : IDataSection -{ - //BEGIN META - /* These functions are used to generate code. Compile-time functions rather than templates are - * used because they are easier to write and understand. Mixins are used to compile the resultant - * code. Must be declared before used since forward references aren't supported for compile-time - * functions. */ - - // Generate the correct name for each variable type. - static char[] varName (char[] type) { - char[] append = ""; - while (type.length >= 2 && type[$-2..$] == "[]") { - type = type[0..$-2]; - append ~= "A"; - } - return "_" ~ type ~ append; - } - - // Int-to-string converter, which may not be efficient but will run at compile time. - static char[] int2str (uint i) { - char[] ret; - const digits = "0123456789"; - if (i == 0) ret = "0"; - else for (; i > 0; i /= 10) ret = digits[i%10] ~ ret; - return ret; - } - - // Generate the code for variable declarations. - static char[] declerations (char[][] types) { - char[] ret = ""; - foreach (char[] type; types) ret ~= type ~ "[ID]\t" ~ varName(type) ~ ";\n"; - return ret; - } - - // Purely to add indentation. Could just return "" without affecting functionality. - static char[] indent (uint i) { - char[] ret = ""; - for (; i > 0; --i) ret ~= " "; - // This is not executable at compile time: - //ret.length = i * 4; // number of characters for each indentation - //ret[] = ' '; // character to indent with - return ret; - } - - /* Generates a binary search algorithm. - * - * Currently this is tailored to it's particular use (addTag). */ - static char[] binarySearch (char[] var, char[][] consts, int indents = 0) { - if (consts.length > 3) { - return indent(indents) ~ "if (" ~ var ~ " <= \"" ~ consts[$/2 - 1] ~ "\") {\n" ~ - binarySearch (var, consts[0 .. $/2], indents + 1) ~ - indent(indents) ~ "} else {\n" ~ - binarySearch (var, consts[$/2 .. $], indents + 1) ~ - indent(indents) ~ "}\n"; - } else { - char[] ret; - ret ~= indent(indents); - foreach (c; consts) { - ret ~= "if (" ~ var ~ " == \"" ~ c ~ "\") {\n" ~ - //indent(indents+1) ~ varName(c) ~ "[id] = parseTo!(" ~ c ~ ") (dt);\n" ~ - indent(indents+1) ~ "if ((id in "~varName(c)~") is null)\n" ~ - indent(indents+2) ~ varName(c)~"[id] = parseTo!(" ~ c ~ ") (dt);\n" ~ - indent(indents) ~ "} else "; - } - ret = ret[0..$-6] ~ '\n'; // remove last else - return ret; - } - } - - // Generates the code to write data members (writeAll). - static char[] writeVars () { - char[] code = ""; - foreach (i,type; dataTypes) { - code ~= "foreach (id, dt; " ~ varName(type) ~ ") itemdlg (dataTypes[" ~ int2str(i) ~ "], id, parseFrom!(" ~ type ~ ")(dt));\n"; - } - return code; - } - //END META - - /** Data Members - * - * These types are all stored directly, as below, are available for direct access. The variable - * names are created dynamically at compile-time based on the dataTypes list. - * ------------------ - * int[ID] _int; // name is type prefixed by _ - * char[][ID] _charA; // [] is replaced by A - * ------------------ - * - * An alternative access method is to use the provided templates: - * -------------------- - * template Arg(T) { - * alias Name Arg; - * } - * - * type y = Arg!(type).Arg; // example of use - * -------------------- - * Note: trying to use Arg!(type) to implicitly refer to Arg!(type).Arg causes compiler errors - * due to the "alias Name Arg;" statement actually being a mixin. - */ - /+ All types previously supported. Most of these weren't used. - const char[][] dataTypes = ["bool","bool[]", - "byte","byte[]", - "char","char[]","char[][]", - "double","double[]", - "float","float[]", - "int","int[]", - "long","long[]", - "real","real[]", - "short","short[]", - "ubyte","ubyte[]", - "uint","uint[]", - "ulong","ulong[]", - "ushort","ushort[]"]; - +/ - const char[][] dataTypes = ["char[]", "char[][]"]; - - mixin (declerations (dataTypes)); // Declare all the variables. - - void addTag (char[] type, ID id, char[] dt) { /// Supports all types listed in dataTypes. - mixin (binarySearch ("type", dataTypes)); - } - - void writeAll (ItemDelg itemdlg) { /// Supports all types listed in dataTypes. - mixin (writeVars ()); - } - - /* These make no attempt to check Arg is valid. - * But if the symbol doesn't exist the complier will throw an error anyway, e.g.: - * Error: identifier '_boolAA' is not defined - */ - template ArgName (T : T[]) { - const char[] ArgName = ArgName!(T)~`A`; - } - template ArgName (T) { - const char[] ArgName = `_`~T.stringof; - } - template Arg(T) { - mixin(`alias `~ArgName!(T)~` Arg;`); - } -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/Reader.d --- a/mde/mergetag/Reader.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,526 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/************************************************************************************************** - * This module contains all reading functions, for both binary and text MergeTag files. - *************************************************************************************************/ -module mde.mergetag.Reader; - -// package imports -public import mde.mergetag.iface.IReader; -import mde.mergetag.DataSet; -import mde.mergetag.DefaultData; -import mde.mergetag.exception; -import mde.mergetag.internal; - -import tango.core.Exception; - -// tango imports -import tango.io.FilePath; -import tango.io.UnicodeFile; -import Util = tango.text.Util; -import ConvInt = tango.text.convert.Integer; -//import tango.util.collection.model.View : View; -import tango.util.collection.HashSet : HashSet; -import tango.util.log.Log : Log, Logger; - -private Logger logger; -static this() { - logger = Log.getLogger ("mde.mergetag.Reader"); -} - -// TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions) - -/** Make an IReader class. -* -* Create an appropriate reader: MTTReader or MTBReader. -* -* Throws: -* $(TABLE -* $(TR $(TH Exception) $(TH Thrown when)) -* $(TR $(TD MTFileIOException) $(TD When extension given is neither mtt nor mtb)) -* ) -* -*/ -IReader makeReader (FilePath path, DataSet ds = null, bool rdHeader = false) { - if (path.ext == "mtb") return new MTBReader (path, ds, rdHeader); - else if (path.ext == "mtt") return new MTTReader (path, ds, rdHeader); - else throw new MTFileIOException ("Invalid mergetag extension"); -} - -/** Resolve a file path. - * - * Tries adding both ".mtt" and ".mtb" extensions, returning whichever exists (the most recently - * modified if both exist), or returns null if neither exist. */ -FilePath findFile (char[] path) { - if (path is null) return null; - - FilePath tPath = new FilePath (path ~ ".mtt"); - FilePath bPath = new FilePath (path ~ ".mtb"); - - bool bPathExists = bPath.exists; - - if (tPath.exists) { - if (bPathExists) { - // take the latest version (roughly speaking...) - return (tPath.modified > bPath.modified ? tPath : bPath); - } else return tPath; - } else { - if (bPathExists) return bPath; - else return null; - } -} - -/** - * Class for reading a mergetag text file. - * - * Use as: - * ----------------------- - * IReader foo; - * try { - * foo = new MTTReader("foo.mtt"); - * foo.read(); - * } - * catch (MTException) {} - * // get your data from foo.dataset. - * ----------------------- - * - * Throws: - * $(TABLE - * $(TR $(TH Exception) $(TH Thrown when)) - * $(TR $(TD MTFileIOException) $(TD An error occurs while opening the file)) - * $(TR $(TD MTFileFormatException) $(TD The file doesn't start with a recognised header/version)) - * $(TR $(TD MTSyntaxException) $(TD A file syntax error occurs)) - * $(TR $(TD MTException) $(TD An unexpected error occurs)) - * ) - * Note that all exceptions extend MTException and when any exception is thrown the class is - * rendered unusable: any subsequent calls to read will be ignored. - * - * Threading: Separate instances of Reader should be thread-safe provided access to the same - * dataset is synchronized; i.e. no two readers refering to the same dataset should run - * simultaneously. (The Reader class could be made thread-safe w.r.t. datasets, but - * performance-wise I doubt it would be worth it.) - * Do not run a single instance of Reader in multiple threads simultaneously. - */ -class MTTReader : IReader -{ -//BEGIN DATA - /** Get or set the DataSet - * - * A container for all read data. - * - * This may be accessed from here; however it may be preferable to use an external reference - * (passed to the class on initialisation). - */ - DataSet dataset () { return _dataset; } - void dataset (DataSet ds) /// ditto - { _dataset = ds; } - - /** A delegate for creating new DataSections within the dataset. - * - * Allows a user-made class to be used in the DataSet instead of DefaultData (used if no - * dataSecCreator exists). Also allows an existing class instance to be used instead of a new - * one. - * - * This works by supplying a function which returns a reference to an instance of a class - * implementing IDataSection. The function is passed the ID of the new section and may use this - * to use different IDataSection classes for different sections. - * - * The function may also return null, in which case the section will be skipped. In the version - * of read taking a set of sections to read, the section will not be marked as read and may - * still be read later (assuming dataSecCreator returns non-null). However, in the version of - * read not taking the set argument, all sections are set as read regardless, and the section - * cannot be read later. - */ - void dataSecCreator (IDataSection delegate (ID) dSC) { - _dataSecCreator = dSC; - } - -private: - static Logger logger; - - // Non-static symbols: - final char[] ErrFile; // added after ErrInFile to do the same without the "in " bit. - final char[] ErrInFile; // something like "in \"path/file.mtt\"" - - final char[] fbuf; // file is read into this - MTFormatVersion.VERS fileVer = MTFormatVersion.VERS.INVALID; // Remains INVALID until set otherwise by CTOR. - - IDataSection delegate (ID) _dataSecCreator = null; // see property setter above - - size_t endOfHeader; - bool allRead = false; // true if endOfHeader == fbuf.length or read([]) has run - bool fatal = false; // a fatal file error occured; don't try to recover - /* If the file is scanned for sections, the starting position of all sections are stored - * in secTable. If this is empty, either no sections exist (and endOfHeader == fbuf.length) - * or a section scan has not been run (read() with no section names doesn't need to do so). - */ - struct SecMD { // sec meta data - static SecMD opCall (size_t _pos, bool _read) { - SecMD ret; - ret.pos = _pos; - ret.read = _read; - return ret; - } - size_t pos; // position to start reading - bool read; // true if already read - } - SecMD [ID] secTable; - - DataSet _dataset; -//END DATA - -//BEGIN METHODS: CTOR / DTOR - static this () { - logger = Log.getLogger ("mde.mergetag.read.Reader"); - } - - /** Tries to open file path and read it into a buffer. - * - * Params: - * path = The name or FilePath of the file to open. - * Standard extensions are .mtt and .mtb for text and binary files respectively. - * ds = If null create a new DataSet, else use existing DataSet ds and merge read - * data into it. - * rdHeader = If true, read the header like a standard section. Doesn't read the header by - * default since if it's not requested it's likely not wanted. - * - * Memory: - * This currently works by loading the whole file into memory at once. This should be fine most - * of the time, but could potentially be a problem. Changing this would mean significantly - * changes to the way the code works. - */ - /* Ideas for implementing a partial-loading memory model: - * Use a conduit directly. - * Use a fiber to do the parsing; let it switch back when it runs out of memory. - * Redesign the code so it never needs to look backwards in the buffer? - * - * Major problem: reading only some sections and keeping references to other sections - * would no longer be possible. - */ - public this (char[] path, DataSet ds = null, bool rdHeader = false) { - this (new FilePath (path), ds, rdHeader); - } - /** ditto */ - public this (FilePath path, DataSet ds = null, bool rdHeader = false) { - // Create a dataset or use an existing one - if (ds !is null) _dataset = ds; - else _dataset = new DataSet(); - - // Open & read the file - try { // Supports unicode files with a BOM; defaults to UTF8 when there isn't a BOM: - scope file = new UnicodeFile!(char) (path, Encoding.Unknown); - fbuf = cast(char[]) file.read(); - } catch (Exception e) { - throwMTErr ("Error reading file: " ~ e.msg, new MTFileIOException); - } - // Remember the file name so that we can report errors (somewhat) informatively: - ErrFile = path.path ~ path.file; - ErrInFile = " in \"" ~ ErrFile ~ '"'; - - // Version checking & matching header section tag: - if (fbuf.length < 6 || fbuf[0] != '{' || fbuf[1] != 'M' || fbuf[2] != 'T' || fbuf[5] != '}') - throwMTErr("Not a valid MergeTag text file" ~ ErrInFile, new MTFileFormatException); - fileVer = MTFormatVersion.parseString (fbuf[3..5]); - if (fileVer == MTFormatVersion.VERS.INVALID) - throwMTErr("Unrecognised MergeTag version: MT" ~ fbuf[3..5] ~ ErrInFile, new MTFileFormatException); - - // Header reading/skipping: - if (rdHeader) { // only bother actually reading it if it was requested - // If already existing, merge; else create a new DefaultData. - if (!_dataset.header) _dataset.header = new DefaultData; - endOfHeader = parseSection (6, cast(IDataSection) _dataset.header); - } - else endOfHeader = parseSection (6,null); - } -//END METHODS: CTOR / DTOR - -//BEGIN METHODS: PUBLIC - /** Scans for sections if not already done and returns a list of IDs. - * - * Won't work (will return an empty array) if all sections have already been read without - * scanning for sections. - */ - public ID[] getSectionNames () { - if (fatal) return []; - if (!secTable.length) read([]); // scan for sections - return secTable.keys; - } - - /** Reads (some) sections of the file into data. Note that sections will never be _read twice. - * - * To be more accurate, the file is copied into a buffer by this(). read() then parses the - * contents of this buffer, and stores the contents in dataset. - * - * Each section read is stored in a DataSection class. By default this is an instance of - * DefaultData; this can be customised (see dataSecCreator). - * - * If secSet is provided, reading is restricted to sections given in secSet, otherwise all - * sections are read. Sections given in secSet but not found in the file are not reported as an - * error. Suggested: supply a HashSet!(uint) as the View!(ID). An ArrayBag!(ID) as used is not a - * good choice, except that in this case it's empty. - * - * Merging: - * Where a section already exists in the DataSet (when either the section is given more than - * once in the file, or it was read from a different file by another reader) it is merged. - * Entries already in the DataSet take priority. - * - * Performance: - * Note that loading only desired sections like this still parses the sections not - * read (although it does not try to understand the type or data fields), so there is only a - * small performance advantage to this where other sections do exist in the file. There is also - * some overhead in only partially reading the file to keep track of where other sections are so - * that the entire file need not be re-read if further (or all remaining) sections are read - * later. - */ - public void read () { - if (secTable.length) { - foreach (ID id, ref SecMD smd; secTable) { - if (!smd.read) { - IDataSection ds = getOrCreateSec (id); - parseSection (smd.pos, ds); - // allRead is set true so there's no point setting smd.read = true - } - } - } else { // this time we don't need to use secTable - for (size_t pos = endOfHeader; pos < fbuf.length;) { - ID id = fbufReadSecMarker (pos); - IDataSection ds = getOrCreateSec (id); - pos = parseSection (pos, ds); - } - } - - allRead = true; - } - /** ditto */ - public void read (ID[] secSet) { - HashSet!(ID) hs = new HashSet!(ID); - foreach (id; secSet) hs.add(id); - read (hs); - } - /** ditto */ - public void read (View!(ID) secSet) { - if (allRead || fatal) return; // never do anything in either case - - if (secTable.length) { - foreach (ID id; secSet) { - SecMD* psmd = id in secTable; - if (psmd && !psmd.read) { // may not exist - IDataSection ds = getOrCreateSec (id); - parseSection (psmd.pos, ds); - if (ds !is null) psmd.read = true; // getOrCreateSec may return null - } - } - } else { - for (size_t pos = endOfHeader; pos < fbuf.length;) { - ID id = fbufReadSecMarker (pos); - secTable[id] = SecMD(pos,false); // add to table - if (secSet.contains(id)) { - IDataSection ds = getOrCreateSec (id); - pos = parseSection (pos, ds); - if (ds !is null) secTable[id].read = true; - } else { - pos = parseSection (pos, null); // skip section - } - } - } - } -//END METHODS: PUBLIC - -//BEGIN METHODS: PRIVATE - /* Utility function for read - * Look for a section; return it if it exists otherwise create a new section: - * use _dataSecCreator if it exists or just create a DefaultData if not. - * However if _dataSecCreator returns null don't add it to the dataset. - */ - private IDataSection getOrCreateSec (ID id) { - IDataSection* i = id in _dataset.sec; - if (i) return *i; - else { - IDataSection s; - if (_dataSecCreator !is null) s = _dataSecCreator(id); - else s = new DefaultData; - if (s !is null) _dataset.sec[id] = s; - return s; - } - } - - /* Reads a section, starting from index pos, finishing at the next section marker (returning - the position of the start of the marker). pos should start after the section marker. - - After analysing tags, the function passes the type, ID and data to addTag. - - NOTE: from performance tests on indexing char[]'s and dereferencing char*'s, the char*'s are - slightly faster, but a tiny difference isn't worth the extra effort/risk of using char*'s. - */ - private size_t parseSection (size_t pos, IDataSection dsec) { - debug scope (failure) - logger.trace ("MTTReader.parseSection: failure"); - /* Searches fbuf starting from start to find one of <=>| and stops at its index. - - If quotable then be quote-aware for single and double quotes. - Note: there's no length restriction for the content of the quote since it could be a single - non-ascii UTF-8 char which would look like several chars. - */ - void fbufLocateDataTagChar (ref size_t pos, bool quotable) { - while (true) { - fbufIncrement (pos); - - if ((fbuf[pos] >= '<' && fbuf[pos] <= '>') || fbuf[pos] == '|') return; - else if (quotable) { - char c = fbuf[pos]; - if (c == '\'' || c == '"') { - fbufIncrement(pos); - while (fbuf[pos] != c) { - if (fbuf[pos] == '\\') ++pos; // escape seq. - fbufIncrement(pos); - } - } - } - } - } - - // Used to ignore a tag (if it starts !< or !{ or should otherwise be ignored): - bool comment = false; - for (; pos < fbuf.length; ++pos) { - if (Util.isSpace(fbuf[pos])) continue; // whitespace - else if (fbuf[pos] == '<') { // data tag - char[] ErrDTAG = "Bad data tag format: not " ~ ErrInFile; - - // Type section of tag: - size_t pos_s = pos + 1; - fbufLocateDataTagChar (pos, false); // find end of type section - if (fbuf[pos] != '|') throwMTErr (ErrDTAG, new MTSyntaxException); - char[] type = fbuf[pos_s..pos]; - - // ID section of tag: - pos_s = pos + 1; - fbufLocateDataTagChar (pos, false); // find end of type section - if (fbuf[pos] != '=') throwMTErr (ErrDTAG, new MTSyntaxException); - ID tagID = cast(ID) fbuf[pos_s..pos]; - - // Data section of tag: - pos_s = pos + 1; - fbufLocateDataTagChar (pos, true); // find end of data section - if (fbuf[pos] != '>') throwMTErr (ErrDTAG, new MTSyntaxException); - char[] data = fbuf[pos_s..pos]; - - if (!comment && dsec !is null) { - type = Util.trim(type); - try { - dsec.addTag (type, tagID, data); - } - catch (TextException e) { - logger.error ("TextException while reading " ~ ErrFile ~ ":"); // following a parse error - logger.error (e.msg); - logger.error ("Tag ignored: <"~type~"|"~tagID~"="~data~">"); - // No throw: tag is just ignored - } - catch (Exception e) { - logger.error ("Unknown error occured" ~ ErrInFile ~ ':'); - logger.error (e.msg); - throwMTErr (e.msg); // Fatal to Reader - } - } else comment = false; // cancel comment status now - } - else if (fbuf[pos] == '{') { - if (comment) { // simple block comment - uint depth = 0; // depth of embedded comment blocks - while (true) { - fbufIncrement (pos); - if (fbuf[pos] == '}') { - if (depth == 0) break; - else --depth; - } else if (fbuf[pos] == '{') - ++depth; - } - comment = false; // end of this comment - } else { - return pos; // next section coming up; we are done - } - } - else if (fbuf[pos] == '!') { // possibly a comment; check next char - comment = true; // starting a comment (or an error) - // variable is reset at end of comment - } else // must be an error - throwMTErr ("Invalid character (or sequence starting \"!\") outside of tag" ~ ErrInFile, new MTSyntaxException); - } - // if code execution reaches here, we're at EOF - // possible error: last character was ! (but don't bother checking since it's inconsequential) - return pos; - } - - /* Parses fbuf for a section marker. Already knows fbuf[pos] == '{'. - */ - private ID fbufReadSecMarker (ref size_t pos) { - // at this point pos is whatever a parseSection run returned - // since we haven't hit EOF, fbuf[pos] MUST be '{' so no need to check - fbufIncrement(pos); - - size_t start = pos; - for (; pos < fbuf.length; ++pos) - if (fbuf[pos] == '}' || fbuf[pos] == '{') break; - - if (pos >= fbuf.length || fbuf[pos] != '}') - throwMTErr ("Bad section tag format: not {id}" ~ ErrInFile, new MTSyntaxException); - - ID id = cast(ID) fbuf[start..pos]; - fbufIncrement(pos); - return id; - } - - /* Increments pos and checks it hasn't hit fbuf.length . */ - private void fbufIncrement(ref size_t pos) { - ++pos; - if (pos >= fbuf.length) throwMTErr("Unexpected EOF" ~ ErrInFile, new MTSyntaxException); - } - - private void throwMTErr (char[] msg, MTException exc = new MTException) { - fatal = true; // if anyone catches the error and tries to do anything --- we're dead now - logger.error (msg); // report the error - throw exc; // and signal our error - } -//END METHODS: PRIVATE -} - - -/** -* Class for reading a mergetag text file. -* -* Currently only a dummy class: a MTNotImplementedException will be thrown if created. -*/ -class MTBReader : IReader -{ - public this (char[] path, DataSet ds = null, bool rdHeader = false) { - this (new FilePath (path), ds, rdHeader); - } - public this (PathView path, DataSet ds = null, bool rdHeader = false) { - throw new MTNotImplementedException; - } - - DataSet dataset () { /// Get the DataSet - return null; - } - void dataset (DataSet) {} /// Set the DataSet - - void dataSecCreator (IDataSection delegate (ID)) {} /// Set the dataSecCreator - - ID[] getSectionNames () { /// Get identifiers for all sections - return []; - } - void read () {} /// Commence reading - void read (ID[] secSet) {} /// ditto - void read (View!(ID) secSet) {} /// ditto -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/Writer.d --- a/mde/mergetag/Writer.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,279 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/************************************************************************************************** - * This module contains all writing functions, for both binary and text MergeTag files. - * - * Files can be written in a text or binary form; binary is faster and smaller while text allows - * editing with an ordinary text editor. TextWriter and BinaryWriter are the main classes, both of - * which implement the interface IWriter. DualWriter is another class implementing IWriter, which - * contains a private instance of a TextWriter and a BinaryWriter and implements all methods in the - * interface simply by chaining the appropriate method from each of these classes, thus performing - * two writes at once. - * - * Any of these three classes may be used directly, or makeWriter may be invoked to create an - * instance of the appropriate class. - *************************************************************************************************/ - module mde.mergetag.Writer; - -// package imports -public import mde.mergetag.iface.IWriter; -import mde.mergetag.DataSet; -import mde.mergetag.internal; -import mde.mergetag.exception; - -// tango imports -import tango.core.Exception; -import tango.io.FileConduit; -import tango.io.Buffer : Buffer, IBuffer; -import tango.io.Print : Print; -import convInt = tango.text.convert.Integer; -import tango.util.log.Log : Log, Logger; - -private Logger logger; -static this () { - logger = Log.getLogger ("mde.mergetag.Writer"); -} - - -/** Method to create and return either a MTTWriter or a MTBWriter. - * - * Has two modes of operation: if method is FromExtension, examines the existing extension and - * creates a MTT/MTB writer if the extension is mtt or mtb (throwing if not). - * - * Otherwise, writing format is determined directly by method, and appropriate extensions are - * added to the file name without checking for an existing extension. - * - * Params: - * path = File path - * dataset = Dataset passed to Writer to write from (if null, must be set before write() is called) - * method = $(TABLE - * $(TR $(TH Value) $(TH Writer returned) $(TH Suffix added)) - * $(TR $(TD FromExtension) $(TD MTBWriter or MTTWriter)$(TD $(I none))) - * $(TR $(TD Binary) $(TD MTBWriter) $(TD .mtb)) - * $(TR $(TD Text) $(TD MTTWriter) $(TD .mtt)) - * $(TR $(TD Both) $(TD DualWriter) $(TD .mtb / .mtt)) - * ) - * - * Throws: - * MTFileFormatException if neither test can deduce the writing method, the supplied writing - * method is invalid or the determined/supplied method is not yet implemented. - * - * Use as: - * ----------------------- - * DataSet dataset; // contains data to write - * IWriter foo; - * try { - * foo = makeWriter(...); - * foo.write(); - * } - * catch (MTException) {} - * ----------------------- - * Where the makeWriter line has one of the following forms: - * ----------------------- - * foo = makeWriter("foo.mtt", dataset); - * foo = makeWriter("foo", dataset, WriterMethod.Text); - * ----------------------- - * - * Throws: - * MTFileFormatException if unable to determine writing format or use requested format. - */ -//FIXME: separate functions for separate functionality, like in Reader? -IWriter makeWriter (char[] path, DataSet dataset = null, - WriterMethod method = WriterMethod.FromExtension) { - if (method == WriterMethod.FromExtension) { - if (path.length > 4 && path[$-4..$] == ".mtt") - return new MTTWriter (path, dataset); - else if (path.length > 4 && path[$-4..$] == ".mtb") - return new MTBWriter (path, dataset); - else { - logger.error ("Unable to determine writing format: text or binary"); - throw new MTFileFormatException; - } - } - else { - if (method == WriterMethod.Binary) return new MTBWriter (path~".mtb", dataset); - else if (method == WriterMethod.Text) return new MTTWriter (path~".mtt", dataset); - else if (method == WriterMethod.Both) return new DualWriter (path, dataset); - else throw new MTFileFormatException; - } -} - - -/** - * Class to write a dataset to a file. - * - * Files are only actually open for writing while the write() method is running. - * - * Throws: - * $(TABLE - * $(TR $(TH Exception) $(TH Thrown when)) - * $(TR $(TD MTNoDataSetException) $(TD No dataset is available to write from)) - * $(TR $(TD MTFileIOException) $(TD An error occurs while attemting to write the file)) - * $(TR $(TD MTException) $(TD An unexpected error occurs)) - * ) - * Note that all exceptions extend MTException; unlike Reader exceptions don't block further calls. - */ -class MTTWriter : IWriter -{ -//BEGIN DATA - /// Get or set the DataSet (i.e. the container from which all data is written). - DataSet dataset () { return _dataset; } - void dataset (DataSet ds) /// ditto - { _dataset = ds; } - - -private: - // taken from tango.io.Console, mostly to make sure notepad can read our files: - version (Win32) - const char[] Eol = "\r\n"; - else - const char[] Eol = "\n"; - - /* The container where data is written from. */ - DataSet _dataset; - - char[] _path; -//END DATA - -//BEGIN CTOR / DTOR - /** Prepares to open file path for writing. - * - * The call doesn't actually execute any code so cannot fail (unless out of memory). - * - * Params: - * path = The name of the file to open. - * Standard extensions are .mtt and .mtb for text and binary files respectively. - * dataset_ = If null create a new DataSet, else use existing DataSet *dataset_ and merge read - * data into it. - */ - public this (char[] path, DataSet ds = null) { - _path = path; - _dataset = ds; - } -//END CTOR / DTOR - - /** Writes the header and all DataSections. - * - * Firstly writes the header unless it has already been written. Then writes all DataSections - * to the file. Thus if write is called more than once with or without changing the DataSet the - * header should be written only once. This behaviour could, for instance, be used to write - * multiple DataSets into one file without firstly merging them. Note that this behaviour may - * be changed when binary support is added. - */ - public void write () - { - if (!_dataset) throwMTErr ("write(): no Dataset available to write from!", new MTNoDataSetException ()); - - try { - FileConduit conduit; // actual conduit; don't use directly when there's content in the buffer - IBuffer buffer; // write strings directly to this (use opCall(void[]) ) - - // Open a conduit on the file: - conduit = new FileConduit (_path, FileConduit.WriteCreate); - scope(exit) conduit.close(); - - buffer = new Buffer(conduit); // And a buffer - scope(exit) buffer.flush(); - - // Write the header: - buffer ("{MT" ~ MTFormatVersion.CurrentString ~ "}" ~ Eol); - if (_dataset.header !is null) writeSection (buffer, _dataset.header); - - // Write the rest: - foreach (ID id, IDataSection sec; _dataset.sec) { - writeSectionIdentifier (buffer, id); - writeSection (buffer, sec); - } - - buffer.flush(); - - } - catch (IOException e) { - throwMTErr ("Error writing to file: " ~ e.msg, new MTFileIOException); - } - catch (Exception e) { - throwMTErr ("Unexpected exception when writing file: " ~ e.msg); - } - } - - private void writeSectionIdentifier (IBuffer buffer, ID id) { - buffer ("{" ~ cast(char[])id ~ "}" ~ Eol); - } - - private void writeSection (IBuffer buffer, IDataSection sec) { - void writeItem (char[] tp, ID id, char[] dt) { // actually writes an item - buffer ("<" ~ tp ~ "|" ~ cast(char[])id ~"=" ~ dt ~ ">" ~ Eol); - } - sec.writeAll (&writeItem); - - buffer (Eol); // blank line at end of each section - } - - private void throwMTErr (char[] msg, Exception exc = new MTException) { - logger.error (msg); // report the error - throw exc; // and signal our error - } -} - -/* -* Implement MTBWriter (and move both writers to own modules?). -*/ -class MTBWriter : IWriter { - public this (char[] path, DataSet ds = null) { - throw new MTNotImplementedException; - - /+_path = path; - _dataset = ds;+/ - } - - DataSet dataset () { - return null; - } - void dataset (DataSet) {} - - void write () {} -} - -/* Basic implementation for mtt only. -* -*Implement std CTORs which add extensions to each filename and extra CTORs which take two filenames. -*/ -class DualWriter : IWriter { - /** The individual writers. - * - * Potentially could be used directly, but there should be no need. */ - MTTWriter mtt; - //MTBWriter mtb; /** ditto */ - - public this (char[] path, DataSet ds = null) { - mtt = new MTTWriter (path~".mtt", ds); - } - - DataSet dataset () { - return mtt.dataset; - } - void dataset (DataSet ds) { - mtt.dataset = ds; - } - - /** Write. - * - * Write text then binary, so the mtb file will be the most recent. - */ - void write () { - mtt.write(); - } -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/deserialize.d --- a/mde/mergetag/deserialize.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,605 +0,0 @@ -/************************************************************************************************** - * Generic deserialization templated function. - * - * copyright: Copyright (c) 2007-2008 Diggory Hardy. - * - * author: Diggory Hardy, diggory.hardy@gmail.com - * - * Supports: - * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. - * - * There are also some public utility functions with their own documentation. - * - * Throws: - * On errors, a ParseException or a UnicodeException (both extend TextException) is thrown with a - * suitable message. No other exceptions should be thrown. - * - * Examples: - * ------------------------------------------------------------------------------------------------ - * // Basic examples: - * ulong a = deserialize!(ulong) ("20350"); - * float d = deserialize!(float) (" 1.2e-9 "); - * int[] b = deserialize!(int[]) ("[0,1,2,3]"); - * - * // String and char[] syntax: - * char[] c = deserialize!(char[]) ("\"A string\""); - * char[] e = deserialize!(char[]) ("['a','n','o','t','h','e','r', ' ' ,'s','t','r','i','n','g']"); - * - * // These be used interchangably; here's a more complex example of an associative array: - * bool[char[]] f = deserialize!(bool[char[]]) ("[ \"one\":true, ['t','w','o']:false, \"three\":1, \"four\":000 ]"); - * - * // There is also a special notation for ubyte[] types: - * // The digits following 0x must be in pairs and each specify one ubyte. - * assert ( deserialize!(ubyte[]) (`0x01F2AC`) == deserialize!(ubyte[]) (`[01 ,0xF2, 0xAC]`) ); - * - * // There's no limit to the complexity! - * char[char[][][][char]][bool] z = ...; // don't expect me to write this! - * ------------------------------------------------------------------------------------------------ - * - * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations - * instead of merely guessing? - *************************************************************************************************/ -//NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, -// or put all the code here. -module mde.mergetag.deserialize; - -// tango imports -import tango.core.Exception : TextException, UnicodeException; -import cInt = tango.text.convert.Integer; -import cFloat = tango.text.convert.Float; -import Utf = tango.text.convert.Utf; -import Util = tango.text.Util; - -/** - * Base class for deserialize exceptions. - */ -class ParseException : TextException -{ - this( char[] msg ) - { - super( msg ); - } -} - -alias deserialize parseTo; // support the old name - -//BEGIN deserialize templates - -// Associative arrays - -T[S] deserialize(T : T[S], S) (char[] src) { - src = Util.trim(src); - if (src.length < 2 || src[0] != '[' || src[$-1] != ']') - throw new ParseException ("Invalid associative array: not [ ... ]"); // bad braces. - - T[S] ret; - foreach (char[] pair; split (src[1..$-1])) { - uint i = 0; - while (i < pair.length) { // advance to the ':' - char c = pair[i]; - if (c == ':') break; - if (c == '\'' || c == '"') { // string or character - ++i; - while (i < pair.length && pair[i] != c) { - if (pair[i] == '\\') - ++i; // escape seq. - ++i; - } - // Could have an unterminated ' or " causing i >= pair.length, but: - // 1. Impossible: split would have thrown - // 2. In any case this would be caught below. - } - ++i; - } - if (i >= pair.length) - throw new ParseException ("Invalid associative array: encountered [ ... KEY] (missing :DATA)"); - ret[deserialize!(S) (pair[0..i])] = deserialize!(T) (pair[i+1..$]); - } - return ret; -} - - -// Arrays - -T[] deserialize(T : T[]) (char[] src) { - src = Util.trim(src); - if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') - return toArray!(T[]) (src); - throw new ParseException ("Invalid array: not [ ... ]"); -} - -// String (array special case) -T deserialize(T : char[]) (char[] src) { - src = Util.trim(src); - if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') { - src = src[1..$-1]; - T ret; - ret.length = src.length; // maximum length; retract to actual length later - uint i = 0; - for (uint t = 0; t < src.length;) { - // process a block of non-escaped characters - uint s = t; - while (t < src.length && src[t] != '\\') ++t; // non-escaped characters - uint j = i + t - s; - ret[i..j] = src[s..t]; // copy a block - i = j; - - // process a block of escaped characters - while (t < src.length && src[t] == '\\') { - t++; - if (t == src.length) - throw new ParseException ("Invalid string: ends \\\" !"); // next char is " - ret[i++] = unEscapeChar (src[t++]); // throws if it's invalid - } - } - return ret[0..i]; - } - else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') - return toArray!(T) (src); - throw new ParseException ("Invalid string: not quoted (\"*\") or char array (['a',...,'c'])"); -} -// Unicode conversions for strings: -T deserialize(T : wchar[]) (char[] src) { - // May throw a UnicodeException; don't bother catching and rethrowing: - return Utf.toString16 (deserialize!(char[]) (src)); -} -T deserialize(T : dchar[]) (char[] src) { - // May throw a UnicodeException; don't bother catching and rethrowing: - return Utf.toString32 (deserialize!(char[]) (src)); -} - -// Binary (array special case) -T deserialize(T : ubyte[]) (char[] src) { - src = Util.trim(src); - // Standard case: - if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); - // Special case: sequence of hex digits, each pair of which is a ubyte - if (src.length >= 2 && src[0..2] == "0x") { - src = src[2..$]; // strip down to actual digits - - // Must be in pairs: - if (src.length % 2 == 1) - throw new ParseException ("Invalid binary: odd number of chars"); - - T ret; - ret.length = src.length / 2; // exact - - for (uint i, pos; pos + 1 < src.length; ++i) { - ubyte x = readHexChar(src, pos) << 4; - x |= readHexChar(src, pos); - ret[i] = x; - } - return ret; - } - else throw new ParseException ("Invalid ubyte[]: not an array and doesn't start 0x"); -} - - -// Basic types - -// Char -// Assumes value is <= 127 (for valid UTF-8), since input would be invalid UTF-8 if not anyway. -// (And we're not really interested in checking for valid unicode; char[] conversions don't either.) -T deserialize(T : char) (char[] src) { - src = Util.trim(src); - if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'') - throw new ParseException ("Invalid char: not 'x' or '\\x'"); - if (src[1] != '\\') { - if (src.length == 3) - return src[1]; // Either non escaped - throw new ParseException ("Invalid char: too long (or non-ASCII)"); - } else if (src.length == 4) - return unEscapeChar (src[2]); // Or escaped - - throw new ParseException ("Invalid char: '\\'"); -} -// Basic unicode convertions for wide-chars. -// Assumes value is <= 127 as does deserialize!(char). -T deserialize(T : wchar) (char[] src) { - return cast(T) deserialize!(char) (src); -} -T deserialize(T : dchar) (char[] src) { - return cast(T) deserialize!(char) (src); -} - -// Bool -T deserialize(T : bool) (char[] src) { - src = Util.trim(src); - if (src == "true") - return true; - if (src == "false") - return false; - uint pos; - while (src.length > pos && src[pos] == '0') ++pos; // skip leading zeros - if (src.length == pos && pos > 0) - return false; - if (src.length == pos + 1 && src[pos] == '1') - return true; - throw new ParseException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1"); -} - -// Ints -T deserialize(T : byte) (char[] src) { - return toTInt!(T) (src); -} -T deserialize(T : short) (char[] src) { - return toTInt!(T) (src); -} -T deserialize(T : int) (char[] src) { - return toTInt!(T) (src); -} -T deserialize(T : long) (char[] src) { - return toTInt!(T) (src); -} -T deserialize(T : ubyte) (char[] src) { - return toTInt!(T) (src); -} -T deserialize(T : ushort) (char[] src) { - return toTInt!(T) (src); -} -T deserialize(T : uint) (char[] src) { - return toTInt!(T) (src); -} -T deserialize(T : ulong) (char[] src) { - return toTInt!(T) (src); -} -debug (UnitTest) unittest { - assert (deserialize!(byte) ("-5") == cast(byte) -5); - // annoyingly, octal syntax differs from D (blame tango): - assert (deserialize!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]); -} - -// Floats -T deserialize(T : float) (char[] src) { - return toTFloat!(T) (src); -} -T deserialize(T : double) (char[] src) { - return toTFloat!(T) (src); -} -T deserialize(T : real) (char[] src) { - return toTFloat!(T) (src); -} - - -// Structs -T deserialize(T) (char[] src) { - static assert (is(T == struct), "Unsupported type: "~typeof(T)); - - src = Util.trim(src); - if (src.length < 2 || src[0] != '{' || src[$-1] != '}') - throw new ParseException ("Invalid struct: not { ... }"); - - // cannot access elements of T.tupleof with non-const key, so use a type which can be - // accessed with a non-const key to store slices: - char[][T.tupleof.length] temp; - foreach (char[] pair; split (src[1..$-1])) { - uint i = 0; - while (i < pair.length) { // advance to the ':' - char c = pair[i]; - if (c == ':') - break; - // key must be an int so no need for string checks - ++i; - } - if (i >= pair.length) - throw new ParseException ("Invalid struct: encountered { ... KEY} (missing :DATA)"); - - size_t k = deserialize!(size_t) (pair[0..i]); - // Note: could check no entry was already stored in temp. - temp[k] = pair[i+1..$]; - } - T ret; - setStruct (ret, temp); - return ret; -} -//END deserialize templates - -//BEGIN Utility funcs -/** Splits a string into substrings separated by '$(B ,)' with support for characters and strings - * containing escape sequences and for embedded arrays ($(B [...])). - * - * Params: - * src A string to separate on commas. It shouldn't have enclosing brackets. - * - * Returns: - * An array of substrings within src, excluding commas. Whitespace is not stripped and - * empty strings may get returned. - * - * Remarks: - * This function is primarily intended for as a utility function for use by the templates - * parsing arrays and associative arrays, but it may be useful in other cases too. Hence the - * fact no brackets are stripped from src. - */ -//FIXME foreach struct is more efficient -char[][] split (char[] src) { - src = Util.trim (src); - if (src == "") - return []; // empty array: no elements when no data - - uint depth = 0; // surface depth (embedded arrays) - char[][] ret; - ret.length = src.length / 3; // unlikely to need a longer array - uint k = 0; // current split piece - uint i = 0, j = 0; // current read location, start of current piece - - while (i < src.length) { - char c = src[i]; - if (c == '\'' || c == '"') { // string or character - ++i; - while (i < src.length && src[i] != c) { - if (src[i] == '\\') - ++i; // escape seq. - ++i; - } // Doesn't throw if no terminal quote at end of src, but this should be caught later. - } - else if (c == '[') ++depth; - else if (c == ']') { - if (depth) - --depth; - else throw new ParseException ("Invalid array literal: closes before end of data item."); - } - else if (c == ',' && depth == 0) { // only if not an embedded array - if (ret.length <= k) - ret.length = ret.length * 2; - ret[k++] = src[j..i]; // add this piece and increment k - j = i + 1; - } - ++i; - } - if (i > src.length) - throw new ParseException ("Unterminated quote (\' or \")"); - - if (ret.length <= k) - ret.length = k + 1; - ret[k] = src[j..i]; // add final piece (i >= j) - return ret[0..k+1]; -} - -/* Templated read-int function to read (un)signed 1-4 byte integers. - * - * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions. - */ -private TInt toTInt(TInt) (char[] src) { - const char[] INT_OUT_OF_RANGE = "Integer out of range"; - bool sign; - uint radix, ate, ate2; - - // Trim off whitespace. - // NOTE: Cannot use tango.text.convert.Integer.trim to trim leading whitespace since it doesn't - // treat new-lines, etc. as whitespace which for our purposes is whitespace. - src = Util.trim (src); - - ate = cInt.trim (src, sign, radix); - if (ate == src.length) - throw new ParseException ("Invalid integer: no digits"); - ulong val = cInt.convert (src[ate..$], radix, &ate2); - ate += ate2; - - if (ate < src.length) - throw new ParseException ("Invalid integer at marked character: \"" ~ src[0..ate] ~ "'" ~ src[ate] ~ "'" ~ src[ate+1..$] ~ "\""); - - if (val > TInt.max) - throw new ParseException (INT_OUT_OF_RANGE); - if (sign) { - long sval = cast(long) -val; - if (sval > TInt.min) - return cast(TInt) sval; - else throw new ParseException (INT_OUT_OF_RANGE); - } - return cast(TInt) val; -} - -/* Basically a reimplementation of tango.text.convert.Float.toFloat which checks for - * whitespace before throwing an exception for overlong input. */ -private TFloat toTFloat(TFloat) (char[] src) { - // NOTE: As for toTInt(), this needs to strip leading as well as trailing whitespace. - src = Util.trim (src); - if (src == "") - throw new ParseException ("Invalid float: no digits"); - uint ate; - - TFloat x = cFloat.parse (src, &ate); - return x; -} - -/* Throws an exception on invalid escape sequences. Supported escape sequences are the following - * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v - */ -private char unEscapeChar (char c) -{ - // This code was generated: - if (c <= 'b') { - if (c <= '\'') { - if (c == '\"') { - return '\"'; - } else if (c == '\'') { - return '\''; - } - } else { - if (c == '\\') { - return '\\'; - } else if (c == 'a') { - return '\a'; - } else if (c == 'b') { - return '\b'; - } - } - } else { - if (c <= 'n') { - if (c == 'f') { - return '\f'; - } else if (c == 'n') { - return '\n'; - } - } else { - if (c == 'r') { - return '\r'; - } else if (c == 't') { - return '\t'; - } else if (c == 'v') { - return '\v'; - } - } - } - - // if we haven't returned: - throw new ParseException ("Bad escape sequence: \\"~c); -} - -// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length. -private ubyte readHexChar (char[] src, inout uint pos) { - ubyte x; - if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0'; - else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10; - else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10; - else throw new ParseException ("Invalid hex digit."); - ++pos; - return x; -} - -// Generic array reader -// Assumes input is of form "[xxxxx]" (i.e. first and last chars are '[', ']' and length >= 2). -private T[] toArray(T : T[]) (char[] src) { - T[] ret = new T[16]; // avoid unnecessary allocations - uint i = 0; - foreach (char[] element; split(src[1..$-1])) { - if (i == ret.length) ret.length = ret.length * 2; - ret[i] = deserialize!(T) (element); - ++i; - } - return ret[0..i]; -} - -/** Set a struct's elements from an array. -* -* For a more generic version, see http://www.dsource.org/projects/tutorials/wiki/StructTupleof -*/ -// NOTE: Efficiency? Do recursive calls get inlined? -private void setStruct(S, size_t N, size_t i = 0) (ref S s, char[][N] src) { - static assert (is(S == struct), "Only to be used with structs."); - static assert (N == S.tupleof.length, "src.length != S.tupleof.length"); - static if (i < N) { - if (src[i]) - s.tupleof[i] = deserialize!(typeof(s.tupleof[i])) (src[i]); - setStruct!(S, N, i+1) (s, src); - } -} -//END Utility funcs - -debug (UnitTest) { - import tango.util.log.Log : Log, Logger; - - private Logger logger; - static this() { - logger = Log.getLogger ("text.deserialize"); - } -unittest { - // Utility - bool throws (void delegate() dg) { - bool r = false; - try { - dg(); - } catch (Exception e) { - r = true; - logger.info ("Exception caught: "~e.msg); - } - return r; - } - assert (!throws ({ int i = 5; })); - assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); - - - // Associative arrays - char[][char] X = deserialize!(char[][char]) (`['a':"animal\n", 'b':['b','u','s','\n']]`); - char[][char] Y = ['a':cast(char[])"animal\n", 'b':['b','u','s','\n']]; - - //FIXME: when the compiler's fixed: http://d.puremagic.com/issues/show_bug.cgi?id=1671 - // just assert (X == Y) - assert (X.length == Y.length); - assert (X.keys == Y.keys); - assert (X.values == Y.values); - //X.rehash; Y.rehash; // doesn't make a difference - //assert (X == Y); // fails (compiler bug) - - assert (throws ({ deserialize!(int[int]) (`[1:1`); })); // bad brackets - assert (throws ({ deserialize!(int[char[]]) (`["ab\":1]`); })); // unterminated quote - assert (throws ({ deserialize!(int[char[]]) (`["abc,\a\b\c":1]`); })); // bad escape seq. - assert (throws ({ deserialize!(int[char[]]) (`["abc"]`); })); // no data - - - // Arrays - assert (deserialize!(double[]) (`[1.0,1.0e-10]`) == [1.0, 1.0e-10]);// generic array stuff - assert (deserialize!(double[]) (`[ ]`) == cast(double[]) []); // empty array - assert (deserialize!(int[][]) (`[[1],[2,3],[]]`) == [[1],[2,3],[]]);// sub-array - assert (throws ({ deserialize!(int[]) (`[1,2`); })); // bad brackets - assert (throws ({ deserialize!(int[][]) (`[[1]]]`); })); // bad brackets - - // char[] and char conversions, with commas, escape sequences and multichar UTF8 characters: - assert (deserialize!(char[][]) (`[ ".\"", [',','\''] ,"!\b€" ]`) == [ ".\"".dup, [',','\''] ,"!\b€" ]); - assert (throws ({ deserialize!(char[]) ("\"\\\""); })); - assert (throws ({ deserialize!(char[]) (`['a'`); })); // bad brackets - - // wchar[] and dchar[] conversions: - // The characters were pretty-much pulled at random from unicode tables. - // The last few cause some wierd (display only) effects in my editor. - assert (deserialize!(wchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"w); - assert (deserialize!(dchar[]) ("\"Test string: ¶α؟अกሀ搀\"") == "Test string: ¶α؟अกሀ搀"d); - - assert (deserialize!(ubyte[]) (`0x01F2aC`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] special notation - assert (deserialize!(ubyte[]) (`[01 ,0xF2, 0xAC]`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] std notation - assert (throws ({ deserialize!(ubyte[]) (`0x123`); })); // digits not in pairs - assert (throws ({ deserialize!(ubyte[]) (`[2,5`); })); // not [...] or 0x.. - assert (throws ({ deserialize!(ubyte[]) (`0x123j`); })); - - - // char types - assert (deserialize!(char) ("'\\\''") == '\''); - assert (deserialize!(wchar) ("'X'") == 'X'); - assert (deserialize!(dchar) ("'X'") == 'X'); - assert (throws ({ deserialize!(char) ("'\\'"); })); - assert (throws ({ deserialize!(char) ("'£'"); })); // non-ascii - assert (throws ({ deserialize!(char) ("''"); })); - assert (throws ({ deserialize!(char) ("'ab'"); })); - assert (throws ({ deserialize!(wchar) ("''"); })); - - - // bool - assert (deserialize!(bool[]) (`[true,false,01,00]`) == cast(bool[]) [1,0,1,0]); - assert (throws ({ deserialize!(bool) ("011"); })); - - - // ints - assert (deserialize!(byte) ("-5") == cast(byte) -5); - assert (deserialize!(int) ("-0x7FFFFFFF") == cast(int) -0x7FFF_FFFF); - // annoyingly, octal syntax differs from D (blame tango): - assert (deserialize!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]); - assert (throws ({ deserialize!(int) (""); })); - assert (throws ({ deserialize!(int) ("0x8FFFFFFF"); })); - assert (throws ({ deserialize!(uint) ("-1"); })); - assert (throws ({ deserialize!(uint) ("1a"); })); - - - // floats - assert (deserialize!(float) ("0.0") == 0.0f); - assert (deserialize!(double) ("-1e25") == -1e25); - assert (deserialize!(real) ("5.24e-269") == cast(real) 5.24e-269); - assert (throws ({ deserialize!(float) (""); })); - - - // structs - struct A { int x = 5; char y; } - struct B { A a; float b; } - A a; a.y = 'y'; - assert (deserialize!(A) ("{ 1 : 'y' }") == a); - B b; b.a = a; b.b = 1.0f; - assert (deserialize!(B) (" {1:1.0,0: { 1 : 'y' } } ") == b); - assert (throws ({ deserialize!(A) (" 1:'x'}"); })); // bad braces - assert (throws ({ deserialize!(A) ("{ 1 }"); })); // no :DATA - - - // unEscapeChar - assert (deserialize!(char[]) ("\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"") == "\a\b\t\n\v\f\r\"\'\\"); - - logger.info ("Unittest complete."); -} -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/exception.d --- a/mde/mergetag/exception.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,91 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/******************************************* - * Contains exception classes for MergeTag. - * - * Publically imports mde.exception. - ******************************************/ -module mde.mergetag.exception; - -public import mde.exception; - -/// Base MergeTag exception class. -class MTException : mdeException { - char[] getSymbol () { - return super.getSymbol ~ ".mergetag"; - } - - this (char[] msg) { - super(msg); - } - this () { // Only called when an unexpected exception/error occurs - super ("Unknown exception"); - } -} - -/** Thrown on file IO errors. */ -class MTFileIOException : MTException { - this () { - super ("File IO exception"); - } - this (char[] msg) { - super (msg); - } -} - -/** Thrown on unknown format errors; when reading or writing and the filetype cannot be guessed. */ -class MTFileFormatException : MTException { - this () { - super ("File format exception"); - } -} - -/** Thrown on syntax errors when reading; bad tags or unexpected EOF. */ -class MTSyntaxException : MTException { - this () { - super ("Syntax exception"); - } -} - -/** Thrown by addTag (in classes implementing IDataSection) when a data parsing error occurs -* (really just to make whoever called addTag to log a warning saying where the error occured). */ -class MTaddTagParseException : MTException { - this () { - super ("Parse exception within addTag"); - } -} - -/+ -/// Thrown by TypeView.parse on errors. -class MTBadTypeStringException : MTException { - this () {} -} -+/ - -/// Thrown by *Writer.write. -class MTNoDataSetException : MTException { - this () { - super ("No dataset"); - } -} - -/// Thrown when attempting to use an unimplemented part of the package -/// Really, just until MTB stuff is implemented -class MTNotImplementedException : MTException { - this () { - super ("Functionality not implemented!"); - } -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/iface/IDataSection.d --- a/mde/mergetag/iface/IDataSection.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/** This module contains the interface IDataSection used by DataSet. -* -* It has been given its own module to avoid cyclic dependancies and separate out the functionality -* of mergetag. -* -* Also some base mergetag symbols have been moved here. -*/ -module mde.mergetag.iface.IDataSection; - -/** Typedef for data & section indexes. -* -* Make it an alias, there doesn't appear to be any point having it as a typedef. */ -alias char[] ID; - -/** - * Interface for data storage classes, generally called DataSections, which contain all data-tags - * loaded from a single section of a file. - * - * A class implementing this may implement the addTag function to do whatever it likes with the - * data passed. DefaultData is one implementation which separates this data out into supported - * types and stores it appropriately (allowing merging with existing entries by keeping whichever - * tag was last loaded), while ignoring unsupported types. A different - * implementation could filter out the tags desired and use them directly, and ignore the rest. - * - * The mde.mergetag.parse.parseTo module provides a useful set of templated functions to - * convert the data accordingly. It is advised to keep the type definitions as defined in the file- - * format except for user-defined types, although this isn't necessary for library operation - * (addTag and writeAll are solely responsible for using and setting the type, ID and data fields). - * - * Another idea for a DataSection class: - * Use a void*[ID] variable to store all data (may also need a type var for each item). - * addTag should call a templated function which calls parse then casts to a void* and stores the data. - * Use a templated get(T)(ID) method which checks the type and casts to T. - */ -interface IDataSection -{ - /** Delegate passed to writeAll. */ - typedef void delegate (char[],ID,char[]) ItemDelg; - - /** Handles parsing of data items for all recognised types. - * - * Should ignore unsupported types/unwanted tags. - * - * TextExceptions (thrown by parseTo/parseFrom) are caught and a warning logged; execution - * then continues (so the offending tag gets dropped). */ - void addTag (char[],ID,char[]); - - /** Responsible for getting all data tags saved. - * - * writeAll should call the ItemDelg once for each tag to be saved with parameters in the same - * form as received by addTag (char[] type, ID id, char[] data). */ - void writeAll (ItemDelg); -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/iface/IReader.d --- a/mde/mergetag/iface/IReader.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/** -* Interface for readers. -*/ -module mde.mergetag.iface.IReader; - -import mde.mergetag.DataSet; - -import tango.util.collection.model.View : View; - -/** Interface for all mergetag readers (MTTReader etc.). -*/ -interface IReader { - DataSet dataset (); /// Get the DataSet - void dataset (DataSet); /// Set the DataSet - - void dataSecCreator (IDataSection delegate (ID)); /// Set the dataSecCreator - - ID[] getSectionNames (); /// Get identifiers for all sections - void read (); /// Commence reading - void read (ID[] secSet); /// ditto - void read (View!(ID) secSet); /// ditto -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/iface/IWriter.d --- a/mde/mergetag/iface/IWriter.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/** -* Interface for writers. -*/ -module mde.mergetag.iface.IWriter; - -import mde.mergetag.DataSet; - - -/** Interface for all mergetag writers (MTTWriter etc.). -*/ -interface IWriter { - DataSet dataset (); /// Get the DataSet - void dataset (DataSet); /// Set the DataSet - - void write (); /// Commence writing -} - -/** -* Enumeration for specifying the writing method ("Params" section shows possible values). -* -* Params: -* FromExtension = Determine writing format from file name extension (must be one of .mtb or .mtt). -* Binary = Use binary mode (adds extension .mtb without checking for an existing extension). -* Text = Use text mode (adds extension .mtt without checking for an existing extension). -* Both = Write simultaneously in binary and text modes (with appropriate extensions added to each -* file name. -*/ -enum WriterMethod : byte { - FromExtension = -1, - Binary = 1, - Text = 2, - Both = 3 -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/internal.d --- a/mde/mergetag/internal.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/// Contains functions/data structures used internally by mergetag. -module mde.mergetag.internal; - -package abstract class MTFormatVersion { - enum VERS : ubyte { // convenient list of all known file format versions - INVALID = 0x00, - MT01 = 0x01, // not yet final - } - /// The current MergeTag version - static const VERS Current = VERS.MT01; - static const char[2] CurrentString = "01"; - - static VERS parseString (char[] str) - in { - assert (str.length == 2); - } body { - if (str[0] == '0' && str[1] == '1') return VERS.MT01; - else return VERS.INVALID; - } -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/mtunittest.d --- a/mde/mergetag/mtunittest.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -/* LICENSE BLOCK -Part of mde: a Modular D game-oriented Engine -Copyright © 2007-2008 Diggory Hardy - -This program is free software: you can redistribute it and/or modify it under the terms -of the GNU General Public License as published by the Free Software Foundation, either -version 2 of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . */ - -/// This module provides a unittest for mergetag. -module mde.mergetag.mtunittest; - -debug (mdeUnitTest) { - import mde.mergetag.Reader; - import mde.mergetag.Writer; - import mde.mergetag.DataSet; - import mde.mergetag.DefaultData; - import mde.mergetag.parse.parseTo : parseTo; - import mde.mergetag.parse.parseFrom : parseFrom; - - import tango.io.FilePath; - import tango.util.log.Log : Log, Logger; - - private Logger logger; - static this() { - logger = Log.getLogger ("mde.mergetag.mtunittest"); - } - - unittest { - /* This does a basic write-out and read-in test for each type with its default value. - * Thus it provides some basic testing for the whole mergetag package. */ - - const file = "unittest"; - const ID UT_ID = cast (ID) "mdeUT"; - const headInfo = "mde Unit Test"; - - DataSet dsW = new DataSet(); - - dsW.header = new DefaultData(); - dsW.header._charA[UT_ID] = headInfo; - - DefaultData secW = new DefaultData(); - dsW.sec[UT_ID] = secW; - - static char[] genUTCode () { - char[] ret; - foreach (type; DefaultData.dataTypes) { - ret ~= `secW.`~DefaultData.varName(type)~`[UT_ID] = (`~type~`).init;`; - } - return ret; - } - mixin (genUTCode()); // Add an entry to dd for each type - - IWriter w = makeWriter (file, dsW, WriterMethod.Both); - w.write(); - - // FIXME: when binary writing is supported, read both formats and check - IReader r = makeReader (file~".mtt", null, true); - r.read(); - - DataSet dsR = r.dataset; - assert (dsR !is null); - - assert (dsR.header !is null); - char[]* p = UT_ID in dsW.header._charA; - assert (p); - assert (*p == headInfo); - - IDataSection* sec_p = (UT_ID in dsR.sec); - assert (sec_p); - DefaultData secR = cast(DefaultData) *sec_p; - assert (secR !is null); - - // FIXME: when comparing associative arrays works, use that. In the mean-time, format!() should work. - static char[] genCheckCode (char[] dd1, char[] dd2) { - const char[] failureMsg = "Assertion failed for type; values: "; - char[] ret; - foreach (type; DefaultData.dataTypes) { - char[] tName = DefaultData.varName(type); - ret ~= `char[] `~tName~`Val1 = parseFrom!(`~type~`[char[]]) (cast(`~type~`[char[]]) `~dd1~`.`~tName~`); -char[] `~tName~`Val2 = parseFrom!(`~type~`[char[]]) (cast(`~type~`[char[]]) `~dd2~`.`~tName~`); -assert (`~tName~`Val1 == `~tName~`Val2, "Assertion failed for type `~type~`; values: "~`~tName~`Val1~", "~`~tName~`Val2 ); -`; - } - return ret; - } - mixin (genCheckCode (`secW`,`secR`)); - - // Delete the unittest file now - FilePath (file~".mtt").remove; - - logger.info ("Unittest complete (for DefaultData)."); - } -} diff -r ea58f277f487 -r d8fccaa45d5f mde/mergetag/serialize.d --- a/mde/mergetag/serialize.d Thu Aug 07 11:25:27 2008 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,377 +0,0 @@ -/************************************************************************************************** - * Generic serialization templated function. - * - * copyright: Copyright (c) 2007-2008 Diggory Hardy. - * - * author: Diggory Hardy, diggory.hardy@gmail.com - * - * Supports: - * Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types. - * - * Examples: - * ------------------------------------------------------------------------------------------------ - * // Basic examples: - * Cout (serialize!(byte) (-13)).newline; // -13 - * Cout (serialize!(real) (2.56e11)).newline; // 2.55999999999999990000e+11 - * Cout (serialize!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000] - * Cout (serialize ([true,false,false])).newline; // [true,false,false] - * - * // String and ubyte[] special syntaxes (always used): - * Cout (serialize ("A string.")).newline; // "A string." (including quotes) - * Cout (serialize (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110 - * - * // Associative arrays: - * Cout (serialize ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"] - * - * // Structs: - * struct S { int a = 5; double[int[]] x; } - * S s; - * Cout (serialize (s)); - * - * // No limit on complexity... - * char[] somethingComplicated = serialize!(real[][][bool[int[][]]]) (...); - * ------------------------------------------------------------------------------------------------ - * - * TODO: Optimize memory allocation (if possible?). Test best sizes for initial allocations - * instead of merely guessing? - *************************************************************************************************/ -//NOTE: in case of multiple formats, make this a dummy module importing both serialize modules, -// or put all the code here. -module mde.mergetag.serialize; -// Since serialize is never used in a module where deserialize is not used, save an import: -public import mde.mergetag.deserialize; - -// tango imports -import tango.core.Traits; -import tango.core.Exception : UnicodeException, IllegalArgumentException; -import cInt = tango.text.convert.Integer; -import cFloat = tango.text.convert.Float; -import Utf = tango.text.convert.Utf; - - -alias serialize parseFrom; // support the old name - -// Formatting options, for where multiple formats are supported by the deserializer. - -// Output using the special binary notation (0x01F2AC instead of [01 ,0xF2, 0xAC])? -const bool SPECIAL_BINARY_NOTATION = true; - -// Output binary as true / false or 1 / 0 ? -const bool BINARY_AS_WORDS = true; - - -char[] serialize(U) (U val) { - // Associative arrays (NOTE: cannot use is() expression) - static if (isAssocArrayType!(U)) { // generic associative array - alias typeof(U.keys[0]) S; - alias typeof(U.values[0]) T; - char[] ret; - // A guess, including values themselves and [,:] elements (must be at least 2). - ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2; - ret[0] = '['; - uint i = 1; - foreach (S k, T v; val) { - char[] s = serialize!(S) (k) ~ ":" ~ serialize!(T) (v); - i += s.length; - if (i+1 >= ret.length) - ret.length = ret.length * 2; // check. - ret[i-s.length .. i] = s; - ret[i++] = ','; - } - if (i == 1) ++i; // special case - not overwriting a comma - ret[i-1] = ']'; // replaces last comma - return ret[0..i]; - } - // Arrays - else static if (is(U S == S[]) || isStaticArrayType!(U)) { - alias typeof(U[0]) T; - - static if (is(T == char)) { // string - char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. - ret[0] = '"'; - uint i = 1; - for (uint t = 0; t < val.length;) { - // process a block of non-escapable characters - uint s = t; - while (t < val.length && !isEscapableChar(val[t])) - ++t; // skip all non-escapable chars - uint j = i + t - s; - ret[i..j] = val[s..t]; // copy a block - i = j; - // process a block of escapable charaters - while (t < val.length && isEscapableChar(val[t])) { - ret[i++] = '\\'; // backslash; increment i - ret[i++] = escapeChar(val[t++]); // character; increment i and t - } - } - ret[i++] = '"'; - return ret[0..i]; - } - else static if (is(T == wchar) || is(T == dchar)) { // wstring or dstring - // May throw a UnicodeException; don't bother catching and rethrowing: - return serialize!(char[]) (Utf.toString (val)); - } - else static if (SPECIAL_BINARY_NOTATION && is(T == ubyte)) { // special binary notation - // Note: To disable the usage of this special type, set SPECIAL_BINARY_NOTATION = false. - static const char[16] digits = "0123456789abcdef"; - - char[] ret = new char[val.length * 2 + 2]; // exact length - ret[0..2] = "0x"; - uint i = 2; - - foreach (ubyte x; val) { - ret[i++] = digits[x >> 4]; - ret[i++] = digits[x & 0x0F]; - } - return ret; - } - else { // generic array - char[] ret; - // A guess, including commas and brackets (must be at least 2) - ret.length = val.length * (defLength!(T) + 1) + 2; - ret[0] = '['; - uint i = 1; - foreach (T x; val) { - char[] s = serialize!(T) (x); - i += s.length; - if (i+1 >= ret.length) - ret.length = ret.length * 2; // check length - ret[i-s.length .. i] = s; - ret[i++] = ','; - } - if (i == 1) - ++i; // special case - not overwriting a comma - ret[i-1] = ']'; // replaces last comma - return ret[0..i]; - } - } - // Structs - else static if (is(U == struct)) { - char[] ret; - // A very rough guess. - ret.length = val.sizeof * 4; - ret[0] = '{'; - uint i = 1; - foreach (k, v; val.tupleof) { - alias typeof(v) T; - char[] s = serialize!(size_t) (k) ~ ":" ~ serialize!(T) (v); - i += s.length; - if (i+1 >= ret.length) - ret.length = ret.length * 2; // check. - ret[i-s.length .. i] = s; - ret[i++] = ','; - } - if (i == 1) ++i; // special case - not overwriting a comma - ret[i-1] = '}'; // replaces last comma - return ret[0..i]; - } - // Basic types - else static if (is(U == char)) { // char (UTF-8 byte) - // Note: if (val > 127) "is invalid UTF-8 single char". However we don't know - // what this is for, in particular if it will be recombined with other chars later. - - // Can't return reference to static array; so making it dynamic is cheaper than copying. - char[] ret = new char[4]; // max length for an escaped char - ret[0] = '\''; - - if (!isEscapableChar (val)) { - ret[1] = val; - ret[2] = '\''; - return ret[0..3]; - } else { - ret[1] = '\\'; - ret[2] = escapeChar (val); - ret[3] = '\''; - return ret; - } - } else static if (is(U == wchar) || - is(U == dchar)) { // wchar or dchar (UTF-16/32 single char) - // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char? - if (val <= 127u) - return serialize!(char) (cast(char) val); // ASCII - else throw new UnicodeException ( - "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0); - } else static if (is (U == bool)) { // boolean - static if (BINARY_AS_WORDS) { - if (val) - return "true"; - else return "false"; - } else { - if (val) - return "1"; - else return "0"; - } - } else static if (is (U : long)) { // any integer type, except char types and bool - static if (is (U == ulong)) // ulong may not be supported properly - if (val > cast(ulong) long.max) - throw new IllegalArgumentException ("No handling available for ulong where value > long.max"); - return cInt.toString (val); - } else static if (is (U : real)) { // any (real) floating point type - char[] ret = new char[32]; // minimum allowed by assert in format - return cFloat.format (ret, val, U.dig+2, 1);// from old C++ tests, U.dig+2 gives best(?) accuracy - } - // Unsupported - else - static assert (false, "Unsupported type: "~U.stringof); -} - -//BEGIN Utility funcs -/* This template provides the initial length for strings for formatting various types. These strings - * can be expanded; this value is intended to cover 90% of cases or so. - * - * NOTE: This template was intended to provide specialisations for different types. - * This one value should do reasonably well for most types. - */ -private { - template defLength(T) { const uint defLength = 20; } - template defLength(T : char) { const uint defLength = 4; } - template defLength(T : bool) { const uint defLength = 5; } -} -private bool isEscapableChar (char c) { - return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); -} -// Throws on unsupported escape sequences; however this should never happen within serialize. -private char escapeChar (char c) { - // This code was generated: - if (c <= '\v') { - if (c <= '\b') { - if (c == '\a') { - return 'a'; - } else if (c == '\b') { - return 'b'; - } - } else { - if (c == '\t') { - return 't'; - } else if (c == '\n') { - return 'n'; - } else if (c == '\v') { - return 'v'; - } - } - } else { - if (c <= '\r') { - if (c == '\f') { - return 'f'; - } else if (c == '\r') { - return 'r'; - } - } else { - if (c == '\"') { - return '\"'; - } else if (c == '\'') { - return '\''; - } else if (c == '\\') { - return '\\'; - } - } - } - - // if we haven't returned: - throw new IllegalArgumentException ("Internal error (escapeChar)"); -} -//END Utility funcs - - - -debug (UnitTest) { - import tango.util.log.Log : Log, Logger; - - private Logger logger; - static this() { - logger = Log.getLogger ("text.serialize"); - } -unittest { - // Utility - bool throws (void delegate() dg) { - bool r = false; - try { - dg(); - } catch (Exception e) { - r = true; - logger.info ("Exception caught: "~e.msg); - } - return r; - } - assert (!throws ({ int i = 5; })); - assert (throws ({ throw new Exception ("Test - this exception should be caught"); })); - - // Associative arrays - char[] X = serialize!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); - char[] Y = `['a':"animal",'b':"bus"]`; - assert (X == Y); - - - // Arrays - // generic array stuff: - assert (serialize!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); - assert (serialize!(double[]) (cast(double[]) []) == `[]`); // empty array - - // char[] conversions, with commas, escape sequences and multichar UTF8 characters: - assert (serialize!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`); - - // wchar[] and dchar[] conversions: - // The characters were pretty-much pulled at random from unicode tables. - assert (serialize!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\""); - assert (serialize!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\""); - - - static if (SPECIAL_BINARY_NOTATION) - assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation - else - assert (serialize!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `[1,242,172]`); - - - // Structs - struct Foo { int a = 9; char b = '\v'; float c; } - struct Bar { Foo a,b; } - static Foo foo1 = { a:150, b:'8', c:17.2f}, foo2; - Bar bar; - bar.a = foo1; - bar.b = foo2; - assert (serialize(bar) == "{0:{0:150,1:'8',2:1.72000007e+01},1:{0:9,1:'\\v',2:nan}}"); - - - // Basic Types - // Character types - assert (serialize!(char) ('\'') == "\'\\\'\'"); - assert (serialize!(wchar) ('X') == "'X'"); - assert (serialize!(dchar) ('X') == "'X'"); - assert (throws ({ char[] r = serialize!(wchar) ('£'); /* unicode U+00A3 */ })); - assert (throws ({ char[] r = serialize!(dchar) ('£'); })); - - // Bool - static if (BINARY_AS_WORDS) - assert (serialize(false) == "false"); - else - assert (serialize(true) == "1"); - - // Integers - assert (serialize (cast(byte) -5) == "-5"); - assert (serialize (cast(short) -32768) == "-32768"); - assert (serialize (-5) == "-5"); - assert (serialize (-9223372036854775807L) == "-9223372036854775807"); - assert (serialize (cast(ubyte) -1) == "255"); - assert (serialize (cast(ushort) -1) == "65535"); - assert (serialize!(uint) (-1) == "4294967295"); - assert (serialize (cast(ulong) 0x7FFF_FFFF_FFFF_FFFFLu) == "9223372036854775807"); - assert (serialize!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == - "[4,468,1025436,4294967295,0]"); - assert (throws ({ - // ulong is not properly supported. - // NOTE: this is something that should really work. - char[] r = serialize!(ulong) (0x8FFF_FFFF_FFFF_FFFFLu); - })); - - // Floats - // These numbers are not particularly meaningful: - assert (serialize!(float) (0.0f) == "0.00000000"); - assert (serialize!(double) (-1e25) == "-1.00000000000000000e+25"); - assert (serialize!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300"); - - // Escape sequences (test conversion functions) - assert (serialize ("\a\b\t\n\v\f\r\"\'\\") == `"\a\b\t\n\v\f\r\"\'\\"`); - - logger.info ("Unittest complete."); -} -} diff -r ea58f277f487 -r d8fccaa45d5f mde/setup/paths.d --- a/mde/setup/paths.d Thu Aug 07 11:25:27 2008 +0100 +++ b/mde/setup/paths.d Fri Aug 29 11:59:43 2008 +0100 @@ -32,10 +32,10 @@ module mde.setup.paths; import mde.exception; -import mde.mergetag.Reader; -import mde.mergetag.Writer; -import mde.mergetag.DataSet; -import mde.mergetag.exception; +import mde.file.mergetag.Reader; +import mde.file.mergetag.Writer; +import mde.file.mergetag.DataSet; +import mde.file.mergetag.exception; import tango.io.Console; import tango.io.FilePath; @@ -105,15 +105,6 @@ return ret; } - /** Check whether the given file exists under any path with either .mtt or .mtb suffix. */ - bool exists (char[] file) { - for (uint i = 0; i < pathsLen; ++i) { - if (FilePath (paths[i]~file~".mtt").exists) return true; - if (FilePath (paths[i]~file~".mtb").exists) return true; - } - return false; - } - /// Print all paths found. static void printPaths () { Cout ("Data paths found:");