Mercurial > projects > mde
changeset 1:18491334a525
Finished format.d and parse.d modules; moved to mde/text. Partway implementing mde.mergetag.write.TextWriter.
committer: Diggory Hardy <diggory.hardy@gmail.com>
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Tue, 30 Oct 2007 17:08:12 +0000 |
parents | d547009c104c |
children | 78eb491bd642 |
files | mde/init.d mde/mergetag/dataset.d mde/mergetag/doc/issues.txt mde/mergetag/exception.d mde/mergetag/format.d mde/mergetag/parse.d mde/mergetag/read.d mde/mergetag/write.d mde/text/exception.d mde/text/format.d mde/text/parse.d mde/text/util.d test.mtt |
diffstat | 13 files changed, 679 insertions(+), 626 deletions(-) [+] |
line wrap: on
line diff
--- a/mde/init.d Sat Oct 27 18:05:39 2007 +0100 +++ b/mde/init.d Tue Oct 30 17:08:12 2007 +0000 @@ -3,6 +3,7 @@ * * This module controls most of the initialisation and deinitialisation of the program. *************************************************************************************************/ +module mde.init; // tango imports import tango.util.log.Log : Log;
--- a/mde/mergetag/dataset.d Sat Oct 27 18:05:39 2007 +0100 +++ b/mde/mergetag/dataset.d Tue Oct 30 17:08:12 2007 +0000 @@ -21,13 +21,12 @@ static const VERS Current = VERS.MT01; static const char[2] CurrentString = "01"; - static VERS parseString (char[] str) { - in { + static VERS parseString (char[] str) + in { assert (str.length == 2); - } body { - if (str[0] == '0' && str[1] == '1') return VERS.MT01; - else return VERS.INVALID; - } + } body { + if (str[0] == '0' && str[1] == '1') return VERS.MT01; + else return VERS.INVALID; } } @@ -102,22 +101,64 @@ */ class DefaultData : DataSection { + //BEGIN DATA + /** Data members for direct access. */ + bool [ID] Bool; + byte [ID] Byte; /// ditto + short [ID] Short; /// ditto + int [ID] Int; /// ditto + long [ID] Long; /// ditto + ubyte [ID] UByte; /// ditto + ushort [ID] UShort; /// ditto + uint [ID] UInt; /// ditto + ulong [ID] ULong; /// ditto + + bool[] [ID] BoolA; /// ditto + byte[] [ID] ByteA; /// ditto + short[] [ID] ShortA; /// ditto + int[] [ID] IntA; /// ditto + long[] [ID] LongA; /// ditto + ubyte[] [ID] UByteA; /// ditto + ushort[] [ID] UShortA;/// ditto + uint[] [ID] UIntA; /// ditto + ulong[] [ID] ULongA; /// ditto + + float [ID] Float; /// ditto + double [ID] Double; /// ditto + real [ID] Real; /// ditto + float[] [ID] FloatA; /// ditto + double[] [ID] DoubleA;/// ditto + real[] [ID] RealA; /// ditto + + char [ID] Char; /// ditto + char[] [ID] CharA; /// ditto + + /** Alias names */ + alias CharA String; + alias UByteA Binary; /// ditto + //END DATA + /+ Could use this: private template addTagTp(alias Var, T) (ID id, char[] dt) { Var[id] = parse!(T) (dt); - } +/ + } + or a mixin. + +/ // Unfortunately, I think each case needs to be mentioned explicitly to tie it to the correct // data member. void addTag (char[] tp, ID id, char[] dt) { /// for adding tags try { + postTrim (tp); + // parse tp, then use if statements to replace the following switch + switch(Util.trim(tp)) { case "1": case "bool": - Bool[id] = parse!(bool) (dt); + Arg!(bool)[id] = parse!(bool) (dt); break; case "s8": case "byte": - Byte[id] = parse!(byte) (dt); + addTagTp!(byte) (id, dt); break; case "s16": case "short": @@ -125,7 +166,7 @@ break; case "s32": case "int": - Int[id] = parse!(int) (dt); + Arg!(int)[id] = parse!(int) (dt); break; case "s64": case "long": @@ -230,40 +271,103 @@ } } - - - /** Data members for direct access. */ - bool [ID] Bool; - byte [ID] Byte; /// ditto - short [ID] Short; /// ditto - int [ID] Int; /// ditto - long [ID] Long; /// ditto - ubyte [ID] UByte; /// ditto - ushort [ID] UShort; /// ditto - uint [ID] UInt; /// ditto - ulong [ID] ULong; /// ditto +private: + void addTagTp(T) (ID id, char[] dt) { + Arg!(T)[id] = parse!(T) (dt); + } - bool[] [ID] BoolA; /// ditto - byte[] [ID] ByteA; /// ditto - short[] [ID] ShortA; /// ditto - int[] [ID] IntA; /// ditto - long[] [ID] LongA; /// ditto - ubyte[] [ID] UByteA; /// ditto - ushort[] [ID] UShortA;/// ditto - uint[] [ID] UIntA; /// ditto - ulong[] [ID] ULongA; /// ditto - - float [ID] Float; /// ditto - double [ID] Double; /// ditto - real [ID] Real; /// ditto - float[] [ID] FloatA; /// ditto - double[] [ID] DoubleA;/// ditto - real[] [ID] RealA; /// ditto - - char [ID] Char; /// ditto - char[] [ID] CharA; /// ditto - - /** Alias names */ - alias CharA String; - alias UByteA Binary; /// ditto + // use as: mixin Arg!(type); or Arg!(type) + template Arg(T : bool) { + alias Bool Arg; + } + template Arg(T : byte) { + alias Byte Arg; + } + template Arg(T : short) { + alias Short Arg; + } + template Arg(T : int) { + alias Int Arg; + } + template Arg(T : long) { + alias Long Arg; + } + template Arg(T : ubyte) { + alias UByte Arg; + } + template Arg(T : ushort) { + alias UShort Arg; + } + template Arg(T : uint) { + alias UInt Arg; + } + template Arg(T : ulong) { + alias ULong Arg; + } + template Arg(T : bool[]) { + alias BoolA Arg; + } + template Arg(T : byte[]) { + alias ByteA Arg; + } + template Arg(T : short[]) { + alias ShortA Arg; + } + template Arg(T : int[]) { + alias IntA Arg; + } + template Arg(T : long[]) { + alias LongA Arg; + } + template Arg(T : ubyte[]) { + alias UByteA Arg; + } + template Arg(T : ushort[]) { + alias UShortA Arg; + } + template Arg(T : uint[]) { + alias UIntA Arg; + } + template Arg(T : ulong[]) { + alias ULongA Arg; + } + template Arg(T : float) { + alias Float Arg; + } + template Arg(T : double) { + alias Double Arg; + } + template Arg(T : real) { + alias Real Arg; + } + template Arg(T : float[]) { + alias FloatA Arg; + } + template Arg(T : double[]) { + alias DoubleA Arg; + } + template Arg(T : real[]) { + alias RealA Arg; + } + template Arg(T : char) { + alias Char Arg; + } + template Arg(T : char[]) { + alias CharA Arg; + } } + +/+ +class TemplateData : DataSection +{ + void addTag (char[] tp, ID id, char[] dt) { + // runtime deduction of tp and aliasing? + // CANNOT add data at runtime though. + } + // will this work? no idea. + // templates can't be used to add non-static elements, so use a static array at index: this + template Data(T,TemplateData* p) { + static T[ID][TemplateData*] Data; + } +} ++/ \ No newline at end of file
--- a/mde/mergetag/doc/issues.txt Sat Oct 27 18:05:39 2007 +0100 +++ b/mde/mergetag/doc/issues.txt Tue Oct 30 17:08:12 2007 +0000 @@ -14,4 +14,7 @@ parse.d: No support for escaped quotes in strings during tokenisation (by tango.text.Util.quotes). - \ No newline at end of file + Doesn't support cent/ucent. + +format.d: + No support for cent/ucent or ulong where val > long.max.
--- a/mde/mergetag/exception.d Sat Oct 27 18:05:39 2007 +0100 +++ b/mde/mergetag/exception.d Tue Oct 30 17:08:12 2007 +0000 @@ -33,12 +33,6 @@ this () {} } -/** Thrown by the parse module on any error. - */ -class MTParseException : MTException { - this () {} -} - /** Thrown by classes implementing DataSection when addTag is called with an unrecognised type string. */ class MTUnknownTypeException : MTException {
--- a/mde/mergetag/format.d Sat Oct 27 18:05:39 2007 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,316 +0,0 @@ -/************************************************************************************************** - * This contains templates for converting various data-types to a char[]. - * - * Copyright (c) 2007 Diggory Hardy. - * Licensed under the Academic Free License version 3.0 - * - * This module basically implements the following templated function for $(B most) basic D types: - * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char. - * It also supports arrays of any supported type (including of other arrays) and has special - * handling for strings (char[]) and binary (ubyte[]) data-types. - * ----------------------------- - * char[] format(T) (T value); - * ----------------------------- - * - * There are also a few utility functions defined; the public ones have their own documentation. - * - * On errors, a warning is logged and an MTConvertException is thrown. No other exceptions should - * be thrown and none thrown from functions used outside this module. - *************************************************************************************************/ -module mde.mergetag.format; - -// package imports -import mde.mergetag.exception; - -// tango imports -import cInt = tango.text.convert.Integer; -import cFloat = tango.text.convert.Float; -import Utf = tango.text.convert.Utf; -import Util = tango.text.Util; -import tango.util.log.Log : Log, Logger; - -private Logger logger; -static this () { - logger = Log.getLogger ("mde.mergetag.format"); -} - -//BEGIN Convert templates -/* Idea: could extend format with a second parameter, containing flags for things like base to output. - * Unnecessary for mergetag though. - */ -// Arrays -char[] format(T : T[]) (T[] val) { - char[val.length * defLength!(T)] ret = void; - ret[0] = '['; - uint i = 0; - foreach (T x; val) { - char[] s = format!(T) (x); - i += s.length; - if (i >= ret.length) ret.length = ret.length * 2; // check. - ret[i-s.length .. i] = s; - ret[i] = ','; - } - ret[i++] = ']'; // replaces last comma - return ret[0..i]; -} -char[] format(T : dchar[]) (T val) { - return format (toUtf8 (val)); -} -char[] format(T : wchar[]) (T val) { - return format (toUtf8 (val)); -} -char[] format(T : char[]) (T val) { - char[val.length * 2 + 2] ret = void; // Initial storage. This should ALWAYS be enough. - ret[0] = '"'; - uint i = 0; - for (uint t = 0; t < val.length;) { - // process a block of non-escapable characters - uint s = t; - while (t < val.length && !isEscapableChar(val[t])) - ++t; // skip all non-escapable chars - uint j = i + t - s; - ret[i..j] = [s..t]; // copy a block - i = j; - // process a block of escapable charaters - while (t < val.length && isEscapableChar(val[t])) { - ret[i++] = '\\'; // backslash; increment i - ret[i++] = replaceEscapableChar(val[t++]); // character; increment i and t - } - } - ret[i++] = '"'; - return ret[0..i]; -} -char[] format(T : ubyte[]) (T val) { - static const char[16] digits = "0123456789abcdef"; - - char[val.length * 2] ret = void; // exact length - uint i = 0; - foreach (ubyte x; val) { - ret[i++] = digits[x >> 4]; - ret[i++] = digits[x & 0x0F]; - } - return ret; -} - -// Support for outputting a wide char... I reccomend against trying to output these though. -const char[] WIDE_CHAR_ERROR = "Error: unicode character cannot be converted to a single UTF-8 char"; -char[] format(T : dchar) (T val) { - if (val <= 127u) return format (cast(char) val); // this char can be converted - throwMTFException (WIDE_CHAR_ERROR); -} -char[] format(T : wchar) (T val) { - if (val <= 127u) return format (cast(char) val); // this char can be converted - throwMTFException (WIDE_CHAR_ERROR); -} -char[] format(T : char) (T val) { - // Note: if (val > 127) "is invalid UTF-8 single char" - // However we don't know what this is for, in particular if it will be recombined with other chars later - - char[4] ret; // max length for an escaped char -// FIXME: carry on down - if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'') - throwMTCException ("Invalid char: not quoted (\'*\')"); - src = src[1..$-1]; - uint pos; - char ret = toChar (src, pos); - if (pos < src.length) { - if (ret & 0xC0u) throwMTCException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)"); - else throwMTCException ("Invalid char: too long"); - } - return ret; -} - -T format(T : bool) (char[] src) { - src = Util.trim(src); - if (src == "true") return true; - if (src == "false") return false; - uint pos; - while (src.length > pos && src[pos] == '0') ++pos; // strip leading zeros - if (src.length == pos && pos > 0) return false; - if (src.length == pos + 1 && src[pos] == '1') return true; - throwMTCException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1"); -} -T format(T : byte) (char[] src) { - return toTInt!(T) (src); -} -T format(T : short) (char[] src) { - return toTInt!(T) (src); -} -T format(T : int) (char[] src) { - return toTInt!(T) (src); -} -T format(T : long) (char[] src) { - return toTInt!(T) (src); -} -T format(T : ubyte) (char[] src) { - return toTInt!(T) (src); -} -T format(T : ushort) (char[] src) { - return toTInt!(T) (src); -} -T format(T : uint) (char[] src) { - return toTInt!(T) (src); -} -T format(T : ulong) (char[] src) { - return toTInt!(T) (src); -} - -T format(T : float) (char[] src) { - return toTFloat!(T) (src); -} -T format(T : double) (char[] src) { - return toTFloat!(T) (src); -} -T format(T : real) (char[] src) { - return toTFloat!(T) (src); -} -//END Convert templates - -//BEGIN Length templates -/* This template provides the initial length for strings for formatting various types. These strings - * can be expanded; this value should cover 90% of cases or so. - * FIXME: provide more specialisations - */ -private { - template defLength(T) { const uint defLength = 20; } -} -//END Length templates - -//BEGIN Utility funcs -/** Templated read-int function to read (un)signed 1-4 byte integers. - * - * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions. - */ -TInt toTInt(TInt) (char[] src) { - const char[] INT_OUT_OF_RANGE = "mde.mergetag.format.toTInt: integer out of range"; - bool sign; - uint radix, ate, ate2; - - ate = cInt.trim (src, sign, radix); - ulong val = cInt.convert (src[ate..$], radix, &ate2); - ate += ate2; - - while (ate < src.length) { - if (src[ate] == ' ' || src[ate] == '\t') ++ate; - else throwMTCException ("mde.mergetag.format.toTInt: invalid integer"); - } - - if (val > TInt.max) throwMTCException (INT_OUT_OF_RANGE); - if (sign) { - long sval = cast(long) -val; - if (sval > TInt.min) return cast(TInt) sval; - else throwMTCException (INT_OUT_OF_RANGE); - } - return cast(TInt) val; -} - -/** Basically a reimplementation of tango.text.convert.Float.toFloat which checks for trailing - * whitespace before throwing an exception for overlong input and throws my exception class - * when it does. - */ -TFloat toTFloat(TFloat) (char[] src) { - uint ate; - - TFloat x = cFloat.parse (src, &ate); - while (ate < src.length) { - if (src[ate] == ' ' || src[ate] == '\t') ++ate; - else throwMTCException ("mde.mergetag.format.toTFloat: invalid number"); - } - return x; -} - -/** Read a character from a string, with support for escape sequences. - * - * Assumes src.length > pos. At return pos is set to one after the last character eaten. - * - * Throws an exception on invalid escape sequences. Supported escape sequences are the following - * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v - */ -char toChar (char[] src, inout uint pos) -{ - //if (src.length <= pos) throwMTCException ("Invalid char: no character"); // shouldn't happen - if (src[pos] == '\\') { - // map of all supported escape sequences - char[char] escChars; - escChars['"'] = '"'; - escChars['\''] = '\''; - escChars['\\'] = '\\'; - escChars['a'] = '\a'; - escChars['b'] = '\b'; - escChars['f'] = '\f'; - escChars['n'] = '\n'; - escChars['r'] = '\r'; - escChars['t'] = '\t'; - escChars['v'] = '\v'; - - ++pos; - if (src.length > pos) { - char* r = src[pos] in escChars; - ++pos; - if (r != null) return *r; - } - throwMTCException ("Invalid escape sequence."); // we didn't return, so something failed - } - char c = src[pos]; - ++pos; - return c; - - // note on UTF-8 non-ascii chars: these consist of multiple "char"s; can only return one at a - // time like this anyway. If this is used to read a string it should handle them fine. -} - -// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length. -private ubyte readHexChar (char[] src, inout uint pos) { - ubyte x; - if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0'; - else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10; - else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10; - else throwMTCException ("Invalid hex digit."); - ++pos; - return x; -} - -// Generic array reader -private T[] toArray(T : T[]) (char[] src) { - T[] ret = new T[16]; // avoid unnecessary allocations - uint i = 0; - foreach (char[] element; Util.quotes (src[1..$-1],",")) { - if (i == ret.length) ret.length = ret.length * 2; - ret[i] = format!(T) (element); - ++i; - } - return ret[0..i]; -} -// FIXME: to here. - -private bool isEscapableChar (char c) { - return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\') -} -// Warning: this DOES NOT check c is escapable -private char replaceEscapableChar (char c) { - static char[char] escCharsRev; // reversed escChars - static bool escCharsRevFilled; // will be initialised false - - if (!escCharsRevFilled) { // only do this once - // map of all supported escape sequences - escCharsRev['"'] = '"'; - escCharsRev['\''] = '\''; - escCharsRev['\\'] = '\\'; - escCharsRev['\a'] = 'a'; - escCharsRev['\b'] = 'b'; - escCharsRev['\f'] = 'f'; - escCharsRev['\n'] = 'n'; - escCharsRev['\r'] = 'r'; - escCharsRev['\t'] = 't'; - escCharsRev['\v'] = 'v'; - escCharsRevFilled = true; - } - - return escCharsRev[c]; -} - -private void throwMTFException (char[] msg) { - logger.warn (msg); // only small errors are trapped here - throw new MTFormatException (); -} -//END Utility funcs
--- a/mde/mergetag/parse.d Sat Oct 27 18:05:39 2007 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,245 +0,0 @@ -/************************************************************************************************** - * This contains templates for converting a char[] to various data-types. - * - * Copyright (c) 2007 Diggory Hardy. - * Licensed under the Academic Free License version 3.0 - * - * This module basically implements the following templated function for $(B most) basic D types: - * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char. - * It also supports arrays of any supported type (including of other arrays) and has special - * handling for strings (char[]) and binary (ubyte[]) data-types. - * ----------------------------- - * T parse(T) (char[] source); - * ----------------------------- - * - * There are also a few utility functions defined; the public ones have their own documentation. - * - * On errors, a warning is logged and an MTParseException is thrown. No other exceptions should - * be thrown and none thrown from functions used outside this module. - *************************************************************************************************/ -module mde.mergetag.parse; - -// package imports -import mde.mergetag.exception; - -// tango imports -import cInt = tango.text.convert.Integer; -import cFloat = tango.text.convert.Float; -import Util = tango.text.Util; -import tango.util.log.Log : Log, Logger; - -private Logger logger; -static this () { - logger = Log.getLogger ("mde.mergetag.parse"); -} - -//BEGIN parse templates -// Arrays -T[] parse(T : T[]) (char[] src) { - src = Util.trim(src); - if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T[]) (src); - throwMTPException ("Invalid array: not [., ..., .]"); -} -T parse(T : char[]) (char[] src) { - src = Util.trim(src); - if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') { - src = src[1..$-1]; - T ret; - ret.length = src.length; // maximum length; retract to actual length later - uint i; - for (uint t = 0; t < src.length;) { - // process a block of non-escaped characters - uint s = t; - while (t < src.length && src[t] != '\\') ++t; // non-escaped characters - uint j = i + t - s; - ret[i..j] = [s..t]; // copy a block - i = j; - - // process a block of escaped characters - while (t < src.length) { - t++; // src[t] == '\\' - if (t == src.length) throwMTPException (`Warning: \" in string! There's currently no support for this during tokenising. Thus your input's probably been garbled!`); // next char is " - ret[i++] = replaceEscapedChar (src[t++]); // throws if it's invalid - } - } - return ret[0..i]; - } - else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); - throwMTPException ("Invalid string: not quoted (\"*\") or char array (['.',...,'.'])"); -} -T parse(T : ubyte[]) (char[] src) { - src = Util.trim(src); - // Standard case: - if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); - // Special case: sequence of hex digits, each pair of which is a ubyte - if (src.length % 2 == 1) throwMTPException ("Invalid binary: odd number of chars"); - T ret; - ret.length = src.length / 2; // exact - for (uint i, pos; pos + 1 < src.length; ++i) { - ubyte x = readHexChar(src, pos) << 4; - x |= readHexChar(src, pos); - ret[i] = x; - } - return ret; -} - -T parse(T : char) (char[] src) { - src = Util.trim(src); - if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'') - throwMTPException ("Invalid char: not quoted (\'*\')"); - if (src[1] != '\\' && src.length == 3) return src[1]; // Either non escaped - if (src.length == 4) return replaceEscapedChar (src[2]); // Or escaped - - // Report various errors; warnings for likely and difficult to tell cases: - if (src[1] == '\\' && src.length == 3) throwMTPException (`Warning: \' in char! There's currently no support for this during tokenising. Thus your input's probably been garbled!`); // next char is " - // Warn in case it's a multibyte UTF-8 character: - if (ret & 0xC0u) throwMTPException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)"); - throwMTPException ("Invalid char: too long"); -} - -T parse(T : bool) (char[] src) { - src = Util.trim(src); - if (src == "true") return true; - if (src == "false") return false; - uint pos; - while (src.length > pos && src[pos] == '0') ++pos; // strip leading zeros - if (src.length == pos && pos > 0) return false; - if (src.length == pos + 1 && src[pos] == '1') return true; - throwMTPException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1"); -} -T parse(T : byte) (char[] src) { - return toTInt!(T) (src); -} -T parse(T : short) (char[] src) { - return toTInt!(T) (src); -} -T parse(T : int) (char[] src) { - return toTInt!(T) (src); -} -T parse(T : long) (char[] src) { - return toTInt!(T) (src); -} -T parse(T : ubyte) (char[] src) { - return toTInt!(T) (src); -} -T parse(T : ushort) (char[] src) { - return toTInt!(T) (src); -} -T parse(T : uint) (char[] src) { - return toTInt!(T) (src); -} -T parse(T : ulong) (char[] src) { - return toTInt!(T) (src); -} - -T parse(T : float) (char[] src) { - return toTFloat!(T) (src); -} -T parse(T : double) (char[] src) { - return toTFloat!(T) (src); -} -T parse(T : real) (char[] src) { - return toTFloat!(T) (src); -} -//END parse templates - -//BEGIN Utility funcs -/** Templated read-int function to read (un)signed 1-4 byte integers. - * - * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions. - */ -TInt toTInt(TInt) (char[] src) { - const char[] INT_OUT_OF_RANGE = "mde.mergetag.parse.toTInt: integer out of range"; - bool sign; - uint radix, ate, ate2; - - ate = cInt.trim (src, sign, radix); - ulong val = cInt.convert (src[ate..$], radix, &ate2); - ate += ate2; - - while (ate < src.length) { - if (src[ate] == ' ' || src[ate] == '\t') ++ate; - else throwMTPException ("mde.mergetag.parse.toTInt: invalid integer"); - } - - if (val > TInt.max) throwMTPException (INT_OUT_OF_RANGE); - if (sign) { - long sval = cast(long) -val; - if (sval > TInt.min) return cast(TInt) sval; - else throwMTPException (INT_OUT_OF_RANGE); - } - return cast(TInt) val; -} - -/** Basically a reimplementation of tango.text.convert.Float.toFloat which checks for trailing - * whitespace before throwing an exception for overlong input and throws my exception class - * when it does. - */ -TFloat toTFloat(TFloat) (char[] src) { - uint ate; - - TFloat x = cFloat.parse (src, &ate); - while (ate < src.length) { - if (src[ate] == ' ' || src[ate] == '\t') ++ate; - else throwMTPException ("mde.mergetag.parse.toTFloat: invalid number"); - } - return x; -} - -/* Throws an exception on invalid escape sequences. Supported escape sequences are the following - * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v - */ -private char replaceEscapedChar (char c) -{ - static char[char] escChars; - static bool escCharsFilled; // will be initialised false - - if (!escCharsFilled) { - // map of all supported escape sequences - escChars['"'] = '"'; - escChars['\''] = '\''; - escChars['\\'] = '\\'; - escChars['a'] = '\a'; - escChars['b'] = '\b'; - escChars['f'] = '\f'; - escChars['n'] = '\n'; - escChars['r'] = '\r'; - escChars['t'] = '\t'; - escChars['v'] = '\v'; - escCharsFilled = true; - } - - char* r = c in escChars; - if (r != null) return *r; - - throwMTPException ("Invalid escape sequence."); // we didn't return, so something failed -} - -// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length. -private ubyte readHexChar (char[] src, inout uint pos) { - ubyte x; - if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0'; - else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10; - else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10; - else throwMTPException ("Invalid hex digit."); - ++pos; - return x; -} - -// Generic array reader -private T[] toArray(T : T[]) (char[] src) { - T[] ret = new T[16]; // avoid unnecessary allocations - uint i = 0; - foreach (char[] element; Util.quotes (src[1..$-1],",")) { - if (i == ret.length) ret.length = ret.length * 2; - ret[i] = parse!(T) (element); - ++i; - } - return ret[0..i]; -} - -private void throwMTPException (char[] msg) { - logger.warn (msg); // only small errors are trapped here - throw new MTParseException (); -} -//END Utility funcs
--- a/mde/mergetag/read.d Sat Oct 27 18:05:39 2007 +0100 +++ b/mde/mergetag/read.d Tue Oct 30 17:08:12 2007 +0000 @@ -81,7 +81,7 @@ final char[] ErrInFile; // something like "in \"path/file.mtt\"" final char[] fbuf; // file is read into this - MT_VERS fileVer = MT_VERS.INVALID; // Remains INVALID until set otherwise by CTOR. + MTFormatVersion.VERS fileVer = MTFormatVersion.VERS.INVALID; // Remains INVALID until set otherwise by CTOR. uint endOfHeader; bool allRead = false; // true if endOfHeader == fbuf.length or read([]) has run
--- a/mde/mergetag/write.d Sat Oct 27 18:05:39 2007 +0100 +++ b/mde/mergetag/write.d Tue Oct 30 17:08:12 2007 +0000 @@ -52,10 +52,15 @@ * An exception is thrown if neither test can deduce the writing method. */ IWriter makeWriter (char[] path, DataSet dataset, WriterMethod method = WriterMethod.Unspecified) { - makeWriter (new FilePath (path), dataset, method); + return makeWriter (new FilePath (path), dataset, method); } /** ditto */ IWriter makeWriter (PathView path, DataSet dataset, WriterMethod method = WriterMethod.Unspecified) { + void throwMTErr (char[] msg, Exception exc = new MTException) { + logger.error (msg); + throw exc; + } + if (method == WriterMethod.Unspecified) { if (path.ext == "mtt") method = WriterMethod.Text; else if (path.ext == "mtb") method = WriterMethod.Binary; @@ -67,14 +72,14 @@ } /// Interface for methods and data necessarily available in TextWriter and/or BinaryWriter. -scope interface IWriter { - char[][ID] indexTable; // only used by TextWriter, but available in both +interface IWriter { + /** Only used in a TextWriter; see TextWriter.indexTable(). + * + * Note that other implementors implement this as a blank function and will not throw an error. + */ + void indexTable (char[][ID]); - this (char[] path, DataSet dataset_); - this (PathView path, DataSet dataset_); - ~this (); - - void write (); + void write (); /// Writing method. } /+ @@ -101,7 +106,9 @@ * If any ID (for a section or tag) to be written is found in this table, the corresponding * string is written instead. */ - char[][ID] indexTable; // see setIndexLookupTable() doc for use. + void indexTable (char[][ID] iT) { + _indexTable = iT; + } private: // taken from tango.io.Console, mostly to make sure notepad can read our files: @@ -114,6 +121,8 @@ bool fileOpen = false; // file needs to be closed on exit bool writtenHeader = false; // The header MUST be written exactly once at the beginning of the file. + char[][ID] _indexTable; // see indexTable() doc for use. + FileConduit conduit; // actual conduit; don't use directly when there's content in the buffer IBuffer buffer; // write strings directly to this (use opCall(void[]) ) Print!(char) format; // formats output to buffer @@ -179,15 +188,16 @@ private void writeSectionIdentifier (ID id) { buffer ("{"); - char[]* p = id in indexTable; // look for a string ID + char[]* p = id in _indexTable; // look for a string ID if (p) buffer ("\"")(*p)("\""); // write a string ID else format (cast(uint) id); // write a numeric ID buffer ("}")(Eol); } private void writeSection (DataSection sec) { + //FIXME - buffer (Eol); // blank line at end of file + buffer (Eol); // blank line at end of each section } private void throwMTErr (char[] msg, Exception exc = new MTException) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/text/exception.d Tue Oct 30 17:08:12 2007 +0000 @@ -0,0 +1,27 @@ +/******************************************************************* + * Contains exception classes for Text (a collection of text utils). + * + * Publically imports mde.exception. + ******************************************************************/ +module mde.text.exception; + +public import mde.exception; + +/// Base Text exception class. +class TextException : mdeException { + this (char[] msg) { + super("Text: " ~ msg); + } + this () {} +} + +/** Thrown by the parse module on any error. + */ +class TextParseException : TextException { + this () {} +} +/** Thrown by the format module on any error. +*/ +class TextFormatException : TextException { +this () {} +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/text/format.d Tue Oct 30 17:08:12 2007 +0000 @@ -0,0 +1,216 @@ +/************************************************************************************************** + * This contains templates for converting various data-types to a char[]. + * + * Copyright (c) 2007 Diggory Hardy. + * Licensed under the Academic Free License version 3.0 + * + * This module basically implements the following templated function for $(B most) basic D types: + * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char. + * It also supports arrays of any supported type (including of other arrays) and has special + * handling for strings (char[]) and binary (ubyte[]) data-types. + * ----------------------------- + * char[] format(T) (T value); + * ----------------------------- + * + * There are also a few utility functions defined; the public ones have their own documentation. + * + * On errors, a warning is logged and an TextConvertException is thrown. No other exceptions should + * be thrown and none thrown from functions used outside this module. + *************************************************************************************************/ +module mde.text.format; + +// package imports +import mde.text.exception; + +// tango imports +import cInt = tango.text.convert.Integer; +import cFloat = tango.text.convert.Float; +import Utf = tango.text.convert.Utf; +import Util = tango.text.Util; +import tango.util.log.Log : Log, Logger; + +private Logger logger; +static this () { + logger = Log.getLogger ("mde.text.format"); +} + +//BEGIN Convert templates +/* Idea: could extend format with a second parameter, containing flags for things like base to output. + * Unnecessary for mergetag though. + */ +// Arrays +char[] format(T : T[]) (T[] val) { + char[val.length * defLength!(T)] ret = void; + ret[0] = '['; + uint i = 0; + foreach (T x; val) { + char[] s = format!(T) (x); + i += s.length; + if (i >= ret.length) ret.length = ret.length * 2; // check. + ret[i-s.length .. i] = s; + ret[i] = ','; + } + ret[i++] = ']'; // replaces last comma + return ret[0..i]; +} +char[] format(T : dchar[]) (T val) { + return format (toUtf8 (val)); +} +char[] format(T : wchar[]) (T val) { + return format (toUtf8 (val)); +} +char[] format(T : char[]) (T val) { + char[val.length * 2 + 2] ret = void; // Initial storage. This should ALWAYS be enough. + ret[0] = '"'; + uint i = 0; + for (uint t = 0; t < val.length;) { + // process a block of non-escapable characters + uint s = t; + while (t < val.length && !isEscapableChar(val[t])) + ++t; // skip all non-escapable chars + uint j = i + t - s; + ret[i..j] = val[s..t]; // copy a block + i = j; + // process a block of escapable charaters + while (t < val.length && isEscapableChar(val[t])) { + ret[i++] = '\\'; // backslash; increment i + ret[i++] = replaceEscapableChar(val[t++]); // character; increment i and t + } + } + ret[i++] = '"'; + return ret[0..i]; +} +char[] format(T : ubyte[]) (T val) { + static const char[16] digits = "0123456789abcdef"; + + char[val.length * 2] ret = void; // exact length + uint i = 0; + foreach (ubyte x; val) { + ret[i++] = digits[x >> 4]; + ret[i++] = digits[x & 0x0F]; + } + return ret; +} + +// Support for outputting a wide char... I reccomend against trying to output these though. +const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted to a single UTF-8 char"; +char[] format(T : dchar) (T val) { + if (val <= 127u) return format (cast(char) val); // this char can be converted + throwException (WIDE_CHAR_ERROR); +} +char[] format(T : wchar) (T val) { + if (val <= 127u) return format (cast(char) val); // this char can be converted + throwException (WIDE_CHAR_ERROR); +} +char[] format(T : char) (T val) { + // Note: if (val > 127) "is invalid UTF-8 single char" + // However we don't know what this is for, in particular if it will be recombined with other chars later + + char[4] ret; // max length for an escaped char + ret[0] = '\''; + + if (!isEscapableChar (val)) { + ret[1] = val; + ret[2] = '\''; + return ret[0..3]; + } else { + ret[1] = '\\'; + ret[2] = replaceEscapableChar (val); + ret[3] = '\''; + return ret; + } + assert (false); +} + +char[] format(T : bool) (T val) { + if (T) return "true"; + else return "false"; +} + +char[] format(T : byte) (T val) { + return formatLong (val); +} +char[] format(T : short) (T val) { + return formatLong (val); +} +char[] format(T : int) (T val) { + return formatLong (val); +} +char[] format(T : long) (T val) { + return formatLong (val); +} +char[] format(T : ubyte) (T val) { + return formatLong (val); +} +char[] format(T : ushort) (T val) { + return formatLong (val); +} +char[] format(T : uint) (T val) { + return formatLong (val); +} +char[] format(T : ulong) (T val) { + if (val > cast(ulong) long.max) throwException ("No handling available for ulong where value > long.max"); + return formatLong (val); +} + +char[] format(T : float) (T val) { + // t.dig+2+4+3 // should be sufficient length (mant + (neg, dot, e, exp neg) + exp (3,4,5 for float,double,real resp.)) + char[32] ret; // minimum allowed by assert in format + return cFloat.format (ret, val, T.dig+2, true); // from old C++ tests, T.dig+2 gives best(?) accuracy +} +char[] format(T : double) (T val) { + char[32] ret; + return cFloat.format (ret, val, T.dig+2, true); +} +char[] format(T : real) (T val) { + char[32] ret; + return cFloat.format (ret, val, T.dig+2, true); +} +//END Convert templates + +//BEGIN Length templates +/* This template provides the initial length for strings for formatting various types. These strings + * can be expanded; this value should cover 90% of cases or so. + * FIXME: provide more specialisations (or not?) + */ +private { + template defLength(T) { const uint defLength = 20; } +} +//END Length templates + +//BEGIN Utility funcs +private char[] formatLong (long val) { + try return cInt.toUtf8 (val, cInt.Style.Unsigned, cInt.Flags.Throw); + catch (Exception e) throwException (e.msg); +} +private bool isEscapableChar (char c) { + return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); +} +// Warning: this DOES NOT check c is escapable +private char replaceEscapableChar (char c) { + static char[char] escCharsRev; // reversed escChars + static bool escCharsRevFilled; // will be initialised false + + if (!escCharsRevFilled) { // only do this once + // map of all supported escape sequences + escCharsRev['"'] = '"'; + escCharsRev['\''] = '\''; + escCharsRev['\\'] = '\\'; + escCharsRev['\a'] = 'a'; + escCharsRev['\b'] = 'b'; + escCharsRev['\f'] = 'f'; + escCharsRev['\n'] = 'n'; + escCharsRev['\r'] = 'r'; + escCharsRev['\t'] = 't'; + escCharsRev['\v'] = 'v'; + escCharsRevFilled = true; + } + + return escCharsRev[c]; +} + +private void throwException (char[] msg) { + logger.warn (msg); // only small errors are trapped here + throw new TextFormatException (); +} +//END Utility funcs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/text/parse.d Tue Oct 30 17:08:12 2007 +0000 @@ -0,0 +1,246 @@ +/************************************************************************************************** + * This contains templates for converting a char[] to various data-types. + * + * Copyright (c) 2007 Diggory Hardy. + * Licensed under the Academic Free License version 3.0 + * + * This module basically implements the following templated function for $(B most) basic D types: + * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char. + * It also supports arrays of any supported type (including of other arrays) and has special + * handling for strings (char[]) and binary (ubyte[]) data-types. + * ----------------------------- + * T parse(T) (char[] source); + * ----------------------------- + * + * There are also a few utility functions defined; the public ones have their own documentation. + * + * On errors, a warning is logged and an TextParseException is thrown. No other exceptions should + * be thrown and none thrown from functions used outside this module. + *************************************************************************************************/ +module mde.text.parse; + +// package imports +import mde.text.exception; + +// tango imports +import cInt = tango.text.convert.Integer; +import cFloat = tango.text.convert.Float; +import Util = tango.text.Util; +import tango.util.log.Log : Log, Logger; + +private Logger logger; +static this () { + logger = Log.getLogger ("mde.text.parse"); +} + +//BEGIN parse templates +// Arrays +T[] parse(T : T[]) (char[] src) { + src = Util.trim(src); + if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T[]) (src); + throwException ("Invalid array: not [., ..., .]"); +} +T parse(T : char[]) (char[] src) { + src = Util.trim(src); + if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') { + src = src[1..$-1]; + T ret; + ret.length = src.length; // maximum length; retract to actual length later + uint i; + for (uint t = 0; t < src.length;) { + // process a block of non-escaped characters + uint s = t; + while (t < src.length && src[t] != '\\') ++t; // non-escaped characters + uint j = i + t - s; + ret[i..j] = src[s..t]; // copy a block + i = j; + + // process a block of escaped characters + while (t < src.length) { + t++; // src[t] == '\\' + if (t == src.length) throwException (`Warning: \" in string! There's currently no support for this during tokenising. Thus your input's probably been garbled!`); // next char is " + ret[i++] = replaceEscapedChar (src[t++]); // throws if it's invalid + } + } + return ret[0..i]; + } + else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); + throwException ("Invalid string: not quoted (\"*\") or char array (['.',...,'.'])"); +} +T parse(T : ubyte[]) (char[] src) { + src = Util.trim(src); + // Standard case: + if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); + // Special case: sequence of hex digits, each pair of which is a ubyte + if (src.length % 2 == 1) throwException ("Invalid binary: odd number of chars"); + T ret; + ret.length = src.length / 2; // exact + for (uint i, pos; pos + 1 < src.length; ++i) { + ubyte x = readHexChar(src, pos) << 4; + x |= readHexChar(src, pos); + ret[i] = x; + } + return ret; +} + +T parse(T : char) (char[] src) { + src = Util.trim(src); + if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'') + throwException ("Invalid char: not quoted (\'*\')"); + if (src[1] != '\\' && src.length == 3) return src[1]; // Either non escaped + if (src.length == 4) return replaceEscapedChar (src[2]); // Or escaped + + // Report various errors; warnings for likely and difficult to tell cases: + if (src[1] == '\\' && src.length == 3) throwException (`Warning: \' in char! There's currently no support for this during tokenising. Thus your input's probably been garbled!`); // next char is " + // Warn in case it's a multibyte UTF-8 character: + if (src[1] & 0xC0u) throwException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)"); + throwException ("Invalid char: too long"); +} + +T parse(T : bool) (char[] src) { + src = Util.trim(src); + if (src == "true") return true; + if (src == "false") return false; + uint pos; + while (src.length > pos && src[pos] == '0') ++pos; // strip leading zeros + if (src.length == pos && pos > 0) return false; + if (src.length == pos + 1 && src[pos] == '1') return true; + throwException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1"); +} + +T parse(T : byte) (char[] src) { + return toTInt!(T) (src); +} +T parse(T : short) (char[] src) { + return toTInt!(T) (src); +} +T parse(T : int) (char[] src) { + return toTInt!(T) (src); +} +T parse(T : long) (char[] src) { + return toTInt!(T) (src); +} +T parse(T : ubyte) (char[] src) { + return toTInt!(T) (src); +} +T parse(T : ushort) (char[] src) { + return toTInt!(T) (src); +} +T parse(T : uint) (char[] src) { + return toTInt!(T) (src); +} +T parse(T : ulong) (char[] src) { + return toTInt!(T) (src); +} + +T parse(T : float) (char[] src) { + return toTFloat!(T) (src); +} +T parse(T : double) (char[] src) { + return toTFloat!(T) (src); +} +T parse(T : real) (char[] src) { + return toTFloat!(T) (src); +} +//END parse templates + +//BEGIN Utility funcs +/** Templated read-int function to read (un)signed 1-4 byte integers. + * + * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions. + */ +TInt toTInt(TInt) (char[] src) { + const char[] INT_OUT_OF_RANGE = "Integer out of range"; + bool sign; + uint radix, ate, ate2; + + ate = cInt.trim (src, sign, radix); + ulong val = cInt.convert (src[ate..$], radix, &ate2); + ate += ate2; + + while (ate < src.length) { + if (src[ate] == ' ' || src[ate] == '\t') ++ate; + else throwException ("Invalid integer"); + } + + if (val > TInt.max) throwException (INT_OUT_OF_RANGE); + if (sign) { + long sval = cast(long) -val; + if (sval > TInt.min) return cast(TInt) sval; + else throwException (INT_OUT_OF_RANGE); + } + return cast(TInt) val; +} + +/** Basically a reimplementation of tango.text.convert.Float.toFloat which checks for trailing + * whitespace before throwing an exception for overlong input and throws my exception class + * when it does. + */ +TFloat toTFloat(TFloat) (char[] src) { + uint ate; + + TFloat x = cFloat.parse (src, &ate); + while (ate < src.length) { + if (src[ate] == ' ' || src[ate] == '\t') ++ate; + else throwException ("Invalid number"); + } + return x; +} + +/* Throws an exception on invalid escape sequences. Supported escape sequences are the following + * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v + */ +private char replaceEscapedChar (char c) +{ + static char[char] escChars; + static bool escCharsFilled; // will be initialised false + + if (!escCharsFilled) { + // map of all supported escape sequences + escChars['"'] = '"'; + escChars['\''] = '\''; + escChars['\\'] = '\\'; + escChars['a'] = '\a'; + escChars['b'] = '\b'; + escChars['f'] = '\f'; + escChars['n'] = '\n'; + escChars['r'] = '\r'; + escChars['t'] = '\t'; + escChars['v'] = '\v'; + escCharsFilled = true; + } + + char* r = c in escChars; + if (r != null) return *r; + + throwException ("Invalid escape sequence."); // we didn't return, so something failed +} + +// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length. +private ubyte readHexChar (char[] src, inout uint pos) { + ubyte x; + if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0'; + else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10; + else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10; + else throwException ("Invalid hex digit."); + ++pos; + return x; +} + +// Generic array reader +private T[] toArray(T : T[]) (char[] src) { + T[] ret = new T[16]; // avoid unnecessary allocations + uint i = 0; + foreach (char[] element; Util.quotes (src[1..$-1],",")) { + if (i == ret.length) ret.length = ret.length * 2; + ret[i] = parse!(T) (element); + ++i; + } + return ret[0..i]; +} + +private void throwException (char[] msg) { + logger.warn (msg); // only small errors are trapped here + throw new TextParseException (); +} +//END Utility funcs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/text/util.d Tue Oct 30 17:08:12 2007 +0000 @@ -0,0 +1,13 @@ +/******************************************************* + * A collection of text utility functions. + * + ******************************************************/ +module mde.text.util; + +T[] postTrim(T : T[]) (T[] str) { + for (uint i = str.length,j; i > 0; i = j) { + j = i - 1; + if (str[j] != ' ' || str[j] != '\t') break; + } + return str[0..i]; +}