Mercurial > projects > mde
view mde/text/parse.d @ 3:485c98ecbd91
text.parse: fixed a small bug with char[]'s.
committer: Diggory Hardy <diggory.hardy@gmail.com>
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Sat, 03 Nov 2007 16:06:06 +0000 |
parents | 18491334a525 |
children | 9a990644948c |
line wrap: on
line source
/************************************************************************************************** * This contains templates for converting a char[] to various data-types. * * Copyright (c) 2007 Diggory Hardy. * Licensed under the Academic Free License version 3.0 * * This module basically implements the following templated function for $(B most) basic D types: * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char. * It also supports arrays of any supported type (including of other arrays) and has special * handling for strings (char[]) and binary (ubyte[]) data-types. * ----------------------------- * T parse(T) (char[] source); * ----------------------------- * * There are also a few utility functions defined; the public ones have their own documentation. * * On errors, a warning is logged and an TextParseException is thrown. No other exceptions should * be thrown and none thrown from functions used outside this module. *************************************************************************************************/ module mde.text.parse; // package imports import mde.text.exception; // tango imports import cInt = tango.text.convert.Integer; import cFloat = tango.text.convert.Float; import Util = tango.text.Util; import tango.util.log.Log : Log, Logger; private Logger logger; static this () { logger = Log.getLogger ("mde.text.parse"); } //BEGIN parse templates // Arrays T[] parse(T : T[]) (char[] src) { src = Util.trim(src); if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T[]) (src); throwException ("Invalid array: not [., ..., .]"); } T parse(T : char[]) (char[] src) { src = Util.trim(src); if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') { src = src[1..$-1]; T ret; ret.length = src.length; // maximum length; retract to actual length later uint i = 0; for (uint t = 0; t < src.length;) { // process a block of non-escaped characters uint s = t; while (t < src.length && src[t] != '\\') ++t; // non-escaped characters uint j = i + t - s; ret[i..j] = src[s..t]; // copy a block i = j; // process a block of escaped characters while (t < src.length && src[t] == '\\') { t++; if (t == src.length) throwException (`Warning: \" in string! There's currently no support for this during tokenising. Thus your input's probably been garbled!`); // next char is " ret[i++] = replaceEscapedChar (src[t++]); // throws if it's invalid } } return ret[0..i]; } else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); throwException ("Invalid string: not quoted (\"*\") or char array (['.',...,'.'])"); } T parse(T : ubyte[]) (char[] src) { src = Util.trim(src); // Standard case: if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); // Special case: sequence of hex digits, each pair of which is a ubyte if (src.length % 2 == 1) throwException ("Invalid binary: odd number of chars"); T ret; ret.length = src.length / 2; // exact for (uint i, pos; pos + 1 < src.length; ++i) { ubyte x = readHexChar(src, pos) << 4; x |= readHexChar(src, pos); ret[i] = x; } return ret; } T parse(T : char) (char[] src) { src = Util.trim(src); if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'') throwException ("Invalid char: not quoted (\'*\')"); if (src[1] != '\\' && src.length == 3) return src[1]; // Either non escaped if (src.length == 4) return replaceEscapedChar (src[2]); // Or escaped // Report various errors; warnings for likely and difficult to tell cases: if (src[1] == '\\' && src.length == 3) throwException (`Warning: \' in char! There's currently no support for this during tokenising. Thus your input's probably been garbled!`); // next char is " // Warn in case it's a multibyte UTF-8 character: if (src[1] & 0xC0u) throwException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)"); throwException ("Invalid char: too long"); } T parse(T : bool) (char[] src) { src = Util.trim(src); if (src == "true") return true; if (src == "false") return false; uint pos; while (src.length > pos && src[pos] == '0') ++pos; // strip leading zeros if (src.length == pos && pos > 0) return false; if (src.length == pos + 1 && src[pos] == '1') return true; throwException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1"); } T parse(T : byte) (char[] src) { return toTInt!(T) (src); } T parse(T : short) (char[] src) { return toTInt!(T) (src); } T parse(T : int) (char[] src) { return toTInt!(T) (src); } T parse(T : long) (char[] src) { return toTInt!(T) (src); } T parse(T : ubyte) (char[] src) { return toTInt!(T) (src); } T parse(T : ushort) (char[] src) { return toTInt!(T) (src); } T parse(T : uint) (char[] src) { return toTInt!(T) (src); } T parse(T : ulong) (char[] src) { return toTInt!(T) (src); } T parse(T : float) (char[] src) { return toTFloat!(T) (src); } T parse(T : double) (char[] src) { return toTFloat!(T) (src); } T parse(T : real) (char[] src) { return toTFloat!(T) (src); } //END parse templates //BEGIN Utility funcs /** Templated read-int function to read (un)signed 1-4 byte integers. * * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions. */ TInt toTInt(TInt) (char[] src) { const char[] INT_OUT_OF_RANGE = "Integer out of range"; bool sign; uint radix, ate, ate2; ate = cInt.trim (src, sign, radix); ulong val = cInt.convert (src[ate..$], radix, &ate2); ate += ate2; while (ate < src.length) { if (src[ate] == ' ' || src[ate] == '\t') ++ate; else throwException ("Invalid integer"); } if (val > TInt.max) throwException (INT_OUT_OF_RANGE); if (sign) { long sval = cast(long) -val; if (sval > TInt.min) return cast(TInt) sval; else throwException (INT_OUT_OF_RANGE); } return cast(TInt) val; } /** Basically a reimplementation of tango.text.convert.Float.toFloat which checks for trailing * whitespace before throwing an exception for overlong input and throws my exception class * when it does. */ TFloat toTFloat(TFloat) (char[] src) { uint ate; TFloat x = cFloat.parse (src, &ate); while (ate < src.length) { if (src[ate] == ' ' || src[ate] == '\t') ++ate; else throwException ("Invalid number"); } return x; } /* Throws an exception on invalid escape sequences. Supported escape sequences are the following * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v */ private char replaceEscapedChar (char c) { static char[char] escChars; static bool escCharsFilled; // will be initialised false if (!escCharsFilled) { // map of all supported escape sequences escChars['"'] = '"'; escChars['\''] = '\''; escChars['\\'] = '\\'; escChars['a'] = '\a'; escChars['b'] = '\b'; escChars['f'] = '\f'; escChars['n'] = '\n'; escChars['r'] = '\r'; escChars['t'] = '\t'; escChars['v'] = '\v'; escCharsFilled = true; } char* r = c in escChars; if (r != null) return *r; throwException ("Invalid escape sequence: \\"~c); // we didn't return, so something failed } // Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length. private ubyte readHexChar (char[] src, inout uint pos) { ubyte x; if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0'; else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10; else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10; else throwException ("Invalid hex digit."); ++pos; return x; } // Generic array reader private T[] toArray(T : T[]) (char[] src) { T[] ret = new T[16]; // avoid unnecessary allocations uint i = 0; foreach (char[] element; Util.quotes (src[1..$-1],",")) { if (i == ret.length) ret.length = ret.length * 2; ret[i] = parse!(T) (element); ++i; } return ret[0..i]; } private void throwException (char[] msg) { logger.warn (msg); // only small errors are trapped here throw new TextParseException (); } //END Utility funcs