projects/mde: mde/mergetag/parse/parseFrom.d comparison

comparison mde/mergetag/parse/parseFrom.d @ 70:7fc0a8295c83

Moved my parseTo and parseFrom modules from tango.scrapple to mde in order to reduce dependencies.

author	Diggory Hardy <diggory.hardy@gmail.com>
date	Fri, 04 Jul 2008 19:04:16 +0100
parents
children

comparison

equal deleted inserted replaced

-:ead4afc6d0b8
+:7fc0a8295c83
+/**************************************************************************************************
+* copyright: Copyright (c) 2007-2008 Diggory Hardy.
+*
+* author: Diggory Hardy, diggory.hardy@gmail.com
+*
+* license: BSD style: $(LICENSE)
+*
+* This contains templates for converting various data-types to a char[].
+*
+* parseFrom is roughly the inverse of $(B parseTo).
+* It is also available in tango.scrapple.
+*
+* This module basically implements the following templated function for most basic D types:
+* bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char, wchar,
+* dchar.
+* It also supports arrays of any supported type (including of other arrays) and has special
+* handling for strings (char[]) and binary (ubyte[]) data-types.
+* -----------------------------
+* char[] parseFrom(T) (T value);
+* -----------------------------
+*
+* $(I value) is the value to convert; it is converted to a string and returned.
+*
+* Syntax:
+* The syntax is the same as parseTo; but since this module only generates formatted output
+* knowing the syntax shouldn't be necessary. There is currently no way to specify options like
+* output base for ints, precision of floats, or
+* whether to write char[] or ubyte[] types as arrays or in their more compact forms.
+*
+* Throws:
+* On errors, an exception is thrown (UnicodeException or IllegalArgumentException). No other
+* exceptions should be thrown.
+*
+* Remarks:
+* There is currently no support for outputting wchar/dchar strings. There are, however, unicode
+* conversions for converting UTF-16/32 to UTF-8. Be warned though that many wchar/dchar characters
+* (any that are non-ascii) will not fit in a single char and an exception will be thrown.
+*
+* The code does involve some heap activity; this is necessary anyway for returning dynamic arrays.
+* (Slices of a pre-allocated array could be returned instead, but for many uses would have to be
+* duplicated before storage, leading to less efficient operation.)
+* Most memory allocation has been kept to a minimum.
+*
+* Unlike the parseTo!() module, the parseFrom templates could be re-written to use static-ifs
+* instead of type specialisation, thus allowing type inference. However I likely won't bother
+* implementing this myself.
+*
+* Examples:
+* ------------------------------------------------------------------------------------------------
+* // Examples are printed via Cout.
+*
+* // Basic examples:
+* Cout (parseFrom!(byte) (-13)).newline;                       // -13
+* Cout (parseFrom!(real) (2.56e11)).newline;                   // 2.55999999999999990000e+11
+* Cout (parseFrom!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline;  // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000]
+* Cout (parseFrom!(bool[]) ([true,false,false])).newline;      // [true,false,false]
+*
+* // String and ubyte[] special syntaxes (always used):
+* Cout (parseFrom!(char[]) ("A string.")).newline;             // "A string." (including quotes)
+* Cout (parseFrom!(ubyte[]) (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline;   // 0x05f110
+*
+* // Associative arrays:
+* Cout (parseFrom!(char[][byte]) ([-1:"negative one"[], 0:"zero", 1:"one"])).newline;  // [0:"zero",1:"one",-1:"negative one"]
+*
+* // No limit on complexity...
+* char[] somethingComplicated = parseFrom!(real[][][bool[int[][]]]) (...);
+* ------------------------------------------------------------------------------------------------
+*************************************************************************************************/
+module mde.mergetag.parse.parseFrom;
+// tango imports
+import tango.core.Exception : UnicodeException, IllegalArgumentException;
+import cInt = tango.text.convert.Integer;
+import cFloat = tango.text.convert.Float;
+import Utf = tango.text.convert.Utf;
+import Util = tango.text.Util;
+//BEGIN parseFrom templates
+/* Idea: could extend parseFrom with a second parameter, containing flags for things like base to output.
+* Unnecessary for mergetag though.
+*/
+// Associative arrays
+char[] parseFrom(T : T[S], S) (T[S] val) {
+char[] ret;
+// A guess, including values themselves and [,:] elements (must be at least 2).
+ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2;
+ret[0] = '[';
+uint i = 1;
+foreach (S k, T v; val) {
+char[] s = parseFrom!(S) (k) ~ ":" ~ parseFrom!(T) (v);
+i += s.length;
+if (i+1 >= ret.length) ret.length = ret.length * 2;	// check.
+ret[i-s.length .. i] = s;
+ret[i++] = ',';
+}
+if (i == 1) ++i;	// special case - not overwriting a comma
+ret[i-1] = ']';	// replaces last comma
+return ret[0..i];
+}
+debug (UnitTest) unittest {
+char[] X = parseFrom!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]);
+char[] Y = `['a':"animal",'b':"bus"]`;
+assert (X == Y);
+}
+// Arrays
+char[] parseFrom(T : T[]) (T[] val) {
+char[] ret;
+// A guess, including commas and brackets (must be at least 2)
+ret.length = val.length * (defLength!(T) + 1) + 2;
+ret[0] = '[';
+uint i = 1;
+foreach (T x; val) {
+char[] s = parseFrom!(T) (x);
+i += s.length;
+if (i+1 >= ret.length) ret.length = ret.length * 2;	// check length
+ret[i-s.length .. i] = s;
+ret[i++] = ',';
+}
+if (i == 1) ++i;	// special case - not overwriting a comma
+ret[i-1] = ']';	// replaces last comma
+return ret[0..i];
+}
+// Strings (array special case)
+char[] parseFrom(T : char[]) (T val) {
+char[] ret = new char[val.length * 2 + 2];	// Initial storage. This should ALWAYS be enough.
+ret[0] = '"';
+uint i = 1;
+for (uint t = 0; t < val.length;) {
+// process a block of non-escapable characters
+uint s = t;
+while (t < val.length && !isEscapableChar(val[t]))
+++t;	// skip all non-escapable chars
+uint j = i + t - s;
+ret[i..j] = val[s..t];	// copy a block
+i = j;
+// process a block of escapable charaters
+while (t < val.length && isEscapableChar(val[t])) {
+ret[i++] = '\\';				// backslash; increment i
+ret[i++] = replaceEscapableChar(val[t++]);	// character; increment i and t
+}
+}
+ret[i++] = '"';
+return ret[0..i];
+}
+// Unicode conversions for strings:
+char[] parseFrom(T : dchar[]) (T val) {
+// May throw a UnicodeException; don't bother catching and rethrowing:
+return parseFrom!(char[]) (Utf.toString (val));
+}
+char[] parseFrom(T : wchar[]) (T val) {
+// May throw a UnicodeException; don't bother catching and rethrowing:
+return parseFrom!(char[]) (Utf.toString (val));
+}
+// Binary (array special case)
+char[] parseFrom(T : ubyte[]) (T val) {
+static const char[16] digits = "0123456789abcdef";
+char[] ret = new char[val.length * 2 + 2];	// exact length
+ret[0..2] = "0x";
+uint i = 2;
+foreach (ubyte x; val) {
+ret[i++] = digits[x >> 4];
+ret[i++] = digits[x & 0x0F];
+}
+return ret;
+}
+debug (UnitTest) unittest {
+// generic array stuff:
+assert (parseFrom!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`);
+assert (parseFrom!(double[]) (cast(double[]) []) == `[]`);		// empty array
+// char[] conversions, with commas, escape sequences and multichar UTF8 characters:
+assert (parseFrom!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`);
+// wchar[] and dchar[] conversions:
+// The characters were pretty-much pulled at random from unicode tables.
+// The last few cause some wierd (display only) effects in my editor.
+assert (parseFrom!(wchar[]) ("Test string: ¶α؟अกሀ搀"w) == "\"Test string: ¶α؟अกሀ搀\"");
+assert (parseFrom!(dchar[]) ("Test string: ¶α؟अกሀ搀"d) == "\"Test string: ¶α؟अกሀ搀\"");
+assert (parseFrom!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`);	// ubyte[] special notation
+}
+// Basic types
+// Char
+char[] parseFrom(T : char) (T val) {
+// NOTE: if (val > 127) "is invalid UTF-8 single char"
+// However we don't know what this is for, in particular if it will be recombined with other chars later
+// Can't return reference to static array; making dynamic is cheaper than copying.
+char[] ret = new char[4];	// max length for an escaped char
+ret[0] = '\'';
+if (!isEscapableChar (val)) {
+ret[1] = val;
+ret[2] = '\'';
+return ret[0..3];
+} else {
+ret[1] = '\\';
+ret[2] = replaceEscapableChar (val);
+ret[3] = '\'';
+return ret;
+}
+assert (false);
+}
+// Basic unicode convertions for wide-chars.
+// NOTE: any other wide-chars will not fit in a single UTF-8 encoded char.
+const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted to a single UTF-8 char";
+char[] parseFrom(T : wchar) (T val) {
+if (val <= 127u) return parseFrom!(char) (cast(char) val);	// this char can be converted
+else throw new UnicodeException (WIDE_CHAR_ERROR, 0);
+}
+char[] parseFrom(T : dchar) (T val) {
+if (val <= 127u) return parseFrom!(char) (cast(char) val);	// this char can be converted
+else throw new UnicodeException (WIDE_CHAR_ERROR, 0);
+}
+debug (UnitTest) unittest {
+assert (parseFrom!(char) ('\'') == "\'\\\'\'");
+assert (parseFrom!(wchar) ('X') == "'X'");
+assert (parseFrom!(dchar) ('X') == "'X'");
+}
+// Bool
+char[] parseFrom(T : bool) (T val) {
+if (val) return "true";
+else return "false";
+}
+// too simple to need a unittest
+// Signed ints
+char[] parseFrom(T : byte) (T val) {
+return formatLong (val);
+}
+char[] parseFrom(T : short) (T val) {
+return formatLong (val);
+}
+char[] parseFrom(T : int) (T val) {
+return formatLong (val);
+}
+char[] parseFrom(T : long) (T val) {
+return formatLong (val);
+}
+// Unsigned ints
+char[] parseFrom(T : ubyte) (T val) {
+return formatLong (val);
+}
+char[] parseFrom(T : ushort) (T val) {
+return formatLong (val);
+}
+char[] parseFrom(T : uint) (T val) {
+return formatLong (val);
+}
+char[] parseFrom(T : ulong) (T val) {
+if (val > cast(ulong) long.max)
+throw new IllegalArgumentException ("No handling available for ulong where value > long.max");
+return formatLong (val);
+}
+debug (UnitTest) unittest {
+assert (parseFrom!(byte) (cast(byte) -5) == "-5");
+// annoyingly, octal syntax differs from D (blame tango):
+assert (parseFrom!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]");
+}
+// Floats
+/* Old calculation (not used):
+t.dig+2+4+3	// should be sufficient length (mant + (neg, dot, e, exp neg) + exp (3,4,5 for float,double,real resp.)) */
+char[] parseFrom(T : float) (T val) {
+char[] ret = new char[32];	// minimum allowed by assert in format
+return cFloat.format (ret, val, T.dig+2, 1);	// from old C++ tests, T.dig+2 gives best(?) accuracy
+}
+char[] parseFrom(T : double) (T val) {
+char[] ret = new char[32];
+return cFloat.format (ret, val, T.dig+2, 1);
+}
+char[] parseFrom(T : real) (T val) {
+char[] ret = new char[32];
+return cFloat.format (ret, val, T.dig+2, 1);
+}
+debug (UnitTest) unittest {
+// NOTE: these numbers are not particularly meaningful.
+assert (parseFrom!(float) (0.0f) == "0.00000000");
+assert (parseFrom!(double) (-1e25) == "-1.00000000000000000e+25");
+assert (parseFrom!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300");
+}
+//END parrseFrom templates
+//BEGIN Length templates
+/* This template provides the initial length for strings for formatting various types. These strings
+* can be expanded; this value is intended to cover 90% of cases or so.
+*
+* NOTE: This template was intended to provide specialisations for different types.
+* This one value should do reasonably well for most types.
+*/
+private {
+template defLength(T)        { const uint defLength = 20; }
+template defLength(T : char) { const uint defLength = 4;  }
+template defLength(T : bool) { const uint defLength = 5;  }
+}
+//END Length templates
+//BEGIN Utility funcs
+private char[] formatLong (long val) {
+// May throw an IllegalArgumentException; don't bother catching and rethrowing:
+return cInt.toString (val);
+}
+private bool isEscapableChar (char c) {
+return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\');
+}
+// Throws on unsupported escape sequences; however this should never actually happen within parseFrom.
+private char replaceEscapableChar (char c) {
+// This code was generated:
+if (c <= '\v') {
+if (c <= '\b') {
+if (c == '\a') {
+return 'a';
+} else if (c == '\b') {
+return 'b';
+}
+} else {
+if (c == '\t') {
+return 't';
+} else if (c == '\n') {
+return 'n';
+} else if (c == '\v') {
+return 'v';
+}
+}
+} else {
+if (c <= '\r') {
+if (c == '\f') {
+return 'f';
+} else if (c == '\r') {
+return 'r';
+}
+} else {
+if (c == '\"') {
+return '\"';
+} else if (c == '\'') {
+return '\'';
+} else if (c == '\\') {
+return '\\';
+}
+}
+}
+// if we haven't returned:
+throw new IllegalArgumentException ("Character is not escapable (internal parseFrom error)");
+}
+debug (UnitTest) {
+import tango.io.Console;
+unittest {
+Cout ("Running unittest: parseFrom ...").flush;
+assert (parseFrom!(char[]) ("\a\b\t\n\v\f\r\"\'\\") == "\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"");
+Cout (" complete").newline;
+}
+}
+//END Utility funcs

Mercurial > projects > mde

comparison mde/mergetag/parse/parseFrom.d @ 70:7fc0a8295c83