Mercurial > projects > mde
view mde/text/format.d @ 6:dcb24afa0dce
Some fixes from mde/text/format.d unittests plus a few more fixes.
committer: Diggory Hardy <diggory.hardy@gmail.com>
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Thu, 10 Jan 2008 18:33:24 +0000 |
parents | 9a990644948c |
children | b544c3a7c9ca |
line wrap: on
line source
/************************************************************************************************** * This contains templates for converting various data-types to a char[]. * * Copyright: Copyright © 2007 Diggory Hardy. * Authors: Diggory Hardy, diggory.hardy@gmail.com * License: Licensed under the Academic Free License version 3.0 * * This module basically implements the following templated function for $(B most) basic D types: * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char. * It also supports arrays of any supported type (including of other arrays) and has special * handling for strings (char[]) and binary (ubyte[]) data-types. * ----------------------------- * char[] format(T) (T value); * ----------------------------- * * There are also a few utility functions defined; the public ones have their own documentation. * * On errors, a warning is logged and an TextConvertException is thrown. No other exceptions should * be thrown and none thrown from functions used outside this module. *************************************************************************************************/ module mde.text.format; // TODO: write unittests; check strings generate quotes. // package imports import mde.text.exception; // tango imports import cInt = tango.text.convert.Integer; import cFloat = tango.text.convert.Float; import Utf = tango.text.convert.Utf; import Util = tango.text.Util; import tango.util.log.Log : Log, Logger; private Logger logger; static this () { logger = Log.getLogger ("mde.text.format"); } //BEGIN Convert templates /* Idea: could extend format with a second parameter, containing flags for things like base to output. * Unnecessary for mergetag though. */ // Associative arrays char[] format(T : T[S], S) (T[S] val) { char[] ret; ret.length = val.length * (defLength!(T) + defLength!(S)) + 2; ret[0] = '['; uint i = 1; foreach (S k, T v; val) { char[] s = format!(S) (k) ~ ":" ~ format!(T) (v); i += s.length; if (i+1 >= ret.length) ret.length = ret.length * 2; // check. ret[i-s.length .. i] = s; ret[i++] = ','; } if (i == 1) ++i; // special case - not overwriting a comma ret[i-1] = ']'; // replaces last comma return ret[0..i]; } unittest { char[] X = format!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); char[] Y = `['a':"animal",'b':"bus"]`; assert (X == Y); } // Arrays char[] format(T : T[]) (T[] val) { char[] ret; ret.length = val.length * defLength!(T) + 2; ret[0] = '['; uint i = 1; foreach (T x; val) { char[] s = format!(T) (x); i += s.length; if (i+1 >= ret.length) ret.length = ret.length * 2; // check. ret[i-s.length .. i] = s; ret[i++] = ','; } if (i == 1) ++i; // special case - not overwriting a comma ret[i-1] = ']'; // replaces last comma return ret[0..i]; } char[] format(T : dchar[]) (T val) { return format (toUtf8 (val)); } char[] format(T : wchar[]) (T val) { return format (toUtf8 (val)); } char[] format(T : char[]) (T val) { char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. ret[0] = '"'; uint i = 1; for (uint t = 0; t < val.length;) { // process a block of non-escapable characters uint s = t; while (t < val.length && !isEscapableChar(val[t])) ++t; // skip all non-escapable chars uint j = i + t - s; ret[i..j] = val[s..t]; // copy a block i = j; // process a block of escapable charaters while (t < val.length && isEscapableChar(val[t])) { ret[i++] = '\\'; // backslash; increment i ret[i++] = replaceEscapableChar(val[t++]); // character; increment i and t } } ret[i++] = '"'; return ret[0..i]; } char[] format(T : ubyte[]) (T val) { static const char[16] digits = "0123456789abcdef"; char[] ret = new char[val.length * 2]; // exact length uint i = 0; foreach (ubyte x; val) { ret[i++] = digits[x >> 4]; ret[i++] = digits[x & 0x0F]; } return ret; } unittest { // generic array stuff: assert (format!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); assert (format!(double[]) (cast(double[]) []) == `[]`); // empty array // char[] conversions, with commas, escape sequences and multichar UTF8 characters: assert (format!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`); assert (format!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `01f2ac`); // ubyte[] special notation } // Support for outputting a wide char... I reccomend against trying to output these though. const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted to a single UTF-8 char"; char[] format(T : dchar) (T val) { if (val <= 127u) return format (cast(char) val); // this char can be converted throwException (WIDE_CHAR_ERROR); } char[] format(T : wchar) (T val) { if (val <= 127u) return format (cast(char) val); // this char can be converted throwException (WIDE_CHAR_ERROR); } char[] format(T : char) (T val) { // Note: if (val > 127) "is invalid UTF-8 single char" // However we don't know what this is for, in particular if it will be recombined with other chars later // Can't return reference to static array; making dynamic is cheaper than copying. char[] ret = new char[4]; // max length for an escaped char ret[0] = '\''; if (!isEscapableChar (val)) { ret[1] = val; ret[2] = '\''; return ret[0..3]; } else { ret[1] = '\\'; ret[2] = replaceEscapableChar (val); ret[3] = '\''; return ret; } assert (false); } unittest { assert (format!(char) ('\'') == "\'\\\'\'"); } char[] format(T : bool) (T val) { if (T) return "true"; else return "false"; } // too simple to need a unittest char[] format(T : byte) (T val) { return formatLong (val); } char[] format(T : short) (T val) { return formatLong (val); } char[] format(T : int) (T val) { return formatLong (val); } char[] format(T : long) (T val) { return formatLong (val); } char[] format(T : ubyte) (T val) { return formatLong (val); } char[] format(T : ushort) (T val) { return formatLong (val); } char[] format(T : uint) (T val) { return formatLong (val); } char[] format(T : ulong) (T val) { if (val > cast(ulong) long.max) throwException ("No handling available for ulong where value > long.max"); return formatLong (val); } unittest { assert (format!(byte) (cast(byte) -5) == "-5"); // annoyingly, octal syntax differs from D (blame tango): assert (format!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]"); } // Old calculation (not used): // t.dig+2+4+3 // should be sufficient length (mant + (neg, dot, e, exp neg) + exp (3,4,5 for float,double,real resp.)) char[] format(T : float) (T val) { char[] ret = new char[32]; // minimum allowed by assert in format return cFloat.format (ret, val, T.dig+2, 1); // from old C++ tests, T.dig+2 gives best(?) accuracy } char[] format(T : double) (T val) { char[] ret = new char[32]; return cFloat.format (ret, val, T.dig+2, 1); } char[] format(T : real) (T val) { char[] ret = new char[32]; return cFloat.format (ret, val, T.dig+2, 1); } unittest { // NOTE: these numbers are not particularly meaningful. assert (format!(float) (0.0f) == "0.00000000"); assert (format!(double) (-1e25) == "-1.00000000000000000e+25"); assert (format!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300"); } //END Convert templates //BEGIN Length templates /* This template provides the initial length for strings for formatting various types. These strings * can be expanded; this value should cover 90% of cases or so. * FIXME: provide more specialisations (or not?) */ private { template defLength(T) { const uint defLength = 20; } } //END Length templates //BEGIN Utility funcs private char[] formatLong (long val) { try return cInt.toString (val, cInt.Style.Signed, cInt.Flags.Throw); catch (Exception e) throwException (e.msg); } private bool isEscapableChar (char c) { return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); } // Warning: this DOES NOT check c is escapable private char replaceEscapableChar (char c) { static char[char] escCharsRev; // reversed escChars static bool escCharsRevFilled; // will be initialised false if (!escCharsRevFilled) { // only do this once // map of all supported escape sequences escCharsRev = ['"' : '"', '\'' : '\'', '\\' : '\\', '\a' : 'a', '\b' : 'b', '\f' : 'f', '\n' : 'n', '\r' : 'r', '\t' : 't', '\v' : 'v']; escCharsRevFilled = true; } return escCharsRev[c]; } private void throwException (char[] msg) { logger.warn (msg); // only small errors are trapped here throw new TextFormatException (); } unittest { // all utility functions should be well-enough used not to need testing } //END Utility funcs