view mde/text/format.d @ 6:dcb24afa0dce

Some fixes from mde/text/format.d unittests plus a few more fixes. committer: Diggory Hardy <diggory.hardy@gmail.com>
author Diggory Hardy <diggory.hardy@gmail.com>
date Thu, 10 Jan 2008 18:33:24 +0000
parents 9a990644948c
children b544c3a7c9ca
line wrap: on
line source

/**************************************************************************************************
 * This contains templates for converting various data-types to a char[].
 *
 * Copyright: Copyright © 2007 Diggory Hardy.
 * Authors: Diggory Hardy, diggory.hardy@gmail.com
 * License: Licensed under the Academic Free License version 3.0
 *
 * This module basically implements the following templated function for $(B most) basic D types:
 * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char.
 * It also supports arrays of any supported type (including of other arrays) and has special
 * handling for strings (char[]) and binary (ubyte[]) data-types.
 * -----------------------------
 * char[] format(T) (T value);
 * -----------------------------
 *
 * There are also a few utility functions defined; the public ones have their own documentation.
 *
 * On errors, a warning is logged and an TextConvertException is thrown. No other exceptions should
 * be thrown and none thrown from functions used outside this module.
 *************************************************************************************************/
module mde.text.format;
// TODO: write unittests; check strings generate quotes.

// package imports
import mde.text.exception;

// tango imports
import cInt = tango.text.convert.Integer;
import cFloat = tango.text.convert.Float;
import Utf = tango.text.convert.Utf;
import Util = tango.text.Util;
import tango.util.log.Log : Log, Logger;

private Logger logger;
static this () {
    logger = Log.getLogger ("mde.text.format");
}

//BEGIN Convert templates
/* Idea: could extend format with a second parameter, containing flags for things like base to output.
 * Unnecessary for mergetag though.
*/

// Associative arrays
char[] format(T : T[S], S) (T[S] val) {
    char[] ret;
    ret.length = val.length * (defLength!(T) + defLength!(S)) + 2;
    ret[0] = '[';
    uint i = 1;
    foreach (S k, T v; val) {
        char[] s = format!(S) (k) ~ ":" ~ format!(T) (v);
        i += s.length;
        if (i+1 >= ret.length) ret.length = ret.length * 2;	// check.
        ret[i-s.length .. i] = s;
        ret[i++] = ',';
    }
    if (i == 1) ++i;	// special case - not overwriting a comma
    ret[i-1] = ']';	// replaces last comma
    return ret[0..i];
}
unittest {
    char[] X = format!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]);
    char[] Y = `['a':"animal",'b':"bus"]`;
    assert (X == Y);
}

// Arrays
char[] format(T : T[]) (T[] val) {
    char[] ret;
    ret.length = val.length * defLength!(T) + 2;
    ret[0] = '[';
    uint i = 1;
    foreach (T x; val) {
        char[] s = format!(T) (x);
        i += s.length;
        if (i+1 >= ret.length) ret.length = ret.length * 2;	// check.
        ret[i-s.length .. i] = s;
        ret[i++] = ',';
    }
    if (i == 1) ++i;	// special case - not overwriting a comma
    ret[i-1] = ']';	// replaces last comma
    return ret[0..i];
}
char[] format(T : dchar[]) (T val) {
    return format (toUtf8 (val));
}
char[] format(T : wchar[]) (T val) {
    return format (toUtf8 (val));
}
char[] format(T : char[]) (T val) {
    char[] ret = new char[val.length * 2 + 2];	// Initial storage. This should ALWAYS be enough.
    ret[0] = '"';
    uint i = 1;
    for (uint t = 0; t < val.length;) {
        // process a block of non-escapable characters
        uint s = t;
        while (t < val.length && !isEscapableChar(val[t]))
            ++t;	// skip all non-escapable chars
        uint j = i + t - s;
        ret[i..j] = val[s..t];	// copy a block
        i = j;
        // process a block of escapable charaters
        while (t < val.length && isEscapableChar(val[t])) {
            ret[i++] = '\\';				// backslash; increment i
            ret[i++] = replaceEscapableChar(val[t++]);	// character; increment i and t
        }
    }
    ret[i++] = '"';
    return ret[0..i];
}
char[] format(T : ubyte[]) (T val) {
    static const char[16] digits = "0123456789abcdef";
    
    char[] ret = new char[val.length * 2];	// exact length
    uint i = 0;
    foreach (ubyte x; val) {
        ret[i++] = digits[x >> 4];
        ret[i++] = digits[x & 0x0F];
    }
    return ret;
}
unittest {
    // generic array stuff:
    assert (format!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`);
    assert (format!(double[]) (cast(double[]) []) == `[]`);		// empty array
    
    // char[] conversions, with commas, escape sequences and multichar UTF8 characters:
    assert (format!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`);
    
    assert (format!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `01f2ac`);	// ubyte[] special notation
}

// Support for outputting a wide char... I reccomend against trying to output these though.
const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted to a single UTF-8 char";
char[] format(T : dchar) (T val) {
    if (val <= 127u) return format (cast(char) val);	// this char can be converted
    throwException (WIDE_CHAR_ERROR);
}
char[] format(T : wchar) (T val) {
    if (val <= 127u) return format (cast(char) val);	// this char can be converted
    throwException (WIDE_CHAR_ERROR);
}
char[] format(T : char) (T val) {
    // Note: if (val > 127) "is invalid UTF-8 single char"
    // However we don't know what this is for, in particular if it will be recombined with other chars later
    
    // Can't return reference to static array; making dynamic is cheaper than copying.
    char[] ret = new char[4];	// max length for an escaped char
    ret[0] = '\'';
    
    if (!isEscapableChar (val)) {
        ret[1] = val;
        ret[2] = '\'';
        return ret[0..3];
    } else {
        ret[1] = '\\';
        ret[2] = replaceEscapableChar (val);
        ret[3] = '\'';
        return ret;
    }
    assert (false);
}
unittest {
    assert (format!(char) ('\'') == "\'\\\'\'");
}

char[] format(T : bool) (T val) {
    if (T) return "true";
    else return "false";
}
// too simple to need a unittest

char[] format(T : byte) (T val) {
    return formatLong (val);
}
char[] format(T : short) (T val) {
    return formatLong (val);
}
char[] format(T : int) (T val) {
    return formatLong (val);
}
char[] format(T : long) (T val) {
    return formatLong (val);
}
char[] format(T : ubyte) (T val) {
    return formatLong (val);
}
char[] format(T : ushort) (T val) {
    return formatLong (val);
}
char[] format(T : uint) (T val) {
    return formatLong (val);
}
char[] format(T : ulong) (T val) {
    if (val > cast(ulong) long.max) throwException ("No handling available for ulong where value > long.max");
    return formatLong (val);
}
unittest {
    assert (format!(byte) (cast(byte) -5) == "-5");
    // annoyingly, octal syntax differs from D (blame tango):
    assert (format!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]");
}

// Old calculation (not used):
// t.dig+2+4+3	// should be sufficient length (mant + (neg, dot, e, exp neg) + exp (3,4,5 for float,double,real resp.))
char[] format(T : float) (T val) {
    char[] ret = new char[32];	// minimum allowed by assert in format
    return cFloat.format (ret, val, T.dig+2, 1);	// from old C++ tests, T.dig+2 gives best(?) accuracy
}
char[] format(T : double) (T val) {
    char[] ret = new char[32];
    return cFloat.format (ret, val, T.dig+2, 1);
}
char[] format(T : real) (T val) {
    char[] ret = new char[32];
    return cFloat.format (ret, val, T.dig+2, 1);
}
unittest {
    // NOTE: these numbers are not particularly meaningful.
    assert (format!(float) (0.0f) == "0.00000000");
    assert (format!(double) (-1e25) == "-1.00000000000000000e+25");
    assert (format!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300");
}
//END Convert templates

//BEGIN Length templates
/* This template provides the initial length for strings for formatting various types. These strings
 * can be expanded; this value should cover 90% of cases or so.
 * FIXME: provide more specialisations (or not?)
 */
private {
    template defLength(T) {	const uint defLength = 20;	}
}
//END Length templates

//BEGIN Utility funcs
private char[] formatLong (long val) {
    try return cInt.toString (val, cInt.Style.Signed, cInt.Flags.Throw);
    catch (Exception e) throwException (e.msg);
}
private bool isEscapableChar (char c) {
    return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\');
}
// Warning: this DOES NOT check c is escapable
private char replaceEscapableChar (char c) {
    static char[char] escCharsRev;	// reversed escChars
    static bool escCharsRevFilled;	// will be initialised false
    
    if (!escCharsRevFilled) {	// only do this once
        // map of all supported escape sequences
        escCharsRev = ['"' : '"', '\'' : '\'',
                       '\\' : '\\', '\a' : 'a',
                       '\b' : 'b', '\f' : 'f',
                       '\n' : 'n', '\r' : 'r',
                       '\t' : 't', '\v' : 'v'];
        escCharsRevFilled = true;
    }
    
    return escCharsRev[c];
}

private void throwException (char[] msg) {
    logger.warn (msg);			// only small errors are trapped here
    throw new TextFormatException ();
}

unittest {
    // all utility functions should be well-enough used not to need testing
}
//END Utility funcs