Mercurial > projects > mde

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/policies.txt	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,23 @@
+This is a collection of all coding policies for the mde engine as a whole. Policies for individual packages should be put in the individual package directory or elsewhere.
+
+These are principles, not cast-iron rules, but should generally be adhered to.
+
+
+Coding conventions: I mostly follow those provided in the D specification. Generally indent with four spaces. Use british or american (or other) spellings as you like, but BE CONSISTANT at least within packages. I generally break lines at 100 chars to prevent overlong lines (particularly documentation); this isn't critical but provides a good guide and keeps text looking reasonable.
+
+
+Package design principle: use a separate package for each module of the engine. In most packages where there is only one module (file) imported by other parts of the engine, that module should have the same name as the package and be designed to have a standardised interface to the package so that the package could be replaced with another as a drop-in replacement (written with the same interface). Of course in many cases it may not be possible to swich one package for another quite this easily, but holding to this principle should at least minimise the amount of work necessary when doing so.
+
+
+Engine-wide initialisation and cleanup should be handled or invoked by mde.init.Init's CTOR and DTOR methods where this is viable.
+
+
+Logging should be handled by tango's Logger class. A logger with the name mde.package.module or mde.package.module.X where X is a symbol within the module should be used for each module. Thrown errors should be documented primarily by a log message rather than by returning a message within the exception, to keep logging consistant for both thrown errors and other messages. In general the levels should be used as follows:
+	Trace	Where required or thought highly useful for debugging
+	Info	Sparingly, for informational purposes (e.g. when parsing a file). Should not be used per-frame.
+	Warn	For small errors which can be overlooked, even if they may cause bigger problems later.
+	Error	For errors which cut-short a (reasonably large) operation (e.g. reading a file), but do not directly cause the program to terminate.
+	Fatal	For errors directly (i.e. definately and almost immediately) ending the program.
+
+
+Thrown errors should use a class specific to at least the package involved to enable specific catching of errors. Exception classes should be defined within a module exception.d in the package directory. Exception classes should all contain a this() CTOR and possibly a this(char[] msg) CTOR.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dsss.conf	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,4 @@
+[mde/mde.d]
+target=mde-exec
+[mde/mergetag]
+version = 0.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/exception.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,19 @@
+/// Contains a base class for all mde exceptions.
+module mde.exception;
+
+/** Base class for all mde Exceptions.
+ *
+ * All packages should have their own base exception type extending this one, and for each package
+ * level a CTOR taking a message should pass the message to the super prepended with "package: ".
+ * The performance of this is unimportant since exceptions are only intended for recovering from
+ * unexpected errors anyway. A CTOR not taking a message and calling the super without a parameter
+ * should also be provided.
+ */
+class mdeException : Exception {
+    this (char[] msg) {
+        super("Error: mde: " ~ msg);
+    }
+    this () {
+        super("");	// Exception doesn't have a this() CTOR
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/init.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,28 @@
+/**************************************************************************************************
+ * Initialisation setup and exit cleanup module.
+ *
+ * This module controls most of the initialisation and deinitialisation of the program.
+ *************************************************************************************************/
+
+// tango imports
+import tango.util.log.Log : Log;
+import tango.util.log.ConsoleAppender : ConsoleAppender;
+
+/**
+ * Init class
+ *
+ * A scope class created at beginning of the program and destroyed at the end; thus the CTOR
+ * handles program initialisation and the DTOR handles program cleanup.
+ */
+scope class Init
+{
+    this()
+    {
+        // For now, just log to the console:
+        Log.getRootLogger().addAppender(new ConsoleAppender);
+    }
+
+    ~this()
+    {
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/input/config.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,70 @@
+/// This module contains a class for holding configs and handles saving, loading and editing.
+module mde.input.config;
+import mde.input.core;
+/+
+/** Struct to hold the configuration for the input system. Thus loading and switching between
+ *  multiple configurations should be easy.
+ *
+ *  Note: documentation should be generated for the codes (enum : uint ...), but it's not.
+ */
+struct Config
+{
+    /** Button event type bit-codes
+     *
+     *  These bitcodes are OR'd to the identifier code for the input device, to indicate which type
+     *  of input they are for. E.g. when a key event is recieved with code x, look up
+     *  B.SDLKEY | x in b. Keyboard events are SDL-specific since the codes may differ for other
+     *  libraries.
+     *
+     *  For joystick hat events, a motion should be converted into up and down events on separate
+     *  U,L,D,R positions and up and down events sent to the appropriate outputs.
+     */
+    enum B : uint {
+        KEY		= 0x8000_0000u,		/// 0x8000_0000u
+        SDLKEY		= 0x8800_0000u,		/// 0x8800_0000u
+        MOUSE		= 0x4000_0000u,		/// 0x4000_0000u
+        JOYBUTTON	= 0x2000_0000u,		/// 0x2000_0000u
+        JOYHAT		= 0x1000_0000u,		/// 0x1000_0000u
+        JOYHAT_U	= 0x1800_0000u,		/// 0x1800_0000u
+        JOYHAT_D	= 0x1400_0000u,		/// 0x1400_0000u
+        JOYHAT_L	= 0x1200_0000u,		/// 0x1200_0000u
+        JOYHAT_R	= 0x1100_0000u,		/// 0x1100_0000u
+    }
+
+    /** Axis event type bit-codes
+     *
+     *  Well, SDL only supports one type of axis now, but this could be extended in the future.
+    */
+    enum A : uint {
+        JOYAXIS		= 0x8000_0000u,		/// 0x8000_0000u
+    }
+
+    /** Mouse & Joystick ball event type bit-codes
+     *
+     *  Currently, mouse input only comes from the window manager: the code is exactly M.WMMOUSE.
+     */
+    enum M : uint {
+        MOUSE		= 0x8000_0000u,		/// 0x8000_0000u
+        WMMOUSE		= 0x8800_0000u,		/// 0x8800_0000u
+        JOYBALL		= 0x4000_0000u,		/// 0x4000_0000u
+    }
+
+    /** Output queues --- the core of the input configuration.
+    *
+    *  b, axis and mouse each have their own index specifications. This is split into two parts:
+    *  the first byte specifies the type of input (given by the above enums), and the last three
+    *  bytes define where the input comes from.
+    *
+    *  For B.SDLKEY, the last three bytes are for the SDL keysym.
+    *  For B.MOUSE, B.JOY*, A.JOY* & M.JOY*, the last three bytes are split into two sets of 12
+    *  bits (with masks 0x00FF_F000 and 0x0000_0FFF), the higher of which specifies the device
+    *  (which mouse or joystick), and the lower of which specifies the button/axis/ball.
+    *
+    *  The code for mouse motion is currently only M.WMMOUSE. If/when multiple mice are supported
+    *  new codes will be defined.
+    */
+    outQueue[uint] b;
+    outQueue[uint] axis;    /// ditto
+    outQueue[uint] mouse;    /// ditto
+}
++/
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/input/core.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,60 @@
+/// This module contains the core (i.e. common part) of the input system.
+module mde.input.core;
+/+
+alias Exception InputException;	// this can be expanded to a custom class if needed
+typedef uint index_t;
+struct RelPair {
+	real x, y;
+	static RelPair opCall (real a, real b) {
+		RelPair ret;
+		ret.x = a;	ret.y = b;
+		return ret;
+	}
+}
+
+/* Note: We really want an array, not a stack. We cannot edit these lists, so we can either
+* copy the stack or just iterate through it as an array.
+*/
+typedef uint[] outQueue;	/// This is the type for the out queue config data.
+struct readOutQueue {		/// A convenient structure for reading an outQueue item by item.
+    private outQueue _q;	// the queue, stored by reference to the original
+    private uint p = 0;		// current read position (start at beginning)
+
+    static readOutQueue (outQueue q) {	/// Static constructor
+        readOutQueue ret;
+        ret._q = q;
+        return ret;
+    }
+    uint next () {		/// Get the next element. Throws an exception if there isn't another.
+        if (p >= _q.length())
+            throw new InputException ("Input: Invalid configuration: incomplete config stack");
+        uint ret = _q[p];
+        ++p;
+        return ret;
+    }
+}
+/+
+struct out_stack {	// a remove-only stack with exception throwing
+    uint[] _data;
+
+    static out_stack opCall (uint[] d) {
+        out_stack ret;
+        ret._data = d;
+        return ret;
+    }
+    uint pop () {
+        if (_data.length < 1)
+            throw new InputException ("Input: Invalid configuration: incomplete config stack");
+        uint a = _data[length - 1];
+        _data.length = _data.length - 1;
+        return a;
+    }
+}
++/
+
+bool[index_t] b_tbl;			/// Table of button states
+real[index_t] axis_tbl;			/// Table of axes states
+uint mouse_x, mouse_y;			/// Current screen coords of the mouse
+// FIXME: these need to be reset after every access:
+RelPair[index_t] axis_rel_tbl;	/// Table of relative mouse / joystick ball motions
++/
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/input/eventstream.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,93 @@
+/** This module contains functions called on an event, which may modify the event (adjuster
+ *	functions), and finally output to one (or more) of the state tables (the event stream).
+ *
+ *	Adjuster and other event functions should have a format to fit the ES_X_Func types, for X is B
+ *	(button event), A (axis event) or M (mouse relative motion event or joystick ball event).
+ *	Adjusters should call (an) output function(s) of any type with their output(s).
+ *
+ *	To control which adjusters get called and pass parameters, a stack is used.
+ */
+module mde.input.eventstream;
+import mde.input.core;
+/+
+/// Module constructor fills es_*_fcts tables and rehashes them.
+static this () {
+	es_b_fcts[ES_B_OUT] = &es_b_out;
+
+	// Call rehash following initialisation:
+	es_b_fcts.rehash;
+	es_a_fcts.rehash;
+	es_m_fcts.rehash;
+}
+
+/// These aliases are for pointers to the event functions.
+alias void function (bool, out_stack) ES_B_Func;
+alias void function (short, out_stack) ES_A_Func;			/// ditto
+alias void function (short, short, out_stack) ES_M_Func;	/// ditto
+
+/// These are the tables for looking up which event function to call.
+static ES_B_Func[uint] es_b_fcts;
+static ES_A_Func[uint] es_a_fcts;	/// ditto
+static ES_M_Func[uint] es_m_fcts;	/// ditto
+
+/// These are the codes allowing the config to specify event functions:
+enum : uint {
+    ES_B_OUT	= 0x0000_0100u,
+    ES_A_OUT	= 0x0000_0200u,
+    ES_M_OUT	= 0x0000_0300u,
+}
+
+/// Functions to pass an event to the appropriate event function
+void bEventOut (bool b, out_stack s)
+{
+	ES_B_Func* func = (s.pop in es_b_fcts);
+	if (func != null) (*func)(b,s);
+	else throw new InputException ("Input: Invalid configuration: bad event function code");
+}
+void aEventOut (short x, out_stack s)	/// ditto
+{
+	ES_A_Func* func = (s.pop in es_a_fcts);
+	if (func != null) (*func)(x,s);
+	else throw new InputException ("Input: Invalid configuration: bad event function code");
+}
+void mEventOut (short x, short y, out_stack s)	/// ditto
+{
+	ES_M_Func* func = (s.pop in es_m_fcts);
+	if (func != null) (*func)(x,y,s);
+	else throw new InputException ("Input: Invalid configuration: bad event function code");
+}
+
+/// Simple output function
+void es_b_out (bool b, out_stack s) {
+	b_tbl[cast(index_t) s.pop] = b;
+}
+/// Adjuster to check modifier keys
+void es_b_modifier (bool b, out_stack s);
+
+/** Simple output function
+
+Adds 1-2 items on the stack.
+*/
+void es_a_out (short x, out_stack s) {
+	real y = x;
+	uint conf = s.pop;
+        enum : uint {
+            HALF_RANGE	= 0x8000_0000u,
+            SENSITIVITY	= 0x0080_0000u,
+        }
+        // Convert ranges into standard intervals (with or without reverse values)
+	if (conf & HALF_RANGE) y = (y + 32767.0) * 1.5259254737998596e-05;	// range  0.0 - 1.0
+	else y *= 3.0518509475997192e-05;					// range -1.0 - 1.0
+	real a;
+	if (conf & SENSITIVITY) a = s.pop;
+        /+ When a global sensitivity is available (possibly only use if it's enabled)...
+        else a = axis.sensitivity;
+	y = sign(y) * pow(abs(y), a);		// sensitivity adjustment by a +/
+	axis_tbl[cast(index_t) s.pop] = y;
+}
+
+/// Simple output function
+void es_m_out (short x, short y, out_stack s) {
+	axis_rel_tbl[cast(index_t) s.pop] = RelPair(x,y);
+}
++/
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/input/input.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,58 @@
+/** This module contains the interface to the input system; it should be the only module of the
+ *	input package imported from outside this package.
+ */
+module mde.input.input;
+/+import derelict.sdl.events;
+import mde.input.core;
+import mde.input.config;
+import mde.input.eventstream;
+
+/// Get key status at this ID
+bool button (uint id) {
+	return b_tbl[cast(index_t) id];
+}
+/// Get axis status at this ID (range -1.0 .. 1.0)
+real axis (uint id) {
+	return axis_tbl[cast(index_t) id];
+}
+/**	Get mouse pointer position in screen coordinates.
+ *	Window managers only support one mouse, so there will only be one screen coordinate.
+ *	Unlike everything else, this is not configurable.
+ */
+void mouseScreenPos (uint x, uint y) {
+	x = mouse_x;	y = mouse_y;
+}
+/** Get relative mouse position (also for joystick balls).
+ *
+ *	Converts to a real via sensitivity settings (defaults may be set and overriden per item).
+ *
+ *	To avoid confusion over the ID here, the idea is for the input-layer upward to support
+ *	multiple mice, even though it's unlikely for the input system itself to support them. Also
+ *	joystick balls (supported by SDL) can be used in the same way as a mouse for relative
+ *	positions. Thus this must be configured only on one-mouse systems.
+ */
+void mouseRelativePos (out real x, out real y, uint id) {
+	RelPair rp = axis_rel_tbl[cast(index_t) id];
+	x = rp.x;	y = rp.y;
+}
+/// As it says. Optional.
+bool modifierStatus (uint id);
+
+/// Adds a callback delegate for key with this ID for both DOWN and UP events.
+/// Passes current status.
+void addKeyCallback (void delegate(bool) dg);
+
+/// Similar function for axis events.
+void addAxisCallback (void delegate(real) dg);
+
+/** Similar function for mouse/joystick ball motion.
+ *	Last parameter is true if it's for the window-manager mouse (use mouseScreenPos to get
+ *	mouse screen position).
+ */
+void addMouseCallback (void delegate(real,real,bool) dg);
+
+/** Feed an SDL_Event struct (only uses if it's a key, mouse or joystick event).
+ *	Other types of event functions may be added.
+ */
+void SDLEvent (SDL_Event event);
++/
\ No newline at end of file
Binary file mde/mde has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mde.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,36 @@
+/** Modular D Engine
+ *
+ * This module contains main().
+ */
+module mde.mde;
+
+// External library imports
+import tango.io.Stdout;
+
+// Package imports
+import mde.init;
+import test = mde.test;
+
+//import mde.input.input;
+
+import mde.mergetag.read;
+
+int main()
+{
+    scope init = new Init();	// initialisation
+
+    Reader MTread;
+    try {
+        MTread = new Reader ("test.mtt", null, true);
+        static DataSection dataPrinter (ID) {	return new test.DataPrinter;	}
+        MTread.dataSecCreator = &dataPrinter;
+        MTread.read();
+    } catch (Exception e) {
+        Stdout (e.msg).newline;
+    }
+    //Stdout ("Data read from file:").newline;
+    //test.printDataSet (MTread.dataset);
+    return 0;
+
+    // cleanup handled by init's DTOR
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/dataset.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,269 @@
+/// This module contains a minimal definition of a MergeTag DataSet.
+module mde.mergetag.dataset;
+
+// package imports
+import mde.mergetag.parse;
+import mde.mergetag.exception;
+
+// tango imports
+import Util = tango.text.Util;
+import tango.util.log.Log : Log, Logger;
+
+/** Typedef for data & section indexes (can be changed to ulong if necessary.) */
+typedef uint ID;
+
+package struct MTFormatVersion {
+    enum VERS : ubyte {	// convenient list of all known file format versions
+        INVALID	= 0x00,
+        MT01	= 0x01,		// not yet final
+    }
+    /// The current MergeTag version
+    static const VERS Current = VERS.MT01;
+    static const char[2] CurrentString = "01";
+
+    static VERS parseString (char[] str) {
+        in {
+            assert (str.length == 2);
+        } body {
+            if (str[0] == '0' && str[1] == '1') return VERS.MT01;
+            else return VERS.INVALID;
+        }
+    }
+}
+
+private Logger logger;
+static this () {
+    logger = Log.getLogger ("mde.mergetag.dataset");
+}
+
+/**************************************************************************************************
+ * Data class; contains a DataSection class instance for each loaded section of a file.
+ *
+ * Any class implementing DataSection may be used to store data; by default a DefaultData class is
+ * used when reading a file. Another class may be used by creating the sections before reading the
+ * file or passing the reader a function to create the sections (see Reader.dataSecCreator).
+ *
+ * Could be a struct, except that structs are value types (not reference types).
+ */
+class DataSet
+{
+    DataSection header;			/// Header
+    DataSection[ID] sec;		/// Dynamic array of sections
+
+    /// Return a reference to the indexed item
+    DataSection opIndex(ID i) {
+        return sec[i];
+    }
+
+    /// Template to return all sections of a child-class type.
+    T[ID] getSections (T : DataSection) () {
+        T[ID] ret;
+        foreach (ID id, DataSection s; sec) {
+            T x = cast(T) s;
+            if (x) ret[id] = x;	// if non-null
+        }
+        return ret;
+    }
+}
+
+/**
+ * Interface for data storage classes, which contain all data-tags loaded from a single section of a
+ * file.
+ *
+ * A class implementing this may implement the addTag function to do whatever it likes with the
+ * data passed; DefaultData separates this data out into supported types and stores it
+ * appropriately, while throwing an error when unsupported types are passed, but a different
+ * implementation could filter out the tags desired and use them directly, while ignoring the rest.
+ * The parse module provides a useful set of templated functions to
+ * convert the data accordingly. It is advised to keep the type definitions as defined in the file-
+ * format except for user-defined types.
+ *
+ * Another idea for a DataSection class:
+ * Use a void*[ID] variable to store all data (may also need a type var for each item).
+ * addTag should call a templated function which calls parse then casts to a void* and stores the data.
+ * Use a templated get(T)(ID) method which checks the type and casts to T. Small issue with this: storing
+ * data in the file with an incorrect type could cause a lot of errors to be thrown in other code.
+ */
+interface DataSection
+{
+    /** Handles parsing of data items.
+     *
+     * Should throw an MTUnknownTypeException for unsupported types, after logging to logger.
+     */
+    void addTag (char[],ID,char[]);
+    //void writeAll (Print!(char));	/// TBD
+}
+
+/**
+ * Default DataSection class.
+ *
+ * Supports all the basic types currently supported and array versions of
+ * each (except no arrays of binary or string types; these are already arrays).
+ */
+class DefaultData : DataSection
+{
+    /+ Could use this:
+    private template addTagTp(alias Var, T) (ID id, char[] dt) {
+        Var[id] = parse!(T) (dt);
+    } +/
+    // Unfortunately, I think each case needs to be mentioned explicitly to tie it to the correct
+    // data member.
+    void addTag (char[] tp, ID id, char[] dt) {	/// for adding tags
+        try {
+        switch(Util.trim(tp)) {
+            case "1":
+            case "bool":
+                Bool[id] = parse!(bool) (dt);
+            break;
+            case "s8":
+            case "byte":
+                Byte[id] = parse!(byte) (dt);
+            break;
+            case "s16":
+            case "short":
+                Short[id] = parse!(short) (dt);
+            break;
+            case "s32":
+            case "int":
+                Int[id] = parse!(int) (dt);
+            break;
+            case "s64":
+            case "long":
+                Long[id] = parse!(long) (dt);
+            break;
+            case "u8":
+            case "ubyte":
+                UByte[id] = parse!(ubyte) (dt);
+            break;
+            case "u16":
+            case "ushort":
+                UShort[id] = parse!(ushort) (dt);
+            break;
+            case "u32":
+            case "uint":
+                UInt[id] = parse!(uint) (dt);
+            break;
+            case "u64":
+            case "ulong":
+                ULong[id] = parse!(ulong) (dt);
+            break;
+
+            case "1[]":
+            case "bool[]":
+                BoolA[id] = parse!(bool[]) (dt);
+            break;
+            case "s8[]":
+            case "byte[]":
+                ByteA[id] = parse!(byte[]) (dt);
+            break;
+            case "s16[]":
+            case "short[]":
+                ShortA[id] = parse!(short[]) (dt);
+            break;
+            case "s32[]":
+            case "int[]":
+                IntA[id] = parse!(int[]) (dt);
+            break;
+            case "s64[]":
+            case "long[]":
+                LongA[id] = parse!(long[]) (dt);
+            break;
+            case "u8[]":
+            case "ubyte[]":
+            case "binary":
+                UByteA[id] = parse!(ubyte[]) (dt);
+            break;
+            case "u16[]":
+            case "ushort[]":
+                UShortA[id] = parse!(ushort[]) (dt);
+            break;
+            case "u32[]":
+            case "uint[]":
+                UIntA[id] = parse!(uint[]) (dt);
+            break;
+            case "u64[]":
+            case "ulong[]":
+                ULongA[id] = parse!(ulong[]) (dt);
+            break;
+
+            case "fp32":
+            case "float":
+                Float[id] = parse!(float) (dt);
+            break;
+            case "fp64":
+            case "double":
+                Double[id] = parse!(double) (dt);
+            break;
+            case "fp":
+            case "real":
+                Real[id] = parse!(real) (dt);
+            break;
+            case "fp32[]":
+            case "float[]":
+                FloatA[id] = parse!(float[]) (dt);
+            break;
+            case "fp64[]":
+            case "double[]":
+                DoubleA[id] = parse!(double[]) (dt);
+            break;
+            case "fp[]":
+            case "real[]":
+                RealA[id] = parse!(real[]) (dt);
+            break;
+
+            case "UTF8":
+            case "char":
+                Char[id] = parse!(char) (dt);
+            break;
+            case "UTF8[]":
+            case "char[]":
+            case "string":
+                CharA[id] = parse!(char[]) (dt);
+            break;
+
+            default:
+                logger.warn ("Type not supported: " ~ tp);
+                throw new MTUnknownTypeException ();
+        }
+        } catch (Exception e) {
+            // an error should have already been logged
+        }
+    }
+
+
+
+    /** Data members for direct access. */
+    bool	[ID]	Bool;
+    byte	[ID]	Byte;	/// ditto
+    short	[ID]	Short;	/// ditto
+    int		[ID]	Int;	/// ditto
+    long	[ID]	Long;	/// ditto
+    ubyte	[ID]	UByte;	/// ditto
+    ushort	[ID]	UShort;	/// ditto
+    uint	[ID]	UInt;	/// ditto
+    ulong	[ID]	ULong;	/// ditto
+
+    bool[]	[ID]	BoolA;	/// ditto
+    byte[]	[ID]	ByteA;	/// ditto
+    short[]	[ID]	ShortA;	/// ditto
+    int[]	[ID]	IntA;	/// ditto
+    long[]	[ID]	LongA;	/// ditto
+    ubyte[]	[ID]	UByteA;	/// ditto
+    ushort[]	[ID]	UShortA;/// ditto
+    uint[]	[ID]	UIntA;	/// ditto
+    ulong[]	[ID]	ULongA;	/// ditto
+
+    float	[ID]	Float;	/// ditto
+    double	[ID]	Double;	/// ditto
+    real	[ID]	Real;	/// ditto
+    float[]	[ID]	FloatA;	/// ditto
+    double[]	[ID]	DoubleA;/// ditto
+    real[]	[ID]	RealA;	/// ditto
+
+    char	[ID]	Char;	/// ditto
+    char[]	[ID]	CharA;	/// ditto
+
+    /** Alias names */
+    alias	CharA	String;
+    alias	UByteA	Binary;	/// ditto
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/dataset.d.old	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,149 @@
+/// This file contains text removed from dataset.d, which might possibly still be useful.
+
+/**
+  Class for exceptions of type "Incorrect Type" thrown when trying to read an item. Identical use to class Exception, but provides an easy way to handle only exceptions of this type.
+*/
+class IncorrectType : Exception {
+	this (char[] s) {
+		super(s);
+	}
+}
+
+/**
+  Data class; contains a Data member for each loaded section of a file.
+
+  Could be a struct, except structs are value types (not reference).
+*/
+class DataSet
+{
+/+	Data[SecID] sec;		/// Dynamic array of section data +/
+	void*[char[]][char[]] data;
+
+	void*[char[]] opIndex(char[] i) {
+		return data[i];
+	}
+	void*[char[]][char[]] opSlice() {
+		return data[];
+	}
+	void*[char[]][char[]] opSlice(char[] i, char[] j) {
+		return data[i,j];
+	}
+}
+
+struct Item {
+	enum Type : ubyte {
+		_void = 0,		// initial type
+		tuple, dynlist,
+		_bool, _byte, _ubyte, _short, _ushort, _int, _uint, _long, _ulong, _cent, _ucent,
+		_char, _wchar, _dchar;
+		_float, _double, _real,
+		_ifloat, _idouble, _ireal,
+		_cfloat, _cdouble, _creal
+	}
+	static char[][26] typeName = ["void","tuple",];
+	Type type;
+	new union {
+		Tuple	tuple;
+		DynList	dynlist;
+//		DynMerge dynmerge; merging lists are stored as dynamic lists
+		bool	_bool;
+		byte	_byte;
+		ubyte	_ubyte;
+		short	_short;
+		ushort	_ushort;
+		int	_int;
+		uint	_uint;
+		long	_long;
+		ulong	_ulong;
+		cent	_cent;
+		ucent	_ucent;
+		char	_char;
+		wchar	_wchar;
+		dchar	_dchar;
+		float	_float;
+		double	_double;
+		real	_real;
+		ifloat	_ifloat;
+		idouble	_idouble;
+		ireal	_ireal;
+		cfloat	_cfloat;
+		cdouble	_cdouble;
+		creal	_creal;
+	}
+
+	/** Functions to get data
+
+	Each function will, if the element is of the appropriate type, return the element; if the type
+	is incorrect it will throw an error.
+	*/
+	bool _bool () {
+		if (type != _bool) throw new IncorrectType("Incorrect type when trying to read: tried to read as bool when item had type " ~ typeName[type]);
+		return _bool;
+	}
+	int _int () {	/// ditto
+		if (type != _int) throw new IncorrectType("Incorrect type when trying to read: tried to read as int");
+		return _int;
+	}
+	uint _uint () {	/// ditto
+		if (type != _uint) throw new IncorrectType("Incorrect type when trying to read: tried to read as uint");
+		return _uint;
+	}
+}
+
+struct DynList
+{
+
+}
+
+class Data
+{
+	// added & accessed soley by templates
+	private (uint,void*)[Index]	_gd;		// generic data
+}
+
+// Externally, types are given as a string:
+typedef char[]	typeIDstr;
+	private:
+// Internally, types are given by a uint for performance:
+typedef uint	typeID;
+typeID[typeIDstr] typeIDTable;	// used to look up type typeID
+
+// This (belongs in read.d) contains a table of reading functions for all supported types. Do similarly for writing.
+(void function (Data, char[]))[typeID] genericReader;
+
+// Template function for creating a function to read a new type and adding it to genericReader:
+/+ don't actually use this without specialization
+void addSupport (T) () {
+	// create a function to read this type from a string and add it into Data.genericData as type void*; put a function pointer into generic Reader
+	// do same for write support
+	// create a reader function, accessible by the user of the library, for accessing elements/converting to the proper type
+}+/
+
+/**
+  Get data of the appropriate type.
+
+  The function performs a check that the data is of the appropriate type and throws an exception if
+  not.
+
+  Note: can be called as d.get!($(I type))(i).
+*/
+get(T : int) (Data d, Index i) {
+	return cast(T) *d._gd[i];
+}
+
+// add support for basic types (for all basic types):
+void addSupport (T : int) () {
+	T read_int (char[]);
+	void* get_voidp (T d) {
+		return cast(void*) &d;
+	}
+}
+
+void addSupport (T : T[]) () {	// for any array type
+	// reader: split input and call appropriate fct to convert sub-strings to type T
+	// writer: use appropriate fct to convert to substrings; concat into "[val1,val2,...,valn]" format
+	// access: store as void* to T[] and something like this:
+	T[] get(Data d, Index i) {	// but cannot overload by return-type!
+		return cast(T[]) genericData[i]
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/doc/file-format-binary.txt	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,5 @@
+This is the file format for mergetag binary files. The unit size is a byte. Most numbers to do with the layout (i.e. not stored data) should be stored as a 32-bit uint.
+
+
+BOM  ---  a Byte Order Mark should be used to determin endianness (MT01 (or other version) in bytes, but converted to two ushorts to detect endianness?)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/doc/file-format-requirements.txt	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,29 @@
+Requirements:
+
+---	Config - named entries (map associated by strings; sections by strings)
+Merging: chosing one entry over another
+
+---	Data - list of entries, each of custom compound type (e.g. list of 3-vector over reals)
+As sub blocks within a standard item (mergable: combine the blocks of multiple items).
+
++++	Global type for handling all this:
+File consists of sections.
+Each section consists of items.
+Items are sorted by ID and not by type, i.e. if two items with the same ID but different types exist, merging rules are used to choose between them.
+Items have a custom type, which can be a compound of:
++	Basic types:
+++		bool
+++		int (int+uint)
+++		real (or float or double? no.)
+++		string (char)
+++		binary (ubyte[])
++	Strings (of char, wchar or dchar)
++	Fixed-length arrays (single type)
++	Variable-length arrays (single type)
++	Fixed format tuples (multiple types which are prespecified)
++	The top-most type may be a "data list", which is identical to a variable-length array accept that merging items with identical types will combine their lists instead of choosing one over the other.
+To access an item, it should be found by ID, its type should be checked, and then it may be accessed.
+Types are specific to items. As an optimisation, a binary format may have a list of types and index them.
+
++++	Basic types:
+All D base types, including void, with support for writing strings.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/doc/file-format-text.txt	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,184 @@
+This is the file format for mergetag text files.
+Version: 0.1 unfinalised
+
+
+The encoding should be unicode UTF-8, UTF-16 or UTF-32, and for anything other than UTF-8 must include a BOM.
+
+
+Hierarchy:
++	Sections	(special section: see header)
+++	Data Tags
+
+
+IDs:
+IDs are used for several purposes; they are always stored as a uint number (0-4294967295). They may
+be given in the file as a base-10 or hex number or, where a lookup table is provided to the reader,
+as a double-quoted string (with no escape sequences).
+Multiple section or data tags with the same ID are allowed; see the "Merging rules" section.
+
+
+Outside of tags the only whitespace or valid tags is allowed. Whitespace is ignored.
+The following tags are valid (see below for details):
+tag		purpose
+{...}		section identifiers
+<...>		data items
+!{...}		simple comment block
+!<...>		comment block parsed the same as <...>
+Within tags, type specifications or data items whitespace is allowed between symbols.
+
+
+Section identifier tags:
+Format: {ID} or {ID|ID}
+In the {ID|ID} case, the first ID is the section type, and the second ID the section name.
+In the {ID} case, the section type ID has been ommitted and the default type is used (0).
+A section identifier marks the beginning of a new section, extending until the next section
+identifier or the end of the file. When a section is read, a new
+
+
+Data item tags:
+Format: <tp|ID=dt>
+A data item with type tp, identifier ID and data dt. If the data does not fit the given type it is
+an error and the tag is ignored. Once split into a type string, ID and data string, the contents
+are passed to an addTag() function within the DataSection class which will parse tags of a
+recognised format and either ignore or print a warning about other tags.
+
+
+Data item tags: Type format:
+Note:
+	The type is not initially parsed; it is read as a token terminated by any of these
+	characters:	<>|=
+	Of course any character other than a | terminating the token is an error.
+Format:
+	tp		a basic type
+	tp[]		a dynamic list of sub-type tp
+Possible future additions:
+	tp()		a dynamic merging list of sub-type tp (only valid as the primary type, ie
+        		<subtype()|...>, not a sub-type of a tuple or another dynamic list)
+	{t1,t2,...,tn}	a tuple with sub-types t1, t2, ..., tn
+
+Basic types (only items with a + are currently supported):
+	abbrev./full name (each type has two names which can be used):
+
+	0	void	--- less useful type
++	1	bool	--- integer types
++	s8	byte
++	u8	ubyte
++	s16	short
++	u16	ushort
++	s32	int
++	u32	uint
++	s64	long
++	u64	ulong
+	s128	cent
+	u128	ucent
+
++		binary	--- alias for ubyte[]
+
++	fp32	float	--- floating point types
++	fp64	double
++	fp	real
+	im32	ifloat
+	im64	idouble
+	im	ireal
+	cpx32	cfloat
+	cpx64	cdouble
+	cpx	creal
+
++	UTF8	char	--- character types (actually these CANNOT support UTF8 chars with length > 1)
+	UTF16	wchar
+	UTF32	dchar
++		string	--- alias for char[] --- (DOES support UTF8)
+		wstring	--- alias for wchar[]
+		dstring	--- alias for dchar[]
+
+
+Data item tags: Data format:
+Valid chars:	[](){},+-.0-9eEixXa-fA-F '.' ".*"
+Format:
+	[d1,d2,...,dn]	data all of type t corresponding to t[]
+	(d1,d2,...,dn)	data all of type t corresponding to t()
+	{d1,d2,...,dn}	data corresponding to a type declaration of {t1,t2,...,tn}
+	d		a single data element
+
+Single data elements:
+	z		an integer number (regexp: [+-]?[0-9]+)
+	z		a floating point number (rough regexp: [+-]?[0-9]*[.]?[0-9]*(e[+-]?[0-9]+)?)
+	zi		an imaginary floating point number (z is a floating point number)
+	y+zi, y-zi	a complex number (4+0i may be written as 4, etc) (y, z are f.p.s)
+	0xz, -0xz	a hexadecimal integer z (composed of chars 0-9,a-f,A-F)
+	'c'		a char/wchar/dchar character, depending on the type specified (c may be any
+			single character except ' or an escape sequence)
+	"string"	equivalent to ['s','t','r','i','n','g'] (for a string/wstring/dstring type)
+			may contain escape sequences
+			Escape sequences are a subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v
+	XX...XX		Binary (ubyte[]); each pair of chars is read as a hex ubyte
+	<void>		void "data" has no symbols
+
+
+Data format: Escape sequences:
+To be created and written.
+
+
+Comment tags (there are no line comments):
+Simple comment blocks:
+Format: !{...}
+This is a simple comment block, and only curly braces ({,}) are treated specially. A {, whether or
+not it is preceded by a !, starts an embedded comment block, and a } ends either an embedded block
+or the actual comment block. Note: beware commenting out {...} tags with a string ID containing
+curly braces which aren't in matching pairs.
+Commented data tags:
+Format: !<tp|ID=dt>
+Basically a commented out data tag. Conformance to the above spec may not be checked as strictly as
+normal, but the dt section is checked for strings so that a > within a string won't end the tag.
+
+
+Merging rules:
+if, when a data item is read, a data item with the same identifier
+within the same section exists in the DataSet being read into:
++	if the types are identical:
+++		if the primary type is a tp() mergeable dynamic list:
++++			the entries from the item being read are concatenated to those in the item
++++			in the DataSet
+++		else:
+++-			the item already in the DataSet takes priority and is left untouched
++	else:
++-		a warning is issued, and the data item within the DataSet is left untouched
+This allows merging some config settings in a user config file with the remaining settings in a
+complete system config file and some support for modifications overriding or adding to some data.
+
+
+Header:
+The header is a standard section which is mandatory and must be the first section. Its section
+identifier must start at the beginning of the file with no whitespace, declared with:
+	{MTXY}		where XY is a two digit CAPITAL HEX version number representing the
+			mergetag format version, e.g. {MT01} .
+If these are not the first 6 characters of the file the file will not be regarded as valid.
+This formatting is very strict to allow reliable low-level parsing.
+
+
+The data tags within the header have no special meaning; any may be used such as the following:
+	<string|"Author"="...">
+	<string|"Name"="...">
+	<string|"Description"="...">
+	<string|"Program"="...">	(which program created/uses this?)
+	<*|"Version"=...>		(use any supported type)
+	<string|"Date"="YYYYMMDD">	(reverse date format; optionally "YYYYMMDDhhmmss")
+	<{u16,u8,u8}|"Date"={YYYY,MM,DD}>	(actually this type probably won't be supported by
+						a standard section)
+	<string|"Copyright"=...>
+
+
+Example:
+{MT01}
+{example section}
+<u32|"num"=5>
+<{u32,UTF8[]}()|"DATA"=(
+	{1,['a']},
+	{59,['w','o','r','d']},
+	{2,"strings can be written like this"} )>
+<wchar[]|"name"="This string is stored in UTF16, regardless of the file's encoding.">
+<{u32,UTF8[]}()|"DATA"=(
+	{3,"this is appended to the previous 'DATA' item"} )>
+{"section: section identifiers and tuples are not confused since tuples only occur inside <...> items"}
+<void|Empty tag= >
+!{this is a comment {containing a comment}}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/doc/issues.txt	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,17 @@
+This is mostly just a list of potential minor issues noticed while coding but not seen worth throwing an error about.
+
+Overall:
+	Threading support?
+
+read.d:
+	Allow only partially loading a file.
+	parseSection():
+	as mentioned at end of function
+	formatting errors could be more informative; in particular say where the error is
+
+write.d:
+	There is currently no way to specify the base in which numbers are written (in text form).
+
+parse.d:
+	No support for escaped quotes in strings during tokenisation (by tango.text.Util.quotes).
+    
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/exception.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,46 @@
+/*******************************************
+ * Contains exception classes for MergeTag.
+ *
+ * Publically imports mde.exception.
+ ******************************************/
+module mde.mergetag.exception;
+
+public import mde.exception;
+
+/// Base MergeTag exception class.
+class MTException : mdeException {
+    this (char[] msg) {
+        super("MergeTag: " ~ msg);
+    }
+    this () {}
+}
+
+/** Thrown on file IO errors. */
+class MTFileIOException : MTException {
+    this () {}
+}
+
+/** Thrown onunknown format errors; when reading or writing and the filetype cannot be guessed. */
+class MTFileFormatException : MTException {
+    this () {}
+}
+
+/** Thrown when a string ID is parsed but cannot be found in the lookup table, hence cannot be used
+ * as an ID. It should always be caught and handled gracefully (by ignoring the tag or section
+ * involved).
+ */
+class MTStringIDException : MTException {
+    this () {}
+}
+
+/** Thrown by the parse module on any error.
+ */
+class MTParseException : MTException {
+    this () {}
+}
+
+/** Thrown by classes implementing DataSection when addTag is called with an unrecognised type string.
+ */
+class MTUnknownTypeException : MTException {
+    this () {}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/format.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,316 @@
+/**************************************************************************************************
+ * This contains templates for converting various data-types to a char[].
+ *
+ * Copyright (c) 2007 Diggory Hardy.
+ * Licensed under the Academic Free License version 3.0
+ *
+ * This module basically implements the following templated function for $(B most) basic D types:
+ * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char.
+ * It also supports arrays of any supported type (including of other arrays) and has special
+ * handling for strings (char[]) and binary (ubyte[]) data-types.
+ * -----------------------------
+ * char[] format(T) (T value);
+ * -----------------------------
+ *
+ * There are also a few utility functions defined; the public ones have their own documentation.
+ *
+ * On errors, a warning is logged and an MTConvertException is thrown. No other exceptions should
+ * be thrown and none thrown from functions used outside this module.
+ *************************************************************************************************/
+module mde.mergetag.format;
+
+// package imports
+import mde.mergetag.exception;
+
+// tango imports
+import cInt = tango.text.convert.Integer;
+import cFloat = tango.text.convert.Float;
+import Utf = tango.text.convert.Utf;
+import Util = tango.text.Util;
+import tango.util.log.Log : Log, Logger;
+
+private Logger logger;
+static this () {
+    logger = Log.getLogger ("mde.mergetag.format");
+}
+
+//BEGIN Convert templates
+/* Idea: could extend format with a second parameter, containing flags for things like base to output.
+ * Unnecessary for mergetag though.
+ */
+// Arrays
+char[] format(T : T[]) (T[] val) {
+    char[val.length * defLength!(T)] ret = void;
+    ret[0] = '[';
+    uint i = 0;
+    foreach (T x; val) {
+        char[] s = format!(T) (x);
+        i += s.length;
+        if (i >= ret.length) ret.length = ret.length * 2;	// check.
+        ret[i-s.length .. i] = s;
+        ret[i] = ',';
+    }
+    ret[i++] = ']';	// replaces last comma
+    return ret[0..i];
+}
+char[] format(T : dchar[]) (T val) {
+    return format (toUtf8 (val));
+}
+char[] format(T : wchar[]) (T val) {
+    return format (toUtf8 (val));
+}
+char[] format(T : char[]) (T val) {
+    char[val.length * 2 + 2] ret = void;	// Initial storage. This should ALWAYS be enough.
+    ret[0] = '"';
+    uint i = 0;
+    for (uint t = 0; t < val.length;) {
+        // process a block of non-escapable characters
+        uint s = t;
+        while (t < val.length && !isEscapableChar(val[t]))
+            ++t;	// skip all non-escapable chars
+        uint j = i + t - s;
+        ret[i..j] = [s..t];	// copy a block
+        i = j;
+        // process a block of escapable charaters
+        while (t < val.length && isEscapableChar(val[t])) {
+            ret[i++] = '\\';				// backslash; increment i
+            ret[i++] = replaceEscapableChar(val[t++]);	// character; increment i and t
+        }
+    }
+    ret[i++] = '"';
+    return ret[0..i];
+}
+char[] format(T : ubyte[]) (T val) {
+    static const char[16] digits = "0123456789abcdef";
+
+    char[val.length * 2] ret = void;	// exact length
+    uint i = 0;
+    foreach (ubyte x; val) {
+        ret[i++] = digits[x >> 4];
+        ret[i++] = digits[x & 0x0F];
+    }
+    return ret;
+}
+
+// Support for outputting a wide char... I reccomend against trying to output these though.
+const char[] WIDE_CHAR_ERROR = "Error: unicode character cannot be converted to a single UTF-8 char";
+char[] format(T : dchar) (T val) {
+    if (val <= 127u) return format (cast(char) val);	// this char can be converted
+    throwMTFException (WIDE_CHAR_ERROR);
+}
+char[] format(T : wchar) (T val) {
+    if (val <= 127u) return format (cast(char) val);	// this char can be converted
+    throwMTFException (WIDE_CHAR_ERROR);
+}
+char[] format(T : char) (T val) {
+    // Note: if (val > 127) "is invalid UTF-8 single char"
+    // However we don't know what this is for, in particular if it will be recombined with other chars later
+
+    char[4] ret;	// max length for an escaped char
+// FIXME: carry on down
+    if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'')
+        throwMTCException ("Invalid char: not quoted (\'*\')");
+    src = src[1..$-1];
+    uint pos;
+    char ret = toChar (src, pos);
+    if (pos < src.length) {
+        if (ret & 0xC0u) throwMTCException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)");
+        else throwMTCException ("Invalid char: too long");
+    }
+    return ret;
+}
+
+T format(T : bool) (char[] src) {
+    src = Util.trim(src);
+    if (src == "true") return true;
+    if (src == "false") return false;
+    uint pos;
+    while (src.length > pos && src[pos] == '0') ++pos;	// strip leading zeros
+    if (src.length == pos && pos > 0) return false;
+    if (src.length == pos + 1 && src[pos] == '1') return true;
+    throwMTCException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1");
+}
+T format(T : byte) (char[] src) {
+    return toTInt!(T) (src);
+}
+T format(T : short) (char[] src) {
+    return toTInt!(T) (src);
+}
+T format(T : int) (char[] src) {
+    return toTInt!(T) (src);
+}
+T format(T : long) (char[] src) {
+    return toTInt!(T) (src);
+}
+T format(T : ubyte) (char[] src) {
+    return toTInt!(T) (src);
+}
+T format(T : ushort) (char[] src) {
+    return toTInt!(T) (src);
+}
+T format(T : uint) (char[] src) {
+    return toTInt!(T) (src);
+}
+T format(T : ulong) (char[] src) {
+    return toTInt!(T) (src);
+}
+
+T format(T : float) (char[] src) {
+    return toTFloat!(T) (src);
+}
+T format(T : double) (char[] src) {
+    return toTFloat!(T) (src);
+}
+T format(T : real) (char[] src) {
+    return toTFloat!(T) (src);
+}
+//END Convert templates
+
+//BEGIN Length templates
+/* This template provides the initial length for strings for formatting various types. These strings
+ * can be expanded; this value should cover 90% of cases or so.
+ * FIXME: provide more specialisations
+ */
+private {
+    template defLength(T) {	const uint defLength = 20;	}
+}
+//END Length templates
+
+//BEGIN Utility funcs
+/** Templated read-int function to read (un)signed 1-4 byte integers.
+ *
+ * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions.
+ */
+TInt toTInt(TInt) (char[] src) {
+    const char[] INT_OUT_OF_RANGE = "mde.mergetag.format.toTInt: integer out of range";
+    bool sign;
+    uint radix, ate, ate2;
+
+    ate = cInt.trim (src, sign, radix);
+    ulong val = cInt.convert (src[ate..$], radix, &ate2);
+    ate += ate2;
+
+    while (ate < src.length) {
+        if (src[ate] == ' ' || src[ate] == '\t') ++ate;
+        else throwMTCException ("mde.mergetag.format.toTInt: invalid integer");
+    }
+
+    if (val > TInt.max) throwMTCException (INT_OUT_OF_RANGE);
+    if (sign) {
+        long sval = cast(long) -val;
+        if (sval > TInt.min) return cast(TInt) sval;
+        else throwMTCException (INT_OUT_OF_RANGE);
+    }
+    return cast(TInt) val;
+}
+
+/** Basically a reimplementation of tango.text.convert.Float.toFloat which checks for trailing
+ * whitespace before throwing an exception for overlong input and throws my exception class
+ * when it does.
+ */
+TFloat toTFloat(TFloat) (char[] src) {
+    uint ate;
+
+    TFloat x = cFloat.parse (src, &ate);
+    while (ate < src.length) {
+        if (src[ate] == ' ' || src[ate] == '\t') ++ate;
+        else throwMTCException ("mde.mergetag.format.toTFloat: invalid number");
+    }
+    return x;
+}
+
+/** Read a character from a string, with support for escape sequences.
+ *
+ * Assumes src.length > pos. At return pos is set to one after the last character eaten.
+ *
+ * Throws an exception on invalid escape sequences. Supported escape sequences are the following
+ * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v
+ */
+char toChar (char[] src, inout uint pos)
+{
+    //if (src.length <= pos) throwMTCException ("Invalid char: no character"); // shouldn't happen
+    if (src[pos] == '\\') {
+        // map of all supported escape sequences
+        char[char] escChars;
+        escChars['"'] = '"';
+        escChars['\''] = '\'';
+        escChars['\\'] = '\\';
+        escChars['a'] = '\a';
+        escChars['b'] = '\b';
+        escChars['f'] = '\f';
+        escChars['n'] = '\n';
+        escChars['r'] = '\r';
+        escChars['t'] = '\t';
+        escChars['v'] = '\v';
+
+        ++pos;
+        if (src.length > pos) {
+            char* r = src[pos] in escChars;
+            ++pos;
+            if (r != null) return *r;
+        }
+        throwMTCException ("Invalid escape sequence.");	// we didn't return, so something failed
+    }
+    char c = src[pos];
+    ++pos;
+    return c;
+
+    // note on UTF-8 non-ascii chars: these consist of multiple "char"s; can only return one at a
+    // time like this anyway. If this is used to read a string it should handle them fine.
+}
+
+// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length.
+private ubyte readHexChar (char[] src, inout uint pos) {
+    ubyte x;
+    if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0';
+    else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10;
+    else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10;
+    else throwMTCException ("Invalid hex digit.");
+    ++pos;
+    return x;
+}
+
+// Generic array reader
+private T[] toArray(T : T[]) (char[] src) {
+    T[] ret = new T[16];	// avoid unnecessary allocations
+    uint i = 0;
+    foreach (char[] element; Util.quotes (src[1..$-1],",")) {
+        if (i == ret.length) ret.length = ret.length * 2;
+        ret[i] = format!(T) (element);
+        ++i;
+    }
+    return ret[0..i];
+}
+// FIXME: to here.
+
+private bool isEscapableChar (char c) {
+    return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\')
+}
+// Warning: this DOES NOT check c is escapable
+private char replaceEscapableChar (char c) {
+    static char[char] escCharsRev;	// reversed escChars
+    static bool escCharsRevFilled;	// will be initialised false
+
+    if (!escCharsRevFilled) {	// only do this once
+        // map of all supported escape sequences
+        escCharsRev['"'] = '"';
+        escCharsRev['\''] = '\'';
+        escCharsRev['\\'] = '\\';
+        escCharsRev['\a'] = 'a';
+        escCharsRev['\b'] = 'b';
+        escCharsRev['\f'] = 'f';
+        escCharsRev['\n'] = 'n';
+        escCharsRev['\r'] = 'r';
+        escCharsRev['\t'] = 't';
+        escCharsRev['\v'] = 'v';
+        escCharsRevFilled = true;
+    }
+
+    return escCharsRev[c];
+}
+
+private void throwMTFException (char[] msg) {
+    logger.warn (msg);			// only small errors are trapped here
+    throw new MTFormatException ();
+}
+//END Utility funcs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/parse.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,245 @@
+/**************************************************************************************************
+ * This contains templates for converting a char[] to various data-types.
+ *
+ * Copyright (c) 2007 Diggory Hardy.
+ * Licensed under the Academic Free License version 3.0
+ *
+ * This module basically implements the following templated function for $(B most) basic D types:
+ * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char.
+ * It also supports arrays of any supported type (including of other arrays) and has special
+ * handling for strings (char[]) and binary (ubyte[]) data-types.
+ * -----------------------------
+ * T parse(T) (char[] source);
+ * -----------------------------
+ *
+ * There are also a few utility functions defined; the public ones have their own documentation.
+ *
+ * On errors, a warning is logged and an MTParseException is thrown. No other exceptions should
+ * be thrown and none thrown from functions used outside this module.
+ *************************************************************************************************/
+module mde.mergetag.parse;
+
+// package imports
+import mde.mergetag.exception;
+
+// tango imports
+import cInt = tango.text.convert.Integer;
+import cFloat = tango.text.convert.Float;
+import Util = tango.text.Util;
+import tango.util.log.Log : Log, Logger;
+
+private Logger logger;
+static this () {
+    logger = Log.getLogger ("mde.mergetag.parse");
+}
+
+//BEGIN parse templates
+// Arrays
+T[] parse(T : T[]) (char[] src) {
+    src = Util.trim(src);
+    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T[]) (src);
+    throwMTPException ("Invalid array: not [., ..., .]");
+}
+T parse(T : char[]) (char[] src) {
+    src = Util.trim(src);
+    if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') {
+        src = src[1..$-1];
+        T ret;
+        ret.length = src.length;	// maximum length; retract to actual length later
+        uint i;
+        for (uint t = 0; t < src.length;) {
+            // process a block of non-escaped characters
+            uint s = t;
+            while (t < src.length && src[t] != '\\') ++t;	// non-escaped characters
+            uint j = i + t - s;
+            ret[i..j] = [s..t];	// copy a block
+            i = j;
+
+            // process a block of escaped characters
+            while (t < src.length) {
+                t++;	// src[t] == '\\'
+                if (t == src.length) throwMTPException (`Warning: \" in string! There's currently no support for this during tokenising. Thus your input's probably been garbled!`);	// next char is "
+                ret[i++] = replaceEscapedChar (src[t++]);	// throws if it's invalid
+            }
+        }
+        return ret[0..i];
+    }
+    else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src);
+    throwMTPException ("Invalid string: not quoted (\"*\") or char array (['.',...,'.'])");
+}
+T parse(T : ubyte[]) (char[] src) {
+    src = Util.trim(src);
+    // Standard case:
+    if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src);
+    // Special case: sequence of hex digits, each pair of which is a ubyte
+    if (src.length % 2 == 1) throwMTPException ("Invalid binary: odd number of chars");
+    T ret;
+    ret.length = src.length / 2;	// exact
+    for (uint i, pos; pos + 1 < src.length; ++i) {
+        ubyte x = readHexChar(src, pos) << 4;
+        x |= readHexChar(src, pos);
+        ret[i] = x;
+    }
+    return ret;
+}
+
+T parse(T : char) (char[] src) {
+    src = Util.trim(src);
+    if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'')
+        throwMTPException ("Invalid char: not quoted (\'*\')");
+    if (src[1] != '\\' && src.length == 3) return src[1];	// Either non escaped
+    if (src.length == 4) return replaceEscapedChar (src[2]);	// Or escaped
+
+    // Report various errors; warnings for likely and difficult to tell cases:
+    if (src[1] == '\\' && src.length == 3) throwMTPException (`Warning: \' in char! There's currently no support for this during tokenising. Thus your input's probably been garbled!`);	// next char is "
+    // Warn in case it's a multibyte UTF-8 character:
+    if (ret & 0xC0u) throwMTPException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)");
+    throwMTPException ("Invalid char: too long");
+}
+
+T parse(T : bool) (char[] src) {
+    src = Util.trim(src);
+    if (src == "true") return true;
+    if (src == "false") return false;
+    uint pos;
+    while (src.length > pos && src[pos] == '0') ++pos;	// strip leading zeros
+    if (src.length == pos && pos > 0) return false;
+    if (src.length == pos + 1 && src[pos] == '1') return true;
+    throwMTPException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1");
+}
+T parse(T : byte) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parse(T : short) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parse(T : int) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parse(T : long) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parse(T : ubyte) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parse(T : ushort) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parse(T : uint) (char[] src) {
+    return toTInt!(T) (src);
+}
+T parse(T : ulong) (char[] src) {
+    return toTInt!(T) (src);
+}
+
+T parse(T : float) (char[] src) {
+    return toTFloat!(T) (src);
+}
+T parse(T : double) (char[] src) {
+    return toTFloat!(T) (src);
+}
+T parse(T : real) (char[] src) {
+    return toTFloat!(T) (src);
+}
+//END parse templates
+
+//BEGIN Utility funcs
+/** Templated read-int function to read (un)signed 1-4 byte integers.
+ *
+ * Actually a reimplementation of tango.text.convert.Integer toLong and parse functions.
+ */
+TInt toTInt(TInt) (char[] src) {
+    const char[] INT_OUT_OF_RANGE = "mde.mergetag.parse.toTInt: integer out of range";
+    bool sign;
+    uint radix, ate, ate2;
+
+    ate = cInt.trim (src, sign, radix);
+    ulong val = cInt.convert (src[ate..$], radix, &ate2);
+    ate += ate2;
+
+    while (ate < src.length) {
+        if (src[ate] == ' ' || src[ate] == '\t') ++ate;
+        else throwMTPException ("mde.mergetag.parse.toTInt: invalid integer");
+    }
+
+    if (val > TInt.max) throwMTPException (INT_OUT_OF_RANGE);
+    if (sign) {
+        long sval = cast(long) -val;
+        if (sval > TInt.min) return cast(TInt) sval;
+        else throwMTPException (INT_OUT_OF_RANGE);
+    }
+    return cast(TInt) val;
+}
+
+/** Basically a reimplementation of tango.text.convert.Float.toFloat which checks for trailing
+ * whitespace before throwing an exception for overlong input and throws my exception class
+ * when it does.
+ */
+TFloat toTFloat(TFloat) (char[] src) {
+    uint ate;
+
+    TFloat x = cFloat.parse (src, &ate);
+    while (ate < src.length) {
+        if (src[ate] == ' ' || src[ate] == '\t') ++ate;
+        else throwMTPException ("mde.mergetag.parse.toTFloat: invalid number");
+    }
+    return x;
+}
+
+/* Throws an exception on invalid escape sequences. Supported escape sequences are the following
+ * subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v
+ */
+private char replaceEscapedChar (char c)
+{
+    static char[char] escChars;
+    static bool escCharsFilled;	// will be initialised false
+
+    if (!escCharsFilled) {
+        // map of all supported escape sequences
+        escChars['"'] = '"';
+        escChars['\''] = '\'';
+        escChars['\\'] = '\\';
+        escChars['a'] = '\a';
+        escChars['b'] = '\b';
+        escChars['f'] = '\f';
+        escChars['n'] = '\n';
+        escChars['r'] = '\r';
+        escChars['t'] = '\t';
+        escChars['v'] = '\v';
+        escCharsFilled = true;
+    }
+
+    char* r = c in escChars;
+    if (r != null) return *r;
+
+    throwMTPException ("Invalid escape sequence.");	// we didn't return, so something failed
+}
+
+// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length.
+private ubyte readHexChar (char[] src, inout uint pos) {
+    ubyte x;
+    if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0';
+    else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10;
+    else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10;
+    else throwMTPException ("Invalid hex digit.");
+    ++pos;
+    return x;
+}
+
+// Generic array reader
+private T[] toArray(T : T[]) (char[] src) {
+    T[] ret = new T[16];	// avoid unnecessary allocations
+    uint i = 0;
+    foreach (char[] element; Util.quotes (src[1..$-1],",")) {
+        if (i == ret.length) ret.length = ret.length * 2;
+        ret[i] = parse!(T) (element);
+        ++i;
+    }
+    return ret[0..i];
+}
+
+private void throwMTPException (char[] msg) {
+    logger.warn (msg);			// only small errors are trapped here
+    throw new MTParseException ();
+}
+//END Utility funcs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/read.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,435 @@
+/**************************************************************************************************
+ * This module contains all reading functions, for both binary and text MergeTag files.
+ *
+ * It publically imports mde.mergetag.dataset.
+ *************************************************************************************************/
+
+module mde.mergetag.read;
+
+// package imports
+public import mde.mergetag.dataset;
+import mde.mergetag.exception;
+
+// tango imports
+import tango.io.UnicodeFile;
+import Util = tango.text.Util;
+import ConvInt = tango.text.convert.Integer;
+import tango.util.collection.model.View : View;
+import tango.util.collection.ArrayBag : ArrayBag;
+import tango.util.log.Log : Log, Logger;
+
+// TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions)
+
+// For now, all section & data tag IDs are uints.
+// TODO: allow a lookup table or function to find a uint ID from a string ID
+
+/**
+ *  Class for reading a file.
+ *
+ * Use as:
+ * -----------------------
+ * Reader foo("foo.mtt");
+ * foo.read();
+ * // get your data from foo.dataset.
+ * -----------------------
+ */
+class Reader
+{
+//BEGIN DATA
+    /**
+    A container for all read data.
+
+    This may be accessed from here; however it may be preferable to use an external reference
+    (passed to the class on initialisation).
+    */
+    DataSet dataset;
+
+    /** A table, which if created, allows items in a text file to have a string ID.
+     *
+     * If a string ID is given for a section or tag identifier and that string is a key in this
+     * table, then the corresponding ID type is used (if the string is not found an error is thrown).
+     */
+    ID[char[]] indexTable;	// see setIndexLookupTable() doc for use.
+
+    /** A function for creating new DataSections within the dataset.
+    *
+    * Allows a user-made class to be used in the DataSet instead of DefaultData.
+    *
+    * This works by supplying a function which returns a reference to an instance of a class
+    * implementing DataSection. The function is passed the ID of the new section and may use this
+    * to use different DataSection classes for different sections.
+    */
+    DataSection function (ID) dataSecCreator = null;
+
+private:
+    // Static symbols:
+    typedef void delegate (char[],ID,char[]) readDelg;	// Delegate for accepting tags.
+
+    static Logger logger;
+
+    // Error messages as const variables. Could be loaded from files to support other languages?
+    static const char[] ERR_FILEREAD = "Error reading file: ";
+    static const char[] ERR_MTHEAD = "Not a valid MergeTag text file";
+    static const char[] ERR_MTVER = "Unrecognised MergeTag version: MT";
+    static const char[] ERR_EOF = "Unexpected EOF";
+    static const char[] ERR_STAG = "Bad section tag format: not {id}";
+    static const char[] ERR_DTAG = "Bad data tag format: not <type|id=data>";
+    static const char[] ERR_CHAR = "Invalid character (or sequence starting \"!\") outside of tag";
+    static const char[] ERR_IDINT = "Tag has invalid integer ID: not a valid uint value";
+
+    // Non-static symbols:
+    final char[] ErrInFile;		// something like "in \"path/file.mtt\""
+
+    final char[] fbuf;			// file is read into this
+    MT_VERS fileVer = MT_VERS.INVALID;	// Remains INVALID until set otherwise by CTOR.
+
+    uint endOfHeader;
+    bool allRead = false;		// true if endOfHeader == fbuf.length or read([]) has run
+    bool fatal = false;			// a fatal file error occured; don't try to recover
+    /* If the file is scanned for sections, the starting position of all sections are stored
+    * in secTable. If this is empty, either no sections exist (and endOfHeader == fbuf.length)
+    * or a section scan has not been run (read() with no section names doesn't need to do so).
+    */
+    struct SecMD {	// sec meta data
+        static SecMD opCall (uint _pos, bool _read) {
+            SecMD ret;
+            ret.pos = _pos;
+            ret.read = _read;
+            return ret;
+        }
+        uint pos;			// position to start reading
+        bool read;			// true if already read
+    }
+    SecMD [ID] secTable;
+//END DATA
+
+//BEGIN METHODS: CTOR / DTOR
+    static this () {
+        logger = Log.getLogger ("mde.mergetag.read.Reader");
+    }
+
+    /** Tries to open file path and read it into a buffer.
+     *
+     * Params:
+     * path = The name or FilePath of the file to open.
+     *     Standard extensions are .mtt and .mtb for text and binary files respectively.
+     * dataset_ = If null create a new DataSet, else use existing DataSet *dataset_ and merge read
+     *     data into it.
+     * rdHeader = If true, read the header like a standard section. Doesn't read the header by
+     *     default since if it's not requested it's likely not wanted.
+     *
+     * Memory:
+     * This currently works by loading the whole file into memory at once. This should be fine most
+     * of the time, but could potentially be a problem. Changing this would mean significantly
+     * changes to the way the code works.
+     */
+    /* Ideas for implementing a partial-loading memory model:
+     * Use a conduit directly.
+     * Use a fiber to do the parsing; let it switch back when it runs out of memory.
+     * Redesign the code so it never needs to look backwards in the buffer?
+     *
+     * Major problem: reading only some sections and keeping references to other sections
+     * would no longer be possible.
+     */
+    public this (char[] path, DataSet* dataset_ = null, bool rdHeader = false) {
+        this (new FilePath (path), dataset_, rdHeader);
+    }
+    /** ditto */
+    public this (PathView path, DataSet* dataset_ = null, bool rdHeader = false) {
+        // Create a dataset or use an existing one
+        if (dataset_) dataset = *dataset_;
+        else dataset = new DataSet();
+
+        // Open & read the file
+        try {	// Supports unicode files with a BOM; defaults to UTF8 when there isn't a BOM:
+            scope file = new UnicodeFile!(char) (path, Encoding.Unknown);
+            fbuf = cast(char[]) file.read();
+        } catch (Exception e) {
+            throwMTErr (ERR_FILEREAD ~ e.msg, new MTFileIOException);
+        }
+        // Remember the file name so that we can report errors (somewhat) informatively:
+        ErrInFile = " in \"" ~ path.path ~ path.file ~ '"';
+
+        // Version checking & matching header section tag:
+        if (fbuf.length < 6 || fbuf[0] != '{' || fbuf[1] != 'M' || fbuf[2] != 'T' || fbuf[5] != '}')
+            throwMTErr(ERR_MTHEAD ~ ErrInFile, new MTFileFormatException);
+        fileVer = MTFormatVersion.parseString (fbuf[3..5]);
+        if (fileVer == MTFormatVersion.VERS.INVALID)
+            throwMTErr(ERR_MTVER ~ fbuf[3..5] ~ ErrInFile, new MTFileFormatException);
+
+        // Header reading/skipping:
+        if (rdHeader) {	// only bother actually reading it if it was requested
+            dataset.header = new DefaultData;
+            endOfHeader = parseSection (6,&dataset.header.addTag);
+        }
+        else endOfHeader = parseSection (6,null);
+    }
+    // Was intended to close file, but file is closed within CTOR anyway.
+    public ~this () {
+    }
+//END METHODS: CTOR / DTOR
+
+//BEGIN METHODS: PUBLIC
+    /// Scans for sections if not already done and returns a list of IDs.
+    public uint[] getSectionNames () {
+        if (fatal) return [];
+        if (!secTable.length)
+            for (uint pos = endOfHeader; pos < fbuf.length;) {
+                try {
+                    ID id = fbufReadSecMarker (pos);
+                    secTable[id] = SecMD(pos,false);	// add to table
+                } catch (MTStringIDException) {
+                    // ignore section; this happens anyway (but don't add to table)
+                }
+                pos = parseSection (pos, null);
+            }
+        return cast(uint[]) secTable.keys;
+    }
+
+    /** Reads (some) sections of the file into data. Note that sections will never be _read twice.
+    *
+    * To be more accurate, the file is copied into a buffer by this(). read() then parses the
+    * contents of this buffer, and stores the contents in dataset.
+    *
+    * Each section read is stored in a DataSection class. By default this is an instance of
+    * DefaultData; this can be customised (see setDataSectionCreator).
+    *
+    * If secSet is non-empty, reading is restricted to sections given in secSet, otherwise all
+    * sections are read. Sections given in secSet but not found in the file are not reported as an
+    * error. Suggested: supply a HashSet!(uint) as the View!(ID). An ArrayBag!(ID) as used is not a
+    * good choice, except that in this case it's empty.
+    *
+    * Merging:
+    * Where a section already exists in the DataSet (when either the section is given more than
+    * once in the file, or it was read from a different file by another reader) it is merged.
+    * Entries already in the DataSet take priority.
+    *
+    * Performance:
+    * Note that loading only desired sections like this still parses the sections not
+    * read (although it does not try to understand the type or data fields), so there is only a
+    * small performance advantage to this where other sections do exist in the file. There is also
+    * some overhead in only partially reading the file to keep track of where other sections are so
+    * that the entire file need not be re-read if further (or all remaining) sections are read
+    * later.
+    */
+    public void read (View!(ID) secSet = new ArrayBag!(ID)) {
+        if (allRead || fatal) return;			// never do anything in either case
+        if (secSet.size) {
+            if (secTable.length) {
+                foreach (ID id; secSet) {
+                    SecMD* psmd = id in secTable;
+                    if (psmd && !psmd.read) {			// may not exist
+                        DataSection ds = getOrCreateSec (id);
+                        parseSection (psmd.pos, &ds.addTag);
+                        psmd.read = true;
+                    }
+                }
+            } else {
+                for (uint pos = endOfHeader; pos < fbuf.length;) {
+                    try {
+                        ID id = fbufReadSecMarker (pos);
+                        secTable[id] = SecMD(pos,false);	// add to table
+                        if (secSet.contains(id)) {
+                            DataSection ds = getOrCreateSec (id);
+                            pos = parseSection (pos, &ds.addTag);
+                            secTable[id].read = true;
+                        }
+                    } catch (MTStringIDException) {	// don't do any of the stuff above
+                        pos = parseSection (pos, null);	// and skip the section
+                    }
+                }
+            }
+        } else {
+            if (secTable.length) {
+                foreach (ID id, ref SecMD smd; secTable) {
+                    if (!smd.read) {
+                        DataSection ds = getOrCreateSec (id);
+                        parseSection (smd.pos, &ds.addTag);
+                        smd.read = true;
+                    }
+                }
+            } else {					// this time we don't need to use secTable
+                for (uint pos = endOfHeader; pos < fbuf.length;) {
+                    try {
+                        ID id = fbufReadSecMarker (pos);
+                        DataSection ds = getOrCreateSec (id);
+                        pos = parseSection (pos, &ds.addTag);
+                    } catch (MTStringIDException) {
+                        pos = parseSection (pos, null);	// just skip the section
+                    }
+                }
+            }
+            allRead = true;
+        }
+    }
+//END METHODS: PUBLIC
+
+//BEGIN METHODS: PRIVATE
+    /* Reads a section, starting from index pos, finishing at the next section marker (returning
+    the position of the start of the marker). pos should start after the section marker.
+
+    After analysing tags, the function passes the type, ID (possibly converted) and data to addTag.
+
+    NOTE: from performance tests on indexing char[]'s and dereferencing char*'s, the char*'s are
+    slightly faster, but a tiny difference isn't worth the extra effort/risk of using char*'s.
+    */
+    private uint parseSection (uint pos, readDelg addTag) {
+        bool comment = false;				// preceding char was !
+        for (; pos < fbuf.length; ++pos) {
+            if (Util.isSpace(fbuf[pos])) continue;	// whitespace
+            else if (fbuf[pos] == '<') {		// data tag
+                char[] type, data;
+                ID tagID;
+
+                // Type section of tag:
+                fbufIncrement (pos);
+                uint pos_s = pos;
+                fbufLocateDataTagChar (pos, false);	// find end of type section
+                if (fbuf[pos] != '|') throwMTErr (ERR_DTAG ~ ErrInFile);
+                type = fbuf[pos_s..pos];
+                // ID section of tag:
+                fbufIncrement (pos);
+                try {
+                    tagID = fbufReadID (pos);		// read the ID, put pos at whatever's next
+                } catch (MTStringIDException) {
+                    comment = true;			// easiest way to ignore this tag
+                }
+                if (fbuf[pos] != '=') throwMTErr (ERR_DTAG ~ ErrInFile);
+                // Data section of tag:
+                fbufIncrement (pos);
+                pos_s = pos;
+                fbufLocateDataTagChar (pos, true);	// find end of data section
+                if (fbuf[pos] != '>') throwMTErr (ERR_DTAG ~ ErrInFile);
+                data = fbuf[pos_s..pos];
+
+                if (!comment) {
+                    if (addTag != null) addTag (type, tagID, data);
+                } else comment = false;			// cancel comment status now
+            }
+            else if (fbuf[pos] == '{') {
+                if (comment) {				// simple block comment
+                    uint depth = 0;			// depth of embedded comment blocks
+                    while (true) {
+                        fbufIncrement (pos);
+                        if (fbuf[pos] == '}') {
+                            if (depth == 0) break;
+                            else --depth;
+                        } else if (fbuf[pos] == '{')
+                            ++depth;
+                    }
+                    comment = false;			// end of this comment
+                } else {
+                    return pos;				// next section coming up; we are done
+                }
+            }
+            else if (fbuf[pos] == '!') {		// possibly a comment; check next char
+                comment = true;				// starting a comment (or an error)
+                					// variable is reset at end of comment
+            } else					// must be an error
+                throwMTErr (ERR_CHAR ~ ErrInFile);
+        }
+        // if code execution reaches here, we're at EOF
+        // possible error: last character was ! (but don't bother checking since it's inconsequential)
+        return pos;
+    }
+
+    /* Look for a section; return it if it exists otherwise create a new section:
+     *     use dataSecCreator if it exists or just create a DefaultData if not.
+     */
+    DataSection getOrCreateSec (ID id) {
+        DataSection* i = id in dataset.sec;
+        if (i) return *i;
+        return (dataset.sec[id] = (dataSecCreator != null) ? dataSecCreator(id) : new DefaultData);
+    }
+
+    /* Parses fbuf for a section marker. Already knows fbuf[pos] == '{'.
+    */
+    private ID fbufReadSecMarker (inout uint pos) {
+        // at this point pos is whatever a parseSection run returned
+        // since we haven't hit EOF, fbuf[pos] MUST be '{' so no need to check
+        fbufIncrement(pos);
+        ID id = fbufReadID (pos);
+        if (fbuf[pos] != '}') throwMTErr (ERR_STAG ~ ErrInFile);
+        fbufIncrement(pos);
+        return id;
+    }
+
+    /* Parses fbuf from pos to read an ID.
+    On return pos is the index of the character following the ID.
+    */
+    private ID fbufReadID (inout uint pos) {
+        while (Util.isSpace(fbuf[pos])) fbufIncrement(pos);	// skip any space
+        if (fbuf[pos] == '"') {
+            fbufIncrement(pos);
+            uint start = pos;
+            while (fbuf[pos] != '"') fbufIncrement(pos);
+            ID* i_p = fbuf[start..pos] in indexTable;
+            while (Util.isSpace(fbuf[pos])) fbufIncrement(pos);	// skip any space
+            if (i_p != null) return *i_p;			// looked-up value
+            // FIXME: log a warning
+            throw new MTStringIDException ();			// string not in look-up table
+        } else {
+            uint ate;
+            long x = ConvInt.parse (fbuf[pos..$], 0, &ate);
+            if (x < 0L || x > 0xFFFF_FFFFL) throwMTErr (ERR_IDINT ~ ErrInFile);
+            pos += ate;					// this is where ConvInt.parse stopped
+            while (Util.isSpace(fbuf[pos])) fbufIncrement(pos);	// skip any space
+            return cast(ID) x;
+        }
+    }
+
+    /* Searches fbuf starting from start to find one of <=>| and stops at its index.
+
+    If quotable then be quote-aware for single and double quotes.
+    Note: there's no length restriction for the content of the quote since it could be a single
+    non-ascii UTF-8 char which would look like several chars.
+    */
+    private void fbufLocateDataTagChar (inout uint pos, bool quotable) {
+        for (; pos < fbuf.length; ++pos) {
+            if ((fbuf[pos] >= '<' && fbuf[pos] <= '>') || fbuf[pos] == '|') return;
+            else if (quotable) {
+                if (fbuf[pos] == '\'') {
+                    do {
+                        fbufIncrement(pos);
+                    } while (fbuf[pos] != '\'')
+                } else if (fbuf[pos] == '"') {
+                    do {
+                        fbufIncrement(pos);
+                    } while (fbuf[pos] != '"')
+                }
+            }
+        }
+    }
+    /* Increments pos and checks it hasn't hit fbuf.length . */
+    private void fbufIncrement(inout uint pos) {
+        ++pos;
+        if (pos >= fbuf.length) throwMTErr(ERR_EOF ~ ErrInFile);
+    }
+
+    private void throwMTErr (char[] msg, Exception exc = new MTException) {
+        fatal = true;	// if anyone catches the error and tries to do anything --- we're dead now
+        logger.error (msg);	// report the error
+        throw exc;		// and signal our error
+    }
+//END METHODS: PRIVATE
+
+    invariant {
+        // Check secTable is valid, but not if it's complete.
+        // This is something I really wouldn't expect to fail.
+        /+ um... this causes a lot of linker errors. Shouldn't be necessary anyway..
+        foreach (ID id, SecMD smd; secTable) {
+            uint pos = smd.pos;
+            for (; true; --pos) {
+                assert (pos);	// we should never reach 0
+                if (fbuf[pos] == '{') break;
+            }
+            ++pos;
+            assert (fbufReadID(pos) == id);
+        }+/
+    }
+    /+ A unittest here is really not practical since a file must be read from. Suggestion: Involve
+    + both reading and writing functions in a single unittest for the entire package mergetag.
+    + This is just here to point anyone looking in the right direction...
+    unittest {}
+    +/
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/mergetag/write.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,205 @@
+/**************************************************************************************************
+ * This module contains all writing functions, for both binary and text MergeTag files.
+ *
+ * Files can be written in a text or binary form; binary is faster and smaller while text allows
+ * editing with an ordinary text editor. TextWriter and BinaryWriter are the main classes, both of
+ * which implement the interface IWriter. DualWriter is another class implementing IWriter, which
+ * contains a private instance of a TextWriter and a BinaryWriter and implements all methods in the
+ * interface simply by chaining the appropriate method from each of these classes, thus performing
+ * two writes at once.
+ *
+ * Any of these three classes may be used directly, or makeWriter may be invoked to create an
+ * instance of the appropriate class.
+ *************************************************************************************************/
+module mde.mergetag.write;
+
+// package imports
+import mde.mergetag.dataset;
+import mde.mergetag.exception;
+
+// tango imports
+import tango.io.FileConduit;
+import tango.io.Buffer : Buffer, IBuffer;
+import tango.text.convert.Layout : Layout;
+import tango.io.Print : Print;
+import tango.util.log.Log : Log, Logger;
+
+Logger logger;
+static this () {
+    logger = Log.getLogger ("mde.mergetag.write");
+}
+
+/**
+ * Enumeration for specifying the writing method ("Params" section shows possible values).
+ *
+ * Params:
+ * Unspecified = If the filename ends with one of .mtb or .mtt, the file is written in
+ *     that format. Otherwise a binary mode is assumed.
+ * Binary = Use binary mode (default extension: .mtb or no extension).
+ * Text = Use text mode (default extension: .mtt; as with the above it is not automatically added).
+ * Both =
+*/
+enum WriterMethod : byte {
+    Unspecified = -1,
+    Binary = 1,
+    Text = 2,
+    Both = 3
+}
+
+/** Method to create and return either a TextWriter or a BinaryWriter, depending primalily on the
+ * method parameter and using the filename extension as a fallback.
+ *
+ * An exception is thrown if neither test can deduce the writing method.
+ */
+IWriter makeWriter (char[] path, DataSet dataset, WriterMethod method = WriterMethod.Unspecified) {
+    makeWriter (new FilePath (path), dataset, method);
+}
+/** ditto */
+IWriter makeWriter (PathView path, DataSet dataset, WriterMethod method = WriterMethod.Unspecified) {
+    if (method == WriterMethod.Unspecified) {
+        if (path.ext == "mtt") method = WriterMethod.Text;
+        else if (path.ext == "mtb") method = WriterMethod.Binary;
+        else throwMTErr ("Unable to determine writing format: text or binary", new MTFileFormatException);
+    }
+    if (method == WriterMethod.Binary) throwMTErr ("Binary writing not supported yet!", new MTFileFormatException);
+    else if (method == WriterMethod.Text) return new TextWriter (path, dataset);
+    else if (method == WriterMethod.Both) throwMTErr ("Dual writing not supported yet!", new MTFileFormatException);
+}
+
+/// Interface for methods and data necessarily available in TextWriter and/or BinaryWriter.
+scope interface IWriter {
+    char[][ID] indexTable;	// only used by TextWriter, but available in both
+
+    this (char[] path, DataSet dataset_);
+    this (PathView path, DataSet dataset_);
+    ~this ();
+
+    void write ();
+}
+
+/+
+scope class BinaryWriter : IWriter
+{
+}
++/
+
+/**
+ * Class to write a dataset to a file.
+ *
+ * Is a scope class, since the file is kept open until ~this() runs.
+ */
+scope class TextWriter : IWriter
+{
+//BEGIN DATA
+    /** The container where data is written from.
+     */
+    DataSet dataset;
+
+
+    /** A table, which if created, allows items in a text file to be written with a string ID.
+    *
+    * If any ID (for a section or tag) to be written is found in this table, the corresponding
+    * string is written instead.
+    */
+    char[][ID] indexTable;	// see setIndexLookupTable() doc for use.
+
+private:
+    // taken from tango.io.Console, mostly to make sure notepad can read our files:
+    version (Win32)
+        const char[] Eol = "\r\n";
+    else
+        const char[] Eol = "\n";
+
+    bool fatal = false;		// fatal error occured: don't attempt anything else
+    bool fileOpen = false;	// file needs to be closed on exit
+    bool writtenHeader = false;	// The header MUST be written exactly once at the beginning of the file.
+
+    FileConduit conduit;	// actual conduit; don't use directly when there's content in the buffer
+    IBuffer buffer;		// write strings directly to this (use opCall(void[]) )
+    Print!(char) format;	// formats output to buffer
+//END DATA
+
+//BEGIN CTOR / DTOR
+    /** Tries to open file path for writing.
+    *
+    * Params:
+    * path = The name or FilePath of the file to open.
+    *     Standard extensions are .mtt and .mtb for text and binary files respectively.
+    * dataset_ = If null create a new DataSet, else use existing DataSet *dataset_ and merge read
+    *     data into it.
+    */
+    public this (char[] path, DataSet dataset_) {
+        this (new FilePath (path), dataset_);
+    }
+    /** ditto */
+    public this (PathView path, DataSet dataset_) {
+        try {	// open a conduit on the file
+            conduit = new FileConduit (path, FileConduit.WriteCreate);
+            buffer = new Buffer(conduit);
+            format = new Print!(char) (new Layout!(char), buffer);
+            fileOpen = true;
+        } catch (Exception e) {
+            throwMTErr ("Error opening file: " ~ e.msg);
+        }
+    }	// OK, all set to start writing.
+
+    ~this () {	// close file on exit
+        if (fileOpen) {
+            buffer.flush();
+            conduit.close();
+        }
+    }
+//END CTOR / DTOR
+
+    /** Writes the header and all DataSections.
+     *
+     * Firstly writes the header unless it has already been read. Then writes all DataSections
+     * to the file. Thus write is called more than once with or without changing the DataSet the
+     * header should be written only once. This behaviour could, for instance, be used to write
+     * multiple DataSets into one file without firstly merging them. Note that this behaviour may
+     * be changed when binary support is added.
+     */
+    public void write ()
+    {
+        // Write the header:
+        if (!writtenHeader) {
+            buffer ("{MT")(MTFormatVersion.CurrentString)("}")(Eol);
+            writtenHeader = true;
+        }
+        writeSection (dataset.header);
+
+        // Write the rest:
+        foreach (ID id, DataSection sec; dataset.sec) {
+            writeSectionIdentifier (id);
+            writeSection (sec);
+        }
+
+        buffer.flush();
+    }
+
+    private void writeSectionIdentifier (ID id) {
+        buffer ("{");
+        char[]* p = id in indexTable;	// look for a string ID
+        if (p) buffer ("\"")(*p)("\"");	// write a string ID
+        else format (cast(uint) id);	// write a numeric ID
+        buffer ("}")(Eol);
+    }
+
+    private void writeSection (DataSection sec) {
+
+        buffer (Eol);			// blank line at end of file
+    }
+
+    private void throwMTErr (char[] msg, Exception exc = new MTException) {
+        fatal = true;			// if anyone catches the error and tries to do anything --- we're dead now
+        logger.error (msg);		// report the error
+        throw exc;			// and signal our error
+    }
+}
+
+/+
+Implement std CTORs which add extensions to each filename and extra CTORs which take two filenames.
+scope class DualWriter : IWriter
+{
+}
++/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/test.d	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,44 @@
+/// Contains some functions for testing stuff.
+module mde.test;
+
+import mde.mergetag.dataset;
+
+import tango.io.Stdout;
+
+/// Prints $(I some) of the dataset.
+void printDataSet (DataSet ds) {
+    foreach (ID sec_id, DefaultData dd; ds.getSections!(DefaultData)()) {
+        Stdout ("Section:  ")(cast(uint) sec_id).newline;
+        foreach (ID i, int x; dd.Int) {
+            Stdout (cast(uint) i)('\t')(x).newline;
+        }
+        foreach (ID i, int[] x; dd.IntA) {
+            Stdout (cast(uint) i);
+            foreach (int y; x)
+                Stdout ('\t')(y);
+            Stdout.newline;
+        }
+        foreach (ID i, ubyte[] x; dd.Binary) {
+            Stdout (cast(uint) i);
+            foreach (ubyte y; x)
+                Stdout ('\t')(y);
+            Stdout.newline;
+        }
+        foreach (ID i, char x; dd.Char) {
+            Stdout (cast(uint) i)('\t')(x).newline;
+        }
+        foreach (ID i, char[] x; dd.String) {
+            Stdout (cast(uint) i)('\t')(x).newline;
+        }
+    }
+}
+
+class DataPrinter : DataSection
+{
+    this () {
+        Stdout ("New section (can't get ID from here).").newline;
+    }
+    void addTag (char[] tp, ID id, char[] dt) {
+        Stdout ("\tData item (")(id)("):\t")(tp)("\t")(dt).newline;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test.mtt	Sat Oct 27 18:05:39 2007 +0100
@@ -0,0 +1,20 @@
+{MT01}
+{11}
+<int|=0xD>
+{13}
+<int|5=5>
+<int[]|5=[5, 4 , -3,-2 ,1]>
+<ubyte|7=0>
+<ubyte|0=255>
+<byte[]|5= [ 2 , -1 , -127, 127, -127,127,0 ] >
+{1}!{Binary}
+<u8[]|1=0051B4F6 >
+<ubyte[]|2= [2,0,250,5]>
+<binary|3=00102030405060708090A0B0C0D0E0F0>
+{2}!{Chars and strings}
+<UTF8 |1= ' '>
+< char| 2 ='\a'>
+< string |0= "A	sequence of\tcharacters:\v1²€ç⋅−+↙↔↘,↕">
+<string|1= [ 's','t' ,'r'	,	'i' , 'n' , 'g', ' ' , '2' ]>
+{11}
+<int|5=6>