projects/mde: mde/file/mergetag/Reader.d comparison

comparison mde/file/mergetag/Reader.d @ 81:d8fccaa45d5f

Moved file IO code from mde/mergetag to mde/file[/mergetag] and changed how some errors are caught.

author	Diggory Hardy <diggory.hardy@gmail.com>
date	Fri, 29 Aug 2008 11:59:43 +0100
parents	mde/mergetag/Reader.d@ea58f277f487
children	ac1e3fd07275

comparison

equal deleted inserted replaced

-:ea58f277f487
+:d8fccaa45d5f
+/* LICENSE BLOCK
+Part of mde: a Modular D game-oriented Engine
+Copyright © 2007-2008 Diggory Hardy
+This program is free software: you can redistribute it and/or modify it under the terms
+of the GNU General Public License as published by the Free Software Foundation, either
+version 2 of the License, or (at your option) any later version.
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+/**************************************************************************************************
+* This module contains all reading functions, for both binary and text MergeTag files.
+*************************************************************************************************/
+module mde.file.mergetag.Reader;
+// package imports
+public import mde.file.mergetag.iface.IReader;
+import mde.file.mergetag.DataSet;
+import mde.file.mergetag.DefaultData;
+import mde.file.mergetag.exception;
+import mde.file.mergetag.internal;
+import tango.core.Exception;
+// tango imports
+import tango.io.FilePath;
+import tango.io.UnicodeFile;
+import Util = tango.text.Util;
+import ConvInt = tango.text.convert.Integer;
+//import tango.util.collection.model.View : View;
+import tango.util.collection.HashSet : HashSet;
+import tango.util.log.Log : Log, Logger;
+private Logger logger;
+static this() {
+logger = Log.getLogger ("mde.mergetag.Reader");
+}
+// TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions)
+/** Make an IReader class.
+*
+* Create an appropriate reader: MTTReader or MTBReader.
+*
+* Throws:
+*  $(TABLE
+*  $(TR $(TH Exception) $(TH Thrown when))
+*  $(TR $(TD MTFileIOException) $(TD When extension given is neither mtt nor mtb))
+*  )
+*
+*/
+IReader makeReader (FilePath path, DataSet ds = null, bool rdHeader = false) {
+if      (path.ext == "mtb") return new MTBReader (path, ds, rdHeader);
+else if (path.ext == "mtt") return new MTTReader (path, ds, rdHeader);
+else throw new MTFileIOException ("Invalid mergetag extension");
+}
+/** Resolve a file path.
+*
+* Tries adding both ".mtt" and ".mtb" extensions, returning whichever exists (the most recently
+* modified if both exist), or returns null if neither exist. */
+FilePath findFile (char[] path) {
+if (path is null) return null;
+FilePath tPath = new FilePath (path ~ ".mtt");
+FilePath bPath = new FilePath (path ~ ".mtb");
+bool bPathExists = bPath.exists;
+if (tPath.exists) {
+if (bPathExists) {
+// take the latest version (roughly speaking...)
+return (tPath.modified > bPath.modified ? tPath : bPath);
+} else return tPath;
+} else {
+if (bPathExists) return bPath;
+else return null;
+}
+}
+/**
+* Class for reading a mergetag text file.
+*
+* Use as:
+* -----------------------
+* IReader foo;
+* try {
+*   foo = new MTTReader("foo.mtt");
+*   foo.read();
+* }
+* catch (MTException) {}
+* // get your data from foo.dataset.
+* -----------------------
+*
+* Throws:
+*  $(TABLE
+*  $(TR $(TH Exception) $(TH Thrown when))
+*  $(TR $(TD MTFileIOException) $(TD An error occurs while opening the file))
+*  $(TR $(TD MTFileFormatException) $(TD The file doesn't start with a recognised header/version))
+*  $(TR $(TD MTSyntaxException) $(TD A file syntax error occurs))
+*  $(TR $(TD MTException) $(TD An unexpected error occurs))
+*  )
+* Note that all exceptions extend MTException and when any exception is thrown the class is
+* rendered unusable: any subsequent calls to read will be ignored.
+*
+* Threading: Separate instances of Reader should be thread-safe provided access to the same
+* dataset is synchronized; i.e. no two readers refering to the same dataset should run
+* simultaneously. (The Reader class could be made thread-safe w.r.t. datasets, but
+* performance-wise I doubt it would be worth it.)
+* Do not run a single instance of Reader in multiple threads simultaneously.
+*/
+class MTTReader : IReader
+{
+//BEGIN DATA
+/** Get or set the DataSet
+*
+* A container for all read data.
+*
+* This may be accessed from here; however it may be preferable to use an external reference
+* (passed to the class on initialisation).
+*/
+DataSet dataset () {	return _dataset;	}
+void dataset (DataSet ds)	/// ditto
+{	_dataset = ds;	}
+/** A delegate for creating new DataSections within the dataset.
+*
+* Allows a user-made class to be used in the DataSet instead of DefaultData (used if no
+* dataSecCreator exists). Also allows an existing class instance to be used instead of a new
+* one.
+*
+* This works by supplying a function which returns a reference to an instance of a class
+* implementing IDataSection. The function is passed the ID of the new section and may use this
+* to use different IDataSection classes for different sections.
+*
+* The function may also return null, in which case the section will be skipped. In the version
+* of read taking a set of sections to read, the section will not be marked as read and may
+* still be read later (assuming dataSecCreator returns non-null). However, in the version of
+* read not taking the set argument, all sections are set as read regardless, and the section
+* cannot be read later.
+*/
+void dataSecCreator (IDataSection delegate (ID) dSC) {
+_dataSecCreator = dSC;
+}
+private:
+static Logger logger;
+// Non-static symbols:
+final char[] ErrFile;		// added after ErrInFile to do the same without the "in " bit.
+final char[] ErrInFile;		// something like "in \"path/file.mtt\""
+final char[] fbuf;			// file is read into this
+MTFormatVersion.VERS fileVer = MTFormatVersion.VERS.INVALID;	// Remains INVALID until set otherwise by CTOR.
+IDataSection delegate (ID) _dataSecCreator = null;   // see property setter above
+size_t endOfHeader;
+bool allRead = false;		// true if endOfHeader == fbuf.length or read([]) has run
+bool fatal = false;			// a fatal file error occured; don't try to recover
+/* If the file is scanned for sections, the starting position of all sections are stored
+* in secTable. If this is empty, either no sections exist (and endOfHeader == fbuf.length)
+* or a section scan has not been run (read() with no section names doesn't need to do so).
+*/
+struct SecMD {	// sec meta data
+static SecMD opCall (size_t _pos, bool _read) {
+SecMD ret;
+ret.pos = _pos;
+ret.read = _read;
+return ret;
+}
+size_t pos;			// position to start reading
+bool read;			// true if already read
+}
+SecMD [ID] secTable;
+DataSet _dataset;
+//END DATA
+//BEGIN METHODS: CTOR / DTOR
+static this () {
+logger = Log.getLogger ("mde.mergetag.read.Reader");
+}
+/** Tries to open file path and read it into a buffer.
+*
+* Params:
+* path     = The name or FilePath of the file to open.
+*     Standard extensions are .mtt and .mtb for text and binary files respectively.
+* ds       = If null create a new DataSet, else use existing DataSet ds and merge read
+*     data into it.
+* rdHeader = If true, read the header like a standard section. Doesn't read the header by
+*     default since if it's not requested it's likely not wanted.
+*
+* Memory:
+* This currently works by loading the whole file into memory at once. This should be fine most
+* of the time, but could potentially be a problem. Changing this would mean significantly
+* changes to the way the code works.
+*/
+/* Ideas for implementing a partial-loading memory model:
+* Use a conduit directly.
+* Use a fiber to do the parsing; let it switch back when it runs out of memory.
+* Redesign the code so it never needs to look backwards in the buffer?
+*
+* Major problem: reading only some sections and keeping references to other sections
+* would no longer be possible.
+*/
+public this (char[] path, DataSet ds = null, bool rdHeader = false) {
+this (new FilePath (path), ds, rdHeader);
+}
+/** ditto */
+public this (FilePath path, DataSet ds = null, bool rdHeader = false) {
+// Create a dataset or use an existing one
+if (ds !is null) _dataset = ds;
+else _dataset = new DataSet();
+// Open & read the file
+try {	// Supports unicode files with a BOM; defaults to UTF8 when there isn't a BOM:
+scope file = new UnicodeFile!(char) (path, Encoding.Unknown);
+fbuf = cast(char[]) file.read();
+} catch (Exception e) {
+throwMTErr ("Error reading file: " ~ e.msg, new MTFileIOException);
+}
+// Remember the file name so that we can report errors (somewhat) informatively:
+ErrFile = path.path ~ path.file;
+ErrInFile = " in \"" ~ ErrFile ~ '"';
+// Version checking & matching header section tag:
+if (fbuf.length < 6 || fbuf[0] != '{' || fbuf[1] != 'M' || fbuf[2] != 'T' || fbuf[5] != '}')
+throwMTErr("Not a valid MergeTag text file" ~ ErrInFile, new MTFileFormatException);
+fileVer = MTFormatVersion.parseString (fbuf[3..5]);
+if (fileVer == MTFormatVersion.VERS.INVALID)
+throwMTErr("Unrecognised MergeTag version: MT" ~ fbuf[3..5] ~ ErrInFile, new MTFileFormatException);
+// Header reading/skipping:
+if (rdHeader) {	// only bother actually reading it if it was requested
+// If already existing, merge; else create a new DefaultData.
+if (!_dataset.header) _dataset.header = new DefaultData;
+endOfHeader = parseSection (6, cast(IDataSection) _dataset.header);
+}
+else endOfHeader = parseSection (6,null);
+}
+//END METHODS: CTOR / DTOR
+//BEGIN METHODS: PUBLIC
+/** Scans for sections if not already done and returns a list of IDs.
+*
+* Won't work (will return an empty array) if all sections have already been read without
+* scanning for sections.
+*/
+public ID[] getSectionNames () {
+if (fatal) return [];
+if (!secTable.length) read([]);     // scan for sections
+return secTable.keys;
+}
+/** Reads (some) sections of the file into data. Note that sections will never be _read twice.
+*
+* To be more accurate, the file is copied into a buffer by this(). read() then parses the
+* contents of this buffer, and stores the contents in dataset.
+*
+* Each section read is stored in a DataSection class. By default this is an instance of
+* DefaultData; this can be customised (see dataSecCreator).
+*
+* If secSet is provided, reading is restricted to sections given in secSet, otherwise all
+* sections are read. Sections given in secSet but not found in the file are not reported as an
+* error. Suggested: supply a HashSet!(uint) as the View!(ID). An ArrayBag!(ID) as used is not a
+* good choice, except that in this case it's empty.
+*
+* Merging:
+* Where a section already exists in the DataSet (when either the section is given more than
+* once in the file, or it was read from a different file by another reader) it is merged.
+* Entries already in the DataSet take priority.
+*
+* Performance:
+* Note that loading only desired sections like this still parses the sections not
+* read (although it does not try to understand the type or data fields), so there is only a
+* small performance advantage to this where other sections do exist in the file. There is also
+* some overhead in only partially reading the file to keep track of where other sections are so
+* that the entire file need not be re-read if further (or all remaining) sections are read
+* later.
+*/
+public void read () {
+if (secTable.length) {
+foreach (ID id, ref SecMD smd; secTable) {
+if (!smd.read) {
+IDataSection ds = getOrCreateSec (id);
+parseSection (smd.pos, ds);
+// allRead is set true so there's no point setting smd.read = true
+}
+}
+} else {					// this time we don't need to use secTable
+for (size_t pos = endOfHeader; pos < fbuf.length;) {
+ID id = fbufReadSecMarker (pos);
+IDataSection ds = getOrCreateSec (id);
+pos = parseSection (pos, ds);
+}
+}
+allRead = true;
+}
+/** ditto */
+public void read (ID[] secSet) {
+HashSet!(ID) hs = new HashSet!(ID);
+foreach (id; secSet) hs.add(id);
+read (hs);
+}
+/** ditto */
+public void read (View!(ID) secSet) {
+if (allRead || fatal) return;			// never do anything in either case
+if (secTable.length) {
+foreach (ID id; secSet) {
+SecMD* psmd = id in secTable;
+if (psmd && !psmd.read) {		// may not exist
+IDataSection ds = getOrCreateSec (id);
+parseSection (psmd.pos, ds);
+if (ds !is null) psmd.read = true;  // getOrCreateSec may return null
+}
+}
+} else {
+for (size_t pos = endOfHeader; pos < fbuf.length;) {
+ID id = fbufReadSecMarker (pos);
+secTable[id] = SecMD(pos,false);	// add to table
+if (secSet.contains(id)) {
+IDataSection ds = getOrCreateSec (id);
+pos = parseSection (pos, ds);
+if (ds !is null) secTable[id].read = true;
+} else {
+pos = parseSection (pos, null);     // skip section
+}
+}
+}
+}
+//END METHODS: PUBLIC
+//BEGIN METHODS: PRIVATE
+/* Utility function for read
+* Look for a section; return it if it exists otherwise create a new section:
+*   use _dataSecCreator if it exists or just create a DefaultData if not.
+* However if _dataSecCreator returns null don't add it to the dataset.
+*/
+private IDataSection getOrCreateSec (ID id) {
+IDataSection* i = id in _dataset.sec;
+if (i) return *i;
+else {
+IDataSection s;
+if (_dataSecCreator !is null) s = _dataSecCreator(id);
+else s = new DefaultData;
+if (s !is null) _dataset.sec[id] = s;
+return s;
+}
+}
+/* Reads a section, starting from index pos, finishing at the next section marker (returning
+the position of the start of the marker). pos should start after the section marker.
+After analysing tags, the function passes the type, ID and data to addTag.
+NOTE: from performance tests on indexing char[]'s and dereferencing char*'s, the char*'s are
+slightly faster, but a tiny difference isn't worth the extra effort/risk of using char*'s.
+*/
+private size_t parseSection (size_t pos, IDataSection dsec) {
+debug scope (failure)
+logger.trace ("MTTReader.parseSection: failure");
+/* Searches fbuf starting from start to find one of <=>| and stops at its index.
+If quotable then be quote-aware for single and double quotes.
+Note: there's no length restriction for the content of the quote since it could be a single
+non-ascii UTF-8 char which would look like several chars.
+*/
+void fbufLocateDataTagChar (ref size_t pos, bool quotable) {
+while (true) {
+fbufIncrement (pos);
+if ((fbuf[pos] >= '<' && fbuf[pos] <= '>') || fbuf[pos] == '|') return;
+else if (quotable) {
+char c = fbuf[pos];
+if (c == '\'' || c == '"') {
+fbufIncrement(pos);
+while (fbuf[pos] != c) {
+if (fbuf[pos] == '\\') ++pos;	// escape seq.
+fbufIncrement(pos);
+}
+}
+}
+}
+}
+// Used to ignore a tag (if it starts !< or !{ or should otherwise be ignored):
+bool comment = false;
+for (; pos < fbuf.length; ++pos) {
+if (Util.isSpace(fbuf[pos])) continue;	// whitespace
+else if (fbuf[pos] == '<') {		// data tag
+char[] ErrDTAG = "Bad data tag format: not <type|id=data>" ~ ErrInFile;
+// Type section of tag:
+size_t pos_s = pos + 1;
+fbufLocateDataTagChar (pos, false);	// find end of type section
+if (fbuf[pos] != '|') throwMTErr (ErrDTAG, new MTSyntaxException);
+char[] type = fbuf[pos_s..pos];
+// ID section of tag:
+pos_s = pos + 1;
+fbufLocateDataTagChar (pos, false);	// find end of type section
+if (fbuf[pos] != '=') throwMTErr (ErrDTAG, new MTSyntaxException);
+ID tagID = cast(ID) fbuf[pos_s..pos];
+// Data section of tag:
+pos_s = pos + 1;
+fbufLocateDataTagChar (pos, true);      // find end of data section
+if (fbuf[pos] != '>') throwMTErr (ErrDTAG, new MTSyntaxException);
+char[] data = fbuf[pos_s..pos];
+if (!comment && dsec !is null) {
+type = Util.trim(type);
+try {
+dsec.addTag (type, tagID, data);
+}
+catch (TextException e) {
+logger.error ("TextException while reading " ~ ErrFile ~ ":");	// following a parse error
+logger.error (e.msg);
+logger.error ("Tag ignored: <"~type~"|"~tagID~"="~data~">");
+// No throw: tag is just ignored
+}
+catch (Exception e) {
+logger.error ("Unknown error occured" ~ ErrInFile ~ ':');
+logger.error (e.msg);
+throwMTErr (e.msg);             // Fatal to Reader
+}
+} else comment = false;			// cancel comment status now
+}
+else if (fbuf[pos] == '{') {
+if (comment) {				// simple block comment
+uint depth = 0;			// depth of embedded comment blocks
+while (true) {
+fbufIncrement (pos);
+if (fbuf[pos] == '}') {
+if (depth == 0) break;
+else --depth;
+} else if (fbuf[pos] == '{')
+++depth;
+}
+comment = false;			// end of this comment
+} else {
+return pos;				// next section coming up; we are done
+}
+}
+else if (fbuf[pos] == '!') {		// possibly a comment; check next char
+comment = true;				// starting a comment (or an error)
+					// variable is reset at end of comment
+} else					// must be an error
+throwMTErr ("Invalid character (or sequence starting \"!\") outside of tag" ~ ErrInFile, new MTSyntaxException);
+}
+// if code execution reaches here, we're at EOF
+// possible error: last character was ! (but don't bother checking since it's inconsequential)
+return pos;
+}
+/* Parses fbuf for a section marker. Already knows fbuf[pos] == '{'.
+*/
+private ID fbufReadSecMarker (ref size_t pos) {
+// at this point pos is whatever a parseSection run returned
+// since we haven't hit EOF, fbuf[pos] MUST be '{' so no need to check
+fbufIncrement(pos);
+size_t start = pos;
+for (; pos < fbuf.length; ++pos)
+if (fbuf[pos] == '}' || fbuf[pos] == '{') break;
+if (pos >= fbuf.length || fbuf[pos] != '}')
+throwMTErr ("Bad section tag format: not {id}" ~ ErrInFile, new MTSyntaxException);
+ID id = cast(ID) fbuf[start..pos];
+fbufIncrement(pos);
+return id;
+}
+/* Increments pos and checks it hasn't hit fbuf.length . */
+private void fbufIncrement(ref size_t pos) {
+++pos;
+if (pos >= fbuf.length) throwMTErr("Unexpected EOF" ~ ErrInFile, new MTSyntaxException);
+}
+private void throwMTErr (char[] msg, MTException exc = new MTException) {
+fatal = true;	// if anyone catches the error and tries to do anything --- we're dead now
+logger.error (msg);	// report the error
+throw exc;		// and signal our error
+}
+//END METHODS: PRIVATE
+}
+/**
+* Class for reading a mergetag text file.
+*
+* Currently only a dummy class: a MTNotImplementedException will be thrown if created.
+*/
+class MTBReader : IReader
+{
+public this (char[] path, DataSet ds = null, bool rdHeader = false) {
+this (new FilePath (path), ds, rdHeader);
+}
+public this (PathView path, DataSet ds = null, bool rdHeader = false) {
+throw new MTNotImplementedException;
+}
+DataSet dataset () {                /// Get the DataSet
+return null;
+}
+void dataset (DataSet) {}           /// Set the DataSet
+void dataSecCreator (IDataSection delegate (ID)) {} /// Set the dataSecCreator
+ID[] getSectionNames () {           /// Get identifiers for all sections
+return [];
+}
+void read () {}                     /// Commence reading
+void read (ID[] secSet) {}          /// ditto
+void read (View!(ID) secSet) {}     /// ditto
+}

Mercurial > projects > mde

comparison mde/file/mergetag/Reader.d @ 81:d8fccaa45d5f