Mercurial > projects > mde
diff mde/file/mergetag/Reader.d @ 81:d8fccaa45d5f
Moved file IO code from mde/mergetag to mde/file[/mergetag] and changed how some errors are caught.
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Fri, 29 Aug 2008 11:59:43 +0100 |
parents | mde/mergetag/Reader.d@ea58f277f487 |
children | ac1e3fd07275 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/file/mergetag/Reader.d Fri Aug 29 11:59:43 2008 +0100 @@ -0,0 +1,526 @@ +/* LICENSE BLOCK +Part of mde: a Modular D game-oriented Engine +Copyright © 2007-2008 Diggory Hardy + +This program is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, either +version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/************************************************************************************************** + * This module contains all reading functions, for both binary and text MergeTag files. + *************************************************************************************************/ +module mde.file.mergetag.Reader; + +// package imports +public import mde.file.mergetag.iface.IReader; +import mde.file.mergetag.DataSet; +import mde.file.mergetag.DefaultData; +import mde.file.mergetag.exception; +import mde.file.mergetag.internal; + +import tango.core.Exception; + +// tango imports +import tango.io.FilePath; +import tango.io.UnicodeFile; +import Util = tango.text.Util; +import ConvInt = tango.text.convert.Integer; +//import tango.util.collection.model.View : View; +import tango.util.collection.HashSet : HashSet; +import tango.util.log.Log : Log, Logger; + +private Logger logger; +static this() { + logger = Log.getLogger ("mde.mergetag.Reader"); +} + +// TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions) + +/** Make an IReader class. +* +* Create an appropriate reader: MTTReader or MTBReader. +* +* Throws: +* $(TABLE +* $(TR $(TH Exception) $(TH Thrown when)) +* $(TR $(TD MTFileIOException) $(TD When extension given is neither mtt nor mtb)) +* ) +* +*/ +IReader makeReader (FilePath path, DataSet ds = null, bool rdHeader = false) { + if (path.ext == "mtb") return new MTBReader (path, ds, rdHeader); + else if (path.ext == "mtt") return new MTTReader (path, ds, rdHeader); + else throw new MTFileIOException ("Invalid mergetag extension"); +} + +/** Resolve a file path. + * + * Tries adding both ".mtt" and ".mtb" extensions, returning whichever exists (the most recently + * modified if both exist), or returns null if neither exist. */ +FilePath findFile (char[] path) { + if (path is null) return null; + + FilePath tPath = new FilePath (path ~ ".mtt"); + FilePath bPath = new FilePath (path ~ ".mtb"); + + bool bPathExists = bPath.exists; + + if (tPath.exists) { + if (bPathExists) { + // take the latest version (roughly speaking...) + return (tPath.modified > bPath.modified ? tPath : bPath); + } else return tPath; + } else { + if (bPathExists) return bPath; + else return null; + } +} + +/** + * Class for reading a mergetag text file. + * + * Use as: + * ----------------------- + * IReader foo; + * try { + * foo = new MTTReader("foo.mtt"); + * foo.read(); + * } + * catch (MTException) {} + * // get your data from foo.dataset. + * ----------------------- + * + * Throws: + * $(TABLE + * $(TR $(TH Exception) $(TH Thrown when)) + * $(TR $(TD MTFileIOException) $(TD An error occurs while opening the file)) + * $(TR $(TD MTFileFormatException) $(TD The file doesn't start with a recognised header/version)) + * $(TR $(TD MTSyntaxException) $(TD A file syntax error occurs)) + * $(TR $(TD MTException) $(TD An unexpected error occurs)) + * ) + * Note that all exceptions extend MTException and when any exception is thrown the class is + * rendered unusable: any subsequent calls to read will be ignored. + * + * Threading: Separate instances of Reader should be thread-safe provided access to the same + * dataset is synchronized; i.e. no two readers refering to the same dataset should run + * simultaneously. (The Reader class could be made thread-safe w.r.t. datasets, but + * performance-wise I doubt it would be worth it.) + * Do not run a single instance of Reader in multiple threads simultaneously. + */ +class MTTReader : IReader +{ +//BEGIN DATA + /** Get or set the DataSet + * + * A container for all read data. + * + * This may be accessed from here; however it may be preferable to use an external reference + * (passed to the class on initialisation). + */ + DataSet dataset () { return _dataset; } + void dataset (DataSet ds) /// ditto + { _dataset = ds; } + + /** A delegate for creating new DataSections within the dataset. + * + * Allows a user-made class to be used in the DataSet instead of DefaultData (used if no + * dataSecCreator exists). Also allows an existing class instance to be used instead of a new + * one. + * + * This works by supplying a function which returns a reference to an instance of a class + * implementing IDataSection. The function is passed the ID of the new section and may use this + * to use different IDataSection classes for different sections. + * + * The function may also return null, in which case the section will be skipped. In the version + * of read taking a set of sections to read, the section will not be marked as read and may + * still be read later (assuming dataSecCreator returns non-null). However, in the version of + * read not taking the set argument, all sections are set as read regardless, and the section + * cannot be read later. + */ + void dataSecCreator (IDataSection delegate (ID) dSC) { + _dataSecCreator = dSC; + } + +private: + static Logger logger; + + // Non-static symbols: + final char[] ErrFile; // added after ErrInFile to do the same without the "in " bit. + final char[] ErrInFile; // something like "in \"path/file.mtt\"" + + final char[] fbuf; // file is read into this + MTFormatVersion.VERS fileVer = MTFormatVersion.VERS.INVALID; // Remains INVALID until set otherwise by CTOR. + + IDataSection delegate (ID) _dataSecCreator = null; // see property setter above + + size_t endOfHeader; + bool allRead = false; // true if endOfHeader == fbuf.length or read([]) has run + bool fatal = false; // a fatal file error occured; don't try to recover + /* If the file is scanned for sections, the starting position of all sections are stored + * in secTable. If this is empty, either no sections exist (and endOfHeader == fbuf.length) + * or a section scan has not been run (read() with no section names doesn't need to do so). + */ + struct SecMD { // sec meta data + static SecMD opCall (size_t _pos, bool _read) { + SecMD ret; + ret.pos = _pos; + ret.read = _read; + return ret; + } + size_t pos; // position to start reading + bool read; // true if already read + } + SecMD [ID] secTable; + + DataSet _dataset; +//END DATA + +//BEGIN METHODS: CTOR / DTOR + static this () { + logger = Log.getLogger ("mde.mergetag.read.Reader"); + } + + /** Tries to open file path and read it into a buffer. + * + * Params: + * path = The name or FilePath of the file to open. + * Standard extensions are .mtt and .mtb for text and binary files respectively. + * ds = If null create a new DataSet, else use existing DataSet ds and merge read + * data into it. + * rdHeader = If true, read the header like a standard section. Doesn't read the header by + * default since if it's not requested it's likely not wanted. + * + * Memory: + * This currently works by loading the whole file into memory at once. This should be fine most + * of the time, but could potentially be a problem. Changing this would mean significantly + * changes to the way the code works. + */ + /* Ideas for implementing a partial-loading memory model: + * Use a conduit directly. + * Use a fiber to do the parsing; let it switch back when it runs out of memory. + * Redesign the code so it never needs to look backwards in the buffer? + * + * Major problem: reading only some sections and keeping references to other sections + * would no longer be possible. + */ + public this (char[] path, DataSet ds = null, bool rdHeader = false) { + this (new FilePath (path), ds, rdHeader); + } + /** ditto */ + public this (FilePath path, DataSet ds = null, bool rdHeader = false) { + // Create a dataset or use an existing one + if (ds !is null) _dataset = ds; + else _dataset = new DataSet(); + + // Open & read the file + try { // Supports unicode files with a BOM; defaults to UTF8 when there isn't a BOM: + scope file = new UnicodeFile!(char) (path, Encoding.Unknown); + fbuf = cast(char[]) file.read(); + } catch (Exception e) { + throwMTErr ("Error reading file: " ~ e.msg, new MTFileIOException); + } + // Remember the file name so that we can report errors (somewhat) informatively: + ErrFile = path.path ~ path.file; + ErrInFile = " in \"" ~ ErrFile ~ '"'; + + // Version checking & matching header section tag: + if (fbuf.length < 6 || fbuf[0] != '{' || fbuf[1] != 'M' || fbuf[2] != 'T' || fbuf[5] != '}') + throwMTErr("Not a valid MergeTag text file" ~ ErrInFile, new MTFileFormatException); + fileVer = MTFormatVersion.parseString (fbuf[3..5]); + if (fileVer == MTFormatVersion.VERS.INVALID) + throwMTErr("Unrecognised MergeTag version: MT" ~ fbuf[3..5] ~ ErrInFile, new MTFileFormatException); + + // Header reading/skipping: + if (rdHeader) { // only bother actually reading it if it was requested + // If already existing, merge; else create a new DefaultData. + if (!_dataset.header) _dataset.header = new DefaultData; + endOfHeader = parseSection (6, cast(IDataSection) _dataset.header); + } + else endOfHeader = parseSection (6,null); + } +//END METHODS: CTOR / DTOR + +//BEGIN METHODS: PUBLIC + /** Scans for sections if not already done and returns a list of IDs. + * + * Won't work (will return an empty array) if all sections have already been read without + * scanning for sections. + */ + public ID[] getSectionNames () { + if (fatal) return []; + if (!secTable.length) read([]); // scan for sections + return secTable.keys; + } + + /** Reads (some) sections of the file into data. Note that sections will never be _read twice. + * + * To be more accurate, the file is copied into a buffer by this(). read() then parses the + * contents of this buffer, and stores the contents in dataset. + * + * Each section read is stored in a DataSection class. By default this is an instance of + * DefaultData; this can be customised (see dataSecCreator). + * + * If secSet is provided, reading is restricted to sections given in secSet, otherwise all + * sections are read. Sections given in secSet but not found in the file are not reported as an + * error. Suggested: supply a HashSet!(uint) as the View!(ID). An ArrayBag!(ID) as used is not a + * good choice, except that in this case it's empty. + * + * Merging: + * Where a section already exists in the DataSet (when either the section is given more than + * once in the file, or it was read from a different file by another reader) it is merged. + * Entries already in the DataSet take priority. + * + * Performance: + * Note that loading only desired sections like this still parses the sections not + * read (although it does not try to understand the type or data fields), so there is only a + * small performance advantage to this where other sections do exist in the file. There is also + * some overhead in only partially reading the file to keep track of where other sections are so + * that the entire file need not be re-read if further (or all remaining) sections are read + * later. + */ + public void read () { + if (secTable.length) { + foreach (ID id, ref SecMD smd; secTable) { + if (!smd.read) { + IDataSection ds = getOrCreateSec (id); + parseSection (smd.pos, ds); + // allRead is set true so there's no point setting smd.read = true + } + } + } else { // this time we don't need to use secTable + for (size_t pos = endOfHeader; pos < fbuf.length;) { + ID id = fbufReadSecMarker (pos); + IDataSection ds = getOrCreateSec (id); + pos = parseSection (pos, ds); + } + } + + allRead = true; + } + /** ditto */ + public void read (ID[] secSet) { + HashSet!(ID) hs = new HashSet!(ID); + foreach (id; secSet) hs.add(id); + read (hs); + } + /** ditto */ + public void read (View!(ID) secSet) { + if (allRead || fatal) return; // never do anything in either case + + if (secTable.length) { + foreach (ID id; secSet) { + SecMD* psmd = id in secTable; + if (psmd && !psmd.read) { // may not exist + IDataSection ds = getOrCreateSec (id); + parseSection (psmd.pos, ds); + if (ds !is null) psmd.read = true; // getOrCreateSec may return null + } + } + } else { + for (size_t pos = endOfHeader; pos < fbuf.length;) { + ID id = fbufReadSecMarker (pos); + secTable[id] = SecMD(pos,false); // add to table + if (secSet.contains(id)) { + IDataSection ds = getOrCreateSec (id); + pos = parseSection (pos, ds); + if (ds !is null) secTable[id].read = true; + } else { + pos = parseSection (pos, null); // skip section + } + } + } + } +//END METHODS: PUBLIC + +//BEGIN METHODS: PRIVATE + /* Utility function for read + * Look for a section; return it if it exists otherwise create a new section: + * use _dataSecCreator if it exists or just create a DefaultData if not. + * However if _dataSecCreator returns null don't add it to the dataset. + */ + private IDataSection getOrCreateSec (ID id) { + IDataSection* i = id in _dataset.sec; + if (i) return *i; + else { + IDataSection s; + if (_dataSecCreator !is null) s = _dataSecCreator(id); + else s = new DefaultData; + if (s !is null) _dataset.sec[id] = s; + return s; + } + } + + /* Reads a section, starting from index pos, finishing at the next section marker (returning + the position of the start of the marker). pos should start after the section marker. + + After analysing tags, the function passes the type, ID and data to addTag. + + NOTE: from performance tests on indexing char[]'s and dereferencing char*'s, the char*'s are + slightly faster, but a tiny difference isn't worth the extra effort/risk of using char*'s. + */ + private size_t parseSection (size_t pos, IDataSection dsec) { + debug scope (failure) + logger.trace ("MTTReader.parseSection: failure"); + /* Searches fbuf starting from start to find one of <=>| and stops at its index. + + If quotable then be quote-aware for single and double quotes. + Note: there's no length restriction for the content of the quote since it could be a single + non-ascii UTF-8 char which would look like several chars. + */ + void fbufLocateDataTagChar (ref size_t pos, bool quotable) { + while (true) { + fbufIncrement (pos); + + if ((fbuf[pos] >= '<' && fbuf[pos] <= '>') || fbuf[pos] == '|') return; + else if (quotable) { + char c = fbuf[pos]; + if (c == '\'' || c == '"') { + fbufIncrement(pos); + while (fbuf[pos] != c) { + if (fbuf[pos] == '\\') ++pos; // escape seq. + fbufIncrement(pos); + } + } + } + } + } + + // Used to ignore a tag (if it starts !< or !{ or should otherwise be ignored): + bool comment = false; + for (; pos < fbuf.length; ++pos) { + if (Util.isSpace(fbuf[pos])) continue; // whitespace + else if (fbuf[pos] == '<') { // data tag + char[] ErrDTAG = "Bad data tag format: not <type|id=data>" ~ ErrInFile; + + // Type section of tag: + size_t pos_s = pos + 1; + fbufLocateDataTagChar (pos, false); // find end of type section + if (fbuf[pos] != '|') throwMTErr (ErrDTAG, new MTSyntaxException); + char[] type = fbuf[pos_s..pos]; + + // ID section of tag: + pos_s = pos + 1; + fbufLocateDataTagChar (pos, false); // find end of type section + if (fbuf[pos] != '=') throwMTErr (ErrDTAG, new MTSyntaxException); + ID tagID = cast(ID) fbuf[pos_s..pos]; + + // Data section of tag: + pos_s = pos + 1; + fbufLocateDataTagChar (pos, true); // find end of data section + if (fbuf[pos] != '>') throwMTErr (ErrDTAG, new MTSyntaxException); + char[] data = fbuf[pos_s..pos]; + + if (!comment && dsec !is null) { + type = Util.trim(type); + try { + dsec.addTag (type, tagID, data); + } + catch (TextException e) { + logger.error ("TextException while reading " ~ ErrFile ~ ":"); // following a parse error + logger.error (e.msg); + logger.error ("Tag ignored: <"~type~"|"~tagID~"="~data~">"); + // No throw: tag is just ignored + } + catch (Exception e) { + logger.error ("Unknown error occured" ~ ErrInFile ~ ':'); + logger.error (e.msg); + throwMTErr (e.msg); // Fatal to Reader + } + } else comment = false; // cancel comment status now + } + else if (fbuf[pos] == '{') { + if (comment) { // simple block comment + uint depth = 0; // depth of embedded comment blocks + while (true) { + fbufIncrement (pos); + if (fbuf[pos] == '}') { + if (depth == 0) break; + else --depth; + } else if (fbuf[pos] == '{') + ++depth; + } + comment = false; // end of this comment + } else { + return pos; // next section coming up; we are done + } + } + else if (fbuf[pos] == '!') { // possibly a comment; check next char + comment = true; // starting a comment (or an error) + // variable is reset at end of comment + } else // must be an error + throwMTErr ("Invalid character (or sequence starting \"!\") outside of tag" ~ ErrInFile, new MTSyntaxException); + } + // if code execution reaches here, we're at EOF + // possible error: last character was ! (but don't bother checking since it's inconsequential) + return pos; + } + + /* Parses fbuf for a section marker. Already knows fbuf[pos] == '{'. + */ + private ID fbufReadSecMarker (ref size_t pos) { + // at this point pos is whatever a parseSection run returned + // since we haven't hit EOF, fbuf[pos] MUST be '{' so no need to check + fbufIncrement(pos); + + size_t start = pos; + for (; pos < fbuf.length; ++pos) + if (fbuf[pos] == '}' || fbuf[pos] == '{') break; + + if (pos >= fbuf.length || fbuf[pos] != '}') + throwMTErr ("Bad section tag format: not {id}" ~ ErrInFile, new MTSyntaxException); + + ID id = cast(ID) fbuf[start..pos]; + fbufIncrement(pos); + return id; + } + + /* Increments pos and checks it hasn't hit fbuf.length . */ + private void fbufIncrement(ref size_t pos) { + ++pos; + if (pos >= fbuf.length) throwMTErr("Unexpected EOF" ~ ErrInFile, new MTSyntaxException); + } + + private void throwMTErr (char[] msg, MTException exc = new MTException) { + fatal = true; // if anyone catches the error and tries to do anything --- we're dead now + logger.error (msg); // report the error + throw exc; // and signal our error + } +//END METHODS: PRIVATE +} + + +/** +* Class for reading a mergetag text file. +* +* Currently only a dummy class: a MTNotImplementedException will be thrown if created. +*/ +class MTBReader : IReader +{ + public this (char[] path, DataSet ds = null, bool rdHeader = false) { + this (new FilePath (path), ds, rdHeader); + } + public this (PathView path, DataSet ds = null, bool rdHeader = false) { + throw new MTNotImplementedException; + } + + DataSet dataset () { /// Get the DataSet + return null; + } + void dataset (DataSet) {} /// Set the DataSet + + void dataSecCreator (IDataSection delegate (ID)) {} /// Set the dataSecCreator + + ID[] getSectionNames () { /// Get identifiers for all sections + return []; + } + void read () {} /// Commence reading + void read (ID[] secSet) {} /// ditto + void read (View!(ID) secSet) {} /// ditto +}