Mercurial > projects > mde
diff mde/mergetag/Reader.d @ 14:0047b364b6d9
Changed much of the mergetag structure and some functionality. First tests on windows.
Changes to mergetag Reader methods. New functionality allowing a dataSecCreator to cause sections to be skipped.
Moved several of the mergetag modules and some of their contents around. Moved all interfaces to separate modules in iface/ .
IReader & IWriter interfaces exist; MTTReader, MTBReader, MTTWriter, MTBWriter & DualWriter all now exist and implement IReader/IWriter (although the MTB variants are dummy classes); makeReader & makeWriter should both be fully functional.
Tested building on windows with partial success (works but window won't open).
Included a temporary hack from windows to get supported resolutions information.
committer: Diggory Hardy <diggory.hardy@gmail.com>
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Fri, 07 Mar 2008 17:51:02 +0000 |
parents | |
children | 4608be19ebe2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mde/mergetag/Reader.d Fri Mar 07 17:51:02 2008 +0000 @@ -0,0 +1,511 @@ +/************************************************************************************************** + * This module contains all reading functions, for both binary and text MergeTag files. + *************************************************************************************************/ + +module mde.mergetag.Reader; + +// package imports +public import mde.mergetag.iface.IReader; +import mde.mergetag.DataSet; +import mde.mergetag.DefaultData; +import mde.mergetag.exception; +import mde.mergetag.internal; + +import tango.core.Exception; + +// tango imports +import tango.io.UnicodeFile; +import Util = tango.text.Util; +import ConvInt = tango.text.convert.Integer; +import tango.util.collection.model.View : View; +import tango.util.collection.HashSet : HashSet; +import tango.util.log.Log : Log, Logger; + +private Logger logger; +static this() { + logger = Log.getLogger ("mde.mergetag.Reader"); +} + +// TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions) + +/** Make an IReader class. +* +* If no extension is given, search for a file using each extension (.mtt and .mtb) appended to +* path, and set path to the most recent file name. +* +* When an extension is available (either after the above or when supplied), use the appropriate +* reader (MTT or MTB). +* +* Throws: +* $(TABLE +* $(TR $(TH Exception) $(TH Thrown when)) +* $(TR $(TD MTFileFormatException) $(TD Unable to determine format (only analysing file name))) +* $(TR $(TD MTFileIOException) $(TD When no extension is given, neither appending .mtt nor +* appending .mtb resolves a valid file)) +* ) +* +*/ +IReader makeReader (char[] path, DataSet ds = null, bool rdHeader = false) { + return makeReader (new FilePath(path), ds, rdHeader); +} +IReader makeReader (PathView path, DataSet ds = null, bool rdHeader = false) { + if (path.ext.length == 0) { + PathView tPath = new FilePath (path.toString ~ ".mtt"); + PathView bPath = new FilePath (path.toString ~ ".mtb"); + + bool bPathExists = bPath.exists; + + if (tPath.exists) { + if (bPathExists) { + // take the latest version (roughly speaking...) + path = tPath.modified > bPath.modified ? tPath : bPath; + } else path = tPath; + } else { + if (bPathExists) path = bPath; + else { + logger.error ("No file exists: "~path.toString~"[.mtt|.mtb]"); + throw new MTFileIOException; + } + } + } + + if (path.ext == "mtb") return new MTBReader (path, ds, rdHeader); + else if (path.ext == "mtt") return new MTTReader (path, ds, rdHeader); + else throw new MTFileFormatException; +} + +/** + * Class for reading a file. + * + * Use as: + * ----------------------- + * IReader foo; + * try { + * foo = new MTTReader("foo.mtt"); + * foo.read(); + * } + * catch (MTException) {} + * // get your data from foo.dataset. + * ----------------------- + * + * Throws: + * $(TABLE + * $(TR $(TH Exception) $(TH Thrown when)) + * $(TR $(TD MTFileIOException) $(TD An error occurs while opening the file)) + * $(TR $(TD MTFileFormatException) $(TD The file doesn't start with a recognised header/version)) + * $(TR $(TD MTSyntaxException) $(TD A file syntax error occurs)) + * $(TR $(TD MTException) $(TD An unexpected error occurs)) + * ) + * Note that all exceptions extend MTException and when any exception is thrown the class is + * rendered unusable: any subsequent calls to read will be ignored. + * + * Threading: Separate instances of Reader should be thread-safe provided access to the same + * dataset is synchronized; i.e. no two readers refering to the same dataset should run + * simultaneously. (The Reader class could be made thread-safe w.r.t. datasets, but + * performance-wise I doubt it would be worth it.) + * Do not run a single instance of Reader in multiple threads simultaneously. + */ +class MTTReader : IReader +{ +//BEGIN DATA + /** Get or set the DataSet + * + * A container for all read data. + * + * This may be accessed from here; however it may be preferable to use an external reference + * (passed to the class on initialisation). + */ + DataSet dataset () { return _dataset; } + void dataset (DataSet ds) /// ditto + { _dataset = ds; } + + /** A delegate for creating new DataSections within the dataset. + * + * Allows a user-made class to be used in the DataSet instead of DefaultData (used if no + * dataSecCreator exists). Also allows an existing class instance to be used instead of a new + * one. + * + * This works by supplying a function which returns a reference to an instance of a class + * implementing IDataSection. The function is passed the ID of the new section and may use this + * to use different IDataSection classes for different sections. + * + * The function may also return null, in which case the section will be skipped. In the version + * of read taking a set of sections to read, the section will not be marked as read and may + * still be read later (assuming dataSecCreator returns non-null). However, in the version of + * read not taking the set argument, all sections are set as read regardless, and the section + * cannot be read later. + */ + void dataSecCreator (IDataSection delegate (ID) dSC) { + _dataSecCreator = dSC; + } + +private: + static Logger logger; + + // Non-static symbols: + final char[] ErrFile; // added after ErrInFile to do the same without the "in " bit. + final char[] ErrInFile; // something like "in \"path/file.mtt\"" + + final char[] fbuf; // file is read into this + MTFormatVersion.VERS fileVer = MTFormatVersion.VERS.INVALID; // Remains INVALID until set otherwise by CTOR. + + IDataSection delegate (ID) _dataSecCreator = null; // see property setter above + + uint endOfHeader; + bool allRead = false; // true if endOfHeader == fbuf.length or read([]) has run + bool fatal = false; // a fatal file error occured; don't try to recover + /* If the file is scanned for sections, the starting position of all sections are stored + * in secTable. If this is empty, either no sections exist (and endOfHeader == fbuf.length) + * or a section scan has not been run (read() with no section names doesn't need to do so). + */ + struct SecMD { // sec meta data + static SecMD opCall (uint _pos, bool _read) { + SecMD ret; + ret.pos = _pos; + ret.read = _read; + return ret; + } + uint pos; // position to start reading + bool read; // true if already read + } + SecMD [ID] secTable; + + DataSet _dataset; +//END DATA + +//BEGIN METHODS: CTOR / DTOR + static this () { + logger = Log.getLogger ("mde.mergetag.read.Reader"); + } + + /** Tries to open file path and read it into a buffer. + * + * Params: + * path = The name or FilePath of the file to open. + * Standard extensions are .mtt and .mtb for text and binary files respectively. + * ds = If null create a new DataSet, else use existing DataSet ds and merge read + * data into it. + * rdHeader = If true, read the header like a standard section. Doesn't read the header by + * default since if it's not requested it's likely not wanted. + * + * Memory: + * This currently works by loading the whole file into memory at once. This should be fine most + * of the time, but could potentially be a problem. Changing this would mean significantly + * changes to the way the code works. + */ + /* Ideas for implementing a partial-loading memory model: + * Use a conduit directly. + * Use a fiber to do the parsing; let it switch back when it runs out of memory. + * Redesign the code so it never needs to look backwards in the buffer? + * + * Major problem: reading only some sections and keeping references to other sections + * would no longer be possible. + */ + public this (char[] path, DataSet ds = null, bool rdHeader = false) { + this (new FilePath (path), ds, rdHeader); + } + /** ditto */ + public this (PathView path, DataSet ds = null, bool rdHeader = false) { + // Create a dataset or use an existing one + if (ds !is null) _dataset = ds; + else _dataset = new DataSet(); + + // Open & read the file + try { // Supports unicode files with a BOM; defaults to UTF8 when there isn't a BOM: + scope file = new UnicodeFile!(char) (path, Encoding.Unknown); + fbuf = cast(char[]) file.read(); + } catch (Exception e) { + throwMTErr ("Error reading file: " ~ e.msg, new MTFileIOException); + } + // Remember the file name so that we can report errors (somewhat) informatively: + ErrFile = path.path ~ path.file; + ErrInFile = " in \"" ~ ErrFile ~ '"'; + + // Version checking & matching header section tag: + if (fbuf.length < 6 || fbuf[0] != '{' || fbuf[1] != 'M' || fbuf[2] != 'T' || fbuf[5] != '}') + throwMTErr("Not a valid MergeTag text file" ~ ErrInFile, new MTFileFormatException); + fileVer = MTFormatVersion.parseString (fbuf[3..5]); + if (fileVer == MTFormatVersion.VERS.INVALID) + throwMTErr("Unrecognised MergeTag version: MT" ~ fbuf[3..5] ~ ErrInFile, new MTFileFormatException); + + // Header reading/skipping: + if (rdHeader) { // only bother actually reading it if it was requested + // If already existing, merge; else create a new DefaultData. + if (!_dataset.header) _dataset.header = new DefaultData; + endOfHeader = parseSection (6, cast(IDataSection) _dataset.header); + } + else endOfHeader = parseSection (6,null); + } +//END METHODS: CTOR / DTOR + +//BEGIN METHODS: PUBLIC + /** Scans for sections if not already done and returns a list of IDs. + * + * Won't work (will return an empty array) if all sections have already been read without + * scanning for sections. + */ + public uint[] getSectionNames () { + if (fatal) return []; + if (!secTable.length) read([]); // scan for sections + return cast(uint[]) secTable.keys; + } + + /** Reads (some) sections of the file into data. Note that sections will never be _read twice. + * + * To be more accurate, the file is copied into a buffer by this(). read() then parses the + * contents of this buffer, and stores the contents in dataset. + * + * Each section read is stored in a DataSection class. By default this is an instance of + * DefaultData; this can be customised (see dataSecCreator). + * + * If secSet is provided, reading is restricted to sections given in secSet, otherwise all + * sections are read. Sections given in secSet but not found in the file are not reported as an + * error. Suggested: supply a HashSet!(uint) as the View!(ID). An ArrayBag!(ID) as used is not a + * good choice, except that in this case it's empty. + * + * Merging: + * Where a section already exists in the DataSet (when either the section is given more than + * once in the file, or it was read from a different file by another reader) it is merged. + * Entries already in the DataSet take priority. + * + * Performance: + * Note that loading only desired sections like this still parses the sections not + * read (although it does not try to understand the type or data fields), so there is only a + * small performance advantage to this where other sections do exist in the file. There is also + * some overhead in only partially reading the file to keep track of where other sections are so + * that the entire file need not be re-read if further (or all remaining) sections are read + * later. + */ + public void read () { + if (secTable.length) { + foreach (ID id, ref SecMD smd; secTable) { + if (!smd.read) { + IDataSection ds = getOrCreateSec (id); + parseSection (smd.pos, ds); + // allRead is set true so there's no point setting smd.read = true + } + } + } else { // this time we don't need to use secTable + for (uint pos = endOfHeader; pos < fbuf.length;) { + ID id = fbufReadSecMarker (pos); + IDataSection ds = getOrCreateSec (id); + pos = parseSection (pos, ds); + } + } + + allRead = true; + } + /** ditto */ + public void read (ID[] secSet) { + HashSet!(ID) hs = new HashSet!(ID); + foreach (id; secSet) hs.add(id); + read (hs); + } + /** ditto */ + public void read (View!(ID) secSet) { + if (allRead || fatal) return; // never do anything in either case + + if (secTable.length) { + foreach (ID id; secSet) { + SecMD* psmd = id in secTable; + if (psmd && !psmd.read) { // may not exist + IDataSection ds = getOrCreateSec (id); + parseSection (psmd.pos, ds); + if (ds !is null) psmd.read = true; // getOrCreateSec may return null + } + } + } else { + for (uint pos = endOfHeader; pos < fbuf.length;) { + ID id = fbufReadSecMarker (pos); + secTable[id] = SecMD(pos,false); // add to table + if (secSet.contains(id)) { + IDataSection ds = getOrCreateSec (id); + pos = parseSection (pos, ds); + if (ds !is null) secTable[id].read = true; + } else { + pos = parseSection (pos, null); // skip section + } + } + } + } +//END METHODS: PUBLIC + +//BEGIN METHODS: PRIVATE + /* Utility function for read + * Look for a section; return it if it exists otherwise create a new section: + * use _dataSecCreator if it exists or just create a DefaultData if not. + * However if _dataSecCreator returns null don't add it to the dataset. + */ + private IDataSection getOrCreateSec (ID id) { + IDataSection* i = id in _dataset.sec; + if (i) return *i; + else { + IDataSection s; + if (_dataSecCreator !is null) s = _dataSecCreator(id); + else s = new DefaultData; + if (s !is null) _dataset.sec[id] = s; + return s; + } + } + + /* Reads a section, starting from index pos, finishing at the next section marker (returning + the position of the start of the marker). pos should start after the section marker. + + After analysing tags, the function passes the type, ID and data to addTag. + + NOTE: from performance tests on indexing char[]'s and dereferencing char*'s, the char*'s are + slightly faster, but a tiny difference isn't worth the extra effort/risk of using char*'s. + */ + private uint parseSection (uint pos, IDataSection dsec) { + /* Searches fbuf starting from start to find one of <=>| and stops at its index. + + If quotable then be quote-aware for single and double quotes. + Note: there's no length restriction for the content of the quote since it could be a single + non-ascii UTF-8 char which would look like several chars. + */ + void fbufLocateDataTagChar (inout uint pos, bool quotable) { + for (; pos < fbuf.length; ++pos) { + if ((fbuf[pos] >= '<' && fbuf[pos] <= '>') || fbuf[pos] == '|') return; + else if (quotable) { + char c = fbuf[pos]; + if (c == '\'' || c == '"') { + ++pos; + while (fbuf[pos] != c) { + if (fbuf[pos] == '\\') ++pos; // escape seq. + fbufIncrement(pos); + } + } + } + } + } + + // Used to ignore a tag (if it starts !< or !{ or should otherwise be ignored): + bool comment = false; + for (; pos < fbuf.length; ++pos) { + if (Util.isSpace(fbuf[pos])) continue; // whitespace + else if (fbuf[pos] == '<') { // data tag + char[] ErrDTAG = "Bad data tag format: not <type|id=data>" ~ ErrInFile; + + fbufIncrement (pos); + + // Type section of tag: + uint pos_s = pos; + fbufLocateDataTagChar (pos, false); // find end of type section + if (fbuf[pos] != '|') throwMTErr (ErrDTAG, new MTSyntaxException); + char[] type = fbuf[pos_s..pos]; + + fbufIncrement (pos); + + // ID section of tag: + pos_s = pos; + fbufLocateDataTagChar (pos, false); // find end of type section + if (fbuf[pos] != '=') throwMTErr (ErrDTAG, new MTSyntaxException); + ID tagID = cast(ID) fbuf[pos_s..pos]; + + fbufIncrement (pos); + + // Data section of tag: + pos_s = pos; + fbufLocateDataTagChar (pos, true); // find end of data section + if (fbuf[pos] != '>') throwMTErr (ErrDTAG, new MTSyntaxException); + char[] data = fbuf[pos_s..pos]; + + if (!comment && dsec !is null) { + type = Util.trim(type); + try { + dsec.addTag (type, tagID, data); + } + catch (TextException e) { + logger.warn ("TextException while reading " ~ ErrFile ~ ":"); // following a parse error + logger.warn (e.msg); + } + catch (Exception e) { + logger.error ("Unknown error occured" ~ ErrInFile ~ ':'); + logger.error (e.msg); + throwMTErr (e.msg); // Fatal to Reader + } + } else comment = false; // cancel comment status now + } + else if (fbuf[pos] == '{') { + if (comment) { // simple block comment + uint depth = 0; // depth of embedded comment blocks + while (true) { + fbufIncrement (pos); + if (fbuf[pos] == '}') { + if (depth == 0) break; + else --depth; + } else if (fbuf[pos] == '{') + ++depth; + } + comment = false; // end of this comment + } else { + return pos; // next section coming up; we are done + } + } + else if (fbuf[pos] == '!') { // possibly a comment; check next char + comment = true; // starting a comment (or an error) + // variable is reset at end of comment + } else // must be an error + throwMTErr ("Invalid character (or sequence starting \"!\") outside of tag" ~ ErrInFile, new MTSyntaxException); + } + // if code execution reaches here, we're at EOF + // possible error: last character was ! (but don't bother checking since it's inconsequential) + return pos; + } + + /* Parses fbuf for a section marker. Already knows fbuf[pos] == '{'. + */ + private ID fbufReadSecMarker (inout uint pos) { + // at this point pos is whatever a parseSection run returned + // since we haven't hit EOF, fbuf[pos] MUST be '{' so no need to check + fbufIncrement(pos); + + uint start = pos; + for (; pos < fbuf.length; ++pos) + if (fbuf[pos] == '}' || fbuf[pos] == '{') break; + + if (pos >= fbuf.length || fbuf[pos] != '}') + throwMTErr ("Bad section tag format: not {id}" ~ ErrInFile, new MTSyntaxException); + + ID id = cast(ID) fbuf[start..pos]; + fbufIncrement(pos); + return id; + } + + /* Increments pos and checks it hasn't hit fbuf.length . */ + private void fbufIncrement(inout uint pos) { + ++pos; + if (pos >= fbuf.length) throwMTErr("Unexpected EOF" ~ ErrInFile, new MTSyntaxException); + } + + private void throwMTErr (char[] msg, MTException exc = new MTException) { + fatal = true; // if anyone catches the error and tries to do anything --- we're dead now + logger.error (msg); // report the error + throw exc; // and signal our error + } +//END METHODS: PRIVATE +} + +class MTBReader : IReader +{ + public this (char[] path, DataSet ds = null, bool rdHeader = false) { + this (new FilePath (path), ds, rdHeader); + } + public this (PathView path, DataSet ds = null, bool rdHeader = false) { + throw new MTNotImplementedException; + } + + DataSet dataset () { /// Get the DataSet + return null; + } + void dataset (DataSet) {} /// Set the DataSet + + void dataSecCreator (IDataSection delegate (ID)) {} /// Set the dataSecCreator + + uint[] getSectionNames () { /// Get identifiers for all sections + return []; + } + void read () {} /// Commence reading + void read (ID[] secSet) {} /// ditto + void read (View!(ID) secSet) {} /// ditto +}