comparison mde/file/mergetag/Reader.d @ 81:d8fccaa45d5f

Moved file IO code from mde/mergetag to mde/file[/mergetag] and changed how some errors are caught.
author Diggory Hardy <diggory.hardy@gmail.com>
date Fri, 29 Aug 2008 11:59:43 +0100
parents mde/mergetag/Reader.d@ea58f277f487
children ac1e3fd07275
comparison
equal deleted inserted replaced
80:ea58f277f487 81:d8fccaa45d5f
1 /* LICENSE BLOCK
2 Part of mde: a Modular D game-oriented Engine
3 Copyright © 2007-2008 Diggory Hardy
4
5 This program is free software: you can redistribute it and/or modify it under the terms
6 of the GNU General Public License as published by the Free Software Foundation, either
7 version 2 of the License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
10 without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 See the GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program. If not, see <http://www.gnu.org/licenses/>. */
15
16 /**************************************************************************************************
17 * This module contains all reading functions, for both binary and text MergeTag files.
18 *************************************************************************************************/
19 module mde.file.mergetag.Reader;
20
21 // package imports
22 public import mde.file.mergetag.iface.IReader;
23 import mde.file.mergetag.DataSet;
24 import mde.file.mergetag.DefaultData;
25 import mde.file.mergetag.exception;
26 import mde.file.mergetag.internal;
27
28 import tango.core.Exception;
29
30 // tango imports
31 import tango.io.FilePath;
32 import tango.io.UnicodeFile;
33 import Util = tango.text.Util;
34 import ConvInt = tango.text.convert.Integer;
35 //import tango.util.collection.model.View : View;
36 import tango.util.collection.HashSet : HashSet;
37 import tango.util.log.Log : Log, Logger;
38
39 private Logger logger;
40 static this() {
41 logger = Log.getLogger ("mde.mergetag.Reader");
42 }
43
44 // TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions)
45
46 /** Make an IReader class.
47 *
48 * Create an appropriate reader: MTTReader or MTBReader.
49 *
50 * Throws:
51 * $(TABLE
52 * $(TR $(TH Exception) $(TH Thrown when))
53 * $(TR $(TD MTFileIOException) $(TD When extension given is neither mtt nor mtb))
54 * )
55 *
56 */
57 IReader makeReader (FilePath path, DataSet ds = null, bool rdHeader = false) {
58 if (path.ext == "mtb") return new MTBReader (path, ds, rdHeader);
59 else if (path.ext == "mtt") return new MTTReader (path, ds, rdHeader);
60 else throw new MTFileIOException ("Invalid mergetag extension");
61 }
62
63 /** Resolve a file path.
64 *
65 * Tries adding both ".mtt" and ".mtb" extensions, returning whichever exists (the most recently
66 * modified if both exist), or returns null if neither exist. */
67 FilePath findFile (char[] path) {
68 if (path is null) return null;
69
70 FilePath tPath = new FilePath (path ~ ".mtt");
71 FilePath bPath = new FilePath (path ~ ".mtb");
72
73 bool bPathExists = bPath.exists;
74
75 if (tPath.exists) {
76 if (bPathExists) {
77 // take the latest version (roughly speaking...)
78 return (tPath.modified > bPath.modified ? tPath : bPath);
79 } else return tPath;
80 } else {
81 if (bPathExists) return bPath;
82 else return null;
83 }
84 }
85
86 /**
87 * Class for reading a mergetag text file.
88 *
89 * Use as:
90 * -----------------------
91 * IReader foo;
92 * try {
93 * foo = new MTTReader("foo.mtt");
94 * foo.read();
95 * }
96 * catch (MTException) {}
97 * // get your data from foo.dataset.
98 * -----------------------
99 *
100 * Throws:
101 * $(TABLE
102 * $(TR $(TH Exception) $(TH Thrown when))
103 * $(TR $(TD MTFileIOException) $(TD An error occurs while opening the file))
104 * $(TR $(TD MTFileFormatException) $(TD The file doesn't start with a recognised header/version))
105 * $(TR $(TD MTSyntaxException) $(TD A file syntax error occurs))
106 * $(TR $(TD MTException) $(TD An unexpected error occurs))
107 * )
108 * Note that all exceptions extend MTException and when any exception is thrown the class is
109 * rendered unusable: any subsequent calls to read will be ignored.
110 *
111 * Threading: Separate instances of Reader should be thread-safe provided access to the same
112 * dataset is synchronized; i.e. no two readers refering to the same dataset should run
113 * simultaneously. (The Reader class could be made thread-safe w.r.t. datasets, but
114 * performance-wise I doubt it would be worth it.)
115 * Do not run a single instance of Reader in multiple threads simultaneously.
116 */
117 class MTTReader : IReader
118 {
119 //BEGIN DATA
120 /** Get or set the DataSet
121 *
122 * A container for all read data.
123 *
124 * This may be accessed from here; however it may be preferable to use an external reference
125 * (passed to the class on initialisation).
126 */
127 DataSet dataset () { return _dataset; }
128 void dataset (DataSet ds) /// ditto
129 { _dataset = ds; }
130
131 /** A delegate for creating new DataSections within the dataset.
132 *
133 * Allows a user-made class to be used in the DataSet instead of DefaultData (used if no
134 * dataSecCreator exists). Also allows an existing class instance to be used instead of a new
135 * one.
136 *
137 * This works by supplying a function which returns a reference to an instance of a class
138 * implementing IDataSection. The function is passed the ID of the new section and may use this
139 * to use different IDataSection classes for different sections.
140 *
141 * The function may also return null, in which case the section will be skipped. In the version
142 * of read taking a set of sections to read, the section will not be marked as read and may
143 * still be read later (assuming dataSecCreator returns non-null). However, in the version of
144 * read not taking the set argument, all sections are set as read regardless, and the section
145 * cannot be read later.
146 */
147 void dataSecCreator (IDataSection delegate (ID) dSC) {
148 _dataSecCreator = dSC;
149 }
150
151 private:
152 static Logger logger;
153
154 // Non-static symbols:
155 final char[] ErrFile; // added after ErrInFile to do the same without the "in " bit.
156 final char[] ErrInFile; // something like "in \"path/file.mtt\""
157
158 final char[] fbuf; // file is read into this
159 MTFormatVersion.VERS fileVer = MTFormatVersion.VERS.INVALID; // Remains INVALID until set otherwise by CTOR.
160
161 IDataSection delegate (ID) _dataSecCreator = null; // see property setter above
162
163 size_t endOfHeader;
164 bool allRead = false; // true if endOfHeader == fbuf.length or read([]) has run
165 bool fatal = false; // a fatal file error occured; don't try to recover
166 /* If the file is scanned for sections, the starting position of all sections are stored
167 * in secTable. If this is empty, either no sections exist (and endOfHeader == fbuf.length)
168 * or a section scan has not been run (read() with no section names doesn't need to do so).
169 */
170 struct SecMD { // sec meta data
171 static SecMD opCall (size_t _pos, bool _read) {
172 SecMD ret;
173 ret.pos = _pos;
174 ret.read = _read;
175 return ret;
176 }
177 size_t pos; // position to start reading
178 bool read; // true if already read
179 }
180 SecMD [ID] secTable;
181
182 DataSet _dataset;
183 //END DATA
184
185 //BEGIN METHODS: CTOR / DTOR
186 static this () {
187 logger = Log.getLogger ("mde.mergetag.read.Reader");
188 }
189
190 /** Tries to open file path and read it into a buffer.
191 *
192 * Params:
193 * path = The name or FilePath of the file to open.
194 * Standard extensions are .mtt and .mtb for text and binary files respectively.
195 * ds = If null create a new DataSet, else use existing DataSet ds and merge read
196 * data into it.
197 * rdHeader = If true, read the header like a standard section. Doesn't read the header by
198 * default since if it's not requested it's likely not wanted.
199 *
200 * Memory:
201 * This currently works by loading the whole file into memory at once. This should be fine most
202 * of the time, but could potentially be a problem. Changing this would mean significantly
203 * changes to the way the code works.
204 */
205 /* Ideas for implementing a partial-loading memory model:
206 * Use a conduit directly.
207 * Use a fiber to do the parsing; let it switch back when it runs out of memory.
208 * Redesign the code so it never needs to look backwards in the buffer?
209 *
210 * Major problem: reading only some sections and keeping references to other sections
211 * would no longer be possible.
212 */
213 public this (char[] path, DataSet ds = null, bool rdHeader = false) {
214 this (new FilePath (path), ds, rdHeader);
215 }
216 /** ditto */
217 public this (FilePath path, DataSet ds = null, bool rdHeader = false) {
218 // Create a dataset or use an existing one
219 if (ds !is null) _dataset = ds;
220 else _dataset = new DataSet();
221
222 // Open & read the file
223 try { // Supports unicode files with a BOM; defaults to UTF8 when there isn't a BOM:
224 scope file = new UnicodeFile!(char) (path, Encoding.Unknown);
225 fbuf = cast(char[]) file.read();
226 } catch (Exception e) {
227 throwMTErr ("Error reading file: " ~ e.msg, new MTFileIOException);
228 }
229 // Remember the file name so that we can report errors (somewhat) informatively:
230 ErrFile = path.path ~ path.file;
231 ErrInFile = " in \"" ~ ErrFile ~ '"';
232
233 // Version checking & matching header section tag:
234 if (fbuf.length < 6 || fbuf[0] != '{' || fbuf[1] != 'M' || fbuf[2] != 'T' || fbuf[5] != '}')
235 throwMTErr("Not a valid MergeTag text file" ~ ErrInFile, new MTFileFormatException);
236 fileVer = MTFormatVersion.parseString (fbuf[3..5]);
237 if (fileVer == MTFormatVersion.VERS.INVALID)
238 throwMTErr("Unrecognised MergeTag version: MT" ~ fbuf[3..5] ~ ErrInFile, new MTFileFormatException);
239
240 // Header reading/skipping:
241 if (rdHeader) { // only bother actually reading it if it was requested
242 // If already existing, merge; else create a new DefaultData.
243 if (!_dataset.header) _dataset.header = new DefaultData;
244 endOfHeader = parseSection (6, cast(IDataSection) _dataset.header);
245 }
246 else endOfHeader = parseSection (6,null);
247 }
248 //END METHODS: CTOR / DTOR
249
250 //BEGIN METHODS: PUBLIC
251 /** Scans for sections if not already done and returns a list of IDs.
252 *
253 * Won't work (will return an empty array) if all sections have already been read without
254 * scanning for sections.
255 */
256 public ID[] getSectionNames () {
257 if (fatal) return [];
258 if (!secTable.length) read([]); // scan for sections
259 return secTable.keys;
260 }
261
262 /** Reads (some) sections of the file into data. Note that sections will never be _read twice.
263 *
264 * To be more accurate, the file is copied into a buffer by this(). read() then parses the
265 * contents of this buffer, and stores the contents in dataset.
266 *
267 * Each section read is stored in a DataSection class. By default this is an instance of
268 * DefaultData; this can be customised (see dataSecCreator).
269 *
270 * If secSet is provided, reading is restricted to sections given in secSet, otherwise all
271 * sections are read. Sections given in secSet but not found in the file are not reported as an
272 * error. Suggested: supply a HashSet!(uint) as the View!(ID). An ArrayBag!(ID) as used is not a
273 * good choice, except that in this case it's empty.
274 *
275 * Merging:
276 * Where a section already exists in the DataSet (when either the section is given more than
277 * once in the file, or it was read from a different file by another reader) it is merged.
278 * Entries already in the DataSet take priority.
279 *
280 * Performance:
281 * Note that loading only desired sections like this still parses the sections not
282 * read (although it does not try to understand the type or data fields), so there is only a
283 * small performance advantage to this where other sections do exist in the file. There is also
284 * some overhead in only partially reading the file to keep track of where other sections are so
285 * that the entire file need not be re-read if further (or all remaining) sections are read
286 * later.
287 */
288 public void read () {
289 if (secTable.length) {
290 foreach (ID id, ref SecMD smd; secTable) {
291 if (!smd.read) {
292 IDataSection ds = getOrCreateSec (id);
293 parseSection (smd.pos, ds);
294 // allRead is set true so there's no point setting smd.read = true
295 }
296 }
297 } else { // this time we don't need to use secTable
298 for (size_t pos = endOfHeader; pos < fbuf.length;) {
299 ID id = fbufReadSecMarker (pos);
300 IDataSection ds = getOrCreateSec (id);
301 pos = parseSection (pos, ds);
302 }
303 }
304
305 allRead = true;
306 }
307 /** ditto */
308 public void read (ID[] secSet) {
309 HashSet!(ID) hs = new HashSet!(ID);
310 foreach (id; secSet) hs.add(id);
311 read (hs);
312 }
313 /** ditto */
314 public void read (View!(ID) secSet) {
315 if (allRead || fatal) return; // never do anything in either case
316
317 if (secTable.length) {
318 foreach (ID id; secSet) {
319 SecMD* psmd = id in secTable;
320 if (psmd && !psmd.read) { // may not exist
321 IDataSection ds = getOrCreateSec (id);
322 parseSection (psmd.pos, ds);
323 if (ds !is null) psmd.read = true; // getOrCreateSec may return null
324 }
325 }
326 } else {
327 for (size_t pos = endOfHeader; pos < fbuf.length;) {
328 ID id = fbufReadSecMarker (pos);
329 secTable[id] = SecMD(pos,false); // add to table
330 if (secSet.contains(id)) {
331 IDataSection ds = getOrCreateSec (id);
332 pos = parseSection (pos, ds);
333 if (ds !is null) secTable[id].read = true;
334 } else {
335 pos = parseSection (pos, null); // skip section
336 }
337 }
338 }
339 }
340 //END METHODS: PUBLIC
341
342 //BEGIN METHODS: PRIVATE
343 /* Utility function for read
344 * Look for a section; return it if it exists otherwise create a new section:
345 * use _dataSecCreator if it exists or just create a DefaultData if not.
346 * However if _dataSecCreator returns null don't add it to the dataset.
347 */
348 private IDataSection getOrCreateSec (ID id) {
349 IDataSection* i = id in _dataset.sec;
350 if (i) return *i;
351 else {
352 IDataSection s;
353 if (_dataSecCreator !is null) s = _dataSecCreator(id);
354 else s = new DefaultData;
355 if (s !is null) _dataset.sec[id] = s;
356 return s;
357 }
358 }
359
360 /* Reads a section, starting from index pos, finishing at the next section marker (returning
361 the position of the start of the marker). pos should start after the section marker.
362
363 After analysing tags, the function passes the type, ID and data to addTag.
364
365 NOTE: from performance tests on indexing char[]'s and dereferencing char*'s, the char*'s are
366 slightly faster, but a tiny difference isn't worth the extra effort/risk of using char*'s.
367 */
368 private size_t parseSection (size_t pos, IDataSection dsec) {
369 debug scope (failure)
370 logger.trace ("MTTReader.parseSection: failure");
371 /* Searches fbuf starting from start to find one of <=>| and stops at its index.
372
373 If quotable then be quote-aware for single and double quotes.
374 Note: there's no length restriction for the content of the quote since it could be a single
375 non-ascii UTF-8 char which would look like several chars.
376 */
377 void fbufLocateDataTagChar (ref size_t pos, bool quotable) {
378 while (true) {
379 fbufIncrement (pos);
380
381 if ((fbuf[pos] >= '<' && fbuf[pos] <= '>') || fbuf[pos] == '|') return;
382 else if (quotable) {
383 char c = fbuf[pos];
384 if (c == '\'' || c == '"') {
385 fbufIncrement(pos);
386 while (fbuf[pos] != c) {
387 if (fbuf[pos] == '\\') ++pos; // escape seq.
388 fbufIncrement(pos);
389 }
390 }
391 }
392 }
393 }
394
395 // Used to ignore a tag (if it starts !< or !{ or should otherwise be ignored):
396 bool comment = false;
397 for (; pos < fbuf.length; ++pos) {
398 if (Util.isSpace(fbuf[pos])) continue; // whitespace
399 else if (fbuf[pos] == '<') { // data tag
400 char[] ErrDTAG = "Bad data tag format: not <type|id=data>" ~ ErrInFile;
401
402 // Type section of tag:
403 size_t pos_s = pos + 1;
404 fbufLocateDataTagChar (pos, false); // find end of type section
405 if (fbuf[pos] != '|') throwMTErr (ErrDTAG, new MTSyntaxException);
406 char[] type = fbuf[pos_s..pos];
407
408 // ID section of tag:
409 pos_s = pos + 1;
410 fbufLocateDataTagChar (pos, false); // find end of type section
411 if (fbuf[pos] != '=') throwMTErr (ErrDTAG, new MTSyntaxException);
412 ID tagID = cast(ID) fbuf[pos_s..pos];
413
414 // Data section of tag:
415 pos_s = pos + 1;
416 fbufLocateDataTagChar (pos, true); // find end of data section
417 if (fbuf[pos] != '>') throwMTErr (ErrDTAG, new MTSyntaxException);
418 char[] data = fbuf[pos_s..pos];
419
420 if (!comment && dsec !is null) {
421 type = Util.trim(type);
422 try {
423 dsec.addTag (type, tagID, data);
424 }
425 catch (TextException e) {
426 logger.error ("TextException while reading " ~ ErrFile ~ ":"); // following a parse error
427 logger.error (e.msg);
428 logger.error ("Tag ignored: <"~type~"|"~tagID~"="~data~">");
429 // No throw: tag is just ignored
430 }
431 catch (Exception e) {
432 logger.error ("Unknown error occured" ~ ErrInFile ~ ':');
433 logger.error (e.msg);
434 throwMTErr (e.msg); // Fatal to Reader
435 }
436 } else comment = false; // cancel comment status now
437 }
438 else if (fbuf[pos] == '{') {
439 if (comment) { // simple block comment
440 uint depth = 0; // depth of embedded comment blocks
441 while (true) {
442 fbufIncrement (pos);
443 if (fbuf[pos] == '}') {
444 if (depth == 0) break;
445 else --depth;
446 } else if (fbuf[pos] == '{')
447 ++depth;
448 }
449 comment = false; // end of this comment
450 } else {
451 return pos; // next section coming up; we are done
452 }
453 }
454 else if (fbuf[pos] == '!') { // possibly a comment; check next char
455 comment = true; // starting a comment (or an error)
456 // variable is reset at end of comment
457 } else // must be an error
458 throwMTErr ("Invalid character (or sequence starting \"!\") outside of tag" ~ ErrInFile, new MTSyntaxException);
459 }
460 // if code execution reaches here, we're at EOF
461 // possible error: last character was ! (but don't bother checking since it's inconsequential)
462 return pos;
463 }
464
465 /* Parses fbuf for a section marker. Already knows fbuf[pos] == '{'.
466 */
467 private ID fbufReadSecMarker (ref size_t pos) {
468 // at this point pos is whatever a parseSection run returned
469 // since we haven't hit EOF, fbuf[pos] MUST be '{' so no need to check
470 fbufIncrement(pos);
471
472 size_t start = pos;
473 for (; pos < fbuf.length; ++pos)
474 if (fbuf[pos] == '}' || fbuf[pos] == '{') break;
475
476 if (pos >= fbuf.length || fbuf[pos] != '}')
477 throwMTErr ("Bad section tag format: not {id}" ~ ErrInFile, new MTSyntaxException);
478
479 ID id = cast(ID) fbuf[start..pos];
480 fbufIncrement(pos);
481 return id;
482 }
483
484 /* Increments pos and checks it hasn't hit fbuf.length . */
485 private void fbufIncrement(ref size_t pos) {
486 ++pos;
487 if (pos >= fbuf.length) throwMTErr("Unexpected EOF" ~ ErrInFile, new MTSyntaxException);
488 }
489
490 private void throwMTErr (char[] msg, MTException exc = new MTException) {
491 fatal = true; // if anyone catches the error and tries to do anything --- we're dead now
492 logger.error (msg); // report the error
493 throw exc; // and signal our error
494 }
495 //END METHODS: PRIVATE
496 }
497
498
499 /**
500 * Class for reading a mergetag text file.
501 *
502 * Currently only a dummy class: a MTNotImplementedException will be thrown if created.
503 */
504 class MTBReader : IReader
505 {
506 public this (char[] path, DataSet ds = null, bool rdHeader = false) {
507 this (new FilePath (path), ds, rdHeader);
508 }
509 public this (PathView path, DataSet ds = null, bool rdHeader = false) {
510 throw new MTNotImplementedException;
511 }
512
513 DataSet dataset () { /// Get the DataSet
514 return null;
515 }
516 void dataset (DataSet) {} /// Set the DataSet
517
518 void dataSecCreator (IDataSection delegate (ID)) {} /// Set the dataSecCreator
519
520 ID[] getSectionNames () { /// Get identifiers for all sections
521 return [];
522 }
523 void read () {} /// Commence reading
524 void read (ID[] secSet) {} /// ditto
525 void read (View!(ID) secSet) {} /// ditto
526 }