Mercurial > projects > mde
comparison mde/file/mergetag/Reader.d @ 81:d8fccaa45d5f
Moved file IO code from mde/mergetag to mde/file[/mergetag] and changed how some errors are caught.
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Fri, 29 Aug 2008 11:59:43 +0100 |
parents | mde/mergetag/Reader.d@ea58f277f487 |
children | ac1e3fd07275 |
comparison
equal
deleted
inserted
replaced
80:ea58f277f487 | 81:d8fccaa45d5f |
---|---|
1 /* LICENSE BLOCK | |
2 Part of mde: a Modular D game-oriented Engine | |
3 Copyright © 2007-2008 Diggory Hardy | |
4 | |
5 This program is free software: you can redistribute it and/or modify it under the terms | |
6 of the GNU General Public License as published by the Free Software Foundation, either | |
7 version 2 of the License, or (at your option) any later version. | |
8 | |
9 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; | |
10 without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
11 See the GNU General Public License for more details. | |
12 | |
13 You should have received a copy of the GNU General Public License | |
14 along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
15 | |
16 /************************************************************************************************** | |
17 * This module contains all reading functions, for both binary and text MergeTag files. | |
18 *************************************************************************************************/ | |
19 module mde.file.mergetag.Reader; | |
20 | |
21 // package imports | |
22 public import mde.file.mergetag.iface.IReader; | |
23 import mde.file.mergetag.DataSet; | |
24 import mde.file.mergetag.DefaultData; | |
25 import mde.file.mergetag.exception; | |
26 import mde.file.mergetag.internal; | |
27 | |
28 import tango.core.Exception; | |
29 | |
30 // tango imports | |
31 import tango.io.FilePath; | |
32 import tango.io.UnicodeFile; | |
33 import Util = tango.text.Util; | |
34 import ConvInt = tango.text.convert.Integer; | |
35 //import tango.util.collection.model.View : View; | |
36 import tango.util.collection.HashSet : HashSet; | |
37 import tango.util.log.Log : Log, Logger; | |
38 | |
39 private Logger logger; | |
40 static this() { | |
41 logger = Log.getLogger ("mde.mergetag.Reader"); | |
42 } | |
43 | |
44 // TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions) | |
45 | |
46 /** Make an IReader class. | |
47 * | |
48 * Create an appropriate reader: MTTReader or MTBReader. | |
49 * | |
50 * Throws: | |
51 * $(TABLE | |
52 * $(TR $(TH Exception) $(TH Thrown when)) | |
53 * $(TR $(TD MTFileIOException) $(TD When extension given is neither mtt nor mtb)) | |
54 * ) | |
55 * | |
56 */ | |
57 IReader makeReader (FilePath path, DataSet ds = null, bool rdHeader = false) { | |
58 if (path.ext == "mtb") return new MTBReader (path, ds, rdHeader); | |
59 else if (path.ext == "mtt") return new MTTReader (path, ds, rdHeader); | |
60 else throw new MTFileIOException ("Invalid mergetag extension"); | |
61 } | |
62 | |
63 /** Resolve a file path. | |
64 * | |
65 * Tries adding both ".mtt" and ".mtb" extensions, returning whichever exists (the most recently | |
66 * modified if both exist), or returns null if neither exist. */ | |
67 FilePath findFile (char[] path) { | |
68 if (path is null) return null; | |
69 | |
70 FilePath tPath = new FilePath (path ~ ".mtt"); | |
71 FilePath bPath = new FilePath (path ~ ".mtb"); | |
72 | |
73 bool bPathExists = bPath.exists; | |
74 | |
75 if (tPath.exists) { | |
76 if (bPathExists) { | |
77 // take the latest version (roughly speaking...) | |
78 return (tPath.modified > bPath.modified ? tPath : bPath); | |
79 } else return tPath; | |
80 } else { | |
81 if (bPathExists) return bPath; | |
82 else return null; | |
83 } | |
84 } | |
85 | |
86 /** | |
87 * Class for reading a mergetag text file. | |
88 * | |
89 * Use as: | |
90 * ----------------------- | |
91 * IReader foo; | |
92 * try { | |
93 * foo = new MTTReader("foo.mtt"); | |
94 * foo.read(); | |
95 * } | |
96 * catch (MTException) {} | |
97 * // get your data from foo.dataset. | |
98 * ----------------------- | |
99 * | |
100 * Throws: | |
101 * $(TABLE | |
102 * $(TR $(TH Exception) $(TH Thrown when)) | |
103 * $(TR $(TD MTFileIOException) $(TD An error occurs while opening the file)) | |
104 * $(TR $(TD MTFileFormatException) $(TD The file doesn't start with a recognised header/version)) | |
105 * $(TR $(TD MTSyntaxException) $(TD A file syntax error occurs)) | |
106 * $(TR $(TD MTException) $(TD An unexpected error occurs)) | |
107 * ) | |
108 * Note that all exceptions extend MTException and when any exception is thrown the class is | |
109 * rendered unusable: any subsequent calls to read will be ignored. | |
110 * | |
111 * Threading: Separate instances of Reader should be thread-safe provided access to the same | |
112 * dataset is synchronized; i.e. no two readers refering to the same dataset should run | |
113 * simultaneously. (The Reader class could be made thread-safe w.r.t. datasets, but | |
114 * performance-wise I doubt it would be worth it.) | |
115 * Do not run a single instance of Reader in multiple threads simultaneously. | |
116 */ | |
117 class MTTReader : IReader | |
118 { | |
119 //BEGIN DATA | |
120 /** Get or set the DataSet | |
121 * | |
122 * A container for all read data. | |
123 * | |
124 * This may be accessed from here; however it may be preferable to use an external reference | |
125 * (passed to the class on initialisation). | |
126 */ | |
127 DataSet dataset () { return _dataset; } | |
128 void dataset (DataSet ds) /// ditto | |
129 { _dataset = ds; } | |
130 | |
131 /** A delegate for creating new DataSections within the dataset. | |
132 * | |
133 * Allows a user-made class to be used in the DataSet instead of DefaultData (used if no | |
134 * dataSecCreator exists). Also allows an existing class instance to be used instead of a new | |
135 * one. | |
136 * | |
137 * This works by supplying a function which returns a reference to an instance of a class | |
138 * implementing IDataSection. The function is passed the ID of the new section and may use this | |
139 * to use different IDataSection classes for different sections. | |
140 * | |
141 * The function may also return null, in which case the section will be skipped. In the version | |
142 * of read taking a set of sections to read, the section will not be marked as read and may | |
143 * still be read later (assuming dataSecCreator returns non-null). However, in the version of | |
144 * read not taking the set argument, all sections are set as read regardless, and the section | |
145 * cannot be read later. | |
146 */ | |
147 void dataSecCreator (IDataSection delegate (ID) dSC) { | |
148 _dataSecCreator = dSC; | |
149 } | |
150 | |
151 private: | |
152 static Logger logger; | |
153 | |
154 // Non-static symbols: | |
155 final char[] ErrFile; // added after ErrInFile to do the same without the "in " bit. | |
156 final char[] ErrInFile; // something like "in \"path/file.mtt\"" | |
157 | |
158 final char[] fbuf; // file is read into this | |
159 MTFormatVersion.VERS fileVer = MTFormatVersion.VERS.INVALID; // Remains INVALID until set otherwise by CTOR. | |
160 | |
161 IDataSection delegate (ID) _dataSecCreator = null; // see property setter above | |
162 | |
163 size_t endOfHeader; | |
164 bool allRead = false; // true if endOfHeader == fbuf.length or read([]) has run | |
165 bool fatal = false; // a fatal file error occured; don't try to recover | |
166 /* If the file is scanned for sections, the starting position of all sections are stored | |
167 * in secTable. If this is empty, either no sections exist (and endOfHeader == fbuf.length) | |
168 * or a section scan has not been run (read() with no section names doesn't need to do so). | |
169 */ | |
170 struct SecMD { // sec meta data | |
171 static SecMD opCall (size_t _pos, bool _read) { | |
172 SecMD ret; | |
173 ret.pos = _pos; | |
174 ret.read = _read; | |
175 return ret; | |
176 } | |
177 size_t pos; // position to start reading | |
178 bool read; // true if already read | |
179 } | |
180 SecMD [ID] secTable; | |
181 | |
182 DataSet _dataset; | |
183 //END DATA | |
184 | |
185 //BEGIN METHODS: CTOR / DTOR | |
186 static this () { | |
187 logger = Log.getLogger ("mde.mergetag.read.Reader"); | |
188 } | |
189 | |
190 /** Tries to open file path and read it into a buffer. | |
191 * | |
192 * Params: | |
193 * path = The name or FilePath of the file to open. | |
194 * Standard extensions are .mtt and .mtb for text and binary files respectively. | |
195 * ds = If null create a new DataSet, else use existing DataSet ds and merge read | |
196 * data into it. | |
197 * rdHeader = If true, read the header like a standard section. Doesn't read the header by | |
198 * default since if it's not requested it's likely not wanted. | |
199 * | |
200 * Memory: | |
201 * This currently works by loading the whole file into memory at once. This should be fine most | |
202 * of the time, but could potentially be a problem. Changing this would mean significantly | |
203 * changes to the way the code works. | |
204 */ | |
205 /* Ideas for implementing a partial-loading memory model: | |
206 * Use a conduit directly. | |
207 * Use a fiber to do the parsing; let it switch back when it runs out of memory. | |
208 * Redesign the code so it never needs to look backwards in the buffer? | |
209 * | |
210 * Major problem: reading only some sections and keeping references to other sections | |
211 * would no longer be possible. | |
212 */ | |
213 public this (char[] path, DataSet ds = null, bool rdHeader = false) { | |
214 this (new FilePath (path), ds, rdHeader); | |
215 } | |
216 /** ditto */ | |
217 public this (FilePath path, DataSet ds = null, bool rdHeader = false) { | |
218 // Create a dataset or use an existing one | |
219 if (ds !is null) _dataset = ds; | |
220 else _dataset = new DataSet(); | |
221 | |
222 // Open & read the file | |
223 try { // Supports unicode files with a BOM; defaults to UTF8 when there isn't a BOM: | |
224 scope file = new UnicodeFile!(char) (path, Encoding.Unknown); | |
225 fbuf = cast(char[]) file.read(); | |
226 } catch (Exception e) { | |
227 throwMTErr ("Error reading file: " ~ e.msg, new MTFileIOException); | |
228 } | |
229 // Remember the file name so that we can report errors (somewhat) informatively: | |
230 ErrFile = path.path ~ path.file; | |
231 ErrInFile = " in \"" ~ ErrFile ~ '"'; | |
232 | |
233 // Version checking & matching header section tag: | |
234 if (fbuf.length < 6 || fbuf[0] != '{' || fbuf[1] != 'M' || fbuf[2] != 'T' || fbuf[5] != '}') | |
235 throwMTErr("Not a valid MergeTag text file" ~ ErrInFile, new MTFileFormatException); | |
236 fileVer = MTFormatVersion.parseString (fbuf[3..5]); | |
237 if (fileVer == MTFormatVersion.VERS.INVALID) | |
238 throwMTErr("Unrecognised MergeTag version: MT" ~ fbuf[3..5] ~ ErrInFile, new MTFileFormatException); | |
239 | |
240 // Header reading/skipping: | |
241 if (rdHeader) { // only bother actually reading it if it was requested | |
242 // If already existing, merge; else create a new DefaultData. | |
243 if (!_dataset.header) _dataset.header = new DefaultData; | |
244 endOfHeader = parseSection (6, cast(IDataSection) _dataset.header); | |
245 } | |
246 else endOfHeader = parseSection (6,null); | |
247 } | |
248 //END METHODS: CTOR / DTOR | |
249 | |
250 //BEGIN METHODS: PUBLIC | |
251 /** Scans for sections if not already done and returns a list of IDs. | |
252 * | |
253 * Won't work (will return an empty array) if all sections have already been read without | |
254 * scanning for sections. | |
255 */ | |
256 public ID[] getSectionNames () { | |
257 if (fatal) return []; | |
258 if (!secTable.length) read([]); // scan for sections | |
259 return secTable.keys; | |
260 } | |
261 | |
262 /** Reads (some) sections of the file into data. Note that sections will never be _read twice. | |
263 * | |
264 * To be more accurate, the file is copied into a buffer by this(). read() then parses the | |
265 * contents of this buffer, and stores the contents in dataset. | |
266 * | |
267 * Each section read is stored in a DataSection class. By default this is an instance of | |
268 * DefaultData; this can be customised (see dataSecCreator). | |
269 * | |
270 * If secSet is provided, reading is restricted to sections given in secSet, otherwise all | |
271 * sections are read. Sections given in secSet but not found in the file are not reported as an | |
272 * error. Suggested: supply a HashSet!(uint) as the View!(ID). An ArrayBag!(ID) as used is not a | |
273 * good choice, except that in this case it's empty. | |
274 * | |
275 * Merging: | |
276 * Where a section already exists in the DataSet (when either the section is given more than | |
277 * once in the file, or it was read from a different file by another reader) it is merged. | |
278 * Entries already in the DataSet take priority. | |
279 * | |
280 * Performance: | |
281 * Note that loading only desired sections like this still parses the sections not | |
282 * read (although it does not try to understand the type or data fields), so there is only a | |
283 * small performance advantage to this where other sections do exist in the file. There is also | |
284 * some overhead in only partially reading the file to keep track of where other sections are so | |
285 * that the entire file need not be re-read if further (or all remaining) sections are read | |
286 * later. | |
287 */ | |
288 public void read () { | |
289 if (secTable.length) { | |
290 foreach (ID id, ref SecMD smd; secTable) { | |
291 if (!smd.read) { | |
292 IDataSection ds = getOrCreateSec (id); | |
293 parseSection (smd.pos, ds); | |
294 // allRead is set true so there's no point setting smd.read = true | |
295 } | |
296 } | |
297 } else { // this time we don't need to use secTable | |
298 for (size_t pos = endOfHeader; pos < fbuf.length;) { | |
299 ID id = fbufReadSecMarker (pos); | |
300 IDataSection ds = getOrCreateSec (id); | |
301 pos = parseSection (pos, ds); | |
302 } | |
303 } | |
304 | |
305 allRead = true; | |
306 } | |
307 /** ditto */ | |
308 public void read (ID[] secSet) { | |
309 HashSet!(ID) hs = new HashSet!(ID); | |
310 foreach (id; secSet) hs.add(id); | |
311 read (hs); | |
312 } | |
313 /** ditto */ | |
314 public void read (View!(ID) secSet) { | |
315 if (allRead || fatal) return; // never do anything in either case | |
316 | |
317 if (secTable.length) { | |
318 foreach (ID id; secSet) { | |
319 SecMD* psmd = id in secTable; | |
320 if (psmd && !psmd.read) { // may not exist | |
321 IDataSection ds = getOrCreateSec (id); | |
322 parseSection (psmd.pos, ds); | |
323 if (ds !is null) psmd.read = true; // getOrCreateSec may return null | |
324 } | |
325 } | |
326 } else { | |
327 for (size_t pos = endOfHeader; pos < fbuf.length;) { | |
328 ID id = fbufReadSecMarker (pos); | |
329 secTable[id] = SecMD(pos,false); // add to table | |
330 if (secSet.contains(id)) { | |
331 IDataSection ds = getOrCreateSec (id); | |
332 pos = parseSection (pos, ds); | |
333 if (ds !is null) secTable[id].read = true; | |
334 } else { | |
335 pos = parseSection (pos, null); // skip section | |
336 } | |
337 } | |
338 } | |
339 } | |
340 //END METHODS: PUBLIC | |
341 | |
342 //BEGIN METHODS: PRIVATE | |
343 /* Utility function for read | |
344 * Look for a section; return it if it exists otherwise create a new section: | |
345 * use _dataSecCreator if it exists or just create a DefaultData if not. | |
346 * However if _dataSecCreator returns null don't add it to the dataset. | |
347 */ | |
348 private IDataSection getOrCreateSec (ID id) { | |
349 IDataSection* i = id in _dataset.sec; | |
350 if (i) return *i; | |
351 else { | |
352 IDataSection s; | |
353 if (_dataSecCreator !is null) s = _dataSecCreator(id); | |
354 else s = new DefaultData; | |
355 if (s !is null) _dataset.sec[id] = s; | |
356 return s; | |
357 } | |
358 } | |
359 | |
360 /* Reads a section, starting from index pos, finishing at the next section marker (returning | |
361 the position of the start of the marker). pos should start after the section marker. | |
362 | |
363 After analysing tags, the function passes the type, ID and data to addTag. | |
364 | |
365 NOTE: from performance tests on indexing char[]'s and dereferencing char*'s, the char*'s are | |
366 slightly faster, but a tiny difference isn't worth the extra effort/risk of using char*'s. | |
367 */ | |
368 private size_t parseSection (size_t pos, IDataSection dsec) { | |
369 debug scope (failure) | |
370 logger.trace ("MTTReader.parseSection: failure"); | |
371 /* Searches fbuf starting from start to find one of <=>| and stops at its index. | |
372 | |
373 If quotable then be quote-aware for single and double quotes. | |
374 Note: there's no length restriction for the content of the quote since it could be a single | |
375 non-ascii UTF-8 char which would look like several chars. | |
376 */ | |
377 void fbufLocateDataTagChar (ref size_t pos, bool quotable) { | |
378 while (true) { | |
379 fbufIncrement (pos); | |
380 | |
381 if ((fbuf[pos] >= '<' && fbuf[pos] <= '>') || fbuf[pos] == '|') return; | |
382 else if (quotable) { | |
383 char c = fbuf[pos]; | |
384 if (c == '\'' || c == '"') { | |
385 fbufIncrement(pos); | |
386 while (fbuf[pos] != c) { | |
387 if (fbuf[pos] == '\\') ++pos; // escape seq. | |
388 fbufIncrement(pos); | |
389 } | |
390 } | |
391 } | |
392 } | |
393 } | |
394 | |
395 // Used to ignore a tag (if it starts !< or !{ or should otherwise be ignored): | |
396 bool comment = false; | |
397 for (; pos < fbuf.length; ++pos) { | |
398 if (Util.isSpace(fbuf[pos])) continue; // whitespace | |
399 else if (fbuf[pos] == '<') { // data tag | |
400 char[] ErrDTAG = "Bad data tag format: not <type|id=data>" ~ ErrInFile; | |
401 | |
402 // Type section of tag: | |
403 size_t pos_s = pos + 1; | |
404 fbufLocateDataTagChar (pos, false); // find end of type section | |
405 if (fbuf[pos] != '|') throwMTErr (ErrDTAG, new MTSyntaxException); | |
406 char[] type = fbuf[pos_s..pos]; | |
407 | |
408 // ID section of tag: | |
409 pos_s = pos + 1; | |
410 fbufLocateDataTagChar (pos, false); // find end of type section | |
411 if (fbuf[pos] != '=') throwMTErr (ErrDTAG, new MTSyntaxException); | |
412 ID tagID = cast(ID) fbuf[pos_s..pos]; | |
413 | |
414 // Data section of tag: | |
415 pos_s = pos + 1; | |
416 fbufLocateDataTagChar (pos, true); // find end of data section | |
417 if (fbuf[pos] != '>') throwMTErr (ErrDTAG, new MTSyntaxException); | |
418 char[] data = fbuf[pos_s..pos]; | |
419 | |
420 if (!comment && dsec !is null) { | |
421 type = Util.trim(type); | |
422 try { | |
423 dsec.addTag (type, tagID, data); | |
424 } | |
425 catch (TextException e) { | |
426 logger.error ("TextException while reading " ~ ErrFile ~ ":"); // following a parse error | |
427 logger.error (e.msg); | |
428 logger.error ("Tag ignored: <"~type~"|"~tagID~"="~data~">"); | |
429 // No throw: tag is just ignored | |
430 } | |
431 catch (Exception e) { | |
432 logger.error ("Unknown error occured" ~ ErrInFile ~ ':'); | |
433 logger.error (e.msg); | |
434 throwMTErr (e.msg); // Fatal to Reader | |
435 } | |
436 } else comment = false; // cancel comment status now | |
437 } | |
438 else if (fbuf[pos] == '{') { | |
439 if (comment) { // simple block comment | |
440 uint depth = 0; // depth of embedded comment blocks | |
441 while (true) { | |
442 fbufIncrement (pos); | |
443 if (fbuf[pos] == '}') { | |
444 if (depth == 0) break; | |
445 else --depth; | |
446 } else if (fbuf[pos] == '{') | |
447 ++depth; | |
448 } | |
449 comment = false; // end of this comment | |
450 } else { | |
451 return pos; // next section coming up; we are done | |
452 } | |
453 } | |
454 else if (fbuf[pos] == '!') { // possibly a comment; check next char | |
455 comment = true; // starting a comment (or an error) | |
456 // variable is reset at end of comment | |
457 } else // must be an error | |
458 throwMTErr ("Invalid character (or sequence starting \"!\") outside of tag" ~ ErrInFile, new MTSyntaxException); | |
459 } | |
460 // if code execution reaches here, we're at EOF | |
461 // possible error: last character was ! (but don't bother checking since it's inconsequential) | |
462 return pos; | |
463 } | |
464 | |
465 /* Parses fbuf for a section marker. Already knows fbuf[pos] == '{'. | |
466 */ | |
467 private ID fbufReadSecMarker (ref size_t pos) { | |
468 // at this point pos is whatever a parseSection run returned | |
469 // since we haven't hit EOF, fbuf[pos] MUST be '{' so no need to check | |
470 fbufIncrement(pos); | |
471 | |
472 size_t start = pos; | |
473 for (; pos < fbuf.length; ++pos) | |
474 if (fbuf[pos] == '}' || fbuf[pos] == '{') break; | |
475 | |
476 if (pos >= fbuf.length || fbuf[pos] != '}') | |
477 throwMTErr ("Bad section tag format: not {id}" ~ ErrInFile, new MTSyntaxException); | |
478 | |
479 ID id = cast(ID) fbuf[start..pos]; | |
480 fbufIncrement(pos); | |
481 return id; | |
482 } | |
483 | |
484 /* Increments pos and checks it hasn't hit fbuf.length . */ | |
485 private void fbufIncrement(ref size_t pos) { | |
486 ++pos; | |
487 if (pos >= fbuf.length) throwMTErr("Unexpected EOF" ~ ErrInFile, new MTSyntaxException); | |
488 } | |
489 | |
490 private void throwMTErr (char[] msg, MTException exc = new MTException) { | |
491 fatal = true; // if anyone catches the error and tries to do anything --- we're dead now | |
492 logger.error (msg); // report the error | |
493 throw exc; // and signal our error | |
494 } | |
495 //END METHODS: PRIVATE | |
496 } | |
497 | |
498 | |
499 /** | |
500 * Class for reading a mergetag text file. | |
501 * | |
502 * Currently only a dummy class: a MTNotImplementedException will be thrown if created. | |
503 */ | |
504 class MTBReader : IReader | |
505 { | |
506 public this (char[] path, DataSet ds = null, bool rdHeader = false) { | |
507 this (new FilePath (path), ds, rdHeader); | |
508 } | |
509 public this (PathView path, DataSet ds = null, bool rdHeader = false) { | |
510 throw new MTNotImplementedException; | |
511 } | |
512 | |
513 DataSet dataset () { /// Get the DataSet | |
514 return null; | |
515 } | |
516 void dataset (DataSet) {} /// Set the DataSet | |
517 | |
518 void dataSecCreator (IDataSection delegate (ID)) {} /// Set the dataSecCreator | |
519 | |
520 ID[] getSectionNames () { /// Get identifiers for all sections | |
521 return []; | |
522 } | |
523 void read () {} /// Commence reading | |
524 void read (ID[] secSet) {} /// ditto | |
525 void read (View!(ID) secSet) {} /// ditto | |
526 } |