Mercurial > projects > mde
annotate mde/mergetag/read.d @ 3:485c98ecbd91
text.parse: fixed a small bug with char[]'s.
committer: Diggory Hardy <diggory.hardy@gmail.com>
author | Diggory Hardy <diggory.hardy@gmail.com> |
---|---|
date | Sat, 03 Nov 2007 16:06:06 +0000 |
parents | 78eb491bd642 |
children | 9a990644948c |
rev | line source |
---|---|
0 | 1 /************************************************************************************************** |
2 * This module contains all reading functions, for both binary and text MergeTag files. | |
3 * | |
4 * It publically imports mde.mergetag.dataset. | |
5 *************************************************************************************************/ | |
6 | |
7 module mde.mergetag.read; | |
8 | |
9 // package imports | |
10 public import mde.mergetag.dataset; | |
11 import mde.mergetag.exception; | |
12 | |
13 // tango imports | |
14 import tango.io.UnicodeFile; | |
15 import Util = tango.text.Util; | |
16 import ConvInt = tango.text.convert.Integer; | |
17 import tango.util.collection.model.View : View; | |
18 import tango.util.collection.ArrayBag : ArrayBag; | |
19 import tango.util.log.Log : Log, Logger; | |
20 | |
21 // TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions) | |
22 | |
23 // For now, all section & data tag IDs are uints. | |
24 // TODO: allow a lookup table or function to find a uint ID from a string ID | |
25 | |
26 /** | |
27 * Class for reading a file. | |
28 * | |
29 * Use as: | |
30 * ----------------------- | |
31 * Reader foo("foo.mtt"); | |
32 * foo.read(); | |
33 * // get your data from foo.dataset. | |
34 * ----------------------- | |
35 */ | |
36 class Reader | |
37 { | |
38 //BEGIN DATA | |
39 /** | |
40 A container for all read data. | |
41 | |
42 This may be accessed from here; however it may be preferable to use an external reference | |
43 (passed to the class on initialisation). | |
44 */ | |
45 DataSet dataset; | |
46 | |
47 /** A table, which if created, allows items in a text file to have a string ID. | |
48 * | |
49 * If a string ID is given for a section or tag identifier and that string is a key in this | |
50 * table, then the corresponding ID type is used (if the string is not found an error is thrown). | |
51 */ | |
52 ID[char[]] indexTable; // see setIndexLookupTable() doc for use. | |
53 | |
54 /** A function for creating new DataSections within the dataset. | |
55 * | |
56 * Allows a user-made class to be used in the DataSet instead of DefaultData. | |
57 * | |
58 * This works by supplying a function which returns a reference to an instance of a class | |
59 * implementing DataSection. The function is passed the ID of the new section and may use this | |
60 * to use different DataSection classes for different sections. | |
61 */ | |
62 DataSection function (ID) dataSecCreator = null; | |
63 | |
64 private: | |
65 // Static symbols: | |
2
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
66 typedef void delegate (TypeInfo,ID,char[]) readDelg; // Delegate for accepting tags. |
0 | 67 |
2
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
68 static bool initialised = false; |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
69 static TypeInfo[char[]] typeTable; |
0 | 70 static Logger logger; |
71 | |
72 // Error messages as const variables. Could be loaded from files to support other languages? | |
73 static const char[] ERR_FILEREAD = "Error reading file: "; | |
74 static const char[] ERR_MTHEAD = "Not a valid MergeTag text file"; | |
75 static const char[] ERR_MTVER = "Unrecognised MergeTag version: MT"; | |
76 static const char[] ERR_EOF = "Unexpected EOF"; | |
77 static const char[] ERR_STAG = "Bad section tag format: not {id}"; | |
78 static const char[] ERR_DTAG = "Bad data tag format: not <type|id=data>"; | |
79 static const char[] ERR_CHAR = "Invalid character (or sequence starting \"!\") outside of tag"; | |
80 static const char[] ERR_IDINT = "Tag has invalid integer ID: not a valid uint value"; | |
81 | |
82 // Non-static symbols: | |
83 final char[] ErrInFile; // something like "in \"path/file.mtt\"" | |
84 | |
85 final char[] fbuf; // file is read into this | |
1
18491334a525
Finished format.d and parse.d modules; moved to mde/text. Partway implementing mde.mergetag.write.TextWriter.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
0
diff
changeset
|
86 MTFormatVersion.VERS fileVer = MTFormatVersion.VERS.INVALID; // Remains INVALID until set otherwise by CTOR. |
0 | 87 |
88 uint endOfHeader; | |
89 bool allRead = false; // true if endOfHeader == fbuf.length or read([]) has run | |
90 bool fatal = false; // a fatal file error occured; don't try to recover | |
91 /* If the file is scanned for sections, the starting position of all sections are stored | |
92 * in secTable. If this is empty, either no sections exist (and endOfHeader == fbuf.length) | |
93 * or a section scan has not been run (read() with no section names doesn't need to do so). | |
94 */ | |
95 struct SecMD { // sec meta data | |
96 static SecMD opCall (uint _pos, bool _read) { | |
97 SecMD ret; | |
98 ret.pos = _pos; | |
99 ret.read = _read; | |
100 return ret; | |
101 } | |
102 uint pos; // position to start reading | |
103 bool read; // true if already read | |
104 } | |
105 SecMD [ID] secTable; | |
106 //END DATA | |
107 | |
108 //BEGIN METHODS: CTOR / DTOR | |
2
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
109 // Could be a static this(), but this way it's only called if the class is used. |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
110 private void init () { |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
111 init_addType!(bool); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
112 init_addType!(byte); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
113 init_addType!(short); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
114 init_addType!(int); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
115 init_addType!(long); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
116 init_addType!(ubyte); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
117 init_addType!(ushort); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
118 init_addType!(uint); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
119 init_addType!(ulong); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
120 init_addType!(char); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
121 init_addType!(float); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
122 init_addType!(double); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
123 init_addType!(real); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
124 init_addType!(bool[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
125 init_addType!(byte[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
126 init_addType!(short[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
127 init_addType!(int[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
128 init_addType!(long[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
129 init_addType!(ubyte[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
130 init_addType!(ushort[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
131 init_addType!(uint[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
132 init_addType!(ulong[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
133 init_addType!(char[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
134 init_addType!(float[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
135 init_addType!(double[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
136 init_addType!(real[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
137 // aliases: |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
138 typeTable["string"] = typeid(char[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
139 typeTable["binary"] = typeid(ubyte[]); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
140 |
0 | 141 logger = Log.getLogger ("mde.mergetag.read.Reader"); |
2
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
142 initialised = true; |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
143 } |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
144 private static void init_addType(T) () { |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
145 typeTable[T.stringof] = typeid(T); |
0 | 146 } |
147 | |
148 /** Tries to open file path and read it into a buffer. | |
149 * | |
150 * Params: | |
151 * path = The name or FilePath of the file to open. | |
152 * Standard extensions are .mtt and .mtb for text and binary files respectively. | |
153 * dataset_ = If null create a new DataSet, else use existing DataSet *dataset_ and merge read | |
154 * data into it. | |
155 * rdHeader = If true, read the header like a standard section. Doesn't read the header by | |
156 * default since if it's not requested it's likely not wanted. | |
157 * | |
158 * Memory: | |
159 * This currently works by loading the whole file into memory at once. This should be fine most | |
160 * of the time, but could potentially be a problem. Changing this would mean significantly | |
161 * changes to the way the code works. | |
162 */ | |
163 /* Ideas for implementing a partial-loading memory model: | |
164 * Use a conduit directly. | |
165 * Use a fiber to do the parsing; let it switch back when it runs out of memory. | |
166 * Redesign the code so it never needs to look backwards in the buffer? | |
167 * | |
168 * Major problem: reading only some sections and keeping references to other sections | |
169 * would no longer be possible. | |
170 */ | |
171 public this (char[] path, DataSet* dataset_ = null, bool rdHeader = false) { | |
172 this (new FilePath (path), dataset_, rdHeader); | |
173 } | |
174 /** ditto */ | |
175 public this (PathView path, DataSet* dataset_ = null, bool rdHeader = false) { | |
2
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
176 if (!initialised) init(); // on-demand static this() |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
177 |
0 | 178 // Create a dataset or use an existing one |
179 if (dataset_) dataset = *dataset_; | |
180 else dataset = new DataSet(); | |
181 | |
182 // Open & read the file | |
183 try { // Supports unicode files with a BOM; defaults to UTF8 when there isn't a BOM: | |
184 scope file = new UnicodeFile!(char) (path, Encoding.Unknown); | |
185 fbuf = cast(char[]) file.read(); | |
186 } catch (Exception e) { | |
187 throwMTErr (ERR_FILEREAD ~ e.msg, new MTFileIOException); | |
188 } | |
189 // Remember the file name so that we can report errors (somewhat) informatively: | |
190 ErrInFile = " in \"" ~ path.path ~ path.file ~ '"'; | |
191 | |
192 // Version checking & matching header section tag: | |
193 if (fbuf.length < 6 || fbuf[0] != '{' || fbuf[1] != 'M' || fbuf[2] != 'T' || fbuf[5] != '}') | |
194 throwMTErr(ERR_MTHEAD ~ ErrInFile, new MTFileFormatException); | |
195 fileVer = MTFormatVersion.parseString (fbuf[3..5]); | |
196 if (fileVer == MTFormatVersion.VERS.INVALID) | |
197 throwMTErr(ERR_MTVER ~ fbuf[3..5] ~ ErrInFile, new MTFileFormatException); | |
198 | |
199 // Header reading/skipping: | |
200 if (rdHeader) { // only bother actually reading it if it was requested | |
201 dataset.header = new DefaultData; | |
202 endOfHeader = parseSection (6,&dataset.header.addTag); | |
203 } | |
204 else endOfHeader = parseSection (6,null); | |
205 } | |
206 // Was intended to close file, but file is closed within CTOR anyway. | |
207 public ~this () { | |
208 } | |
209 //END METHODS: CTOR / DTOR | |
210 | |
211 //BEGIN METHODS: PUBLIC | |
212 /// Scans for sections if not already done and returns a list of IDs. | |
213 public uint[] getSectionNames () { | |
214 if (fatal) return []; | |
215 if (!secTable.length) | |
216 for (uint pos = endOfHeader; pos < fbuf.length;) { | |
217 try { | |
218 ID id = fbufReadSecMarker (pos); | |
219 secTable[id] = SecMD(pos,false); // add to table | |
220 } catch (MTStringIDException) { | |
221 // ignore section; this happens anyway (but don't add to table) | |
222 } | |
223 pos = parseSection (pos, null); | |
224 } | |
225 return cast(uint[]) secTable.keys; | |
226 } | |
227 | |
228 /** Reads (some) sections of the file into data. Note that sections will never be _read twice. | |
229 * | |
230 * To be more accurate, the file is copied into a buffer by this(). read() then parses the | |
231 * contents of this buffer, and stores the contents in dataset. | |
232 * | |
233 * Each section read is stored in a DataSection class. By default this is an instance of | |
234 * DefaultData; this can be customised (see setDataSectionCreator). | |
235 * | |
236 * If secSet is non-empty, reading is restricted to sections given in secSet, otherwise all | |
237 * sections are read. Sections given in secSet but not found in the file are not reported as an | |
238 * error. Suggested: supply a HashSet!(uint) as the View!(ID). An ArrayBag!(ID) as used is not a | |
239 * good choice, except that in this case it's empty. | |
240 * | |
241 * Merging: | |
242 * Where a section already exists in the DataSet (when either the section is given more than | |
243 * once in the file, or it was read from a different file by another reader) it is merged. | |
244 * Entries already in the DataSet take priority. | |
245 * | |
246 * Performance: | |
247 * Note that loading only desired sections like this still parses the sections not | |
248 * read (although it does not try to understand the type or data fields), so there is only a | |
249 * small performance advantage to this where other sections do exist in the file. There is also | |
250 * some overhead in only partially reading the file to keep track of where other sections are so | |
251 * that the entire file need not be re-read if further (or all remaining) sections are read | |
252 * later. | |
253 */ | |
254 public void read (View!(ID) secSet = new ArrayBag!(ID)) { | |
255 if (allRead || fatal) return; // never do anything in either case | |
256 if (secSet.size) { | |
257 if (secTable.length) { | |
258 foreach (ID id; secSet) { | |
259 SecMD* psmd = id in secTable; | |
260 if (psmd && !psmd.read) { // may not exist | |
261 DataSection ds = getOrCreateSec (id); | |
262 parseSection (psmd.pos, &ds.addTag); | |
263 psmd.read = true; | |
264 } | |
265 } | |
266 } else { | |
267 for (uint pos = endOfHeader; pos < fbuf.length;) { | |
268 try { | |
269 ID id = fbufReadSecMarker (pos); | |
270 secTable[id] = SecMD(pos,false); // add to table | |
271 if (secSet.contains(id)) { | |
272 DataSection ds = getOrCreateSec (id); | |
273 pos = parseSection (pos, &ds.addTag); | |
274 secTable[id].read = true; | |
275 } | |
276 } catch (MTStringIDException) { // don't do any of the stuff above | |
277 pos = parseSection (pos, null); // and skip the section | |
278 } | |
279 } | |
280 } | |
281 } else { | |
282 if (secTable.length) { | |
283 foreach (ID id, ref SecMD smd; secTable) { | |
284 if (!smd.read) { | |
285 DataSection ds = getOrCreateSec (id); | |
286 parseSection (smd.pos, &ds.addTag); | |
287 smd.read = true; | |
288 } | |
289 } | |
290 } else { // this time we don't need to use secTable | |
291 for (uint pos = endOfHeader; pos < fbuf.length;) { | |
292 try { | |
293 ID id = fbufReadSecMarker (pos); | |
294 DataSection ds = getOrCreateSec (id); | |
295 pos = parseSection (pos, &ds.addTag); | |
296 } catch (MTStringIDException) { | |
297 pos = parseSection (pos, null); // just skip the section | |
298 } | |
299 } | |
300 } | |
301 allRead = true; | |
302 } | |
303 } | |
304 //END METHODS: PUBLIC | |
305 | |
306 //BEGIN METHODS: PRIVATE | |
307 /* Reads a section, starting from index pos, finishing at the next section marker (returning | |
308 the position of the start of the marker). pos should start after the section marker. | |
309 | |
310 After analysing tags, the function passes the type, ID (possibly converted) and data to addTag. | |
311 | |
312 NOTE: from performance tests on indexing char[]'s and dereferencing char*'s, the char*'s are | |
313 slightly faster, but a tiny difference isn't worth the extra effort/risk of using char*'s. | |
314 */ | |
315 private uint parseSection (uint pos, readDelg addTag) { | |
316 bool comment = false; // preceding char was ! | |
317 for (; pos < fbuf.length; ++pos) { | |
318 if (Util.isSpace(fbuf[pos])) continue; // whitespace | |
319 else if (fbuf[pos] == '<') { // data tag | |
320 char[] type, data; | |
321 ID tagID; | |
322 | |
323 // Type section of tag: | |
324 fbufIncrement (pos); | |
325 uint pos_s = pos; | |
326 fbufLocateDataTagChar (pos, false); // find end of type section | |
327 if (fbuf[pos] != '|') throwMTErr (ERR_DTAG ~ ErrInFile); | |
328 type = fbuf[pos_s..pos]; | |
329 // ID section of tag: | |
330 fbufIncrement (pos); | |
331 try { | |
332 tagID = fbufReadID (pos); // read the ID, put pos at whatever's next | |
333 } catch (MTStringIDException) { | |
334 comment = true; // easiest way to ignore this tag | |
335 } | |
336 if (fbuf[pos] != '=') throwMTErr (ERR_DTAG ~ ErrInFile); | |
337 // Data section of tag: | |
338 fbufIncrement (pos); | |
339 pos_s = pos; | |
340 fbufLocateDataTagChar (pos, true); // find end of data section | |
341 if (fbuf[pos] != '>') throwMTErr (ERR_DTAG ~ ErrInFile); | |
342 data = fbuf[pos_s..pos]; | |
343 | |
2
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
344 if (!comment && addTag != null) { |
3
485c98ecbd91
text.parse: fixed a small bug with char[]'s.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
2
diff
changeset
|
345 TypeInfo* ti_p = Util.trim(type) in typeTable; |
2
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
346 if (ti_p) addTag (*ti_p, tagID, data); |
78eb491bd642
mergetag: partially redesigned dataset and text reader classes. Changed text format.
Diggory Hardy <diggory.hardy@gmail.com>
parents:
1
diff
changeset
|
347 else logger.warn ("Type not supported: " ~ type); |
0 | 348 } else comment = false; // cancel comment status now |
349 } | |
350 else if (fbuf[pos] == '{') { | |
351 if (comment) { // simple block comment | |
352 uint depth = 0; // depth of embedded comment blocks | |
353 while (true) { | |
354 fbufIncrement (pos); | |
355 if (fbuf[pos] == '}') { | |
356 if (depth == 0) break; | |
357 else --depth; | |
358 } else if (fbuf[pos] == '{') | |
359 ++depth; | |
360 } | |
361 comment = false; // end of this comment | |
362 } else { | |
363 return pos; // next section coming up; we are done | |
364 } | |
365 } | |
366 else if (fbuf[pos] == '!') { // possibly a comment; check next char | |
367 comment = true; // starting a comment (or an error) | |
368 // variable is reset at end of comment | |
369 } else // must be an error | |
370 throwMTErr (ERR_CHAR ~ ErrInFile); | |
371 } | |
372 // if code execution reaches here, we're at EOF | |
373 // possible error: last character was ! (but don't bother checking since it's inconsequential) | |
374 return pos; | |
375 } | |
376 | |
377 /* Look for a section; return it if it exists otherwise create a new section: | |
378 * use dataSecCreator if it exists or just create a DefaultData if not. | |
379 */ | |
380 DataSection getOrCreateSec (ID id) { | |
381 DataSection* i = id in dataset.sec; | |
382 if (i) return *i; | |
383 return (dataset.sec[id] = (dataSecCreator != null) ? dataSecCreator(id) : new DefaultData); | |
384 } | |
385 | |
386 /* Parses fbuf for a section marker. Already knows fbuf[pos] == '{'. | |
387 */ | |
388 private ID fbufReadSecMarker (inout uint pos) { | |
389 // at this point pos is whatever a parseSection run returned | |
390 // since we haven't hit EOF, fbuf[pos] MUST be '{' so no need to check | |
391 fbufIncrement(pos); | |
392 ID id = fbufReadID (pos); | |
393 if (fbuf[pos] != '}') throwMTErr (ERR_STAG ~ ErrInFile); | |
394 fbufIncrement(pos); | |
395 return id; | |
396 } | |
397 | |
398 /* Parses fbuf from pos to read an ID. | |
399 On return pos is the index of the character following the ID. | |
400 */ | |
401 private ID fbufReadID (inout uint pos) { | |
402 while (Util.isSpace(fbuf[pos])) fbufIncrement(pos); // skip any space | |
403 if (fbuf[pos] == '"') { | |
404 fbufIncrement(pos); | |
405 uint start = pos; | |
406 while (fbuf[pos] != '"') fbufIncrement(pos); | |
407 ID* i_p = fbuf[start..pos] in indexTable; | |
408 while (Util.isSpace(fbuf[pos])) fbufIncrement(pos); // skip any space | |
409 if (i_p != null) return *i_p; // looked-up value | |
410 // FIXME: log a warning | |
411 throw new MTStringIDException (); // string not in look-up table | |
412 } else { | |
413 uint ate; | |
414 long x = ConvInt.parse (fbuf[pos..$], 0, &ate); | |
415 if (x < 0L || x > 0xFFFF_FFFFL) throwMTErr (ERR_IDINT ~ ErrInFile); | |
416 pos += ate; // this is where ConvInt.parse stopped | |
417 while (Util.isSpace(fbuf[pos])) fbufIncrement(pos); // skip any space | |
418 return cast(ID) x; | |
419 } | |
420 } | |
421 | |
422 /* Searches fbuf starting from start to find one of <=>| and stops at its index. | |
423 | |
424 If quotable then be quote-aware for single and double quotes. | |
425 Note: there's no length restriction for the content of the quote since it could be a single | |
426 non-ascii UTF-8 char which would look like several chars. | |
427 */ | |
428 private void fbufLocateDataTagChar (inout uint pos, bool quotable) { | |
429 for (; pos < fbuf.length; ++pos) { | |
430 if ((fbuf[pos] >= '<' && fbuf[pos] <= '>') || fbuf[pos] == '|') return; | |
431 else if (quotable) { | |
432 if (fbuf[pos] == '\'') { | |
433 do { | |
434 fbufIncrement(pos); | |
435 } while (fbuf[pos] != '\'') | |
436 } else if (fbuf[pos] == '"') { | |
437 do { | |
438 fbufIncrement(pos); | |
439 } while (fbuf[pos] != '"') | |
440 } | |
441 } | |
442 } | |
443 } | |
444 /* Increments pos and checks it hasn't hit fbuf.length . */ | |
445 private void fbufIncrement(inout uint pos) { | |
446 ++pos; | |
447 if (pos >= fbuf.length) throwMTErr(ERR_EOF ~ ErrInFile); | |
448 } | |
449 | |
450 private void throwMTErr (char[] msg, Exception exc = new MTException) { | |
451 fatal = true; // if anyone catches the error and tries to do anything --- we're dead now | |
452 logger.error (msg); // report the error | |
453 throw exc; // and signal our error | |
454 } | |
455 //END METHODS: PRIVATE | |
456 | |
457 invariant { | |
458 // Check secTable is valid, but not if it's complete. | |
459 // This is something I really wouldn't expect to fail. | |
460 /+ um... this causes a lot of linker errors. Shouldn't be necessary anyway.. | |
461 foreach (ID id, SecMD smd; secTable) { | |
462 uint pos = smd.pos; | |
463 for (; true; --pos) { | |
464 assert (pos); // we should never reach 0 | |
465 if (fbuf[pos] == '{') break; | |
466 } | |
467 ++pos; | |
468 assert (fbufReadID(pos) == id); | |
469 }+/ | |
470 } | |
471 /+ A unittest here is really not practical since a file must be read from. Suggestion: Involve | |
472 + both reading and writing functions in a single unittest for the entire package mergetag. | |
473 + This is just here to point anyone looking in the right direction... | |
474 unittest {} | |
475 +/ | |
476 } |