132
|
1 /*******************************************************************************
|
|
2
|
|
3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved
|
|
4
|
|
5 license: BSD style: $(LICENSE)
|
|
6
|
|
7 version: Initial release: December 2005
|
|
8
|
|
9 author: Kris
|
|
10
|
|
11 *******************************************************************************/
|
|
12
|
|
13 module tango.text.stream.StreamIterator;
|
|
14
|
|
15 public import tango.io.Buffer;
|
|
16
|
|
17 private import Text = tango.text.Util;
|
|
18
|
|
19 private import tango.io.model.IConduit;
|
|
20
|
|
21 /*******************************************************************************
|
|
22
|
|
23 The base class for a set of stream iterators. These operate
|
|
24 upon a buffered input stream, and are designed to deal with
|
|
25 partial content. That is, stream iterators go to work the
|
|
26 moment any data becomes available in the buffer. Contrast
|
|
27 this behaviour with the tango.text.Util iterators, which
|
|
28 operate upon the extent of an array.
|
|
29
|
|
30 There are two types of iterators supported; exclusive and
|
|
31 inclusive. The former are the more common kind, where a token
|
|
32 is delimited by elements that are considered foreign. Examples
|
|
33 include space, comma, and end-of-line delineation. Inclusive
|
|
34 tokens are just the opposite: they look for patterns in the
|
|
35 text that should be part of the token itself - everything else
|
|
36 is considered foreign. Currently tango.text.stream includes the
|
|
37 exclusive variety only.
|
|
38
|
|
39 Each pattern is exposed to the client as a slice of the original
|
|
40 content, where the slice is transient. If you need to retain the
|
|
41 exposed content, then you should .dup it appropriately.
|
|
42
|
|
43 The content provided to these iterators is intended to be fully
|
|
44 read-only. All current tokenizers abide by this rule, but it is
|
|
45 possible a user could mutate the content through a token slice.
|
|
46 To enforce the desired read-only aspect, the code would have to
|
|
47 introduce redundant copying or the compiler would have to support
|
|
48 read-only arrays.
|
|
49
|
|
50 See LineIterator, CharIterator, RegexIterator, QuotedIterator,
|
|
51 and SimpleIterator
|
|
52
|
|
53 *******************************************************************************/
|
|
54
|
|
55 class StreamIterator(T) : InputStream, Buffered
|
|
56 {
|
|
57 protected T[] slice,
|
|
58 pushed;
|
|
59 private IBuffer input;
|
|
60
|
|
61 /***********************************************************************
|
|
62
|
|
63 The pattern scanner, implemented via subclasses
|
|
64
|
|
65 ***********************************************************************/
|
|
66
|
|
67 abstract protected uint scan (void[] data);
|
|
68
|
|
69 /***********************************************************************
|
|
70
|
|
71 Instantiate with a buffer
|
|
72
|
|
73 ***********************************************************************/
|
|
74
|
|
75 this (InputStream stream = null)
|
|
76 {
|
|
77 if (stream)
|
|
78 set (stream);
|
|
79 }
|
|
80
|
|
81 /***********************************************************************
|
|
82
|
|
83 Set the provided stream as the scanning source
|
|
84
|
|
85 ***********************************************************************/
|
|
86
|
|
87 final StreamIterator set (InputStream stream)
|
|
88 {
|
|
89 assert (stream);
|
|
90 input = Buffer.share (stream);
|
|
91 return this;
|
|
92 }
|
|
93
|
|
94 /***********************************************************************
|
|
95
|
|
96 Return the current token as a slice of the content
|
|
97
|
|
98 ***********************************************************************/
|
|
99
|
|
100 final T[] get ()
|
|
101 {
|
|
102 return slice;
|
|
103 }
|
|
104
|
|
105 /***********************************************************************
|
|
106
|
|
107 Push one token back into the stream, to be returned by a
|
|
108 subsequent call to next()
|
|
109
|
|
110 Push null to cancel a prior assignment
|
|
111
|
|
112 ***********************************************************************/
|
|
113
|
|
114 final StreamIterator push (T[] token)
|
|
115 {
|
|
116 pushed = token;
|
|
117 return this;
|
|
118 }
|
|
119
|
|
120 /**********************************************************************
|
|
121
|
|
122 Iterate over the set of tokens. This should really
|
|
123 provide read-only access to the tokens, but D does
|
|
124 not support that at this time
|
|
125
|
|
126 **********************************************************************/
|
|
127
|
|
128 int opApply (int delegate(inout T[]) dg)
|
|
129 {
|
|
130 bool more;
|
|
131 int result;
|
|
132
|
|
133 do {
|
|
134 more = consume;
|
|
135 result = dg (slice);
|
|
136 } while (more && !result);
|
|
137 return result;
|
|
138 }
|
|
139
|
|
140 /**********************************************************************
|
|
141
|
|
142 Iterate over a set of tokens, exposing a token count
|
|
143 starting at zero
|
|
144
|
|
145 **********************************************************************/
|
|
146
|
|
147 int opApply (int delegate(inout int, inout T[]) dg)
|
|
148 {
|
|
149 bool more;
|
|
150 int result,
|
|
151 tokens;
|
|
152
|
|
153 do {
|
|
154 more = consume;
|
|
155 result = dg (tokens, slice);
|
|
156 ++tokens;
|
|
157 } while (more && !result);
|
|
158 return result;
|
|
159 }
|
|
160
|
|
161 /***********************************************************************
|
|
162
|
|
163 Locate the next token. Returns the token if found, null
|
|
164 otherwise. Null indicates an end of stream condition. To
|
|
165 sweep a conduit for lines using method next():
|
|
166 ---
|
|
167 auto lines = new LineIterator!(char) (new FileConduit("myfile"));
|
|
168 while (lines.next)
|
|
169 Cout (lines.get).newline;
|
|
170 ---
|
|
171
|
|
172 Alternatively, we can extract one line from a conduit:
|
|
173 ---
|
|
174 auto line = (new LineIterator!(char) (new FileConduit("myfile"))).next;
|
|
175 ---
|
|
176
|
|
177 The difference between next() and foreach() is that the
|
|
178 latter processes all tokens in one go, whereas the former
|
|
179 processes in a piecemeal fashion. To wit:
|
|
180 ---
|
|
181 foreach (line; new LineIterator!(char) (new FileConduit("myfile"))
|
|
182 Cout(line).newline;
|
|
183 ---
|
|
184
|
|
185 Note that tokens exposed via push() are returned immediately
|
|
186 when available, taking priority over the input stream itself
|
|
187
|
|
188 ***********************************************************************/
|
|
189
|
|
190 final T[] next ()
|
|
191 {
|
|
192 if (pushed.ptr)
|
|
193 return pushed;
|
|
194 else
|
|
195 if (consume() || slice.length)
|
|
196 return slice;
|
|
197 return null;
|
|
198 }
|
|
199
|
|
200 /***********************************************************************
|
|
201
|
|
202 Set the content of the current slice
|
|
203
|
|
204 ***********************************************************************/
|
|
205
|
|
206 protected final uint set (T* content, uint start, uint end)
|
|
207 {
|
|
208 slice = content [start .. end];
|
|
209 return end;
|
|
210 }
|
|
211
|
|
212 /***********************************************************************
|
|
213
|
|
214 Called when a scanner fails to find a matching pattern.
|
|
215 This may cause more content to be loaded, and a rescan
|
|
216 initiated
|
|
217
|
|
218 ***********************************************************************/
|
|
219
|
|
220 protected final uint notFound ()
|
|
221 {
|
|
222 return IConduit.Eof;
|
|
223 }
|
|
224
|
|
225 /***********************************************************************
|
|
226
|
|
227 Invoked when a scanner matches a pattern. The provided
|
|
228 value should be the index of the last element of the
|
|
229 matching pattern, which is converted back to a void[]
|
|
230 index.
|
|
231
|
|
232 ***********************************************************************/
|
|
233
|
|
234 protected final uint found (uint i)
|
|
235 {
|
|
236 return (i + 1) * T.sizeof;
|
|
237 }
|
|
238
|
|
239 /***********************************************************************
|
|
240
|
|
241 See if set of characters holds a particular instance
|
|
242
|
|
243 ***********************************************************************/
|
|
244
|
|
245 protected final bool has (T[] set, T match)
|
|
246 {
|
|
247 foreach (T c; set)
|
|
248 if (match is c)
|
|
249 return true;
|
|
250 return false;
|
|
251 }
|
|
252
|
|
253 /***********************************************************************
|
|
254
|
|
255 Consume the next token and place it in 'slice'. Returns
|
|
256 true when there are potentially more tokens
|
|
257
|
|
258 ***********************************************************************/
|
|
259
|
|
260 private bool consume ()
|
|
261 {
|
|
262 if (input.next (&scan))
|
|
263 return true;
|
|
264
|
|
265 auto tmp = input.slice (buffer.readable);
|
|
266 slice = (cast(T*) tmp.ptr) [0 .. tmp.length/T.sizeof];
|
|
267 return false;
|
|
268 }
|
|
269
|
|
270
|
|
271 /**********************************************************************/
|
|
272 /************************ Buffered Interface **************************/
|
|
273 /**********************************************************************/
|
|
274
|
|
275
|
|
276 /***********************************************************************
|
|
277
|
|
278 Return the associated buffer
|
|
279
|
|
280 ***********************************************************************/
|
|
281
|
|
282 final IBuffer buffer ()
|
|
283 {
|
|
284 return input;
|
|
285 }
|
|
286
|
|
287 /**********************************************************************/
|
|
288 /********************** InputStream Interface *************************/
|
|
289 /**********************************************************************/
|
|
290
|
|
291
|
|
292 /***********************************************************************
|
|
293
|
|
294 Return the host conduit
|
|
295
|
|
296 ***********************************************************************/
|
|
297
|
|
298 final IConduit conduit ()
|
|
299 {
|
|
300 return input.conduit;
|
|
301 }
|
|
302
|
|
303 /***********************************************************************
|
|
304
|
|
305 Read from conduit into a target array. The provided dst
|
|
306 will be populated with content from the conduit.
|
|
307
|
|
308 Returns the number of bytes read, which may be less than
|
|
309 requested in dst
|
|
310
|
|
311 ***********************************************************************/
|
|
312
|
|
313 uint read (void[] dst)
|
|
314 {
|
|
315 return input.read (dst);
|
|
316 }
|
|
317
|
|
318 /***********************************************************************
|
|
319
|
|
320 Clear any buffered content
|
|
321
|
|
322 ***********************************************************************/
|
|
323
|
|
324 final InputStream clear ()
|
|
325 {
|
|
326 return input.clear;
|
|
327 }
|
|
328
|
|
329 /***********************************************************************
|
|
330
|
|
331 Close the input
|
|
332
|
|
333 ***********************************************************************/
|
|
334
|
|
335 final void close ()
|
|
336 {
|
|
337 input.close;
|
|
338 }
|
|
339 }
|
|
340
|
|
341
|