comparison tango/tango/text/stream/StreamIterator.d @ 132:1700239cab2e trunk

[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author lindquist
date Fri, 11 Jan 2008 17:57:40 +0100
parents
children
comparison
equal deleted inserted replaced
131:5825d48b27d1 132:1700239cab2e
1 /*******************************************************************************
2
3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved
4
5 license: BSD style: $(LICENSE)
6
7 version: Initial release: December 2005
8
9 author: Kris
10
11 *******************************************************************************/
12
13 module tango.text.stream.StreamIterator;
14
15 public import tango.io.Buffer;
16
17 private import Text = tango.text.Util;
18
19 private import tango.io.model.IConduit;
20
21 /*******************************************************************************
22
23 The base class for a set of stream iterators. These operate
24 upon a buffered input stream, and are designed to deal with
25 partial content. That is, stream iterators go to work the
26 moment any data becomes available in the buffer. Contrast
27 this behaviour with the tango.text.Util iterators, which
28 operate upon the extent of an array.
29
30 There are two types of iterators supported; exclusive and
31 inclusive. The former are the more common kind, where a token
32 is delimited by elements that are considered foreign. Examples
33 include space, comma, and end-of-line delineation. Inclusive
34 tokens are just the opposite: they look for patterns in the
35 text that should be part of the token itself - everything else
36 is considered foreign. Currently tango.text.stream includes the
37 exclusive variety only.
38
39 Each pattern is exposed to the client as a slice of the original
40 content, where the slice is transient. If you need to retain the
41 exposed content, then you should .dup it appropriately.
42
43 The content provided to these iterators is intended to be fully
44 read-only. All current tokenizers abide by this rule, but it is
45 possible a user could mutate the content through a token slice.
46 To enforce the desired read-only aspect, the code would have to
47 introduce redundant copying or the compiler would have to support
48 read-only arrays.
49
50 See LineIterator, CharIterator, RegexIterator, QuotedIterator,
51 and SimpleIterator
52
53 *******************************************************************************/
54
55 class StreamIterator(T) : InputStream, Buffered
56 {
57 protected T[] slice,
58 pushed;
59 private IBuffer input;
60
61 /***********************************************************************
62
63 The pattern scanner, implemented via subclasses
64
65 ***********************************************************************/
66
67 abstract protected uint scan (void[] data);
68
69 /***********************************************************************
70
71 Instantiate with a buffer
72
73 ***********************************************************************/
74
75 this (InputStream stream = null)
76 {
77 if (stream)
78 set (stream);
79 }
80
81 /***********************************************************************
82
83 Set the provided stream as the scanning source
84
85 ***********************************************************************/
86
87 final StreamIterator set (InputStream stream)
88 {
89 assert (stream);
90 input = Buffer.share (stream);
91 return this;
92 }
93
94 /***********************************************************************
95
96 Return the current token as a slice of the content
97
98 ***********************************************************************/
99
100 final T[] get ()
101 {
102 return slice;
103 }
104
105 /***********************************************************************
106
107 Push one token back into the stream, to be returned by a
108 subsequent call to next()
109
110 Push null to cancel a prior assignment
111
112 ***********************************************************************/
113
114 final StreamIterator push (T[] token)
115 {
116 pushed = token;
117 return this;
118 }
119
120 /**********************************************************************
121
122 Iterate over the set of tokens. This should really
123 provide read-only access to the tokens, but D does
124 not support that at this time
125
126 **********************************************************************/
127
128 int opApply (int delegate(inout T[]) dg)
129 {
130 bool more;
131 int result;
132
133 do {
134 more = consume;
135 result = dg (slice);
136 } while (more && !result);
137 return result;
138 }
139
140 /**********************************************************************
141
142 Iterate over a set of tokens, exposing a token count
143 starting at zero
144
145 **********************************************************************/
146
147 int opApply (int delegate(inout int, inout T[]) dg)
148 {
149 bool more;
150 int result,
151 tokens;
152
153 do {
154 more = consume;
155 result = dg (tokens, slice);
156 ++tokens;
157 } while (more && !result);
158 return result;
159 }
160
161 /***********************************************************************
162
163 Locate the next token. Returns the token if found, null
164 otherwise. Null indicates an end of stream condition. To
165 sweep a conduit for lines using method next():
166 ---
167 auto lines = new LineIterator!(char) (new FileConduit("myfile"));
168 while (lines.next)
169 Cout (lines.get).newline;
170 ---
171
172 Alternatively, we can extract one line from a conduit:
173 ---
174 auto line = (new LineIterator!(char) (new FileConduit("myfile"))).next;
175 ---
176
177 The difference between next() and foreach() is that the
178 latter processes all tokens in one go, whereas the former
179 processes in a piecemeal fashion. To wit:
180 ---
181 foreach (line; new LineIterator!(char) (new FileConduit("myfile"))
182 Cout(line).newline;
183 ---
184
185 Note that tokens exposed via push() are returned immediately
186 when available, taking priority over the input stream itself
187
188 ***********************************************************************/
189
190 final T[] next ()
191 {
192 if (pushed.ptr)
193 return pushed;
194 else
195 if (consume() || slice.length)
196 return slice;
197 return null;
198 }
199
200 /***********************************************************************
201
202 Set the content of the current slice
203
204 ***********************************************************************/
205
206 protected final uint set (T* content, uint start, uint end)
207 {
208 slice = content [start .. end];
209 return end;
210 }
211
212 /***********************************************************************
213
214 Called when a scanner fails to find a matching pattern.
215 This may cause more content to be loaded, and a rescan
216 initiated
217
218 ***********************************************************************/
219
220 protected final uint notFound ()
221 {
222 return IConduit.Eof;
223 }
224
225 /***********************************************************************
226
227 Invoked when a scanner matches a pattern. The provided
228 value should be the index of the last element of the
229 matching pattern, which is converted back to a void[]
230 index.
231
232 ***********************************************************************/
233
234 protected final uint found (uint i)
235 {
236 return (i + 1) * T.sizeof;
237 }
238
239 /***********************************************************************
240
241 See if set of characters holds a particular instance
242
243 ***********************************************************************/
244
245 protected final bool has (T[] set, T match)
246 {
247 foreach (T c; set)
248 if (match is c)
249 return true;
250 return false;
251 }
252
253 /***********************************************************************
254
255 Consume the next token and place it in 'slice'. Returns
256 true when there are potentially more tokens
257
258 ***********************************************************************/
259
260 private bool consume ()
261 {
262 if (input.next (&scan))
263 return true;
264
265 auto tmp = input.slice (buffer.readable);
266 slice = (cast(T*) tmp.ptr) [0 .. tmp.length/T.sizeof];
267 return false;
268 }
269
270
271 /**********************************************************************/
272 /************************ Buffered Interface **************************/
273 /**********************************************************************/
274
275
276 /***********************************************************************
277
278 Return the associated buffer
279
280 ***********************************************************************/
281
282 final IBuffer buffer ()
283 {
284 return input;
285 }
286
287 /**********************************************************************/
288 /********************** InputStream Interface *************************/
289 /**********************************************************************/
290
291
292 /***********************************************************************
293
294 Return the host conduit
295
296 ***********************************************************************/
297
298 final IConduit conduit ()
299 {
300 return input.conduit;
301 }
302
303 /***********************************************************************
304
305 Read from conduit into a target array. The provided dst
306 will be populated with content from the conduit.
307
308 Returns the number of bytes read, which may be less than
309 requested in dst
310
311 ***********************************************************************/
312
313 uint read (void[] dst)
314 {
315 return input.read (dst);
316 }
317
318 /***********************************************************************
319
320 Clear any buffered content
321
322 ***********************************************************************/
323
324 final InputStream clear ()
325 {
326 return input.clear;
327 }
328
329 /***********************************************************************
330
331 Close the input
332
333 ***********************************************************************/
334
335 final void close ()
336 {
337 input.close;
338 }
339 }
340
341