Mercurial > projects > ldc
comparison tango/tango/text/stream/StreamIterator.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!!
Initial commit after moving to Tango instead of Phobos.
Lots of bugfixes...
This build is not suitable for most things.
author | lindquist |
---|---|
date | Fri, 11 Jan 2008 17:57:40 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
131:5825d48b27d1 | 132:1700239cab2e |
---|---|
1 /******************************************************************************* | |
2 | |
3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved | |
4 | |
5 license: BSD style: $(LICENSE) | |
6 | |
7 version: Initial release: December 2005 | |
8 | |
9 author: Kris | |
10 | |
11 *******************************************************************************/ | |
12 | |
13 module tango.text.stream.StreamIterator; | |
14 | |
15 public import tango.io.Buffer; | |
16 | |
17 private import Text = tango.text.Util; | |
18 | |
19 private import tango.io.model.IConduit; | |
20 | |
21 /******************************************************************************* | |
22 | |
23 The base class for a set of stream iterators. These operate | |
24 upon a buffered input stream, and are designed to deal with | |
25 partial content. That is, stream iterators go to work the | |
26 moment any data becomes available in the buffer. Contrast | |
27 this behaviour with the tango.text.Util iterators, which | |
28 operate upon the extent of an array. | |
29 | |
30 There are two types of iterators supported; exclusive and | |
31 inclusive. The former are the more common kind, where a token | |
32 is delimited by elements that are considered foreign. Examples | |
33 include space, comma, and end-of-line delineation. Inclusive | |
34 tokens are just the opposite: they look for patterns in the | |
35 text that should be part of the token itself - everything else | |
36 is considered foreign. Currently tango.text.stream includes the | |
37 exclusive variety only. | |
38 | |
39 Each pattern is exposed to the client as a slice of the original | |
40 content, where the slice is transient. If you need to retain the | |
41 exposed content, then you should .dup it appropriately. | |
42 | |
43 The content provided to these iterators is intended to be fully | |
44 read-only. All current tokenizers abide by this rule, but it is | |
45 possible a user could mutate the content through a token slice. | |
46 To enforce the desired read-only aspect, the code would have to | |
47 introduce redundant copying or the compiler would have to support | |
48 read-only arrays. | |
49 | |
50 See LineIterator, CharIterator, RegexIterator, QuotedIterator, | |
51 and SimpleIterator | |
52 | |
53 *******************************************************************************/ | |
54 | |
55 class StreamIterator(T) : InputStream, Buffered | |
56 { | |
57 protected T[] slice, | |
58 pushed; | |
59 private IBuffer input; | |
60 | |
61 /*********************************************************************** | |
62 | |
63 The pattern scanner, implemented via subclasses | |
64 | |
65 ***********************************************************************/ | |
66 | |
67 abstract protected uint scan (void[] data); | |
68 | |
69 /*********************************************************************** | |
70 | |
71 Instantiate with a buffer | |
72 | |
73 ***********************************************************************/ | |
74 | |
75 this (InputStream stream = null) | |
76 { | |
77 if (stream) | |
78 set (stream); | |
79 } | |
80 | |
81 /*********************************************************************** | |
82 | |
83 Set the provided stream as the scanning source | |
84 | |
85 ***********************************************************************/ | |
86 | |
87 final StreamIterator set (InputStream stream) | |
88 { | |
89 assert (stream); | |
90 input = Buffer.share (stream); | |
91 return this; | |
92 } | |
93 | |
94 /*********************************************************************** | |
95 | |
96 Return the current token as a slice of the content | |
97 | |
98 ***********************************************************************/ | |
99 | |
100 final T[] get () | |
101 { | |
102 return slice; | |
103 } | |
104 | |
105 /*********************************************************************** | |
106 | |
107 Push one token back into the stream, to be returned by a | |
108 subsequent call to next() | |
109 | |
110 Push null to cancel a prior assignment | |
111 | |
112 ***********************************************************************/ | |
113 | |
114 final StreamIterator push (T[] token) | |
115 { | |
116 pushed = token; | |
117 return this; | |
118 } | |
119 | |
120 /********************************************************************** | |
121 | |
122 Iterate over the set of tokens. This should really | |
123 provide read-only access to the tokens, but D does | |
124 not support that at this time | |
125 | |
126 **********************************************************************/ | |
127 | |
128 int opApply (int delegate(inout T[]) dg) | |
129 { | |
130 bool more; | |
131 int result; | |
132 | |
133 do { | |
134 more = consume; | |
135 result = dg (slice); | |
136 } while (more && !result); | |
137 return result; | |
138 } | |
139 | |
140 /********************************************************************** | |
141 | |
142 Iterate over a set of tokens, exposing a token count | |
143 starting at zero | |
144 | |
145 **********************************************************************/ | |
146 | |
147 int opApply (int delegate(inout int, inout T[]) dg) | |
148 { | |
149 bool more; | |
150 int result, | |
151 tokens; | |
152 | |
153 do { | |
154 more = consume; | |
155 result = dg (tokens, slice); | |
156 ++tokens; | |
157 } while (more && !result); | |
158 return result; | |
159 } | |
160 | |
161 /*********************************************************************** | |
162 | |
163 Locate the next token. Returns the token if found, null | |
164 otherwise. Null indicates an end of stream condition. To | |
165 sweep a conduit for lines using method next(): | |
166 --- | |
167 auto lines = new LineIterator!(char) (new FileConduit("myfile")); | |
168 while (lines.next) | |
169 Cout (lines.get).newline; | |
170 --- | |
171 | |
172 Alternatively, we can extract one line from a conduit: | |
173 --- | |
174 auto line = (new LineIterator!(char) (new FileConduit("myfile"))).next; | |
175 --- | |
176 | |
177 The difference between next() and foreach() is that the | |
178 latter processes all tokens in one go, whereas the former | |
179 processes in a piecemeal fashion. To wit: | |
180 --- | |
181 foreach (line; new LineIterator!(char) (new FileConduit("myfile")) | |
182 Cout(line).newline; | |
183 --- | |
184 | |
185 Note that tokens exposed via push() are returned immediately | |
186 when available, taking priority over the input stream itself | |
187 | |
188 ***********************************************************************/ | |
189 | |
190 final T[] next () | |
191 { | |
192 if (pushed.ptr) | |
193 return pushed; | |
194 else | |
195 if (consume() || slice.length) | |
196 return slice; | |
197 return null; | |
198 } | |
199 | |
200 /*********************************************************************** | |
201 | |
202 Set the content of the current slice | |
203 | |
204 ***********************************************************************/ | |
205 | |
206 protected final uint set (T* content, uint start, uint end) | |
207 { | |
208 slice = content [start .. end]; | |
209 return end; | |
210 } | |
211 | |
212 /*********************************************************************** | |
213 | |
214 Called when a scanner fails to find a matching pattern. | |
215 This may cause more content to be loaded, and a rescan | |
216 initiated | |
217 | |
218 ***********************************************************************/ | |
219 | |
220 protected final uint notFound () | |
221 { | |
222 return IConduit.Eof; | |
223 } | |
224 | |
225 /*********************************************************************** | |
226 | |
227 Invoked when a scanner matches a pattern. The provided | |
228 value should be the index of the last element of the | |
229 matching pattern, which is converted back to a void[] | |
230 index. | |
231 | |
232 ***********************************************************************/ | |
233 | |
234 protected final uint found (uint i) | |
235 { | |
236 return (i + 1) * T.sizeof; | |
237 } | |
238 | |
239 /*********************************************************************** | |
240 | |
241 See if set of characters holds a particular instance | |
242 | |
243 ***********************************************************************/ | |
244 | |
245 protected final bool has (T[] set, T match) | |
246 { | |
247 foreach (T c; set) | |
248 if (match is c) | |
249 return true; | |
250 return false; | |
251 } | |
252 | |
253 /*********************************************************************** | |
254 | |
255 Consume the next token and place it in 'slice'. Returns | |
256 true when there are potentially more tokens | |
257 | |
258 ***********************************************************************/ | |
259 | |
260 private bool consume () | |
261 { | |
262 if (input.next (&scan)) | |
263 return true; | |
264 | |
265 auto tmp = input.slice (buffer.readable); | |
266 slice = (cast(T*) tmp.ptr) [0 .. tmp.length/T.sizeof]; | |
267 return false; | |
268 } | |
269 | |
270 | |
271 /**********************************************************************/ | |
272 /************************ Buffered Interface **************************/ | |
273 /**********************************************************************/ | |
274 | |
275 | |
276 /*********************************************************************** | |
277 | |
278 Return the associated buffer | |
279 | |
280 ***********************************************************************/ | |
281 | |
282 final IBuffer buffer () | |
283 { | |
284 return input; | |
285 } | |
286 | |
287 /**********************************************************************/ | |
288 /********************** InputStream Interface *************************/ | |
289 /**********************************************************************/ | |
290 | |
291 | |
292 /*********************************************************************** | |
293 | |
294 Return the host conduit | |
295 | |
296 ***********************************************************************/ | |
297 | |
298 final IConduit conduit () | |
299 { | |
300 return input.conduit; | |
301 } | |
302 | |
303 /*********************************************************************** | |
304 | |
305 Read from conduit into a target array. The provided dst | |
306 will be populated with content from the conduit. | |
307 | |
308 Returns the number of bytes read, which may be less than | |
309 requested in dst | |
310 | |
311 ***********************************************************************/ | |
312 | |
313 uint read (void[] dst) | |
314 { | |
315 return input.read (dst); | |
316 } | |
317 | |
318 /*********************************************************************** | |
319 | |
320 Clear any buffered content | |
321 | |
322 ***********************************************************************/ | |
323 | |
324 final InputStream clear () | |
325 { | |
326 return input.clear; | |
327 } | |
328 | |
329 /*********************************************************************** | |
330 | |
331 Close the input | |
332 | |
333 ***********************************************************************/ | |
334 | |
335 final void close () | |
336 { | |
337 input.close; | |
338 } | |
339 } | |
340 | |
341 |