Mercurial > projects > ldc
comparison tango/tango/io/compress/BzipStream.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!!
Initial commit after moving to Tango instead of Phobos.
Lots of bugfixes...
This build is not suitable for most things.
author | lindquist |
---|---|
date | Fri, 11 Jan 2008 17:57:40 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
131:5825d48b27d1 | 132:1700239cab2e |
---|---|
1 /******************************************************************************* | |
2 | |
3 copyright: Copyright (C) 2007 Daniel Keep. All rights reserved. | |
4 | |
5 license: BSD style: $(LICENSE) | |
6 | |
7 version: Initial release: July 2007 | |
8 | |
9 author: Daniel Keep | |
10 | |
11 *******************************************************************************/ | |
12 | |
13 module tango.io.compress.BzipStream; | |
14 | |
15 private import tango.io.compress.c.bzlib; | |
16 | |
17 private import tango.core.Exception : IOException; | |
18 | |
19 private import tango.io.Conduit : InputFilter, OutputFilter; | |
20 | |
21 private import tango.io.model.IConduit : InputStream, OutputStream, IConduit; | |
22 | |
23 private | |
24 { | |
25 /* This constant controls the size of the input/output buffers we use | |
26 * internally. There's no particular reason to pick this size. It might | |
27 * be an idea to run some benchmarks to work out what a good number is. | |
28 */ | |
29 const BUFFER_SIZE = 4*1024; | |
30 | |
31 const DEFAULT_BLOCKSIZE = 9; | |
32 const DEFAULT_WORKFACTOR = 0; | |
33 } | |
34 | |
35 /******************************************************************************* | |
36 | |
37 This output filter can be used to perform compression of data into a bzip2 | |
38 stream. | |
39 | |
40 *******************************************************************************/ | |
41 | |
42 class BzipOutput : OutputFilter | |
43 { | |
44 /*************************************************************************** | |
45 | |
46 This enumeration represents several pre-defined compression block | |
47 sizes, measured in hundreds of kilobytes. See the documentation for | |
48 the BzipOutput class' constructor for more details. | |
49 | |
50 ***************************************************************************/ | |
51 | |
52 enum BlockSize : int | |
53 { | |
54 Normal = 9, | |
55 Fast = 1, | |
56 Best = 9, | |
57 } | |
58 | |
59 private | |
60 { | |
61 bool bzs_valid = false; | |
62 bz_stream bzs; | |
63 ubyte[] out_chunk; | |
64 size_t _written = 0; | |
65 } | |
66 | |
67 /*************************************************************************** | |
68 | |
69 Constructs a new bzip2 compression filter. You need to pass in the | |
70 stream that the compression filter will write to. If you are using | |
71 this filter with a conduit, the idiom to use is: | |
72 | |
73 --- | |
74 auto output = new BzipOutput(myConduit.output); | |
75 output.write(myContent); | |
76 --- | |
77 | |
78 blockSize relates to the size of the window bzip2 uses when | |
79 compressing data and determines how much memory is required to | |
80 decompress a stream. It is measured in hundreds of kilobytes. | |
81 | |
82 ccording to the bzip2 documentation, there is no dramatic difference | |
83 between the various block sizes, so the default should suffice in most | |
84 cases. | |
85 | |
86 BlockSize.Normal (the default) is the same as BlockSize.Best | |
87 (or 9). The blockSize may be any integer between 1 and 9 inclusive. | |
88 | |
89 ***************************************************************************/ | |
90 | |
91 this(OutputStream stream, int blockSize = BlockSize.Normal) | |
92 { | |
93 if( blockSize < 1 || blockSize > 9 ) | |
94 throw new BzipException("bzip2 block size must be between" | |
95 " 1 and 9"); | |
96 | |
97 super(stream); | |
98 out_chunk = new ubyte[BUFFER_SIZE]; | |
99 | |
100 auto ret = BZ2_bzCompressInit(&bzs, blockSize, 0, DEFAULT_WORKFACTOR); | |
101 if( ret != BZ_OK ) | |
102 throw new BzipException(ret); | |
103 | |
104 bzs_valid = true; | |
105 } | |
106 | |
107 ~this() | |
108 { | |
109 if( bzs_valid ) | |
110 kill_bzs(); | |
111 } | |
112 | |
113 /*************************************************************************** | |
114 | |
115 Compresses the given data to the underlying conduit. | |
116 | |
117 Returns the number of bytes from src that were compressed, which may | |
118 be less than given. | |
119 | |
120 ***************************************************************************/ | |
121 | |
122 uint write(void[] src) | |
123 { | |
124 check_valid(); | |
125 scope(failure) kill_bzs(); | |
126 | |
127 bzs.avail_in = src.length; | |
128 bzs.next_in = cast(ubyte*)src.ptr; | |
129 | |
130 do | |
131 { | |
132 bzs.avail_out = out_chunk.length; | |
133 bzs.next_out = out_chunk.ptr; | |
134 | |
135 auto ret = BZ2_bzCompress(&bzs, BZ_RUN); | |
136 if( ret != BZ_RUN_OK ) | |
137 throw new BzipException(ret); | |
138 | |
139 // Push the compressed bytes out to the stream, until it's either | |
140 // written them all, or choked. | |
141 auto have = out_chunk.length-bzs.avail_out; | |
142 auto out_buffer = out_chunk[0..have]; | |
143 do | |
144 { | |
145 auto w = host.write(out_buffer); | |
146 if( w == IConduit.Eof ) | |
147 return w; | |
148 | |
149 out_buffer = out_buffer[w..$]; | |
150 _written += w; | |
151 } | |
152 while( out_buffer.length > 0 ); | |
153 } | |
154 // Loop while we are still using up the whole output buffer | |
155 while( bzs.avail_out == 0 ); | |
156 | |
157 assert( bzs.avail_in == 0, "failed to compress all provided data" ); | |
158 | |
159 return src.length; | |
160 } | |
161 | |
162 /*************************************************************************** | |
163 | |
164 This read-only property returns the number of compressed bytes that | |
165 have been written to the underlying stream. Following a call to | |
166 either close or commit, this will contain the total compressed size of | |
167 the input data stream. | |
168 | |
169 ***************************************************************************/ | |
170 | |
171 size_t written() | |
172 { | |
173 return _written; | |
174 } | |
175 | |
176 /*************************************************************************** | |
177 | |
178 commit the output | |
179 | |
180 ***************************************************************************/ | |
181 | |
182 void close() | |
183 { | |
184 if( bzs_valid ) commit; | |
185 super.close; | |
186 } | |
187 | |
188 /*************************************************************************** | |
189 | |
190 Purge any buffered content. Calling this will implicitly end the | |
191 bzip2 stream, so it should not be called until you are finished | |
192 compressing data. Any calls to either write or commit after a | |
193 compression filter has been committed will throw an exception. | |
194 | |
195 ***************************************************************************/ | |
196 | |
197 void commit() | |
198 { | |
199 check_valid(); | |
200 scope(failure) kill_bzs(); | |
201 | |
202 bzs.avail_in = 0; | |
203 bzs.next_in = null; | |
204 | |
205 bool finished = false; | |
206 | |
207 do | |
208 { | |
209 bzs.avail_out = out_chunk.length; | |
210 bzs.next_out = out_chunk.ptr; | |
211 | |
212 auto ret = BZ2_bzCompress(&bzs, BZ_FINISH); | |
213 switch( ret ) | |
214 { | |
215 case BZ_FINISH_OK: | |
216 break; | |
217 | |
218 case BZ_STREAM_END: | |
219 finished = true; | |
220 break; | |
221 | |
222 default: | |
223 throw new BzipException(ret); | |
224 } | |
225 | |
226 auto have = out_chunk.length - bzs.avail_out; | |
227 auto out_buffer = out_chunk[0..have]; | |
228 if( have > 0 ) | |
229 { | |
230 do | |
231 { | |
232 auto w = host.write(out_buffer); | |
233 if( w == IConduit.Eof ) | |
234 return w; | |
235 | |
236 out_buffer = out_buffer[w..$]; | |
237 _written += w; | |
238 } | |
239 while( out_buffer.length > 0 ); | |
240 } | |
241 } | |
242 while( !finished ); | |
243 | |
244 kill_bzs(); | |
245 } | |
246 | |
247 // This function kills the stream: it deallocates the internal state, and | |
248 // unsets the bzs_valid flag. | |
249 private void kill_bzs() | |
250 { | |
251 check_valid(); | |
252 | |
253 BZ2_bzCompressEnd(&bzs); | |
254 bzs_valid = false; | |
255 } | |
256 | |
257 // Asserts that the stream is still valid and usable (except that this | |
258 // check doesn't get elided with -release). | |
259 private void check_valid() | |
260 { | |
261 if( !bzs_valid ) | |
262 throw new BzipClosedException; | |
263 } | |
264 } | |
265 | |
266 /******************************************************************************* | |
267 | |
268 This input filter can be used to perform decompression of bzip2 streams. | |
269 | |
270 *******************************************************************************/ | |
271 | |
272 class BzipInput : InputFilter | |
273 { | |
274 private | |
275 { | |
276 bool bzs_valid = false; | |
277 bz_stream bzs; | |
278 ubyte[] in_chunk; | |
279 } | |
280 | |
281 /*************************************************************************** | |
282 | |
283 Constructs a new bzip2 decompression filter. You need to pass in the | |
284 stream that the decompression filter will read from. If you are using | |
285 this filter with a conduit, the idiom to use is: | |
286 | |
287 --- | |
288 auto input = new BzipInput(myConduit.input); | |
289 input.read(myContent); | |
290 --- | |
291 | |
292 The small argument, if set to true, instructs bzip2 to perform | |
293 decompression using half the regular amount of memory, at the cost of | |
294 running at half speed. | |
295 | |
296 ***************************************************************************/ | |
297 | |
298 this(InputStream stream, bool small=false) | |
299 { | |
300 super(stream); | |
301 in_chunk = new ubyte[BUFFER_SIZE]; | |
302 | |
303 auto ret = BZ2_bzDecompressInit(&bzs, 0, small?1:0); | |
304 if( ret != BZ_OK ) | |
305 throw new BzipException(ret); | |
306 | |
307 bzs_valid = true; | |
308 } | |
309 | |
310 ~this() | |
311 { | |
312 if( bzs_valid ) | |
313 kill_bzs(); | |
314 } | |
315 | |
316 /*************************************************************************** | |
317 | |
318 Decompresses data from the underlying conduit into a target array. | |
319 | |
320 Returns the number of bytes stored into dst, which may be less than | |
321 requested. | |
322 | |
323 ***************************************************************************/ | |
324 | |
325 uint read(void[] dst) | |
326 { | |
327 check_valid(); | |
328 scope(failure) kill_bzs(); | |
329 | |
330 bool finished = false; | |
331 | |
332 bzs.avail_out = dst.length; | |
333 bzs.next_out = cast(ubyte*)dst.ptr; | |
334 | |
335 do | |
336 { | |
337 if( bzs.avail_in == 0 ) | |
338 { | |
339 auto len = host.read(in_chunk); | |
340 if( len == IConduit.Eof ) | |
341 return IConduit.Eof; | |
342 | |
343 bzs.avail_in = len; | |
344 bzs.next_in = in_chunk.ptr; | |
345 } | |
346 | |
347 auto ret = BZ2_bzDecompress(&bzs); | |
348 if( ret == BZ_STREAM_END ) | |
349 { | |
350 kill_bzs(); | |
351 finished = true; | |
352 } | |
353 else if( ret != BZ_OK ) | |
354 throw new BzipException(ret); | |
355 } | |
356 while( !finished && bzs.avail_out > 0 ); | |
357 | |
358 return dst.length - bzs.avail_out; | |
359 } | |
360 | |
361 /*************************************************************************** | |
362 | |
363 Clear any buffered content. No-op. | |
364 | |
365 ***************************************************************************/ | |
366 | |
367 InputStream clear() | |
368 { | |
369 check_valid(); | |
370 | |
371 // TODO: What should this method do? We don't do any heap allocation, | |
372 // so there's really nothing to clear... For now, just invalidate the | |
373 // stream... | |
374 kill_bzs(); | |
375 super.clear(); | |
376 return this; | |
377 } | |
378 | |
379 // This function kills the stream: it deallocates the internal state, and | |
380 // unsets the bzs_valid flag. | |
381 private void kill_bzs() | |
382 { | |
383 check_valid(); | |
384 | |
385 BZ2_bzDecompressEnd(&bzs); | |
386 bzs_valid = false; | |
387 } | |
388 | |
389 // Asserts that the stream is still valid and usable (except that this | |
390 // check doesn't get elided with -release). | |
391 private void check_valid() | |
392 { | |
393 if( !bzs_valid ) | |
394 throw new BzipClosedException; | |
395 } | |
396 } | |
397 | |
398 /******************************************************************************* | |
399 | |
400 This exception is thrown when an error occurs in the underlying bzip2 | |
401 library. | |
402 | |
403 *******************************************************************************/ | |
404 | |
405 class BzipException : IOException | |
406 { | |
407 this(in int code) | |
408 { | |
409 super(codeName(code)); | |
410 } | |
411 | |
412 this(char[] msg) | |
413 { | |
414 super(msg); | |
415 } | |
416 | |
417 private char[] codeName(in int code) | |
418 { | |
419 char[] name; | |
420 | |
421 switch( code ) | |
422 { | |
423 case BZ_OK: name = "BZ_OK"; break; | |
424 case BZ_RUN_OK: name = "BZ_RUN_OK"; break; | |
425 case BZ_FLUSH_OK: name = "BZ_FLUSH_OK"; break; | |
426 case BZ_STREAM_END: name = "BZ_STREAM_END"; break; | |
427 case BZ_SEQUENCE_ERROR: name = "BZ_SEQUENCE_ERROR"; break; | |
428 case BZ_PARAM_ERROR: name = "BZ_PARAM_ERROR"; break; | |
429 case BZ_MEM_ERROR: name = "BZ_MEM_ERROR"; break; | |
430 case BZ_DATA_ERROR: name = "BZ_DATA_ERROR"; break; | |
431 case BZ_DATA_ERROR_MAGIC: name = "BZ_DATA_ERROR_MAGIC"; break; | |
432 case BZ_IO_ERROR: name = "BZ_IO_ERROR"; break; | |
433 case BZ_UNEXPECTED_EOF: name = "BZ_UNEXPECTED_EOF"; break; | |
434 case BZ_OUTBUFF_FULL: name = "BZ_OUTBUFF_FULL"; break; | |
435 case BZ_CONFIG_ERROR: name = "BZ_CONFIG_ERROR"; break; | |
436 default: name = "BZ_UNKNOWN"; | |
437 } | |
438 | |
439 return name; | |
440 } | |
441 } | |
442 | |
443 /******************************************************************************* | |
444 | |
445 This exception is thrown if you attempt to perform a read, write or flush | |
446 operation on a closed bzip2 filter stream. This can occur if the input | |
447 stream has finished, or an output stream was flushed. | |
448 | |
449 *******************************************************************************/ | |
450 | |
451 class BzipClosedException : IOException | |
452 { | |
453 this() | |
454 { | |
455 super("cannot operate on closed bzip2 stream"); | |
456 } | |
457 } | |
458 | |
459 /* ***************************************************************************** | |
460 | |
461 This section contains a simple unit test for this module. It is hidden | |
462 behind a version statement because it introduces additional dependencies. | |
463 | |
464 ***************************************************************************** */ | |
465 | |
466 debug(UnitTest): | |
467 | |
468 import tango.io.GrowBuffer : GrowBuffer; | |
469 | |
470 unittest | |
471 { | |
472 const char[] message = | |
473 "All dwarfs are by nature dutiful, serious, literate, obedient " | |
474 "and thoughtful people whose only minor failing is a tendency, " | |
475 "after one drink, to rush at enemies screaming \"Arrrrrrgh!\" and " | |
476 "axing their legs off at the knee."; | |
477 | |
478 const ubyte[] message_z = [ | |
479 0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26, | |
480 0x53, 0x59, 0x40, 0x98, 0xbe, 0xaa, 0x00, 0x00, | |
481 0x16, 0xd5, 0x80, 0x10, 0x00, 0x70, 0x05, 0x20, | |
482 0x00, 0x3f, 0xef, 0xde, 0xe0, 0x30, 0x00, 0xac, | |
483 0xd8, 0x8a, 0x3d, 0x34, 0x6a, 0x6d, 0x4c, 0x4f, | |
484 0x24, 0x31, 0x0d, 0x08, 0x98, 0x9b, 0x48, 0x9a, | |
485 0x7a, 0x80, 0x00, 0x06, 0xa6, 0xd2, 0xa7, 0xe9, | |
486 0xaa, 0x37, 0xa8, 0xd4, 0xf5, 0x3f, 0x54, 0x63, | |
487 0x51, 0xe9, 0x2d, 0x4b, 0x99, 0xe1, 0xcc, 0xca, | |
488 0xda, 0x75, 0x04, 0x42, 0x14, 0xc8, 0x6a, 0x8e, | |
489 0x23, 0xc1, 0x3e, 0xb1, 0x8a, 0x16, 0xd2, 0x55, | |
490 0x9a, 0x3e, 0x56, 0x1a, 0xb1, 0x83, 0x11, 0xa6, | |
491 0x50, 0x4f, 0xd3, 0xed, 0x21, 0x40, 0xaa, 0xd1, | |
492 0x95, 0x2c, 0xda, 0xcb, 0xb7, 0x0e, 0xce, 0x65, | |
493 0xfc, 0x63, 0xf2, 0x88, 0x5b, 0x36, 0xda, 0xf0, | |
494 0xf5, 0xd2, 0x9c, 0xe6, 0xf1, 0x87, 0x12, 0x87, | |
495 0xce, 0x56, 0x0c, 0xf5, 0x65, 0x4d, 0x2e, 0xd6, | |
496 0x27, 0x61, 0x2b, 0x74, 0xcd, 0x5e, 0x3b, 0x02, | |
497 0x42, 0x4e, 0x0b, 0x80, 0xa8, 0x70, 0x04, 0x48, | |
498 0xfb, 0x93, 0x4c, 0x41, 0xa8, 0x2a, 0xdf, 0xf2, | |
499 0x67, 0x37, 0x28, 0xad, 0x38, 0xd4, 0x5c, 0xd6, | |
500 0x34, 0x8b, 0x49, 0x5e, 0x90, 0xb2, 0x06, 0xce, | |
501 0x0a, 0x83, 0x29, 0x84, 0x20, 0xd7, 0x5f, 0xc5, | |
502 0xdc, 0x91, 0x4e, 0x14, 0x24, 0x10, 0x26, 0x2f, | |
503 0xaa, 0x80]; | |
504 | |
505 scope cond = new GrowBuffer; | |
506 scope comp = new BzipOutput(cond); | |
507 comp.write(message); | |
508 comp.close; | |
509 | |
510 assert( comp.written == message_z.length ); | |
511 | |
512 assert( message_z == cast(ubyte[])(cond.slice) ); | |
513 | |
514 scope decomp = new BzipInput(cond); | |
515 auto buffer = new ubyte[256]; | |
516 buffer = buffer[0 .. decomp.read(buffer)]; | |
517 | |
518 assert( cast(ubyte[])message == buffer ); | |
519 } | |
520 |