comparison tango/tango/io/compress/BzipStream.d @ 132:1700239cab2e trunk

[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author lindquist
date Fri, 11 Jan 2008 17:57:40 +0100
parents
children
comparison
equal deleted inserted replaced
131:5825d48b27d1 132:1700239cab2e
1 /*******************************************************************************
2
3 copyright: Copyright (C) 2007 Daniel Keep. All rights reserved.
4
5 license: BSD style: $(LICENSE)
6
7 version: Initial release: July 2007
8
9 author: Daniel Keep
10
11 *******************************************************************************/
12
13 module tango.io.compress.BzipStream;
14
15 private import tango.io.compress.c.bzlib;
16
17 private import tango.core.Exception : IOException;
18
19 private import tango.io.Conduit : InputFilter, OutputFilter;
20
21 private import tango.io.model.IConduit : InputStream, OutputStream, IConduit;
22
23 private
24 {
25 /* This constant controls the size of the input/output buffers we use
26 * internally. There's no particular reason to pick this size. It might
27 * be an idea to run some benchmarks to work out what a good number is.
28 */
29 const BUFFER_SIZE = 4*1024;
30
31 const DEFAULT_BLOCKSIZE = 9;
32 const DEFAULT_WORKFACTOR = 0;
33 }
34
35 /*******************************************************************************
36
37 This output filter can be used to perform compression of data into a bzip2
38 stream.
39
40 *******************************************************************************/
41
42 class BzipOutput : OutputFilter
43 {
44 /***************************************************************************
45
46 This enumeration represents several pre-defined compression block
47 sizes, measured in hundreds of kilobytes. See the documentation for
48 the BzipOutput class' constructor for more details.
49
50 ***************************************************************************/
51
52 enum BlockSize : int
53 {
54 Normal = 9,
55 Fast = 1,
56 Best = 9,
57 }
58
59 private
60 {
61 bool bzs_valid = false;
62 bz_stream bzs;
63 ubyte[] out_chunk;
64 size_t _written = 0;
65 }
66
67 /***************************************************************************
68
69 Constructs a new bzip2 compression filter. You need to pass in the
70 stream that the compression filter will write to. If you are using
71 this filter with a conduit, the idiom to use is:
72
73 ---
74 auto output = new BzipOutput(myConduit.output);
75 output.write(myContent);
76 ---
77
78 blockSize relates to the size of the window bzip2 uses when
79 compressing data and determines how much memory is required to
80 decompress a stream. It is measured in hundreds of kilobytes.
81
82 ccording to the bzip2 documentation, there is no dramatic difference
83 between the various block sizes, so the default should suffice in most
84 cases.
85
86 BlockSize.Normal (the default) is the same as BlockSize.Best
87 (or 9). The blockSize may be any integer between 1 and 9 inclusive.
88
89 ***************************************************************************/
90
91 this(OutputStream stream, int blockSize = BlockSize.Normal)
92 {
93 if( blockSize < 1 || blockSize > 9 )
94 throw new BzipException("bzip2 block size must be between"
95 " 1 and 9");
96
97 super(stream);
98 out_chunk = new ubyte[BUFFER_SIZE];
99
100 auto ret = BZ2_bzCompressInit(&bzs, blockSize, 0, DEFAULT_WORKFACTOR);
101 if( ret != BZ_OK )
102 throw new BzipException(ret);
103
104 bzs_valid = true;
105 }
106
107 ~this()
108 {
109 if( bzs_valid )
110 kill_bzs();
111 }
112
113 /***************************************************************************
114
115 Compresses the given data to the underlying conduit.
116
117 Returns the number of bytes from src that were compressed, which may
118 be less than given.
119
120 ***************************************************************************/
121
122 uint write(void[] src)
123 {
124 check_valid();
125 scope(failure) kill_bzs();
126
127 bzs.avail_in = src.length;
128 bzs.next_in = cast(ubyte*)src.ptr;
129
130 do
131 {
132 bzs.avail_out = out_chunk.length;
133 bzs.next_out = out_chunk.ptr;
134
135 auto ret = BZ2_bzCompress(&bzs, BZ_RUN);
136 if( ret != BZ_RUN_OK )
137 throw new BzipException(ret);
138
139 // Push the compressed bytes out to the stream, until it's either
140 // written them all, or choked.
141 auto have = out_chunk.length-bzs.avail_out;
142 auto out_buffer = out_chunk[0..have];
143 do
144 {
145 auto w = host.write(out_buffer);
146 if( w == IConduit.Eof )
147 return w;
148
149 out_buffer = out_buffer[w..$];
150 _written += w;
151 }
152 while( out_buffer.length > 0 );
153 }
154 // Loop while we are still using up the whole output buffer
155 while( bzs.avail_out == 0 );
156
157 assert( bzs.avail_in == 0, "failed to compress all provided data" );
158
159 return src.length;
160 }
161
162 /***************************************************************************
163
164 This read-only property returns the number of compressed bytes that
165 have been written to the underlying stream. Following a call to
166 either close or commit, this will contain the total compressed size of
167 the input data stream.
168
169 ***************************************************************************/
170
171 size_t written()
172 {
173 return _written;
174 }
175
176 /***************************************************************************
177
178 commit the output
179
180 ***************************************************************************/
181
182 void close()
183 {
184 if( bzs_valid ) commit;
185 super.close;
186 }
187
188 /***************************************************************************
189
190 Purge any buffered content. Calling this will implicitly end the
191 bzip2 stream, so it should not be called until you are finished
192 compressing data. Any calls to either write or commit after a
193 compression filter has been committed will throw an exception.
194
195 ***************************************************************************/
196
197 void commit()
198 {
199 check_valid();
200 scope(failure) kill_bzs();
201
202 bzs.avail_in = 0;
203 bzs.next_in = null;
204
205 bool finished = false;
206
207 do
208 {
209 bzs.avail_out = out_chunk.length;
210 bzs.next_out = out_chunk.ptr;
211
212 auto ret = BZ2_bzCompress(&bzs, BZ_FINISH);
213 switch( ret )
214 {
215 case BZ_FINISH_OK:
216 break;
217
218 case BZ_STREAM_END:
219 finished = true;
220 break;
221
222 default:
223 throw new BzipException(ret);
224 }
225
226 auto have = out_chunk.length - bzs.avail_out;
227 auto out_buffer = out_chunk[0..have];
228 if( have > 0 )
229 {
230 do
231 {
232 auto w = host.write(out_buffer);
233 if( w == IConduit.Eof )
234 return w;
235
236 out_buffer = out_buffer[w..$];
237 _written += w;
238 }
239 while( out_buffer.length > 0 );
240 }
241 }
242 while( !finished );
243
244 kill_bzs();
245 }
246
247 // This function kills the stream: it deallocates the internal state, and
248 // unsets the bzs_valid flag.
249 private void kill_bzs()
250 {
251 check_valid();
252
253 BZ2_bzCompressEnd(&bzs);
254 bzs_valid = false;
255 }
256
257 // Asserts that the stream is still valid and usable (except that this
258 // check doesn't get elided with -release).
259 private void check_valid()
260 {
261 if( !bzs_valid )
262 throw new BzipClosedException;
263 }
264 }
265
266 /*******************************************************************************
267
268 This input filter can be used to perform decompression of bzip2 streams.
269
270 *******************************************************************************/
271
272 class BzipInput : InputFilter
273 {
274 private
275 {
276 bool bzs_valid = false;
277 bz_stream bzs;
278 ubyte[] in_chunk;
279 }
280
281 /***************************************************************************
282
283 Constructs a new bzip2 decompression filter. You need to pass in the
284 stream that the decompression filter will read from. If you are using
285 this filter with a conduit, the idiom to use is:
286
287 ---
288 auto input = new BzipInput(myConduit.input);
289 input.read(myContent);
290 ---
291
292 The small argument, if set to true, instructs bzip2 to perform
293 decompression using half the regular amount of memory, at the cost of
294 running at half speed.
295
296 ***************************************************************************/
297
298 this(InputStream stream, bool small=false)
299 {
300 super(stream);
301 in_chunk = new ubyte[BUFFER_SIZE];
302
303 auto ret = BZ2_bzDecompressInit(&bzs, 0, small?1:0);
304 if( ret != BZ_OK )
305 throw new BzipException(ret);
306
307 bzs_valid = true;
308 }
309
310 ~this()
311 {
312 if( bzs_valid )
313 kill_bzs();
314 }
315
316 /***************************************************************************
317
318 Decompresses data from the underlying conduit into a target array.
319
320 Returns the number of bytes stored into dst, which may be less than
321 requested.
322
323 ***************************************************************************/
324
325 uint read(void[] dst)
326 {
327 check_valid();
328 scope(failure) kill_bzs();
329
330 bool finished = false;
331
332 bzs.avail_out = dst.length;
333 bzs.next_out = cast(ubyte*)dst.ptr;
334
335 do
336 {
337 if( bzs.avail_in == 0 )
338 {
339 auto len = host.read(in_chunk);
340 if( len == IConduit.Eof )
341 return IConduit.Eof;
342
343 bzs.avail_in = len;
344 bzs.next_in = in_chunk.ptr;
345 }
346
347 auto ret = BZ2_bzDecompress(&bzs);
348 if( ret == BZ_STREAM_END )
349 {
350 kill_bzs();
351 finished = true;
352 }
353 else if( ret != BZ_OK )
354 throw new BzipException(ret);
355 }
356 while( !finished && bzs.avail_out > 0 );
357
358 return dst.length - bzs.avail_out;
359 }
360
361 /***************************************************************************
362
363 Clear any buffered content. No-op.
364
365 ***************************************************************************/
366
367 InputStream clear()
368 {
369 check_valid();
370
371 // TODO: What should this method do? We don't do any heap allocation,
372 // so there's really nothing to clear... For now, just invalidate the
373 // stream...
374 kill_bzs();
375 super.clear();
376 return this;
377 }
378
379 // This function kills the stream: it deallocates the internal state, and
380 // unsets the bzs_valid flag.
381 private void kill_bzs()
382 {
383 check_valid();
384
385 BZ2_bzDecompressEnd(&bzs);
386 bzs_valid = false;
387 }
388
389 // Asserts that the stream is still valid and usable (except that this
390 // check doesn't get elided with -release).
391 private void check_valid()
392 {
393 if( !bzs_valid )
394 throw new BzipClosedException;
395 }
396 }
397
398 /*******************************************************************************
399
400 This exception is thrown when an error occurs in the underlying bzip2
401 library.
402
403 *******************************************************************************/
404
405 class BzipException : IOException
406 {
407 this(in int code)
408 {
409 super(codeName(code));
410 }
411
412 this(char[] msg)
413 {
414 super(msg);
415 }
416
417 private char[] codeName(in int code)
418 {
419 char[] name;
420
421 switch( code )
422 {
423 case BZ_OK: name = "BZ_OK"; break;
424 case BZ_RUN_OK: name = "BZ_RUN_OK"; break;
425 case BZ_FLUSH_OK: name = "BZ_FLUSH_OK"; break;
426 case BZ_STREAM_END: name = "BZ_STREAM_END"; break;
427 case BZ_SEQUENCE_ERROR: name = "BZ_SEQUENCE_ERROR"; break;
428 case BZ_PARAM_ERROR: name = "BZ_PARAM_ERROR"; break;
429 case BZ_MEM_ERROR: name = "BZ_MEM_ERROR"; break;
430 case BZ_DATA_ERROR: name = "BZ_DATA_ERROR"; break;
431 case BZ_DATA_ERROR_MAGIC: name = "BZ_DATA_ERROR_MAGIC"; break;
432 case BZ_IO_ERROR: name = "BZ_IO_ERROR"; break;
433 case BZ_UNEXPECTED_EOF: name = "BZ_UNEXPECTED_EOF"; break;
434 case BZ_OUTBUFF_FULL: name = "BZ_OUTBUFF_FULL"; break;
435 case BZ_CONFIG_ERROR: name = "BZ_CONFIG_ERROR"; break;
436 default: name = "BZ_UNKNOWN";
437 }
438
439 return name;
440 }
441 }
442
443 /*******************************************************************************
444
445 This exception is thrown if you attempt to perform a read, write or flush
446 operation on a closed bzip2 filter stream. This can occur if the input
447 stream has finished, or an output stream was flushed.
448
449 *******************************************************************************/
450
451 class BzipClosedException : IOException
452 {
453 this()
454 {
455 super("cannot operate on closed bzip2 stream");
456 }
457 }
458
459 /* *****************************************************************************
460
461 This section contains a simple unit test for this module. It is hidden
462 behind a version statement because it introduces additional dependencies.
463
464 ***************************************************************************** */
465
466 debug(UnitTest):
467
468 import tango.io.GrowBuffer : GrowBuffer;
469
470 unittest
471 {
472 const char[] message =
473 "All dwarfs are by nature dutiful, serious, literate, obedient "
474 "and thoughtful people whose only minor failing is a tendency, "
475 "after one drink, to rush at enemies screaming \"Arrrrrrgh!\" and "
476 "axing their legs off at the knee.";
477
478 const ubyte[] message_z = [
479 0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26,
480 0x53, 0x59, 0x40, 0x98, 0xbe, 0xaa, 0x00, 0x00,
481 0x16, 0xd5, 0x80, 0x10, 0x00, 0x70, 0x05, 0x20,
482 0x00, 0x3f, 0xef, 0xde, 0xe0, 0x30, 0x00, 0xac,
483 0xd8, 0x8a, 0x3d, 0x34, 0x6a, 0x6d, 0x4c, 0x4f,
484 0x24, 0x31, 0x0d, 0x08, 0x98, 0x9b, 0x48, 0x9a,
485 0x7a, 0x80, 0x00, 0x06, 0xa6, 0xd2, 0xa7, 0xe9,
486 0xaa, 0x37, 0xa8, 0xd4, 0xf5, 0x3f, 0x54, 0x63,
487 0x51, 0xe9, 0x2d, 0x4b, 0x99, 0xe1, 0xcc, 0xca,
488 0xda, 0x75, 0x04, 0x42, 0x14, 0xc8, 0x6a, 0x8e,
489 0x23, 0xc1, 0x3e, 0xb1, 0x8a, 0x16, 0xd2, 0x55,
490 0x9a, 0x3e, 0x56, 0x1a, 0xb1, 0x83, 0x11, 0xa6,
491 0x50, 0x4f, 0xd3, 0xed, 0x21, 0x40, 0xaa, 0xd1,
492 0x95, 0x2c, 0xda, 0xcb, 0xb7, 0x0e, 0xce, 0x65,
493 0xfc, 0x63, 0xf2, 0x88, 0x5b, 0x36, 0xda, 0xf0,
494 0xf5, 0xd2, 0x9c, 0xe6, 0xf1, 0x87, 0x12, 0x87,
495 0xce, 0x56, 0x0c, 0xf5, 0x65, 0x4d, 0x2e, 0xd6,
496 0x27, 0x61, 0x2b, 0x74, 0xcd, 0x5e, 0x3b, 0x02,
497 0x42, 0x4e, 0x0b, 0x80, 0xa8, 0x70, 0x04, 0x48,
498 0xfb, 0x93, 0x4c, 0x41, 0xa8, 0x2a, 0xdf, 0xf2,
499 0x67, 0x37, 0x28, 0xad, 0x38, 0xd4, 0x5c, 0xd6,
500 0x34, 0x8b, 0x49, 0x5e, 0x90, 0xb2, 0x06, 0xce,
501 0x0a, 0x83, 0x29, 0x84, 0x20, 0xd7, 0x5f, 0xc5,
502 0xdc, 0x91, 0x4e, 0x14, 0x24, 0x10, 0x26, 0x2f,
503 0xaa, 0x80];
504
505 scope cond = new GrowBuffer;
506 scope comp = new BzipOutput(cond);
507 comp.write(message);
508 comp.close;
509
510 assert( comp.written == message_z.length );
511
512 assert( message_z == cast(ubyte[])(cond.slice) );
513
514 scope decomp = new BzipInput(cond);
515 auto buffer = new ubyte[256];
516 buffer = buffer[0 .. decomp.read(buffer)];
517
518 assert( cast(ubyte[])message == buffer );
519 }
520