132
|
1 /*******************************************************************************
|
|
2
|
|
3 copyright: Copyright (C) 2007 Daniel Keep. All rights reserved.
|
|
4
|
|
5 license: BSD style: $(LICENSE)
|
|
6
|
|
7 version: Initial release: July 2007
|
|
8
|
|
9 author: Daniel Keep
|
|
10
|
|
11 *******************************************************************************/
|
|
12
|
|
13 module tango.io.compress.BzipStream;
|
|
14
|
|
15 private import tango.io.compress.c.bzlib;
|
|
16
|
|
17 private import tango.core.Exception : IOException;
|
|
18
|
|
19 private import tango.io.Conduit : InputFilter, OutputFilter;
|
|
20
|
|
21 private import tango.io.model.IConduit : InputStream, OutputStream, IConduit;
|
|
22
|
|
23 private
|
|
24 {
|
|
25 /* This constant controls the size of the input/output buffers we use
|
|
26 * internally. There's no particular reason to pick this size. It might
|
|
27 * be an idea to run some benchmarks to work out what a good number is.
|
|
28 */
|
|
29 const BUFFER_SIZE = 4*1024;
|
|
30
|
|
31 const DEFAULT_BLOCKSIZE = 9;
|
|
32 const DEFAULT_WORKFACTOR = 0;
|
|
33 }
|
|
34
|
|
35 /*******************************************************************************
|
|
36
|
|
37 This output filter can be used to perform compression of data into a bzip2
|
|
38 stream.
|
|
39
|
|
40 *******************************************************************************/
|
|
41
|
|
42 class BzipOutput : OutputFilter
|
|
43 {
|
|
44 /***************************************************************************
|
|
45
|
|
46 This enumeration represents several pre-defined compression block
|
|
47 sizes, measured in hundreds of kilobytes. See the documentation for
|
|
48 the BzipOutput class' constructor for more details.
|
|
49
|
|
50 ***************************************************************************/
|
|
51
|
|
52 enum BlockSize : int
|
|
53 {
|
|
54 Normal = 9,
|
|
55 Fast = 1,
|
|
56 Best = 9,
|
|
57 }
|
|
58
|
|
59 private
|
|
60 {
|
|
61 bool bzs_valid = false;
|
|
62 bz_stream bzs;
|
|
63 ubyte[] out_chunk;
|
|
64 size_t _written = 0;
|
|
65 }
|
|
66
|
|
67 /***************************************************************************
|
|
68
|
|
69 Constructs a new bzip2 compression filter. You need to pass in the
|
|
70 stream that the compression filter will write to. If you are using
|
|
71 this filter with a conduit, the idiom to use is:
|
|
72
|
|
73 ---
|
|
74 auto output = new BzipOutput(myConduit.output);
|
|
75 output.write(myContent);
|
|
76 ---
|
|
77
|
|
78 blockSize relates to the size of the window bzip2 uses when
|
|
79 compressing data and determines how much memory is required to
|
|
80 decompress a stream. It is measured in hundreds of kilobytes.
|
|
81
|
|
82 ccording to the bzip2 documentation, there is no dramatic difference
|
|
83 between the various block sizes, so the default should suffice in most
|
|
84 cases.
|
|
85
|
|
86 BlockSize.Normal (the default) is the same as BlockSize.Best
|
|
87 (or 9). The blockSize may be any integer between 1 and 9 inclusive.
|
|
88
|
|
89 ***************************************************************************/
|
|
90
|
|
91 this(OutputStream stream, int blockSize = BlockSize.Normal)
|
|
92 {
|
|
93 if( blockSize < 1 || blockSize > 9 )
|
|
94 throw new BzipException("bzip2 block size must be between"
|
|
95 " 1 and 9");
|
|
96
|
|
97 super(stream);
|
|
98 out_chunk = new ubyte[BUFFER_SIZE];
|
|
99
|
|
100 auto ret = BZ2_bzCompressInit(&bzs, blockSize, 0, DEFAULT_WORKFACTOR);
|
|
101 if( ret != BZ_OK )
|
|
102 throw new BzipException(ret);
|
|
103
|
|
104 bzs_valid = true;
|
|
105 }
|
|
106
|
|
107 ~this()
|
|
108 {
|
|
109 if( bzs_valid )
|
|
110 kill_bzs();
|
|
111 }
|
|
112
|
|
113 /***************************************************************************
|
|
114
|
|
115 Compresses the given data to the underlying conduit.
|
|
116
|
|
117 Returns the number of bytes from src that were compressed, which may
|
|
118 be less than given.
|
|
119
|
|
120 ***************************************************************************/
|
|
121
|
|
122 uint write(void[] src)
|
|
123 {
|
|
124 check_valid();
|
|
125 scope(failure) kill_bzs();
|
|
126
|
|
127 bzs.avail_in = src.length;
|
|
128 bzs.next_in = cast(ubyte*)src.ptr;
|
|
129
|
|
130 do
|
|
131 {
|
|
132 bzs.avail_out = out_chunk.length;
|
|
133 bzs.next_out = out_chunk.ptr;
|
|
134
|
|
135 auto ret = BZ2_bzCompress(&bzs, BZ_RUN);
|
|
136 if( ret != BZ_RUN_OK )
|
|
137 throw new BzipException(ret);
|
|
138
|
|
139 // Push the compressed bytes out to the stream, until it's either
|
|
140 // written them all, or choked.
|
|
141 auto have = out_chunk.length-bzs.avail_out;
|
|
142 auto out_buffer = out_chunk[0..have];
|
|
143 do
|
|
144 {
|
|
145 auto w = host.write(out_buffer);
|
|
146 if( w == IConduit.Eof )
|
|
147 return w;
|
|
148
|
|
149 out_buffer = out_buffer[w..$];
|
|
150 _written += w;
|
|
151 }
|
|
152 while( out_buffer.length > 0 );
|
|
153 }
|
|
154 // Loop while we are still using up the whole output buffer
|
|
155 while( bzs.avail_out == 0 );
|
|
156
|
|
157 assert( bzs.avail_in == 0, "failed to compress all provided data" );
|
|
158
|
|
159 return src.length;
|
|
160 }
|
|
161
|
|
162 /***************************************************************************
|
|
163
|
|
164 This read-only property returns the number of compressed bytes that
|
|
165 have been written to the underlying stream. Following a call to
|
|
166 either close or commit, this will contain the total compressed size of
|
|
167 the input data stream.
|
|
168
|
|
169 ***************************************************************************/
|
|
170
|
|
171 size_t written()
|
|
172 {
|
|
173 return _written;
|
|
174 }
|
|
175
|
|
176 /***************************************************************************
|
|
177
|
|
178 commit the output
|
|
179
|
|
180 ***************************************************************************/
|
|
181
|
|
182 void close()
|
|
183 {
|
|
184 if( bzs_valid ) commit;
|
|
185 super.close;
|
|
186 }
|
|
187
|
|
188 /***************************************************************************
|
|
189
|
|
190 Purge any buffered content. Calling this will implicitly end the
|
|
191 bzip2 stream, so it should not be called until you are finished
|
|
192 compressing data. Any calls to either write or commit after a
|
|
193 compression filter has been committed will throw an exception.
|
|
194
|
|
195 ***************************************************************************/
|
|
196
|
|
197 void commit()
|
|
198 {
|
|
199 check_valid();
|
|
200 scope(failure) kill_bzs();
|
|
201
|
|
202 bzs.avail_in = 0;
|
|
203 bzs.next_in = null;
|
|
204
|
|
205 bool finished = false;
|
|
206
|
|
207 do
|
|
208 {
|
|
209 bzs.avail_out = out_chunk.length;
|
|
210 bzs.next_out = out_chunk.ptr;
|
|
211
|
|
212 auto ret = BZ2_bzCompress(&bzs, BZ_FINISH);
|
|
213 switch( ret )
|
|
214 {
|
|
215 case BZ_FINISH_OK:
|
|
216 break;
|
|
217
|
|
218 case BZ_STREAM_END:
|
|
219 finished = true;
|
|
220 break;
|
|
221
|
|
222 default:
|
|
223 throw new BzipException(ret);
|
|
224 }
|
|
225
|
|
226 auto have = out_chunk.length - bzs.avail_out;
|
|
227 auto out_buffer = out_chunk[0..have];
|
|
228 if( have > 0 )
|
|
229 {
|
|
230 do
|
|
231 {
|
|
232 auto w = host.write(out_buffer);
|
|
233 if( w == IConduit.Eof )
|
|
234 return w;
|
|
235
|
|
236 out_buffer = out_buffer[w..$];
|
|
237 _written += w;
|
|
238 }
|
|
239 while( out_buffer.length > 0 );
|
|
240 }
|
|
241 }
|
|
242 while( !finished );
|
|
243
|
|
244 kill_bzs();
|
|
245 }
|
|
246
|
|
247 // This function kills the stream: it deallocates the internal state, and
|
|
248 // unsets the bzs_valid flag.
|
|
249 private void kill_bzs()
|
|
250 {
|
|
251 check_valid();
|
|
252
|
|
253 BZ2_bzCompressEnd(&bzs);
|
|
254 bzs_valid = false;
|
|
255 }
|
|
256
|
|
257 // Asserts that the stream is still valid and usable (except that this
|
|
258 // check doesn't get elided with -release).
|
|
259 private void check_valid()
|
|
260 {
|
|
261 if( !bzs_valid )
|
|
262 throw new BzipClosedException;
|
|
263 }
|
|
264 }
|
|
265
|
|
266 /*******************************************************************************
|
|
267
|
|
268 This input filter can be used to perform decompression of bzip2 streams.
|
|
269
|
|
270 *******************************************************************************/
|
|
271
|
|
272 class BzipInput : InputFilter
|
|
273 {
|
|
274 private
|
|
275 {
|
|
276 bool bzs_valid = false;
|
|
277 bz_stream bzs;
|
|
278 ubyte[] in_chunk;
|
|
279 }
|
|
280
|
|
281 /***************************************************************************
|
|
282
|
|
283 Constructs a new bzip2 decompression filter. You need to pass in the
|
|
284 stream that the decompression filter will read from. If you are using
|
|
285 this filter with a conduit, the idiom to use is:
|
|
286
|
|
287 ---
|
|
288 auto input = new BzipInput(myConduit.input);
|
|
289 input.read(myContent);
|
|
290 ---
|
|
291
|
|
292 The small argument, if set to true, instructs bzip2 to perform
|
|
293 decompression using half the regular amount of memory, at the cost of
|
|
294 running at half speed.
|
|
295
|
|
296 ***************************************************************************/
|
|
297
|
|
298 this(InputStream stream, bool small=false)
|
|
299 {
|
|
300 super(stream);
|
|
301 in_chunk = new ubyte[BUFFER_SIZE];
|
|
302
|
|
303 auto ret = BZ2_bzDecompressInit(&bzs, 0, small?1:0);
|
|
304 if( ret != BZ_OK )
|
|
305 throw new BzipException(ret);
|
|
306
|
|
307 bzs_valid = true;
|
|
308 }
|
|
309
|
|
310 ~this()
|
|
311 {
|
|
312 if( bzs_valid )
|
|
313 kill_bzs();
|
|
314 }
|
|
315
|
|
316 /***************************************************************************
|
|
317
|
|
318 Decompresses data from the underlying conduit into a target array.
|
|
319
|
|
320 Returns the number of bytes stored into dst, which may be less than
|
|
321 requested.
|
|
322
|
|
323 ***************************************************************************/
|
|
324
|
|
325 uint read(void[] dst)
|
|
326 {
|
|
327 check_valid();
|
|
328 scope(failure) kill_bzs();
|
|
329
|
|
330 bool finished = false;
|
|
331
|
|
332 bzs.avail_out = dst.length;
|
|
333 bzs.next_out = cast(ubyte*)dst.ptr;
|
|
334
|
|
335 do
|
|
336 {
|
|
337 if( bzs.avail_in == 0 )
|
|
338 {
|
|
339 auto len = host.read(in_chunk);
|
|
340 if( len == IConduit.Eof )
|
|
341 return IConduit.Eof;
|
|
342
|
|
343 bzs.avail_in = len;
|
|
344 bzs.next_in = in_chunk.ptr;
|
|
345 }
|
|
346
|
|
347 auto ret = BZ2_bzDecompress(&bzs);
|
|
348 if( ret == BZ_STREAM_END )
|
|
349 {
|
|
350 kill_bzs();
|
|
351 finished = true;
|
|
352 }
|
|
353 else if( ret != BZ_OK )
|
|
354 throw new BzipException(ret);
|
|
355 }
|
|
356 while( !finished && bzs.avail_out > 0 );
|
|
357
|
|
358 return dst.length - bzs.avail_out;
|
|
359 }
|
|
360
|
|
361 /***************************************************************************
|
|
362
|
|
363 Clear any buffered content. No-op.
|
|
364
|
|
365 ***************************************************************************/
|
|
366
|
|
367 InputStream clear()
|
|
368 {
|
|
369 check_valid();
|
|
370
|
|
371 // TODO: What should this method do? We don't do any heap allocation,
|
|
372 // so there's really nothing to clear... For now, just invalidate the
|
|
373 // stream...
|
|
374 kill_bzs();
|
|
375 super.clear();
|
|
376 return this;
|
|
377 }
|
|
378
|
|
379 // This function kills the stream: it deallocates the internal state, and
|
|
380 // unsets the bzs_valid flag.
|
|
381 private void kill_bzs()
|
|
382 {
|
|
383 check_valid();
|
|
384
|
|
385 BZ2_bzDecompressEnd(&bzs);
|
|
386 bzs_valid = false;
|
|
387 }
|
|
388
|
|
389 // Asserts that the stream is still valid and usable (except that this
|
|
390 // check doesn't get elided with -release).
|
|
391 private void check_valid()
|
|
392 {
|
|
393 if( !bzs_valid )
|
|
394 throw new BzipClosedException;
|
|
395 }
|
|
396 }
|
|
397
|
|
398 /*******************************************************************************
|
|
399
|
|
400 This exception is thrown when an error occurs in the underlying bzip2
|
|
401 library.
|
|
402
|
|
403 *******************************************************************************/
|
|
404
|
|
405 class BzipException : IOException
|
|
406 {
|
|
407 this(in int code)
|
|
408 {
|
|
409 super(codeName(code));
|
|
410 }
|
|
411
|
|
412 this(char[] msg)
|
|
413 {
|
|
414 super(msg);
|
|
415 }
|
|
416
|
|
417 private char[] codeName(in int code)
|
|
418 {
|
|
419 char[] name;
|
|
420
|
|
421 switch( code )
|
|
422 {
|
|
423 case BZ_OK: name = "BZ_OK"; break;
|
|
424 case BZ_RUN_OK: name = "BZ_RUN_OK"; break;
|
|
425 case BZ_FLUSH_OK: name = "BZ_FLUSH_OK"; break;
|
|
426 case BZ_STREAM_END: name = "BZ_STREAM_END"; break;
|
|
427 case BZ_SEQUENCE_ERROR: name = "BZ_SEQUENCE_ERROR"; break;
|
|
428 case BZ_PARAM_ERROR: name = "BZ_PARAM_ERROR"; break;
|
|
429 case BZ_MEM_ERROR: name = "BZ_MEM_ERROR"; break;
|
|
430 case BZ_DATA_ERROR: name = "BZ_DATA_ERROR"; break;
|
|
431 case BZ_DATA_ERROR_MAGIC: name = "BZ_DATA_ERROR_MAGIC"; break;
|
|
432 case BZ_IO_ERROR: name = "BZ_IO_ERROR"; break;
|
|
433 case BZ_UNEXPECTED_EOF: name = "BZ_UNEXPECTED_EOF"; break;
|
|
434 case BZ_OUTBUFF_FULL: name = "BZ_OUTBUFF_FULL"; break;
|
|
435 case BZ_CONFIG_ERROR: name = "BZ_CONFIG_ERROR"; break;
|
|
436 default: name = "BZ_UNKNOWN";
|
|
437 }
|
|
438
|
|
439 return name;
|
|
440 }
|
|
441 }
|
|
442
|
|
443 /*******************************************************************************
|
|
444
|
|
445 This exception is thrown if you attempt to perform a read, write or flush
|
|
446 operation on a closed bzip2 filter stream. This can occur if the input
|
|
447 stream has finished, or an output stream was flushed.
|
|
448
|
|
449 *******************************************************************************/
|
|
450
|
|
451 class BzipClosedException : IOException
|
|
452 {
|
|
453 this()
|
|
454 {
|
|
455 super("cannot operate on closed bzip2 stream");
|
|
456 }
|
|
457 }
|
|
458
|
|
459 /* *****************************************************************************
|
|
460
|
|
461 This section contains a simple unit test for this module. It is hidden
|
|
462 behind a version statement because it introduces additional dependencies.
|
|
463
|
|
464 ***************************************************************************** */
|
|
465
|
|
466 debug(UnitTest):
|
|
467
|
|
468 import tango.io.GrowBuffer : GrowBuffer;
|
|
469
|
|
470 unittest
|
|
471 {
|
|
472 const char[] message =
|
|
473 "All dwarfs are by nature dutiful, serious, literate, obedient "
|
|
474 "and thoughtful people whose only minor failing is a tendency, "
|
|
475 "after one drink, to rush at enemies screaming \"Arrrrrrgh!\" and "
|
|
476 "axing their legs off at the knee.";
|
|
477
|
|
478 const ubyte[] message_z = [
|
|
479 0x42, 0x5a, 0x68, 0x39, 0x31, 0x41, 0x59, 0x26,
|
|
480 0x53, 0x59, 0x40, 0x98, 0xbe, 0xaa, 0x00, 0x00,
|
|
481 0x16, 0xd5, 0x80, 0x10, 0x00, 0x70, 0x05, 0x20,
|
|
482 0x00, 0x3f, 0xef, 0xde, 0xe0, 0x30, 0x00, 0xac,
|
|
483 0xd8, 0x8a, 0x3d, 0x34, 0x6a, 0x6d, 0x4c, 0x4f,
|
|
484 0x24, 0x31, 0x0d, 0x08, 0x98, 0x9b, 0x48, 0x9a,
|
|
485 0x7a, 0x80, 0x00, 0x06, 0xa6, 0xd2, 0xa7, 0xe9,
|
|
486 0xaa, 0x37, 0xa8, 0xd4, 0xf5, 0x3f, 0x54, 0x63,
|
|
487 0x51, 0xe9, 0x2d, 0x4b, 0x99, 0xe1, 0xcc, 0xca,
|
|
488 0xda, 0x75, 0x04, 0x42, 0x14, 0xc8, 0x6a, 0x8e,
|
|
489 0x23, 0xc1, 0x3e, 0xb1, 0x8a, 0x16, 0xd2, 0x55,
|
|
490 0x9a, 0x3e, 0x56, 0x1a, 0xb1, 0x83, 0x11, 0xa6,
|
|
491 0x50, 0x4f, 0xd3, 0xed, 0x21, 0x40, 0xaa, 0xd1,
|
|
492 0x95, 0x2c, 0xda, 0xcb, 0xb7, 0x0e, 0xce, 0x65,
|
|
493 0xfc, 0x63, 0xf2, 0x88, 0x5b, 0x36, 0xda, 0xf0,
|
|
494 0xf5, 0xd2, 0x9c, 0xe6, 0xf1, 0x87, 0x12, 0x87,
|
|
495 0xce, 0x56, 0x0c, 0xf5, 0x65, 0x4d, 0x2e, 0xd6,
|
|
496 0x27, 0x61, 0x2b, 0x74, 0xcd, 0x5e, 0x3b, 0x02,
|
|
497 0x42, 0x4e, 0x0b, 0x80, 0xa8, 0x70, 0x04, 0x48,
|
|
498 0xfb, 0x93, 0x4c, 0x41, 0xa8, 0x2a, 0xdf, 0xf2,
|
|
499 0x67, 0x37, 0x28, 0xad, 0x38, 0xd4, 0x5c, 0xd6,
|
|
500 0x34, 0x8b, 0x49, 0x5e, 0x90, 0xb2, 0x06, 0xce,
|
|
501 0x0a, 0x83, 0x29, 0x84, 0x20, 0xd7, 0x5f, 0xc5,
|
|
502 0xdc, 0x91, 0x4e, 0x14, 0x24, 0x10, 0x26, 0x2f,
|
|
503 0xaa, 0x80];
|
|
504
|
|
505 scope cond = new GrowBuffer;
|
|
506 scope comp = new BzipOutput(cond);
|
|
507 comp.write(message);
|
|
508 comp.close;
|
|
509
|
|
510 assert( comp.written == message_z.length );
|
|
511
|
|
512 assert( message_z == cast(ubyte[])(cond.slice) );
|
|
513
|
|
514 scope decomp = new BzipInput(cond);
|
|
515 auto buffer = new ubyte[256];
|
|
516 buffer = buffer[0 .. decomp.read(buffer)];
|
|
517
|
|
518 assert( cast(ubyte[])message == buffer );
|
|
519 }
|
|
520
|