comparison lphobos/std/zip.d @ 473:373489eeaf90

Applied downs' lphobos update
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Mon, 04 Aug 2008 19:28:49 +0200
parents
children
comparison
equal deleted inserted replaced
472:15c804b6ce77 473:373489eeaf90
1
2 /**
3 * Read/write data in the $(LINK2 http://www.info-_zip.org, zip archive) format.
4 * Makes use of the etc.c.zlib compression library.
5 *
6 * Bugs:
7 * $(UL
8 * $(LI Multi-disk zips not supported.)
9 * $(LI Only Zip version 20 formats are supported.)
10 * $(LI Only supports compression modes 0 (no compression) and 8 (deflate).)
11 * $(LI Does not support encryption.)
12 * )
13 *
14 * Macros:
15 * WIKI = Phobos/StdZip
16 */
17
18 module std.zip;
19
20 private import std.zlib;
21 private import std.date;
22 private import std.intrinsic;
23
24 //debug=print;
25
26 /** Thrown on error.
27 */
28 class ZipException : Exception
29 {
30 this(char[] msg)
31 {
32 super("ZipException: " ~ msg);
33 }
34 }
35
36 /**
37 * A member of the ZipArchive.
38 */
39 class ArchiveMember
40 {
41 ushort madeVersion = 20; /// Read Only
42 ushort extractVersion = 20; /// Read Only
43 ushort flags; /// Read/Write: normally set to 0
44 ushort compressionMethod; /// Read/Write: 0 for compression, 8 for deflate
45 std.date.DosFileTime time; /// Read/Write: Last modified time of the member. It's in the DOS date/time format.
46 uint crc32; /// Read Only: cyclic redundancy check (CRC) value
47 uint compressedSize; /// Read Only: size of data of member in compressed form.
48 uint expandedSize; /// Read Only: size of data of member in expanded form.
49 ushort diskNumber; /// Read Only: should be 0.
50 ushort internalAttributes; /// Read/Write
51 uint externalAttributes; /// Read/Write
52
53 private uint offset;
54
55 /**
56 * Read/Write: Usually the file name of the archive member; it is used to
57 * index the archive directory for the member. Each member must have a unique
58 * name[]. Do not change without removing member from the directory first.
59 */
60 char[] name;
61
62 ubyte[] extra; /// Read/Write: extra data for this member.
63 char[] comment; /// Read/Write: comment associated with this member.
64 ubyte[] compressedData; /// Read Only: data of member in compressed form.
65 ubyte[] expandedData; /// Read/Write: data of member in uncompressed form.
66
67 debug(print)
68 {
69 void print()
70 {
71 printf("name = '%.*s'\n", cast(int) name.length, name.ptr);
72 printf("\tcomment = '%.*s'\n", cast(int) comment.length, comment.ptr);
73 printf("\tmadeVersion = x%04x\n", madeVersion);
74 printf("\textractVersion = x%04x\n", extractVersion);
75 printf("\tflags = x%04x\n", flags);
76 printf("\tcompressionMethod = %d\n", compressionMethod);
77 printf("\ttime = %d\n", time);
78 printf("\tcrc32 = x%08x\n", crc32);
79 printf("\texpandedSize = %d\n", expandedSize);
80 printf("\tcompressedSize = %d\n", compressedSize);
81 printf("\tinternalAttributes = x%04x\n", internalAttributes);
82 printf("\texternalAttributes = x%08x\n", externalAttributes);
83 }
84 }
85 }
86
87 /**
88 * Object representing the entire archive.
89 * ZipArchives are collections of ArchiveMembers.
90 */
91 class ZipArchive
92 {
93 ubyte[] data; /// Read Only: array representing the entire contents of the archive.
94 uint endrecOffset;
95
96 uint diskNumber; /// Read Only: 0 since multi-disk zip archives are not supported.
97 uint diskStartDir; /// Read Only: 0 since multi-disk zip archives are not supported.
98 uint numEntries; /// Read Only: number of ArchiveMembers in the directory.
99 uint totalEntries; /// Read Only: same as totalEntries.
100 char[] comment; /// Read/Write: the archive comment. Must be less than 65536 bytes in length.
101
102 /**
103 * Read Only: array indexed by the name of each member of the archive.
104 * Example:
105 * All the members of the archive can be accessed with a foreach loop:
106 * --------------------
107 * ZipArchive archive = new ZipArchive(data);
108 * foreach (ArchiveMember am; archive.directory)
109 * {
110 * writefln("member name is '%s'", am.name);
111 * }
112 * --------------------
113 */
114 ArchiveMember[char[]] directory;
115
116 debug (print)
117 {
118 void print()
119 {
120 printf("\tdiskNumber = %u\n", diskNumber);
121 printf("\tdiskStartDir = %u\n", diskStartDir);
122 printf("\tnumEntries = %u\n", numEntries);
123 printf("\ttotalEntries = %u\n", totalEntries);
124 printf("\tcomment = '%.*s'\n", cast(int) comment.length, comment.ptr);
125 }
126 }
127
128 /* ============ Creating a new archive =================== */
129
130 /** Constructor to use when creating a new archive.
131 */
132 this()
133 {
134 }
135
136 /** Add de to the archive.
137 */
138 void addMember(ArchiveMember de)
139 {
140 directory[de.name] = de;
141 }
142
143 /** Delete de from the archive.
144 */
145 void deleteMember(ArchiveMember de)
146 {
147 directory.remove(de.name);
148 }
149
150 /**
151 * Construct an archive out of the current members of the archive.
152 *
153 * Fills in the properties data[], diskNumber, diskStartDir, numEntries,
154 * totalEntries, and directory[].
155 * For each ArchiveMember, fills in properties crc32, compressedSize,
156 * compressedData[].
157 *
158 * Returns: array representing the entire archive.
159 */
160 void[] build()
161 { uint i;
162 uint directoryOffset;
163
164 if (comment.length > 0xFFFF)
165 throw new ZipException("archive comment longer than 65535");
166
167 // Compress each member; compute size
168 uint archiveSize = 0;
169 uint directorySize = 0;
170 foreach (ArchiveMember de; directory)
171 {
172 de.expandedSize = de.expandedData.length;
173 switch (de.compressionMethod)
174 {
175 case 0:
176 de.compressedData = de.expandedData;
177 break;
178
179 case 8:
180 de.compressedData = cast(ubyte[])std.zlib.compress(cast(void[])de.expandedData);
181 de.compressedData = de.compressedData[2 .. de.compressedData.length - 4];
182 break;
183
184 default:
185 throw new ZipException("unsupported compression method");
186 }
187 de.compressedSize = de.compressedData.length;
188 de.crc32 = std.zlib.crc32(0, cast(void[])de.expandedData);
189
190 archiveSize += 30 + de.name.length +
191 de.extra.length +
192 de.compressedSize;
193 directorySize += 46 + de.name.length +
194 de.extra.length +
195 de.comment.length;
196 }
197
198 data = new ubyte[archiveSize + directorySize + 22 + comment.length];
199
200 // Populate the data[]
201
202 // Store each archive member
203 i = 0;
204 foreach (ArchiveMember de; directory)
205 {
206 de.offset = i;
207 data[i .. i + 4] = cast(ubyte[])"PK\x03\x04";
208 putUshort(i + 4, de.extractVersion);
209 putUshort(i + 6, de.flags);
210 putUshort(i + 8, de.compressionMethod);
211 putUint (i + 10, cast(uint)de.time);
212 putUint (i + 14, de.crc32);
213 putUint (i + 18, de.compressedSize);
214 putUint (i + 22, de.expandedData.length);
215 putUshort(i + 26, cast(ushort)de.name.length);
216 putUshort(i + 28, cast(ushort)de.extra.length);
217 i += 30;
218
219 data[i .. i + de.name.length] = cast(ubyte[])de.name[];
220 i += de.name.length;
221 data[i .. i + de.extra.length] = cast(ubyte[])de.extra[];
222 i += de.extra.length;
223 data[i .. i + de.compressedSize] = de.compressedData[];
224 i += de.compressedSize;
225 }
226
227 // Write directory
228 directoryOffset = i;
229 numEntries = 0;
230 foreach (ArchiveMember de; directory)
231 {
232 data[i .. i + 4] = cast(ubyte[])"PK\x01\x02";
233 putUshort(i + 4, de.madeVersion);
234 putUshort(i + 6, de.extractVersion);
235 putUshort(i + 8, de.flags);
236 putUshort(i + 10, de.compressionMethod);
237 putUint (i + 12, cast(uint)de.time);
238 putUint (i + 16, de.crc32);
239 putUint (i + 20, de.compressedSize);
240 putUint (i + 24, de.expandedSize);
241 putUshort(i + 28, cast(ushort)de.name.length);
242 putUshort(i + 30, cast(ushort)de.extra.length);
243 putUshort(i + 32, cast(ushort)de.comment.length);
244 putUshort(i + 34, de.diskNumber);
245 putUshort(i + 36, de.internalAttributes);
246 putUint (i + 38, de.externalAttributes);
247 putUint (i + 42, de.offset);
248 i += 46;
249
250 data[i .. i + de.name.length] = cast(ubyte[])de.name[];
251 i += de.name.length;
252 data[i .. i + de.extra.length] = cast(ubyte[])de.extra[];
253 i += de.extra.length;
254 data[i .. i + de.comment.length] = cast(ubyte[])de.comment[];
255 i += de.comment.length;
256 numEntries++;
257 }
258 totalEntries = numEntries;
259
260 // Write end record
261 endrecOffset = i;
262 data[i .. i + 4] = cast(ubyte[])"PK\x05\x06";
263 putUshort(i + 4, cast(ushort)diskNumber);
264 putUshort(i + 6, cast(ushort)diskStartDir);
265 putUshort(i + 8, cast(ushort)numEntries);
266 putUshort(i + 10, cast(ushort)totalEntries);
267 putUint (i + 12, directorySize);
268 putUint (i + 16, directoryOffset);
269 putUshort(i + 20, cast(ushort)comment.length);
270 i += 22;
271
272 // Write archive comment
273 assert(i + comment.length == data.length);
274 data[i .. data.length] = cast(ubyte[])comment[];
275
276 return cast(void[])data;
277 }
278
279 /* ============ Reading an existing archive =================== */
280
281 /**
282 * Constructor to use when reading an existing archive.
283 *
284 * Fills in the properties data[], diskNumber, diskStartDir, numEntries,
285 * totalEntries, comment[], and directory[].
286 * For each ArchiveMember, fills in
287 * properties madeVersion, extractVersion, flags, compressionMethod, time,
288 * crc32, compressedSize, expandedSize, compressedData[], diskNumber,
289 * internalAttributes, externalAttributes, name[], extra[], comment[].
290 * Use expand() to get the expanded data for each ArchiveMember.
291 *
292 * Params:
293 * buffer = the entire contents of the archive.
294 */
295
296 this(void[] buffer)
297 { int iend;
298 int i;
299 int endcommentlength;
300 uint directorySize;
301 uint directoryOffset;
302
303 this.data = cast(ubyte[]) buffer;
304
305 // Find 'end record index' by searching backwards for signature
306 iend = data.length - 66000;
307 if (iend < 0)
308 iend = 0;
309 for (i = data.length - 22; 1; i--)
310 {
311 if (i < iend)
312 throw new ZipException("no end record");
313
314 if (data[i .. i + 4] == cast(ubyte[])"PK\x05\x06")
315 {
316 endcommentlength = getUshort(i + 20);
317 if (i + 22 + endcommentlength > data.length)
318 continue;
319 comment = cast(char[])data[i + 22 .. i + 22 + endcommentlength];
320 endrecOffset = i;
321 break;
322 }
323 }
324
325 // Read end record data
326 diskNumber = getUshort(i + 4);
327 diskStartDir = getUshort(i + 6);
328
329 numEntries = getUshort(i + 8);
330 totalEntries = getUshort(i + 10);
331
332 if (numEntries != totalEntries)
333 throw new ZipException("multiple disk zips not supported");
334
335 directorySize = getUint(i + 12);
336 directoryOffset = getUint(i + 16);
337
338 if (directoryOffset + directorySize > i)
339 throw new ZipException("corrupted directory");
340
341 i = directoryOffset;
342 for (int n = 0; n < numEntries; n++)
343 {
344 /* The format of an entry is:
345 * 'PK' 1, 2
346 * directory info
347 * path
348 * extra data
349 * comment
350 */
351
352 uint offset;
353 uint namelen;
354 uint extralen;
355 uint commentlen;
356
357 if (data[i .. i + 4] != cast(ubyte[])"PK\x01\x02")
358 throw new ZipException("invalid directory entry 1");
359 ArchiveMember de = new ArchiveMember();
360 de.madeVersion = getUshort(i + 4);
361 de.extractVersion = getUshort(i + 6);
362 de.flags = getUshort(i + 8);
363 de.compressionMethod = getUshort(i + 10);
364 de.time = cast(DosFileTime)getUint(i + 12);
365 de.crc32 = getUint(i + 16);
366 de.compressedSize = getUint(i + 20);
367 de.expandedSize = getUint(i + 24);
368 namelen = getUshort(i + 28);
369 extralen = getUshort(i + 30);
370 commentlen = getUshort(i + 32);
371 de.diskNumber = getUshort(i + 34);
372 de.internalAttributes = getUshort(i + 36);
373 de.externalAttributes = getUint(i + 38);
374 de.offset = getUint(i + 42);
375 i += 46;
376
377 if (i + namelen + extralen + commentlen > directoryOffset + directorySize)
378 throw new ZipException("invalid directory entry 2");
379
380 de.name = cast(char[])data[i .. i + namelen];
381 i += namelen;
382 de.extra = data[i .. i + extralen];
383 i += extralen;
384 de.comment = cast(char[])data[i .. i + commentlen];
385 i += commentlen;
386
387 directory[de.name] = de;
388 }
389 if (i != directoryOffset + directorySize)
390 throw new ZipException("invalid directory entry 3");
391 }
392
393 /*****
394 * Decompress the contents of archive member de and return the expanded
395 * data.
396 *
397 * Fills in properties extractVersion, flags, compressionMethod, time,
398 * crc32, compressedSize, expandedSize, expandedData[], name[], extra[].
399 */
400 ubyte[] expand(ArchiveMember de)
401 { uint namelen;
402 uint extralen;
403
404 if (data[de.offset .. de.offset + 4] != cast(ubyte[])"PK\x03\x04")
405 throw new ZipException("invalid directory entry 4");
406
407 // These values should match what is in the main zip archive directory
408 de.extractVersion = getUshort(de.offset + 4);
409 de.flags = getUshort(de.offset + 6);
410 de.compressionMethod = getUshort(de.offset + 8);
411 de.time = cast(DosFileTime)getUint(de.offset + 10);
412 de.crc32 = getUint(de.offset + 14);
413 de.compressedSize = getUint(de.offset + 18);
414 de.expandedSize = getUint(de.offset + 22);
415 namelen = getUshort(de.offset + 26);
416 extralen = getUshort(de.offset + 28);
417
418 debug(print)
419 {
420 printf("\t\texpandedSize = %d\n", de.expandedSize);
421 printf("\t\tcompressedSize = %d\n", de.compressedSize);
422 printf("\t\tnamelen = %d\n", namelen);
423 printf("\t\textralen = %d\n", extralen);
424 }
425
426 if (de.flags & 1)
427 throw new ZipException("encryption not supported");
428
429 int i;
430 i = de.offset + 30 + namelen + extralen;
431 if (i + de.compressedSize > endrecOffset)
432 throw new ZipException("invalid directory entry 5");
433
434 de.compressedData = data[i .. i + de.compressedSize];
435 debug(print) arrayPrint(de.compressedData);
436
437 switch (de.compressionMethod)
438 {
439 case 0:
440 de.expandedData = de.compressedData;
441 return de.expandedData;
442
443 case 8:
444 // -15 is a magic value used to decompress zip files.
445 // It has the effect of not requiring the 2 byte header
446 // and 4 byte trailer.
447 de.expandedData = cast(ubyte[])std.zlib.uncompress(cast(void[])de.compressedData, de.expandedSize, -15);
448 return de.expandedData;
449
450 default:
451 throw new ZipException("unsupported compression method");
452 }
453 assert(0);
454 }
455
456 /* ============ Utility =================== */
457
458 ushort getUshort(int i)
459 {
460 version (LittleEndian)
461 {
462 return *cast(ushort *)&data[i];
463 }
464 else
465 {
466 ubyte b0 = data[i];
467 ubyte b1 = data[i + 1];
468 return (b1 << 8) | b0;
469 }
470 }
471
472 uint getUint(int i)
473 {
474 version (LittleEndian)
475 {
476 return *cast(uint *)&data[i];
477 }
478 else
479 {
480 return bswap(*cast(uint *)&data[i]);
481 }
482 }
483
484 void putUshort(int i, ushort us)
485 {
486 version (LittleEndian)
487 {
488 *cast(ushort *)&data[i] = us;
489 }
490 else
491 {
492 data[i] = cast(ubyte)us;
493 data[i + 1] = cast(ubyte)(us >> 8);
494 }
495 }
496
497 void putUint(int i, uint ui)
498 {
499 version (BigEndian)
500 {
501 ui = bswap(ui);
502 }
503 *cast(uint *)&data[i] = ui;
504 }
505 }
506
507 debug(print)
508 {
509 void arrayPrint(ubyte[] array)
510 {
511 printf("array %p,%d\n", cast(void*)array, array.length);
512 for (int i = 0; i < array.length; i++)
513 {
514 printf("%02x ", array[i]);
515 if (((i + 1) & 15) == 0)
516 printf("\n");
517 }
518 printf("\n");
519 }
520 }