diff lphobos/std/zip.d @ 473:373489eeaf90

Applied downs' lphobos update
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Mon, 04 Aug 2008 19:28:49 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lphobos/std/zip.d	Mon Aug 04 19:28:49 2008 +0200
@@ -0,0 +1,520 @@
+
+/**
+ * Read/write data in the $(LINK2 http://www.info-_zip.org, zip archive) format.
+ * Makes use of the etc.c.zlib compression library.
+ *
+ * Bugs: 
+ *	$(UL
+ *	$(LI Multi-disk zips not supported.)
+ *	$(LI Only Zip version 20 formats are supported.)
+ *	$(LI Only supports compression modes 0 (no compression) and 8 (deflate).)
+ *	$(LI Does not support encryption.)
+ *	)
+ *
+ * Macros:
+ *	WIKI = Phobos/StdZip
+ */
+
+module std.zip;
+
+private import std.zlib;
+private import std.date;
+private import std.intrinsic;
+
+//debug=print;
+
+/** Thrown on error.
+ */
+class ZipException : Exception
+{
+    this(char[] msg)
+    {
+	super("ZipException: " ~ msg);
+    }
+}
+
+/**
+ * A member of the ZipArchive.
+ */
+class ArchiveMember
+{
+    ushort madeVersion = 20;	/// Read Only
+    ushort extractVersion = 20;	/// Read Only
+    ushort flags;		/// Read/Write: normally set to 0
+    ushort compressionMethod;	/// Read/Write: 0 for compression, 8 for deflate
+    std.date.DosFileTime time;	/// Read/Write: Last modified time of the member. It's in the DOS date/time format.
+    uint crc32;			/// Read Only: cyclic redundancy check (CRC) value
+    uint compressedSize;	/// Read Only: size of data of member in compressed form.
+    uint expandedSize;		/// Read Only: size of data of member in expanded form.
+    ushort diskNumber;		/// Read Only: should be 0.
+    ushort internalAttributes;	/// Read/Write
+    uint externalAttributes;	/// Read/Write
+
+    private uint offset;
+
+    /**
+     * Read/Write: Usually the file name of the archive member; it is used to
+     * index the archive directory for the member. Each member must have a unique
+     * name[]. Do not change without removing member from the directory first.
+     */
+    char[] name;
+
+    ubyte[] extra;		/// Read/Write: extra data for this member.
+    char[] comment;		/// Read/Write: comment associated with this member.
+    ubyte[] compressedData;	/// Read Only: data of member in compressed form.
+    ubyte[] expandedData;	/// Read/Write: data of member in uncompressed form.
+
+    debug(print)
+    {
+    void print()
+    {
+	printf("name = '%.*s'\n", cast(int) name.length, name.ptr);
+	printf("\tcomment = '%.*s'\n", cast(int) comment.length, comment.ptr);
+	printf("\tmadeVersion = x%04x\n", madeVersion);
+	printf("\textractVersion = x%04x\n", extractVersion);
+	printf("\tflags = x%04x\n", flags);
+	printf("\tcompressionMethod = %d\n", compressionMethod);
+	printf("\ttime = %d\n", time);
+	printf("\tcrc32 = x%08x\n", crc32);
+	printf("\texpandedSize = %d\n", expandedSize);
+	printf("\tcompressedSize = %d\n", compressedSize);
+	printf("\tinternalAttributes = x%04x\n", internalAttributes);
+	printf("\texternalAttributes = x%08x\n", externalAttributes);
+    }
+    }
+}
+
+/**
+ * Object representing the entire archive.
+ * ZipArchives are collections of ArchiveMembers.
+ */
+class ZipArchive
+{
+    ubyte[] data;	/// Read Only: array representing the entire contents of the archive.
+    uint endrecOffset;
+
+    uint diskNumber;	/// Read Only: 0 since multi-disk zip archives are not supported.
+    uint diskStartDir;	/// Read Only: 0 since multi-disk zip archives are not supported.
+    uint numEntries;	/// Read Only: number of ArchiveMembers in the directory.
+    uint totalEntries;	/// Read Only: same as totalEntries.
+    char[] comment;	/// Read/Write: the archive comment. Must be less than 65536 bytes in length.
+
+    /**
+     * Read Only: array indexed by the name of each member of the archive.
+     * Example:
+     *  All the members of the archive can be accessed with a foreach loop:
+     * --------------------
+     * ZipArchive archive = new ZipArchive(data);
+     * foreach (ArchiveMember am; archive.directory)
+     * {
+     *     writefln("member name is '%s'", am.name);
+     * }
+     * --------------------
+     */
+    ArchiveMember[char[]] directory;
+
+    debug (print)
+    {
+    void print()
+    {
+	printf("\tdiskNumber = %u\n", diskNumber);
+	printf("\tdiskStartDir = %u\n", diskStartDir);
+	printf("\tnumEntries = %u\n", numEntries);
+	printf("\ttotalEntries = %u\n", totalEntries);
+	printf("\tcomment = '%.*s'\n", cast(int) comment.length, comment.ptr);
+    }
+    }
+
+    /* ============ Creating a new archive =================== */
+
+    /** Constructor to use when creating a new archive.
+     */
+    this()
+    {
+    }
+
+    /** Add de to the archive.
+     */
+    void addMember(ArchiveMember de)
+    {
+	directory[de.name] = de;
+    }
+
+    /** Delete de from the archive.
+     */
+    void deleteMember(ArchiveMember de)
+    {
+	directory.remove(de.name);
+    }
+
+    /**
+     * Construct an archive out of the current members of the archive.
+     *
+     * Fills in the properties data[], diskNumber, diskStartDir, numEntries,
+     * totalEntries, and directory[].
+     * For each ArchiveMember, fills in properties crc32, compressedSize,
+     * compressedData[].
+     *
+     * Returns: array representing the entire archive.
+     */
+    void[] build()
+    {	uint i;
+	uint directoryOffset;
+
+	if (comment.length > 0xFFFF)
+	    throw new ZipException("archive comment longer than 65535");
+
+	// Compress each member; compute size
+	uint archiveSize = 0;
+	uint directorySize = 0;
+	foreach (ArchiveMember de; directory)
+	{
+	    de.expandedSize = de.expandedData.length;
+	    switch (de.compressionMethod)
+	    {
+		case 0:
+		    de.compressedData = de.expandedData;
+		    break;
+
+		case 8:
+		    de.compressedData = cast(ubyte[])std.zlib.compress(cast(void[])de.expandedData);
+		    de.compressedData = de.compressedData[2 .. de.compressedData.length - 4];
+		    break;
+
+		default:
+		    throw new ZipException("unsupported compression method");
+	    }
+	    de.compressedSize = de.compressedData.length;
+	    de.crc32 = std.zlib.crc32(0, cast(void[])de.expandedData);
+
+	    archiveSize += 30 + de.name.length +
+				de.extra.length +
+				de.compressedSize;
+	    directorySize += 46 + de.name.length +
+				de.extra.length +
+				de.comment.length;
+	}
+
+	data = new ubyte[archiveSize + directorySize + 22 + comment.length];
+
+	// Populate the data[]
+
+	// Store each archive member
+	i = 0;
+	foreach (ArchiveMember de; directory)
+	{
+	    de.offset = i;
+	    data[i .. i + 4] = cast(ubyte[])"PK\x03\x04";
+	    putUshort(i + 4,  de.extractVersion);
+	    putUshort(i + 6,  de.flags);
+	    putUshort(i + 8,  de.compressionMethod);
+	    putUint  (i + 10, cast(uint)de.time);
+	    putUint  (i + 14, de.crc32);
+	    putUint  (i + 18, de.compressedSize);
+	    putUint  (i + 22, de.expandedData.length);
+	    putUshort(i + 26, cast(ushort)de.name.length);
+	    putUshort(i + 28, cast(ushort)de.extra.length);
+	    i += 30;
+
+	    data[i .. i + de.name.length] = cast(ubyte[])de.name[];
+	    i += de.name.length;
+	    data[i .. i + de.extra.length] = cast(ubyte[])de.extra[];
+	    i += de.extra.length;
+	    data[i .. i + de.compressedSize] = de.compressedData[];
+	    i += de.compressedSize;
+	}
+
+	// Write directory
+	directoryOffset = i;
+	numEntries = 0;
+	foreach (ArchiveMember de; directory)
+	{
+	    data[i .. i + 4] = cast(ubyte[])"PK\x01\x02";
+	    putUshort(i + 4,  de.madeVersion);
+	    putUshort(i + 6,  de.extractVersion);
+	    putUshort(i + 8,  de.flags);
+	    putUshort(i + 10, de.compressionMethod);
+	    putUint  (i + 12, cast(uint)de.time);
+	    putUint  (i + 16, de.crc32);
+	    putUint  (i + 20, de.compressedSize);
+	    putUint  (i + 24, de.expandedSize);
+	    putUshort(i + 28, cast(ushort)de.name.length);
+	    putUshort(i + 30, cast(ushort)de.extra.length);
+	    putUshort(i + 32, cast(ushort)de.comment.length);
+	    putUshort(i + 34, de.diskNumber);
+	    putUshort(i + 36, de.internalAttributes);
+	    putUint  (i + 38, de.externalAttributes);
+	    putUint  (i + 42, de.offset);
+	    i += 46;
+
+	    data[i .. i + de.name.length] = cast(ubyte[])de.name[];
+	    i += de.name.length;
+	    data[i .. i + de.extra.length] = cast(ubyte[])de.extra[];
+	    i += de.extra.length;
+	    data[i .. i + de.comment.length] = cast(ubyte[])de.comment[];
+	    i += de.comment.length;
+	    numEntries++;
+	}
+	totalEntries = numEntries;
+
+	// Write end record
+	endrecOffset = i;
+	data[i .. i + 4] = cast(ubyte[])"PK\x05\x06";
+	putUshort(i + 4,  cast(ushort)diskNumber);
+	putUshort(i + 6,  cast(ushort)diskStartDir);
+	putUshort(i + 8,  cast(ushort)numEntries);
+	putUshort(i + 10, cast(ushort)totalEntries);
+	putUint  (i + 12, directorySize);
+	putUint  (i + 16, directoryOffset);
+	putUshort(i + 20, cast(ushort)comment.length);
+	i += 22;
+
+	// Write archive comment
+	assert(i + comment.length == data.length);
+	data[i .. data.length] = cast(ubyte[])comment[];
+
+	return cast(void[])data;
+    }
+
+    /* ============ Reading an existing archive =================== */
+
+    /**
+     * Constructor to use when reading an existing archive.
+     *
+     * Fills in the properties data[], diskNumber, diskStartDir, numEntries,
+     * totalEntries, comment[], and directory[].
+     * For each ArchiveMember, fills in
+     * properties madeVersion, extractVersion, flags, compressionMethod, time,
+     * crc32, compressedSize, expandedSize, compressedData[], diskNumber,
+     * internalAttributes, externalAttributes, name[], extra[], comment[].
+     * Use expand() to get the expanded data for each ArchiveMember.
+     *
+     * Params:
+     *	buffer = the entire contents of the archive.
+     */
+
+    this(void[] buffer)
+    {	int iend;
+	int i;
+	int endcommentlength;
+	uint directorySize;
+	uint directoryOffset;
+
+	this.data = cast(ubyte[]) buffer;
+
+	// Find 'end record index' by searching backwards for signature
+	iend = data.length - 66000;
+	if (iend < 0)
+	    iend = 0;
+	for (i = data.length - 22; 1; i--)
+	{
+	    if (i < iend)
+		throw new ZipException("no end record");
+
+	    if (data[i .. i + 4] == cast(ubyte[])"PK\x05\x06")
+	    {
+		endcommentlength = getUshort(i + 20);
+		if (i + 22 + endcommentlength > data.length)
+		    continue;
+		comment = cast(char[])data[i + 22 .. i + 22 + endcommentlength];
+		endrecOffset = i;
+		break;
+	    }
+	}
+
+	// Read end record data
+	diskNumber = getUshort(i + 4);
+	diskStartDir = getUshort(i + 6);
+
+	numEntries = getUshort(i + 8);
+	totalEntries = getUshort(i + 10);
+
+	if (numEntries != totalEntries)
+	    throw new ZipException("multiple disk zips not supported");
+
+	directorySize = getUint(i + 12);
+	directoryOffset = getUint(i + 16);
+
+	if (directoryOffset + directorySize > i)
+	    throw new ZipException("corrupted directory");
+
+	i = directoryOffset;
+	for (int n = 0; n < numEntries; n++)
+	{
+	    /* The format of an entry is:
+	     *	'PK' 1, 2
+	     *	directory info
+	     *	path
+	     *	extra data
+	     *	comment
+	     */
+
+	    uint offset;
+	    uint namelen;
+	    uint extralen;
+	    uint commentlen;
+
+	    if (data[i .. i + 4] != cast(ubyte[])"PK\x01\x02")
+		throw new ZipException("invalid directory entry 1");
+	    ArchiveMember de = new ArchiveMember();
+	    de.madeVersion = getUshort(i + 4);
+	    de.extractVersion = getUshort(i + 6);
+	    de.flags = getUshort(i + 8);
+	    de.compressionMethod = getUshort(i + 10);
+	    de.time = cast(DosFileTime)getUint(i + 12);
+	    de.crc32 = getUint(i + 16);
+	    de.compressedSize = getUint(i + 20);
+	    de.expandedSize = getUint(i + 24);
+	    namelen = getUshort(i + 28);
+	    extralen = getUshort(i + 30);
+	    commentlen = getUshort(i + 32);
+	    de.diskNumber = getUshort(i + 34);
+	    de.internalAttributes = getUshort(i + 36);
+	    de.externalAttributes = getUint(i + 38);
+	    de.offset = getUint(i + 42);
+	    i += 46;
+
+	    if (i + namelen + extralen + commentlen > directoryOffset + directorySize)
+		throw new ZipException("invalid directory entry 2");
+
+	    de.name = cast(char[])data[i .. i + namelen];
+	    i += namelen;
+	    de.extra = data[i .. i + extralen];
+	    i += extralen;
+	    de.comment = cast(char[])data[i .. i + commentlen];
+	    i += commentlen;
+
+	    directory[de.name] = de;
+	}
+	if (i != directoryOffset + directorySize)
+	    throw new ZipException("invalid directory entry 3");
+    }
+
+    /*****
+     * Decompress the contents of archive member de and return the expanded
+     * data.
+     *
+     * Fills in properties extractVersion, flags, compressionMethod, time,
+     * crc32, compressedSize, expandedSize, expandedData[], name[], extra[].
+     */
+    ubyte[] expand(ArchiveMember de)
+    {	uint namelen;
+	uint extralen;
+
+	if (data[de.offset .. de.offset + 4] != cast(ubyte[])"PK\x03\x04")
+	    throw new ZipException("invalid directory entry 4");
+
+	// These values should match what is in the main zip archive directory
+	de.extractVersion = getUshort(de.offset + 4);
+	de.flags = getUshort(de.offset + 6);
+	de.compressionMethod = getUshort(de.offset + 8);
+	de.time = cast(DosFileTime)getUint(de.offset + 10);
+	de.crc32 = getUint(de.offset + 14);
+	de.compressedSize = getUint(de.offset + 18);
+	de.expandedSize = getUint(de.offset + 22);
+	namelen = getUshort(de.offset + 26);
+	extralen = getUshort(de.offset + 28);
+
+	debug(print)
+	{
+	    printf("\t\texpandedSize = %d\n", de.expandedSize);
+	    printf("\t\tcompressedSize = %d\n", de.compressedSize);
+	    printf("\t\tnamelen = %d\n", namelen);
+	    printf("\t\textralen = %d\n", extralen);
+	}
+
+	if (de.flags & 1)
+	    throw new ZipException("encryption not supported");
+
+	int i;
+	i = de.offset + 30 + namelen + extralen;
+	if (i + de.compressedSize > endrecOffset)
+	    throw new ZipException("invalid directory entry 5");
+
+	de.compressedData = data[i .. i + de.compressedSize];
+	debug(print) arrayPrint(de.compressedData);
+
+	switch (de.compressionMethod)
+	{
+	    case 0:
+		de.expandedData = de.compressedData;
+		return de.expandedData;
+
+	    case 8:
+		// -15 is a magic value used to decompress zip files.
+		// It has the effect of not requiring the 2 byte header
+		// and 4 byte trailer.
+		de.expandedData = cast(ubyte[])std.zlib.uncompress(cast(void[])de.compressedData, de.expandedSize, -15);
+		return de.expandedData;
+
+	    default:
+		throw new ZipException("unsupported compression method");
+	}
+	assert(0);
+    }
+
+    /* ============ Utility =================== */
+
+    ushort getUshort(int i)
+    {
+	version (LittleEndian)
+	{
+	    return *cast(ushort *)&data[i];
+	}
+	else
+	{
+	    ubyte b0 = data[i];
+	    ubyte b1 = data[i + 1];
+	    return (b1 << 8) | b0;
+	}
+    }
+
+    uint getUint(int i)
+    {
+	version (LittleEndian)
+	{
+	    return *cast(uint *)&data[i];
+	}
+	else
+	{
+	    return bswap(*cast(uint *)&data[i]);
+	}
+    }
+
+    void putUshort(int i, ushort us)
+    {
+	version (LittleEndian)
+	{
+	    *cast(ushort *)&data[i] = us;
+	}
+	else
+	{
+	    data[i] = cast(ubyte)us;
+	    data[i + 1] = cast(ubyte)(us >> 8);
+	}
+    }
+
+    void putUint(int i, uint ui)
+    {
+	version (BigEndian)
+	{
+	    ui = bswap(ui);
+	}
+	*cast(uint *)&data[i] = ui;
+    }
+}
+
+debug(print)
+{
+    void arrayPrint(ubyte[] array)
+    {
+	printf("array %p,%d\n", cast(void*)array, array.length);
+	for (int i = 0; i < array.length; i++)
+	{
+	    printf("%02x ", array[i]);
+	    if (((i + 1) & 15) == 0)
+		printf("\n");
+	}
+	printf("\n");
+    }
+}