view lphobos/std/zip.d @ 650:aa6a0b7968f7

Added test case for bug #100 Removed dubious check for not emitting static private global in other modules without access. This should be handled properly somewhere else, it's causing unresolved global errors for stuff that should work (in MiniD)
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Sun, 05 Oct 2008 17:28:15 +0200
parents 373489eeaf90
children
line wrap: on
line source


/**
 * Read/write data in the $(LINK2 http://www.info-_zip.org, zip archive) format.
 * Makes use of the etc.c.zlib compression library.
 *
 * Bugs: 
 *	$(UL
 *	$(LI Multi-disk zips not supported.)
 *	$(LI Only Zip version 20 formats are supported.)
 *	$(LI Only supports compression modes 0 (no compression) and 8 (deflate).)
 *	$(LI Does not support encryption.)
 *	)
 *
 * Macros:
 *	WIKI = Phobos/StdZip
 */

module std.zip;

private import std.zlib;
private import std.date;
private import std.intrinsic;

//debug=print;

/** Thrown on error.
 */
class ZipException : Exception
{
    this(char[] msg)
    {
	super("ZipException: " ~ msg);
    }
}

/**
 * A member of the ZipArchive.
 */
class ArchiveMember
{
    ushort madeVersion = 20;	/// Read Only
    ushort extractVersion = 20;	/// Read Only
    ushort flags;		/// Read/Write: normally set to 0
    ushort compressionMethod;	/// Read/Write: 0 for compression, 8 for deflate
    std.date.DosFileTime time;	/// Read/Write: Last modified time of the member. It's in the DOS date/time format.
    uint crc32;			/// Read Only: cyclic redundancy check (CRC) value
    uint compressedSize;	/// Read Only: size of data of member in compressed form.
    uint expandedSize;		/// Read Only: size of data of member in expanded form.
    ushort diskNumber;		/// Read Only: should be 0.
    ushort internalAttributes;	/// Read/Write
    uint externalAttributes;	/// Read/Write

    private uint offset;

    /**
     * Read/Write: Usually the file name of the archive member; it is used to
     * index the archive directory for the member. Each member must have a unique
     * name[]. Do not change without removing member from the directory first.
     */
    char[] name;

    ubyte[] extra;		/// Read/Write: extra data for this member.
    char[] comment;		/// Read/Write: comment associated with this member.
    ubyte[] compressedData;	/// Read Only: data of member in compressed form.
    ubyte[] expandedData;	/// Read/Write: data of member in uncompressed form.

    debug(print)
    {
    void print()
    {
	printf("name = '%.*s'\n", cast(int) name.length, name.ptr);
	printf("\tcomment = '%.*s'\n", cast(int) comment.length, comment.ptr);
	printf("\tmadeVersion = x%04x\n", madeVersion);
	printf("\textractVersion = x%04x\n", extractVersion);
	printf("\tflags = x%04x\n", flags);
	printf("\tcompressionMethod = %d\n", compressionMethod);
	printf("\ttime = %d\n", time);
	printf("\tcrc32 = x%08x\n", crc32);
	printf("\texpandedSize = %d\n", expandedSize);
	printf("\tcompressedSize = %d\n", compressedSize);
	printf("\tinternalAttributes = x%04x\n", internalAttributes);
	printf("\texternalAttributes = x%08x\n", externalAttributes);
    }
    }
}

/**
 * Object representing the entire archive.
 * ZipArchives are collections of ArchiveMembers.
 */
class ZipArchive
{
    ubyte[] data;	/// Read Only: array representing the entire contents of the archive.
    uint endrecOffset;

    uint diskNumber;	/// Read Only: 0 since multi-disk zip archives are not supported.
    uint diskStartDir;	/// Read Only: 0 since multi-disk zip archives are not supported.
    uint numEntries;	/// Read Only: number of ArchiveMembers in the directory.
    uint totalEntries;	/// Read Only: same as totalEntries.
    char[] comment;	/// Read/Write: the archive comment. Must be less than 65536 bytes in length.

    /**
     * Read Only: array indexed by the name of each member of the archive.
     * Example:
     *  All the members of the archive can be accessed with a foreach loop:
     * --------------------
     * ZipArchive archive = new ZipArchive(data);
     * foreach (ArchiveMember am; archive.directory)
     * {
     *     writefln("member name is '%s'", am.name);
     * }
     * --------------------
     */
    ArchiveMember[char[]] directory;

    debug (print)
    {
    void print()
    {
	printf("\tdiskNumber = %u\n", diskNumber);
	printf("\tdiskStartDir = %u\n", diskStartDir);
	printf("\tnumEntries = %u\n", numEntries);
	printf("\ttotalEntries = %u\n", totalEntries);
	printf("\tcomment = '%.*s'\n", cast(int) comment.length, comment.ptr);
    }
    }

    /* ============ Creating a new archive =================== */

    /** Constructor to use when creating a new archive.
     */
    this()
    {
    }

    /** Add de to the archive.
     */
    void addMember(ArchiveMember de)
    {
	directory[de.name] = de;
    }

    /** Delete de from the archive.
     */
    void deleteMember(ArchiveMember de)
    {
	directory.remove(de.name);
    }

    /**
     * Construct an archive out of the current members of the archive.
     *
     * Fills in the properties data[], diskNumber, diskStartDir, numEntries,
     * totalEntries, and directory[].
     * For each ArchiveMember, fills in properties crc32, compressedSize,
     * compressedData[].
     *
     * Returns: array representing the entire archive.
     */
    void[] build()
    {	uint i;
	uint directoryOffset;

	if (comment.length > 0xFFFF)
	    throw new ZipException("archive comment longer than 65535");

	// Compress each member; compute size
	uint archiveSize = 0;
	uint directorySize = 0;
	foreach (ArchiveMember de; directory)
	{
	    de.expandedSize = de.expandedData.length;
	    switch (de.compressionMethod)
	    {
		case 0:
		    de.compressedData = de.expandedData;
		    break;

		case 8:
		    de.compressedData = cast(ubyte[])std.zlib.compress(cast(void[])de.expandedData);
		    de.compressedData = de.compressedData[2 .. de.compressedData.length - 4];
		    break;

		default:
		    throw new ZipException("unsupported compression method");
	    }
	    de.compressedSize = de.compressedData.length;
	    de.crc32 = std.zlib.crc32(0, cast(void[])de.expandedData);

	    archiveSize += 30 + de.name.length +
				de.extra.length +
				de.compressedSize;
	    directorySize += 46 + de.name.length +
				de.extra.length +
				de.comment.length;
	}

	data = new ubyte[archiveSize + directorySize + 22 + comment.length];

	// Populate the data[]

	// Store each archive member
	i = 0;
	foreach (ArchiveMember de; directory)
	{
	    de.offset = i;
	    data[i .. i + 4] = cast(ubyte[])"PK\x03\x04";
	    putUshort(i + 4,  de.extractVersion);
	    putUshort(i + 6,  de.flags);
	    putUshort(i + 8,  de.compressionMethod);
	    putUint  (i + 10, cast(uint)de.time);
	    putUint  (i + 14, de.crc32);
	    putUint  (i + 18, de.compressedSize);
	    putUint  (i + 22, de.expandedData.length);
	    putUshort(i + 26, cast(ushort)de.name.length);
	    putUshort(i + 28, cast(ushort)de.extra.length);
	    i += 30;

	    data[i .. i + de.name.length] = cast(ubyte[])de.name[];
	    i += de.name.length;
	    data[i .. i + de.extra.length] = cast(ubyte[])de.extra[];
	    i += de.extra.length;
	    data[i .. i + de.compressedSize] = de.compressedData[];
	    i += de.compressedSize;
	}

	// Write directory
	directoryOffset = i;
	numEntries = 0;
	foreach (ArchiveMember de; directory)
	{
	    data[i .. i + 4] = cast(ubyte[])"PK\x01\x02";
	    putUshort(i + 4,  de.madeVersion);
	    putUshort(i + 6,  de.extractVersion);
	    putUshort(i + 8,  de.flags);
	    putUshort(i + 10, de.compressionMethod);
	    putUint  (i + 12, cast(uint)de.time);
	    putUint  (i + 16, de.crc32);
	    putUint  (i + 20, de.compressedSize);
	    putUint  (i + 24, de.expandedSize);
	    putUshort(i + 28, cast(ushort)de.name.length);
	    putUshort(i + 30, cast(ushort)de.extra.length);
	    putUshort(i + 32, cast(ushort)de.comment.length);
	    putUshort(i + 34, de.diskNumber);
	    putUshort(i + 36, de.internalAttributes);
	    putUint  (i + 38, de.externalAttributes);
	    putUint  (i + 42, de.offset);
	    i += 46;

	    data[i .. i + de.name.length] = cast(ubyte[])de.name[];
	    i += de.name.length;
	    data[i .. i + de.extra.length] = cast(ubyte[])de.extra[];
	    i += de.extra.length;
	    data[i .. i + de.comment.length] = cast(ubyte[])de.comment[];
	    i += de.comment.length;
	    numEntries++;
	}
	totalEntries = numEntries;

	// Write end record
	endrecOffset = i;
	data[i .. i + 4] = cast(ubyte[])"PK\x05\x06";
	putUshort(i + 4,  cast(ushort)diskNumber);
	putUshort(i + 6,  cast(ushort)diskStartDir);
	putUshort(i + 8,  cast(ushort)numEntries);
	putUshort(i + 10, cast(ushort)totalEntries);
	putUint  (i + 12, directorySize);
	putUint  (i + 16, directoryOffset);
	putUshort(i + 20, cast(ushort)comment.length);
	i += 22;

	// Write archive comment
	assert(i + comment.length == data.length);
	data[i .. data.length] = cast(ubyte[])comment[];

	return cast(void[])data;
    }

    /* ============ Reading an existing archive =================== */

    /**
     * Constructor to use when reading an existing archive.
     *
     * Fills in the properties data[], diskNumber, diskStartDir, numEntries,
     * totalEntries, comment[], and directory[].
     * For each ArchiveMember, fills in
     * properties madeVersion, extractVersion, flags, compressionMethod, time,
     * crc32, compressedSize, expandedSize, compressedData[], diskNumber,
     * internalAttributes, externalAttributes, name[], extra[], comment[].
     * Use expand() to get the expanded data for each ArchiveMember.
     *
     * Params:
     *	buffer = the entire contents of the archive.
     */

    this(void[] buffer)
    {	int iend;
	int i;
	int endcommentlength;
	uint directorySize;
	uint directoryOffset;

	this.data = cast(ubyte[]) buffer;

	// Find 'end record index' by searching backwards for signature
	iend = data.length - 66000;
	if (iend < 0)
	    iend = 0;
	for (i = data.length - 22; 1; i--)
	{
	    if (i < iend)
		throw new ZipException("no end record");

	    if (data[i .. i + 4] == cast(ubyte[])"PK\x05\x06")
	    {
		endcommentlength = getUshort(i + 20);
		if (i + 22 + endcommentlength > data.length)
		    continue;
		comment = cast(char[])data[i + 22 .. i + 22 + endcommentlength];
		endrecOffset = i;
		break;
	    }
	}

	// Read end record data
	diskNumber = getUshort(i + 4);
	diskStartDir = getUshort(i + 6);

	numEntries = getUshort(i + 8);
	totalEntries = getUshort(i + 10);

	if (numEntries != totalEntries)
	    throw new ZipException("multiple disk zips not supported");

	directorySize = getUint(i + 12);
	directoryOffset = getUint(i + 16);

	if (directoryOffset + directorySize > i)
	    throw new ZipException("corrupted directory");

	i = directoryOffset;
	for (int n = 0; n < numEntries; n++)
	{
	    /* The format of an entry is:
	     *	'PK' 1, 2
	     *	directory info
	     *	path
	     *	extra data
	     *	comment
	     */

	    uint offset;
	    uint namelen;
	    uint extralen;
	    uint commentlen;

	    if (data[i .. i + 4] != cast(ubyte[])"PK\x01\x02")
		throw new ZipException("invalid directory entry 1");
	    ArchiveMember de = new ArchiveMember();
	    de.madeVersion = getUshort(i + 4);
	    de.extractVersion = getUshort(i + 6);
	    de.flags = getUshort(i + 8);
	    de.compressionMethod = getUshort(i + 10);
	    de.time = cast(DosFileTime)getUint(i + 12);
	    de.crc32 = getUint(i + 16);
	    de.compressedSize = getUint(i + 20);
	    de.expandedSize = getUint(i + 24);
	    namelen = getUshort(i + 28);
	    extralen = getUshort(i + 30);
	    commentlen = getUshort(i + 32);
	    de.diskNumber = getUshort(i + 34);
	    de.internalAttributes = getUshort(i + 36);
	    de.externalAttributes = getUint(i + 38);
	    de.offset = getUint(i + 42);
	    i += 46;

	    if (i + namelen + extralen + commentlen > directoryOffset + directorySize)
		throw new ZipException("invalid directory entry 2");

	    de.name = cast(char[])data[i .. i + namelen];
	    i += namelen;
	    de.extra = data[i .. i + extralen];
	    i += extralen;
	    de.comment = cast(char[])data[i .. i + commentlen];
	    i += commentlen;

	    directory[de.name] = de;
	}
	if (i != directoryOffset + directorySize)
	    throw new ZipException("invalid directory entry 3");
    }

    /*****
     * Decompress the contents of archive member de and return the expanded
     * data.
     *
     * Fills in properties extractVersion, flags, compressionMethod, time,
     * crc32, compressedSize, expandedSize, expandedData[], name[], extra[].
     */
    ubyte[] expand(ArchiveMember de)
    {	uint namelen;
	uint extralen;

	if (data[de.offset .. de.offset + 4] != cast(ubyte[])"PK\x03\x04")
	    throw new ZipException("invalid directory entry 4");

	// These values should match what is in the main zip archive directory
	de.extractVersion = getUshort(de.offset + 4);
	de.flags = getUshort(de.offset + 6);
	de.compressionMethod = getUshort(de.offset + 8);
	de.time = cast(DosFileTime)getUint(de.offset + 10);
	de.crc32 = getUint(de.offset + 14);
	de.compressedSize = getUint(de.offset + 18);
	de.expandedSize = getUint(de.offset + 22);
	namelen = getUshort(de.offset + 26);
	extralen = getUshort(de.offset + 28);

	debug(print)
	{
	    printf("\t\texpandedSize = %d\n", de.expandedSize);
	    printf("\t\tcompressedSize = %d\n", de.compressedSize);
	    printf("\t\tnamelen = %d\n", namelen);
	    printf("\t\textralen = %d\n", extralen);
	}

	if (de.flags & 1)
	    throw new ZipException("encryption not supported");

	int i;
	i = de.offset + 30 + namelen + extralen;
	if (i + de.compressedSize > endrecOffset)
	    throw new ZipException("invalid directory entry 5");

	de.compressedData = data[i .. i + de.compressedSize];
	debug(print) arrayPrint(de.compressedData);

	switch (de.compressionMethod)
	{
	    case 0:
		de.expandedData = de.compressedData;
		return de.expandedData;

	    case 8:
		// -15 is a magic value used to decompress zip files.
		// It has the effect of not requiring the 2 byte header
		// and 4 byte trailer.
		de.expandedData = cast(ubyte[])std.zlib.uncompress(cast(void[])de.compressedData, de.expandedSize, -15);
		return de.expandedData;

	    default:
		throw new ZipException("unsupported compression method");
	}
	assert(0);
    }

    /* ============ Utility =================== */

    ushort getUshort(int i)
    {
	version (LittleEndian)
	{
	    return *cast(ushort *)&data[i];
	}
	else
	{
	    ubyte b0 = data[i];
	    ubyte b1 = data[i + 1];
	    return (b1 << 8) | b0;
	}
    }

    uint getUint(int i)
    {
	version (LittleEndian)
	{
	    return *cast(uint *)&data[i];
	}
	else
	{
	    return bswap(*cast(uint *)&data[i]);
	}
    }

    void putUshort(int i, ushort us)
    {
	version (LittleEndian)
	{
	    *cast(ushort *)&data[i] = us;
	}
	else
	{
	    data[i] = cast(ubyte)us;
	    data[i + 1] = cast(ubyte)(us >> 8);
	}
    }

    void putUint(int i, uint ui)
    {
	version (BigEndian)
	{
	    ui = bswap(ui);
	}
	*cast(uint *)&data[i] = ui;
    }
}

debug(print)
{
    void arrayPrint(ubyte[] array)
    {
	printf("array %p,%d\n", cast(void*)array, array.length);
	for (int i = 0; i < array.length; i++)
	{
	    printf("%02x ", array[i]);
	    if (((i + 1) & 15) == 0)
		printf("\n");
	}
	printf("\n");
    }
}