From: Tim Kientzle Date: Sat, 28 Dec 2013 09:45:32 +0000 (-0800) Subject: Test for large Zip archives, following code for large Tar test. X-Git-Tag: v3.1.900a~327^2~24 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a25a3a67758e0bea566169b7cf3dac16471fc9c9;p=thirdparty%2Flibarchive.git Test for large Zip archives, following code for large Tar test. Fix several bugs: * comparison function for ordering entries in reader was wrong * writer wasn't including 64-bit sizes for entries of exactly 0xffffffff bytes Also, add options to suppress CRC calculations and checks (otherwise, this test spends a *lot* of time in CRC routines). --- diff --git a/Makefile.am b/Makefile.am index 40cd09b45..251f32846 100644 --- a/Makefile.am +++ b/Makefile.am @@ -512,11 +512,12 @@ libarchive_test_SOURCES= \ libarchive/test/test_write_format_xar.c \ libarchive/test/test_write_format_xar_empty.c \ libarchive/test/test_write_format_zip.c \ + libarchive/test/test_write_format_zip_compression_store.c \ libarchive/test/test_write_format_zip_empty.c \ libarchive/test/test_write_format_zip_empty_zip64.c \ libarchive/test/test_write_format_zip_file.c \ libarchive/test/test_write_format_zip_file_zip64.c \ - libarchive/test/test_write_format_zip_compression_store.c \ + libarchive/test/test_write_format_zip_large.c \ libarchive/test/test_write_open_memory.c \ libarchive/test/test_zip_filename_encoding.c diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index f895b1923..65060508a 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -97,6 +97,8 @@ struct zip { /* Running CRC32 of the decompressed data */ unsigned long entry_crc32; + unsigned long (*crc32func)(unsigned long, const void *, size_t); + char ignore_crc32; /* Flags to mark progress of decompression. */ char decompress_init; @@ -162,6 +164,21 @@ static time_t zip_time(const char *); static const char *compression_name(int compression); static void process_extra(const char *, size_t, struct zip_entry *); +static unsigned long +real_crc32(unsigned long crc, const void *buff, size_t len) +{ + return crc32(crc, buff, len); +} + +static unsigned long +fake_crc32(unsigned long crc, const void *buff, size_t len) +{ + (void)crc; /* UNUSED */ + (void)buff; /* UNUSED */ + (void)len; /* UNUSED */ + return 0; +} + int archive_read_support_format_zip_streamable(struct archive *_a) { @@ -432,7 +449,11 @@ cmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2) const struct zip_entry *e1 = (const struct zip_entry *)n1; const struct zip_entry *e2 = (const struct zip_entry *)n2; - return ((int)(e2->local_header_offset - e1->local_header_offset)); + if (e1->local_header_offset > e2->local_header_offset) + return -1; + if (e1->local_header_offset < e2->local_header_offset) + return 1; + return 0; } static int @@ -444,6 +465,10 @@ cmp_key(const struct archive_rb_node *n, const void *key) return 1; } +static const struct archive_rb_tree_ops rb_ops = { + &cmp_node, &cmp_key +}; + static int rsrc_cmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2) @@ -461,6 +486,10 @@ rsrc_cmp_key(const struct archive_rb_node *n, const void *key) return (strcmp((const char *)key, e->rsrcname.s)); } +static const struct archive_rb_tree_ops rb_rsrc_ops = { + &rsrc_cmp_node, &rsrc_cmp_key +}; + static const char * rsrc_basename(const char *name, size_t name_length) { @@ -522,12 +551,6 @@ slurp_central_directory(struct archive_read *a, struct zip *zip) int64_t correction; ssize_t bytes_avail; const char *p; - static const struct archive_rb_tree_ops rb_ops = { - &cmp_node, &cmp_key - }; - static const struct archive_rb_tree_ops rb_rsrc_ops = { - &rsrc_cmp_node, &rsrc_cmp_key - }; /* * Find the start of the central directory. The end-of-CD @@ -1018,6 +1041,16 @@ archive_read_format_zip_options(struct archive_read *a, ret = ARCHIVE_FATAL; } return (ret); + } else if (strcmp(key, "ignorecrc32") == 0) { + /* Mostly useful for testing. */ + if (val == NULL || val[0] == 0) { + zip->crc32func = real_crc32; + zip->ignore_crc32 = 0; + } else { + zip->crc32func = fake_crc32; + zip->ignore_crc32 = 1; + } + return (ARCHIVE_OK); } /* Note: The "warn" return is just to inform the options @@ -1145,7 +1178,7 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, zip->end_of_entry = 0; zip->entry_uncompressed_bytes_read = 0; zip->entry_compressed_bytes_read = 0; - zip->entry_crc32 = crc32(0, NULL, 0); + zip->entry_crc32 = zip->crc32func(0, NULL, 0); /* Setup default conversion. */ if (zip->sconv == NULL && !zip->init_default_conversion) { @@ -1276,7 +1309,8 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, writers always put zero in the local header; don't bother warning about that. */ if (zip_entry->crc32 != 0 - && zip_entry->crc32 != zip_entry_original.crc32) { + && zip_entry->crc32 != zip_entry_original.crc32 + && !zip->ignore_crc32) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Inconsistent CRC32 values"); @@ -1454,7 +1488,7 @@ archive_read_format_zip_read_data(struct archive_read *a, return (r); /* Update checksum */ if (*size) - zip->entry_crc32 = crc32(zip->entry_crc32, *buff, + zip->entry_crc32 = zip->crc32func(zip->entry_crc32, *buff, (unsigned)*size); /* If we hit the end, swallow any end-of-data marker. */ if (zip->end_of_entry) { @@ -1480,7 +1514,8 @@ archive_read_format_zip_read_data(struct archive_read *a, return (ARCHIVE_WARN); } /* Check computed CRC against header */ - if (zip->entry->crc32 != zip->entry_crc32) { + if (zip->entry->crc32 != zip->entry_crc32 + && !zip->ignore_crc32) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "ZIP bad CRC: 0x%lx should be 0x%lx", (unsigned long)zip->entry_crc32, @@ -1548,7 +1583,8 @@ zip_read_data_none(struct archive_read *a, const void **_buff, p = buff; if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010' - && archive_le32dec(p + 4) == zip->entry_crc32) { + && (archive_le32dec(p + 4) == zip->entry_crc32 + || zip->ignore_crc32)) { if (zip->entry->have_zip64) { zip->entry->crc32 = archive_le32dec(p + 4); zip->entry->compressed_size = archive_le64dec(p + 8); diff --git a/libarchive/archive_write_set_format_zip.c b/libarchive/archive_write_set_format_zip.c index 96b502523..2d9acfe0b 100644 --- a/libarchive/archive_write_set_format_zip.c +++ b/libarchive/archive_write_set_format_zip.c @@ -98,6 +98,7 @@ struct zip { unsigned char *file_header; size_t file_header_extra_offset; + unsigned long (*crc32func)(unsigned long crc, const void *buff, size_t len); struct cd_segment *central_directory; struct cd_segment *central_directory_last; @@ -173,6 +174,21 @@ cd_alloc(struct zip *zip, size_t length) return (p); } +static unsigned long +real_crc32(unsigned long crc, const void *buff, size_t len) +{ + return crc32(crc, buff, len); +} + +static unsigned long +fake_crc32(unsigned long crc, const void *buff, size_t len) +{ + (void)crc; /* UNUSED */ + (void)buff; /* UNUSED */ + (void)len; /* UNUSED */ + return 0; +} + static int archive_write_zip_options(struct archive_write *a, const char *key, const char *val) @@ -198,6 +214,14 @@ archive_write_zip_options(struct archive_write *a, const char *key, ret = ARCHIVE_OK; } return (ret); + } else if (strcmp(key, "fakecrc32") == 0) { + /* FOR TESTING ONLY: turn off CRC calculator to speed up + * certain complex tests. */ + if (val == NULL || val[0] == 0) { + zip->crc32func = real_crc32; + } else { + zip->crc32func = fake_crc32; + } } else if (strcmp(key, "hdrcharset") == 0) { if (val == NULL || val[0] == 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, @@ -296,6 +320,7 @@ archive_write_set_format_zip(struct archive *_a) /* "Unspecified" lets us choose the appropriate compression. */ zip->requested_compression = COMPRESSION_UNSPECIFIED; + zip->crc32func = real_crc32; #ifdef HAVE_ZLIB_H zip->len_buf = 65536; @@ -373,7 +398,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) zip->entry_uncompressed_written = 0; zip->entry_flags = 0; zip->entry_uses_zip64 = 0; - zip->entry_crc32 = crc32(0, NULL, 0); + zip->entry_crc32 = zip->crc32func(0, NULL, 0); if (zip->entry != NULL) { archive_entry_free(zip->entry); zip->entry = NULL; @@ -457,7 +482,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) zip->entry_uncompressed_limit = symlink_size; zip->entry_compressed_size = symlink_size; zip->entry_uncompressed_size = symlink_size; - zip->entry_crc32 = crc32(zip->entry_crc32, + zip->entry_crc32 = zip->crc32func(zip->entry_crc32, (const unsigned char *)symlink, symlink_size); zip->entry_compression = COMPRESSION_STORE; version_needed = 20; @@ -702,7 +727,7 @@ archive_write_zip_data(struct archive_write *a, const void *buff, size_t s) } zip->entry_uncompressed_limit -= s; - zip->entry_crc32 = crc32(zip->entry_crc32, buff, (unsigned)s); + zip->entry_crc32 = zip->crc32func(zip->entry_crc32, buff, (unsigned)s); return (s); } @@ -763,15 +788,15 @@ archive_write_zip_finish_entry(struct archive_write *a) unsigned char *z = zip64, *zd; memcpy(z, "\001\000\000\000", 4); z += 4; - if (zip->entry_uncompressed_written > 0xffffffffLL) { + if (zip->entry_uncompressed_written >= 0xffffffffLL) { archive_le64enc(z, zip->entry_uncompressed_written); z += 8; } - if (zip->entry_compressed_written > 0xffffffffLL) { + if (zip->entry_compressed_written >= 0xffffffffLL) { archive_le64enc(z, zip->entry_compressed_written); z += 8; } - if (zip->entry_offset > 0xffffffffLL) { + if (zip->entry_offset >= 0xffffffffLL) { archive_le64enc(z, zip->entry_offset); z += 8; } diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index 83608708b..d637bd3ec 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -225,11 +225,12 @@ IF(ENABLE_TEST) test_write_format_xar.c test_write_format_xar_empty.c test_write_format_zip.c + test_write_format_zip_compression_store.c test_write_format_zip_empty.c test_write_format_zip_empty_zip64.c test_write_format_zip_file.c test_write_format_zip_file_zip64.c - test_write_format_zip_compression_store.c + test_write_format_zip_large.c test_write_open_memory.c test_zip_filename_encoding.c ) diff --git a/libarchive/test/test_write_format_zip_large.c b/libarchive/test/test_write_format_zip_large.c new file mode 100644 index 000000000..f30291790 --- /dev/null +++ b/libarchive/test/test_write_format_zip_large.c @@ -0,0 +1,350 @@ +/*- + * Copyright (c) 2003-2007,2013 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +/* + * This is a somewhat tricky test that verifies the ability to + * write and read very large entries to zip archives. + * + * See test_tar_large.c for more information about the machinery + * being used here. + */ + +static size_t nullsize; +static void *nulldata; + +struct fileblock { + struct fileblock *next; + int size; + void *buff; + int64_t gap_size; /* Size of following gap */ +}; + +struct fileblocks { + int64_t filesize; + int64_t fileposition; + int64_t gap_remaining; + void *buff; + struct fileblock *first; + struct fileblock *current; + struct fileblock *last; +}; + +/* The following size definitions simplify things below. */ +#define KB ((int64_t)1024) +#define MB ((int64_t)1024 * KB) +#define GB ((int64_t)1024 * MB) +#define TB ((int64_t)1024 * GB) + +static int64_t memory_read_skip(struct archive *, void *, int64_t request); +static ssize_t memory_read(struct archive *, void *, const void **buff); +static ssize_t memory_write(struct archive *, void *, const void *, size_t); + +static ssize_t +memory_write(struct archive *a, void *_private, const void *buff, size_t size) +{ + struct fileblocks *private = _private; + struct fileblock *block; + + (void)a; + + if ((const char *)nulldata <= (const char *)buff + && (const char *)buff < (const char *)nulldata + nullsize) { + /* We don't need to store a block of gap data. */ + private->last->gap_size += (int64_t)size; + } else { + /* Yes, we're assuming the very first write is metadata. */ + /* It's header or metadata, copy and save it. */ + block = (struct fileblock *)malloc(sizeof(*block)); + memset(block, 0, sizeof(*block)); + block->size = size; + block->buff = malloc(size); + memcpy(block->buff, buff, size); + if (private->last == NULL) { + private->first = private->last = block; + } else { + private->last->next = block; + private->last = block; + } + block->next = NULL; + } + private->filesize += size; + return ((long)size); +} + +static ssize_t +memory_read(struct archive *a, void *_private, const void **buff) +{ + struct fileblocks *private = _private; + ssize_t size; + + (void)a; + + while (private->current != NULL && private->buff == NULL && private->gap_remaining == 0) { + private->current = private->current->next; + if (private->current != NULL) { + private->buff = private->current->buff; + private->gap_remaining = private->current->gap_size; + } + } + + if (private->current == NULL) + return (ARCHIVE_EOF); + + /* If there's real data, return that. */ + if (private->buff != NULL) { + *buff = private->buff; + size = (private->current->buff + private->current->size) + - private->buff; + private->buff = NULL; + private->fileposition += size; + return (size); + } + + /* Big gap: too big to return all at once, so just return some. */ + if (private->gap_remaining > (int64_t)nullsize) { + private->gap_remaining -= nullsize; + *buff = nulldata; + private->fileposition += nullsize; + return (nullsize); + } + + /* Small gap: finish the gap and prep for next block. */ + if (private->gap_remaining > 0) { + size = (ssize_t)private->gap_remaining; + *buff = nulldata; + private->gap_remaining = 0; + private->fileposition += size; + + private->current = private->current->next; + if (private->current != NULL) { + private->buff = private->current->buff; + private->gap_remaining = private->current->gap_size; + } + + return (size); + } + fprintf(stderr, "\n\n\nInternal failure\n\n\n"); + exit(1); +} + +static int +memory_read_open(struct archive *a, void *_private) +{ + struct fileblocks *private = _private; + + (void)a; /* UNUSED */ + + private->current = private->first; + private->fileposition = 0; + if (private->current != NULL) { + private->buff = private->current->buff; + private->gap_remaining = private->current->gap_size; + } + return (ARCHIVE_OK); +} + +static int64_t +memory_read_seek(struct archive *a, void *_private, int64_t offset, int whence) +{ + struct fileblocks *private = _private; + + (void)a; + if (whence == SEEK_END) { + offset = private->filesize + offset; + whence = SEEK_SET; + } else if (whence == SEEK_CUR) { + offset = private->fileposition + offset; + whence = SEEK_SET; + } + + if (offset < 0) { + fprintf(stderr, "\n\n\nInternal failure: negative seek\n\n\n"); + exit(1); + } + + /* We've converted the request into a SEEK_SET. */ + private->fileposition = offset; + + /* Walk the block list to find the new position. */ + offset = 0; + private->current = private->first; + while (private->current != NULL) { + if (offset + private->current->size > private->fileposition) { + /* Position is in this block. */ + private->buff = private->current->buff + + private->fileposition - offset; + private->gap_remaining = private->current->gap_size; + return private->fileposition; + } + offset += private->current->size; + if (offset + private->current->gap_size > private->fileposition) { + /* Position is in this gap. */ + private->buff = NULL; + private->gap_remaining = private->current->gap_size + - (private->fileposition - offset); + return private->fileposition; + } + offset += private->current->gap_size; + /* Skip to next block. */ + private->current = private->current->next; + } + if (private->fileposition == private->filesize) { + return private->fileposition; + } + fprintf(stderr, "\n\n\nInternal failure: over-sized seek\n\n\n"); + exit(1); +} + +static int64_t +memory_read_skip(struct archive *a, void *_private, int64_t skip) +{ + struct fileblocks *private = _private; + int64_t old_position = private->fileposition; + int64_t new_position = memory_read_seek(a, _private, skip, SEEK_CUR); + return (new_position - old_position); +} + +DEFINE_TEST(test_write_format_zip_large) +{ + /* The sizes of the entries we're going to generate. */ + static int64_t tests[] = { + /* Test for 32-bit signed overflow. */ + 2 * GB - 1, 2 * GB, 2 * GB + 1, + /* Test for 32-bit unsigned overflow. */ + 4 * GB - 1, 4 * GB, 4 * GB + 1, + /* And beyond ... because we can. */ + 8 * GB - 1, 8 * GB, 8 * GB + 1, + 64 * GB - 1, 64 * GB, 64 * GB + 1, + 256 * GB, 1 * TB, 0 }; + int i; + char namebuff[64]; + struct fileblocks fileblocks; + struct archive_entry *ae; + struct archive *a; + int64_t filesize; + size_t writesize; + + nullsize = (size_t)(1 * MB); + nulldata = malloc(nullsize); + memset(nulldata, 0xAA, nullsize); + memset(&fileblocks, 0, sizeof(fileblocks)); + + /* + * Open an archive for writing. + */ + a = archive_write_new(); + archive_write_set_format_zip(a); + archive_write_set_options(a, "zip:compression=store"); + archive_write_set_options(a, "zip:fakecrc32"); + archive_write_set_bytes_per_block(a, 0); /* No buffering. */ + archive_write_open(a, &fileblocks, NULL, memory_write, NULL); + + /* + * Write a series of large files to it. + */ + for (i = 0; tests[i] != 0; i++) { + assert((ae = archive_entry_new()) != NULL); + sprintf(namebuff, "file_%d", i); + archive_entry_copy_pathname(ae, namebuff); + archive_entry_set_mode(ae, S_IFREG | 0755); + filesize = tests[i]; + + archive_entry_set_size(ae, filesize); + + assertA(0 == archive_write_header(a, ae)); + archive_entry_free(ae); + + /* + * Write the actual data to the archive. + */ + while (filesize > 0) { + writesize = nullsize; + if ((int64_t)writesize > filesize) + writesize = (size_t)filesize; + assertA((int)writesize + == archive_write_data(a, nulldata, writesize)); + filesize -= writesize; + } + } + + assert((ae = archive_entry_new()) != NULL); + archive_entry_copy_pathname(ae, "lastfile"); + archive_entry_set_mode(ae, S_IFREG | 0755); + assertA(0 == archive_write_header(a, ae)); + archive_entry_free(ae); + + + /* Close out the archive. */ + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + + /* + * Open the same archive for reading. + */ + a = archive_read_new(); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_support_format_zip_seekable(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_set_options(a, "zip:ignorecrc32")); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_set_open_callback(a, memory_read_open)); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_set_read_callback(a, memory_read)); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_set_skip_callback(a, memory_read_skip)); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_set_seek_callback(a, memory_read_seek)); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_set_callback_data(a, &fileblocks)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open1(a)); + + /* + * Read entries back. + */ + for (i = 0; tests[i] > 0; i++) { + assertEqualIntA(a, ARCHIVE_OK, + archive_read_next_header(a, &ae)); + sprintf(namebuff, "file_%d", i); + assertEqualString(namebuff, archive_entry_pathname(ae)); + assertEqualInt(tests[i], archive_entry_size(ae)); + } + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualString("lastfile", archive_entry_pathname(ae)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + + /* Close out the archive. */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + + free(fileblocks.buff); + free(nulldata); +}