From 8acb738db6bc7087a5e7cdd328bbfb6e673e5bd8 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Sat, 23 Mar 2024 17:07:08 -0700 Subject: [PATCH] Overhaul Zip end-of-data marker parsing (#2042) This significantly changes how end-of-data markers are parsed. In particular, the spec allows the end-of-data marker to have either 32-bit or 64-bit size values, and there is basically no indication which is being used. (The spec mentions "Zip64 mode" in many places, but there is no definitive way for a Zip reader to know whether the writer is using this mode or not. My mis-reading of another part of the spec caused me to believe that the Zip64 Extra Data field was such a marker, but I've been patiently corrected. ;-) So a Zip reader just has to guess: Try every possible end-of-data marker format and accept it if any of the four possible forms is correct. In libarchive's case, this required some non-trivial additional refactoring to ensure that the CRC32, compressed size, and uncompressed size statistics are always updated _before_ we need to look for an end-of-data marker. This generally follows the strategy outlined by Mark Adler for his `sunzip` streaming unzip implementation. While testing this, I played with pmqs/zipdetails which pointed out a discrepancy in how libarchive writes the `UT` extra field. I folded a fix for that in here as well. Resolves #1834 TODO: It would be nice to augment the test suite with some static files created by Java's implementation to verify that we can read those when they hold entries of +/- 4GiB. The existing `test_write_format_zip_large` uses an ad hoc RLE encoding trick to exercise writing and reading back multi-gigabyte entries. I wonder if that could be generalized to support deflate-compressed Zip data stored in test files? --- Makefile.am | 4 +- libarchive/archive_entry.c | 29 + libarchive/archive_entry.h | 4 + libarchive/archive_entry_private.h | 4 + libarchive/archive_read_support_format_zip.c | 498 ++++++++++++------ libarchive/archive_write_set_format_zip.c | 188 ++++--- libarchive/test/CMakeLists.txt | 2 + libarchive/test/test_read_format_zip.c | 2 +- libarchive/test/test_write_format_zip.c | 49 +- .../test/test_write_format_zip64_stream.c | 276 ++++++++++ .../test_write_format_zip_compression_store.c | 65 +-- .../test_write_format_zip_entry_size_unset.c | 12 +- libarchive/test/test_write_format_zip_file.c | 42 +- .../test/test_write_format_zip_file_zip64.c | 46 +- libarchive/test/test_write_format_zip_large.c | 30 +- .../test/test_write_format_zip_stream.c | 247 +++++++++ libarchive/test/test_write_read_format_zip.c | 55 +- test_utils/test_common.h | 3 + test_utils/test_main.c | 15 + 19 files changed, 1196 insertions(+), 375 deletions(-) create mode 100644 libarchive/test/test_write_format_zip64_stream.c create mode 100644 libarchive/test/test_write_format_zip_stream.c diff --git a/Makefile.am b/Makefile.am index c7fbd1439..286f08694 100644 --- a/Makefile.am +++ b/Makefile.am @@ -633,13 +633,15 @@ libarchive_test_SOURCES= \ libarchive/test/test_write_format_xar.c \ libarchive/test/test_write_format_xar_empty.c \ libarchive/test/test_write_format_zip.c \ + libarchive/test/test_write_format_zip64_stream.c \ libarchive/test/test_write_format_zip_compression_store.c \ - libarchive/test/test_write_format_zip_entry_size_unset.c \ libarchive/test/test_write_format_zip_empty.c \ libarchive/test/test_write_format_zip_empty_zip64.c \ + libarchive/test/test_write_format_zip_entry_size_unset.c \ libarchive/test/test_write_format_zip_file.c \ libarchive/test/test_write_format_zip_file_zip64.c \ libarchive/test/test_write_format_zip_large.c \ + libarchive/test/test_write_format_zip_stream.c \ libarchive/test/test_write_format_zip_zip64.c \ libarchive/test/test_write_open_memory.c \ libarchive/test/test_write_read_format_zip.c \ diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c index 33fa9ffef..9463233e6 100644 --- a/libarchive/archive_entry.c +++ b/libarchive/archive_entry.c @@ -371,6 +371,12 @@ archive_entry_filetype(struct archive_entry *entry) return (AE_IFMT & entry->acl.mode); } +int +archive_entry_filetype_is_set(struct archive_entry *entry) +{ + return (entry->ae_set & AE_SET_FILETYPE); +} + void archive_entry_fflags(struct archive_entry *entry, unsigned long *set, unsigned long *clear) @@ -424,6 +430,12 @@ archive_entry_gid(struct archive_entry *entry) return (entry->ae_stat.aest_gid); } +int +archive_entry_gid_is_set(struct archive_entry *entry) +{ + return (entry->ae_set & AE_SET_GID); +} + const char * archive_entry_gname(struct archive_entry *entry) { @@ -630,6 +642,12 @@ archive_entry_perm(struct archive_entry *entry) return (~AE_IFMT & entry->acl.mode); } +int +archive_entry_perm_is_set(struct archive_entry *entry) +{ + return (entry->ae_set & AE_SET_PERM); +} + dev_t archive_entry_rdev(struct archive_entry *entry) { @@ -758,6 +776,12 @@ archive_entry_uid(struct archive_entry *entry) return (entry->ae_stat.aest_uid); } +int +archive_entry_uid_is_set(struct archive_entry *entry) +{ + return (entry->ae_set & AE_SET_UID); +} + const char * archive_entry_uname(struct archive_entry *entry) { @@ -826,6 +850,7 @@ archive_entry_set_filetype(struct archive_entry *entry, unsigned int type) entry->stat_valid = 0; entry->acl.mode &= ~AE_IFMT; entry->acl.mode |= AE_IFMT & type; + entry->ae_set |= AE_SET_FILETYPE; } void @@ -860,6 +885,7 @@ archive_entry_set_gid(struct archive_entry *entry, la_int64_t g) { entry->stat_valid = 0; entry->ae_stat.aest_gid = g; + entry->ae_set |= AE_SET_GID; } void @@ -1144,6 +1170,7 @@ archive_entry_set_mode(struct archive_entry *entry, mode_t m) { entry->stat_valid = 0; entry->acl.mode = m; + entry->ae_set |= AE_SET_PERM | AE_SET_FILETYPE; } void @@ -1219,6 +1246,7 @@ archive_entry_set_perm(struct archive_entry *entry, mode_t p) entry->stat_valid = 0; entry->acl.mode &= AE_IFMT; entry->acl.mode |= ~AE_IFMT & p; + entry->ae_set |= AE_SET_PERM; } void @@ -1353,6 +1381,7 @@ archive_entry_set_uid(struct archive_entry *entry, la_int64_t u) { entry->stat_valid = 0; entry->ae_stat.aest_uid = u; + entry->ae_set |= AE_SET_UID; } void diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h index 6638f5abd..df9cb765f 100644 --- a/libarchive/archive_entry.h +++ b/libarchive/archive_entry.h @@ -249,11 +249,13 @@ __LA_DECL int archive_entry_dev_is_set(struct archive_entry *); __LA_DECL dev_t archive_entry_devmajor(struct archive_entry *); __LA_DECL dev_t archive_entry_devminor(struct archive_entry *); __LA_DECL __LA_MODE_T archive_entry_filetype(struct archive_entry *); +__LA_DECL int archive_entry_filetype_is_set(struct archive_entry *); __LA_DECL void archive_entry_fflags(struct archive_entry *, unsigned long * /* set */, unsigned long * /* clear */); __LA_DECL const char *archive_entry_fflags_text(struct archive_entry *); __LA_DECL la_int64_t archive_entry_gid(struct archive_entry *); +__LA_DECL int archive_entry_gid_is_set(struct archive_entry *); __LA_DECL const char *archive_entry_gname(struct archive_entry *); __LA_DECL const char *archive_entry_gname_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_gname_w(struct archive_entry *); @@ -272,6 +274,7 @@ __LA_DECL const char *archive_entry_pathname(struct archive_entry *); __LA_DECL const char *archive_entry_pathname_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_pathname_w(struct archive_entry *); __LA_DECL __LA_MODE_T archive_entry_perm(struct archive_entry *); +__LA_DECL int archive_entry_perm_is_set(struct archive_entry *); __LA_DECL dev_t archive_entry_rdev(struct archive_entry *); __LA_DECL dev_t archive_entry_rdevmajor(struct archive_entry *); __LA_DECL dev_t archive_entry_rdevminor(struct archive_entry *); @@ -285,6 +288,7 @@ __LA_DECL const char *archive_entry_symlink_utf8(struct archive_entry *); __LA_DECL int archive_entry_symlink_type(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_symlink_w(struct archive_entry *); __LA_DECL la_int64_t archive_entry_uid(struct archive_entry *); +__LA_DECL int archive_entry_uid_is_set(struct archive_entry *); __LA_DECL const char *archive_entry_uname(struct archive_entry *); __LA_DECL const char *archive_entry_uname_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_uname_w(struct archive_entry *); diff --git a/libarchive/archive_entry_private.h b/libarchive/archive_entry_private.h index 7e61dcf45..3423966c6 100644 --- a/libarchive/archive_entry_private.h +++ b/libarchive/archive_entry_private.h @@ -145,6 +145,10 @@ struct archive_entry { #define AE_SET_SIZE 64 #define AE_SET_INO 128 #define AE_SET_DEV 256 +#define AE_SET_PERM 512 +#define AE_SET_FILETYPE 1024 +#define AE_SET_UID 2048 +#define AE_SET_GID 4096 /* * Use aes here so that we get transparent mbs<->wcs conversions. diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index 0f528b28f..6722b0515 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -118,7 +118,7 @@ struct trad_enc_ctx { /* Bits used in zip_flags. */ #define ZIP_ENCRYPTED (1 << 0) -#define ZIP_LENGTH_AT_END (1 << 3) +#define ZIP_LENGTH_AT_END (1 << 3) /* Also called "Streaming bit" */ #define ZIP_STRONG_ENCRYPTED (1 << 6) #define ZIP_UTF8_NAME (1 << 11) /* See "7.2 Single Password Symmetric Encryption Method" @@ -165,8 +165,8 @@ struct zip { int64_t entry_compressed_bytes_read; int64_t entry_uncompressed_bytes_read; - /* Running CRC32 of the decompressed data */ - unsigned long entry_crc32; + /* Running CRC32 of the decompressed and decrypted data */ + unsigned long computed_crc32; unsigned long (*crc32func)(unsigned long, const void *, size_t); char ignore_crc32; @@ -944,7 +944,7 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, zip->end_of_entry = 0; zip->entry_uncompressed_bytes_read = 0; zip->entry_compressed_bytes_read = 0; - zip->entry_crc32 = zip->crc32func(0, NULL, 0); + zip->computed_crc32 = zip->crc32func(0, NULL, 0); /* Setup default conversion. */ if (zip->sconv == NULL && !zip->init_default_conversion) { @@ -1139,7 +1139,8 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, "Inconsistent CRC32 values"); ret = ARCHIVE_WARN; } - if (zip_entry->compressed_size == 0) { + if (zip_entry->compressed_size == 0 + || zip_entry->compressed_size == 0xffffffff) { zip_entry->compressed_size = zip_entry_central_dir.compressed_size; } else if (zip_entry->compressed_size @@ -1283,7 +1284,8 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, return ARCHIVE_FATAL; } } else if (0 == (zip_entry->zip_flags & ZIP_LENGTH_AT_END) - || zip_entry->uncompressed_size > 0) { + || (zip_entry->uncompressed_size > 0 + && zip_entry->uncompressed_size != 0xffffffff)) { /* Set the size only if it's meaningful. */ archive_entry_set_size(entry, zip_entry->uncompressed_size); } @@ -1342,25 +1344,267 @@ check_authentication_code(struct archive_read *a, const void *_p) } /* - * Read "uncompressed" data. There are three cases: - * 1) We know the size of the data. This is always true for the - * seeking reader (we've examined the Central Directory already). - * 2) ZIP_LENGTH_AT_END was set, but only the CRC was deferred. - * Info-ZIP seems to do this; we know the size but have to grab - * the CRC from the data descriptor afterwards. - * 3) We're streaming and ZIP_LENGTH_AT_END was specified and - * we have no size information. In this case, we can do pretty - * well by watching for the data descriptor record. The data - * descriptor is 16 bytes and includes a computed CRC that should - * provide a strong check. + * The Zip end-of-file marker is inherently ambiguous. The specification + * in APPNOTE.TXT allows any of four possible formats, and there is no + * guaranteed-correct way for a reader to know a priori which one the writer + * will have used. The four formats are: + * 1. 32-bit format with an initial PK78 marker + * 2. 32-bit format without that marker + * 3. 64-bit format with the marker + * 4. 64-bit format without the marker * - * TODO: Technically, the PK\007\010 signature is optional. - * In the original spec, the data descriptor contained CRC - * and size fields but had no leading signature. In practice, - * newer writers seem to provide the signature pretty consistently. + * Mark Adler's `sunzip` streaming unzip program solved this ambiguity + * by just looking at every possible combination and accepting the + * longest one that matches the expected values. His approach always + * consumes the longest possible matching EOF marker, based on an + * analysis of all the possible failures and how the values could + * overlap. * - * For uncompressed data, the PK\007\010 marker seems essential - * to be sure we've actually seen the end of the entry. + * For example, suppose both of the first two formats listed + * above match. In that case, we know the next four + * 32-bit words match this pattern: + * ``` + * [PK\07\08] [CRC32] [compressed size] [uncompressed size] + * ``` + * but we know they must also match this pattern: + * ``` + * [CRC32] [compressed size] [uncompressed size] [other PK marker] + * ``` + * + * Since the first word here matches both the PK78 signature in the + * first form and the CRC32 in the second, we know those two values + * are equal, the CRC32 must be exactly 0x08074b50. Similarly, the + * compressed and uncompressed size must also be exactly this value. + * So we know these four words are all 0x08074b50. If we were to + * accept the shorter pattern, it would be immediately followed by + * another PK78 marker, which is not possible in a well-formed ZIP + * archive unless there is garbage between entries. This implies we + * should not accept the shorter form in such a case; we should accept + * the longer form. + * + * If the second and third possibilities above both match, we + * have a slightly different situation. The following words + * must match both the 32-bit format + * ``` + * [CRC32] [compressed size] [uncompressed size] [other PK marker] + * ``` + * and the 64-bit format + * ``` + * [CRC32] [compressed low] [compressed high] [uncompressed low] [uncompressed high] [other PK marker] + * ``` + * Since the 32-bit and 64-bit compressed sizes both match, the + * actualy size must fit in 32 bits, which implies the high-order + * word of the compressed size is zero. So we know the uncompressed + * low word is zero, which again implies that if we accept the shorter + * format, there will not be a valid PK marker following it. + * + * Similar considerations rule out the shorter form in every other + * possibly-ambiguous pair. So if two of the four possible formats + * match, we should accept the longer option. + * + * If none of the four formats matches, we know the archive must be + * corrupted in some fashion. In particular, it's possible that the + * length-at-end bit was incorrect and we should not really be looking + * for an EOF marker at all. To allow for this possibility, we + * evaluate the following words to collect data for a later error + * report but do not consume any bytes. We instead rely on the later + * search for a new PK marker to re-sync to the next well-formed + * entry. + */ +static void +consume_end_of_file_marker(struct archive_read *a, struct zip *zip) +{ + const char *marker; + const char *p; + uint64_t compressed32, uncompressed32; + uint64_t compressed64, uncompressed64; + uint64_t compressed_actual, uncompressed_actual; + uint32_t crc32_actual; + const uint32_t PK78 = 0x08074B50ULL; + uint8_t crc32_ignored, crc32_may_be_zero; + + /* If there shouldn't be a marker, don't consume it. */ + if ((zip->entry->zip_flags & ZIP_LENGTH_AT_END) == 0) { + return; + } + + /* The longest Zip end-of-file record is 24 bytes. Since an + * end-of-file record can never appear at the end of the + * archive, we know 24 bytes will be available unless + * the archive is severely truncated. */ + if (NULL == (marker = __archive_read_ahead(a, 24, NULL))) { + return; + } + p = marker; + + /* The end-of-file record comprises: + * = Optional PK\007\010 marker + * = 4-byte CRC32 + * = Compressed size + * = Uncompressed size + * + * The last two fields are either both 32 bits or both 64 + * bits. We check all possible layouts and accept any one + * that gives us a complete match, else we make a best-effort + * attempt to parse out the pieces. + */ + + /* CRC32 checking can be tricky: + * * Test suites sometimes ignore the CRC32 + * * AES AE-2 always writes zero for the CRC32 + * * AES AE-1 sometimes writes zero for the CRC32 + */ + crc32_ignored = zip->ignore_crc32; + crc32_may_be_zero = 0; + crc32_actual = zip->computed_crc32; + if (zip->hctx_valid) { + switch (zip->entry->aes_extra.vendor) { + case AES_VENDOR_AE_2: + crc32_actual = 0; + break; + case AES_VENDOR_AE_1: + default: + crc32_may_be_zero = 1; + break; + } + } + + /* Values computed from the actual data in the archive. */ + compressed_actual = (uint64_t)zip->entry_compressed_bytes_read; + uncompressed_actual = (uint64_t)zip->entry_uncompressed_bytes_read; + + + /* Longest: PK78 marker, all 64-bit fields (24 bytes total) */ + if (archive_le32dec(p) == PK78 + && ((archive_le32dec(p + 4) == crc32_actual) + || (crc32_may_be_zero && (archive_le32dec(p + 4) == 0)) + || crc32_ignored) + && (archive_le64dec(p + 8) == compressed_actual) + && (archive_le64dec(p + 16) == uncompressed_actual)) { + if (!crc32_ignored) { + zip->entry->crc32 = crc32_actual; + } + zip->entry->compressed_size = compressed_actual; + zip->entry->uncompressed_size = uncompressed_actual; + zip->unconsumed += 24; + return; + } + + /* No PK78 marker, 64-bit fields (20 bytes total) */ + if (((archive_le32dec(p) == crc32_actual) + || (crc32_may_be_zero && (archive_le32dec(p + 4) == 0)) + || crc32_ignored) + && (archive_le64dec(p + 4) == compressed_actual) + && (archive_le64dec(p + 12) == uncompressed_actual)) { + if (!crc32_ignored) { + zip->entry->crc32 = crc32_actual; + } + zip->entry->compressed_size = compressed_actual; + zip->entry->uncompressed_size = uncompressed_actual; + zip->unconsumed += 20; + return; + } + + /* PK78 marker and 32-bit fields (16 bytes total) */ + if (archive_le32dec(p) == PK78 + && ((archive_le32dec(p + 4) == crc32_actual) + || (crc32_may_be_zero && (archive_le32dec(p + 4) == 0)) + || crc32_ignored) + && (archive_le32dec(p + 8) == compressed_actual) + && (archive_le32dec(p + 12) == uncompressed_actual)) { + if (!crc32_ignored) { + zip->entry->crc32 = crc32_actual; + } + zip->entry->compressed_size = compressed_actual; + zip->entry->uncompressed_size = uncompressed_actual; + zip->unconsumed += 16; + return; + } + + /* Shortest: No PK78 marker, all 32-bit fields (12 bytes total) */ + if (((archive_le32dec(p) == crc32_actual) + || (crc32_may_be_zero && (archive_le32dec(p + 4) == 0)) + || crc32_ignored) + && (archive_le32dec(p + 4) == compressed_actual) + && (archive_le32dec(p + 8) == uncompressed_actual)) { + if (!crc32_ignored) { + zip->entry->crc32 = crc32_actual; + } + zip->entry->compressed_size = compressed_actual; + zip->entry->uncompressed_size = uncompressed_actual; + zip->unconsumed += 12; + return; + } + + /* If none of the above patterns gives us a full exact match, + * then there's something definitely amiss. The fallback code + * below will parse out some plausible values for error + * reporting purposes. Note that this won't actually + * consume anything: + * + * = If there really is a marker here, the logic to resync to + * the next entry will suffice to skip it. + * + * = There might not really be a marker: Corruption or bugs + * may have set the length-at-end bit without a marker ever + * having actually been written. In this case, we + * explicitly should not consume any bytes, since that would + * prevent us from correctly reading the next entry. + */ + if (archive_le32dec(p) == PK78) { + p += 4; /* Ignore PK78 if it appears to be present */ + } + zip->entry->crc32 = archive_le32dec(p); /* Parse CRC32 */ + p += 4; + + /* Consider both 32- and 64-bit interpretations */ + compressed32 = archive_le32dec(p); + uncompressed32 = archive_le32dec(p + 4); + compressed64 = archive_le64dec(p); + uncompressed64 = archive_le32dec(p + 8); + + /* The earlier patterns may have failed because of CRC32 + * mismatch, so it's still possible that both sizes match. + * Try to match as many as we can... + */ + if (compressed32 == compressed_actual + && uncompressed32 == uncompressed_actual) { + /* Both 32-bit fields match */ + zip->entry->compressed_size = compressed32; + zip->entry->uncompressed_size = uncompressed32; + } else if (compressed64 == compressed_actual + || uncompressed64 == uncompressed_actual) { + /* One or both 64-bit fields match */ + zip->entry->compressed_size = compressed64; + zip->entry->uncompressed_size = uncompressed64; + } else { + /* Zero or one 32-bit fields match */ + zip->entry->compressed_size = compressed32; + zip->entry->uncompressed_size = uncompressed32; + } +} + +/* + * Read "uncompressed" data. + * + * This is straightforward if we know the size of the data. This is + * always true for the seeking reader (we've examined the Central + * Directory already), and will often be true for the streaming reader + * (the writer was writing uncompressed so probably knows the size). + * + * If we don't know the size, then life is more interesting. Note + * that a careful reading of the Zip specification says that a writer + * must use ZIP_LENGTH_AT_END if it cannot write the CRC into the + * local header. And if it uses ZIP_LENGTH_AT_END, then it is + * prohibited from storing the sizes in the local header. This + * prevents fully-compliant streaming writers from providing any size + * clues to a streaming reader. In this case, we have to scan the + * data as we read to try to locate the end-of-file marker. + * + * We assume here that the end-of-file marker always has the + * PK\007\010 signature. Although it's technically optional, newer + * writers seem to provide it pretty consistently, and it's not clear + * how to efficiently recognize an end-of-file marker that lacks it. * * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets * zip->end_of_entry if it consumes all of the data. @@ -1372,18 +1616,18 @@ zip_read_data_none(struct archive_read *a, const void **_buff, struct zip *zip; const char *buff; ssize_t bytes_avail; + ssize_t trailing_extra; int r; (void)offset; /* UNUSED */ zip = (struct zip *)(a->format->data); + trailing_extra = zip->hctx_valid ? AUTH_CODE_SIZE : 0; if (zip->entry->zip_flags & ZIP_LENGTH_AT_END) { const char *p; - ssize_t grabbing_bytes = 24; + ssize_t grabbing_bytes = 24 + trailing_extra; - if (zip->hctx_valid) - grabbing_bytes += AUTH_CODE_SIZE; /* Grab at least 24 bytes. */ buff = __archive_read_ahead(a, grabbing_bytes, &bytes_avail); if (bytes_avail < grabbing_bytes) { @@ -1398,44 +1642,19 @@ zip_read_data_none(struct archive_read *a, const void **_buff, } /* Check for a complete PK\007\010 signature, followed * by the correct 4-byte CRC. */ - p = buff; - if (zip->hctx_valid) - p += AUTH_CODE_SIZE; + p = buff + trailing_extra; if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010' - && (archive_le32dec(p + 4) == zip->entry_crc32 + && (archive_le32dec(p + 4) == zip->computed_crc32 || zip->ignore_crc32 || (zip->hctx_valid && zip->entry->aes_extra.vendor == AES_VENDOR_AE_2))) { - if (zip->entry->flags & LA_USED_ZIP64) { - uint64_t compressed, uncompressed; - zip->entry->crc32 = archive_le32dec(p + 4); - compressed = archive_le64dec(p + 8); - uncompressed = archive_le64dec(p + 16); - if (compressed > INT64_MAX || uncompressed > - INT64_MAX) { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Overflow of 64-bit file sizes"); - return ARCHIVE_FAILED; - } - zip->entry->compressed_size = compressed; - zip->entry->uncompressed_size = uncompressed; - zip->unconsumed = 24; - } else { - zip->entry->crc32 = archive_le32dec(p + 4); - zip->entry->compressed_size = - archive_le32dec(p + 8); - zip->entry->uncompressed_size = - archive_le32dec(p + 12); - zip->unconsumed = 16; - } + zip->end_of_entry = 1; if (zip->hctx_valid) { r = check_authentication_code(a, buff); if (r != ARCHIVE_OK) return (r); } - zip->end_of_entry = 1; return (ARCHIVE_OK); } /* If not at EOF, ensure we consume at least one byte. */ @@ -1451,11 +1670,10 @@ zip_read_data_none(struct archive_read *a, const void **_buff, else if (p[3] == '\007') { p += 1; } else if (p[3] == '\010' && p[2] == '\007' && p[1] == 'K' && p[0] == 'P') { - if (zip->hctx_valid) - p -= AUTH_CODE_SIZE; break; } else { p += 4; } } + p -= trailing_extra; bytes_avail = p - buff; } else { if (zip->entry_bytes_remaining == 0) { @@ -1498,59 +1716,15 @@ zip_read_data_none(struct archive_read *a, const void **_buff, bytes_avail = dec_size; buff = (const char *)zip->decrypted_buffer; } - *size = bytes_avail; zip->entry_bytes_remaining -= bytes_avail; zip->entry_uncompressed_bytes_read += bytes_avail; zip->entry_compressed_bytes_read += bytes_avail; zip->unconsumed += bytes_avail; + *size = bytes_avail; *_buff = buff; return (ARCHIVE_OK); } -static int -consume_optional_marker(struct archive_read *a, struct zip *zip) -{ - if (zip->end_of_entry && (zip->entry->zip_flags & ZIP_LENGTH_AT_END)) { - const char *p; - - if (NULL == (p = __archive_read_ahead(a, 24, NULL))) { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Truncated ZIP end-of-file record"); - return (ARCHIVE_FATAL); - } - /* Consume the optional PK\007\010 marker. */ - if (p[0] == 'P' && p[1] == 'K' && - p[2] == '\007' && p[3] == '\010') { - p += 4; - zip->unconsumed = 4; - } - if (zip->entry->flags & LA_USED_ZIP64) { - uint64_t compressed, uncompressed; - zip->entry->crc32 = archive_le32dec(p); - compressed = archive_le64dec(p + 4); - uncompressed = archive_le64dec(p + 12); - if (compressed > INT64_MAX || - uncompressed > INT64_MAX) { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Overflow of 64-bit file sizes"); - return ARCHIVE_FAILED; - } - zip->entry->compressed_size = compressed; - zip->entry->uncompressed_size = uncompressed; - zip->unconsumed += 20; - } else { - zip->entry->crc32 = archive_le32dec(p); - zip->entry->compressed_size = archive_le32dec(p + 4); - zip->entry->uncompressed_size = archive_le32dec(p + 8); - zip->unconsumed += 12; - } - } - - return (ARCHIVE_OK); -} - #if HAVE_LZMA_H && HAVE_LIBLZMA static int zipx_xz_init(struct archive_read *a, struct zip *zip) @@ -1802,10 +1976,6 @@ zip_read_data_zipx_xz(struct archive_read *a, const void **buff, *size = zip->zipx_lzma_stream.total_out; *buff = zip->uncompressed_buffer; - ret = consume_optional_marker(a, zip); - if (ret != ARCHIVE_OK) - return (ret); - return (ARCHIVE_OK); } @@ -1870,8 +2040,6 @@ zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, /* This case is optional in lzma alone format. It can happen, * but most of the files don't have it. (GitHub #1257) */ case LZMA_STREAM_END: - lzma_end(&zip->zipx_lzma_stream); - zip->zipx_lzma_valid = 0; if((int64_t) zip->zipx_lzma_stream.total_in != zip->entry_bytes_remaining) { @@ -1905,21 +2073,18 @@ zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, zip->end_of_entry = 1; } - /* Return values. */ - *size = zip->zipx_lzma_stream.total_out; - *buff = zip->uncompressed_buffer; - - /* Behave the same way as during deflate decompression. */ - ret = consume_optional_marker(a, zip); - if (ret != ARCHIVE_OK) - return (ret); - /* Free lzma decoder handle because we'll no longer need it. */ + /* This cannot be folded into LZMA_STREAM_END handling above + * because the stream end marker is not required in this format. */ if(zip->end_of_entry) { lzma_end(&zip->zipx_lzma_stream); zip->zipx_lzma_valid = 0; } + /* Return values. */ + *size = zip->zipx_lzma_stream.total_out; + *buff = zip->uncompressed_buffer; + /* If we're here, then we're good! */ return (ARCHIVE_OK); } @@ -2077,10 +2242,6 @@ zip_read_data_zipx_ppmd(struct archive_read *a, const void **buff, ++consumed_bytes; } while(consumed_bytes < zip->uncompressed_buffer_size); - /* Update pointers for libarchive. */ - *buff = zip->uncompressed_buffer; - *size = consumed_bytes; - /* Update pointers so we can continue decompression in another call. */ zip->entry_bytes_remaining -= zip->zipx_ppmd_read_compressed; zip->entry_compressed_bytes_read += zip->zipx_ppmd_read_compressed; @@ -2092,10 +2253,9 @@ zip_read_data_zipx_ppmd(struct archive_read *a, const void **buff, zip->ppmd8_valid = 0; } - /* Seek for optional marker, same way as in each zip entry. */ - ret = consume_optional_marker(a, zip); - if (ret != ARCHIVE_OK) - return ret; + /* Update pointers for libarchive. */ + *buff = zip->uncompressed_buffer; + *size = consumed_bytes; return ARCHIVE_OK; } @@ -2237,11 +2397,6 @@ zip_read_data_zipx_bzip2(struct archive_read *a, const void **buff, *size = total_out; *buff = zip->uncompressed_buffer; - /* Seek for optional marker, like in other entries. */ - r = consume_optional_marker(a, zip); - if(r != ARCHIVE_OK) - return r; - return ARCHIVE_OK; } @@ -2372,11 +2527,6 @@ zip_read_data_zipx_zstd(struct archive_read *a, const void **buff, *size = total_out; *buff = zip->uncompressed_buffer; - /* Seek for optional marker, like in other entries. */ - r = consume_optional_marker(a, zip); - if(r != ARCHIVE_OK) - return r; - return ARCHIVE_OK; } #endif @@ -2412,7 +2562,7 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { struct zip *zip; - ssize_t bytes_avail; + ssize_t bytes_avail, to_consume = 0; const void *compressed_buff, *sp; int r; @@ -2533,34 +2683,33 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, } /* Consume as much as the compressor actually used. */ - bytes_avail = zip->stream.total_in; + to_consume = zip->stream.total_in; + __archive_read_consume(a, to_consume); + zip->entry_bytes_remaining -= to_consume; + zip->entry_compressed_bytes_read += to_consume; + zip->entry_uncompressed_bytes_read += zip->stream.total_out; + if (zip->tctx_valid || zip->cctx_valid) { - zip->decrypted_bytes_remaining -= bytes_avail; + zip->decrypted_bytes_remaining -= to_consume; if (zip->decrypted_bytes_remaining == 0) zip->decrypted_ptr = zip->decrypted_buffer; else - zip->decrypted_ptr += bytes_avail; + zip->decrypted_ptr += to_consume; } - /* Calculate compressed data as much as we used.*/ if (zip->hctx_valid) - archive_hmac_sha1_update(&zip->hctx, sp, bytes_avail); - __archive_read_consume(a, bytes_avail); - zip->entry_bytes_remaining -= bytes_avail; - zip->entry_compressed_bytes_read += bytes_avail; - - *size = zip->stream.total_out; - zip->entry_uncompressed_bytes_read += zip->stream.total_out; - *buff = zip->uncompressed_buffer; + archive_hmac_sha1_update(&zip->hctx, sp, to_consume); - if (zip->end_of_entry && zip->hctx_valid) { - r = check_authentication_code(a, NULL); - if (r != ARCHIVE_OK) - return (r); + if (zip->end_of_entry) { + if (zip->hctx_valid) { + r = check_authentication_code(a, NULL); + if (r != ARCHIVE_OK) { + return (r); + } + } } - r = consume_optional_marker(a, zip); - if (r != ARCHIVE_OK) - return (r); + *size = zip->stream.total_out; + *buff = zip->uncompressed_buffer; return (ARCHIVE_OK); } @@ -3028,13 +3177,27 @@ archive_read_format_zip_read_data(struct archive_read *a, } if (r != ARCHIVE_OK) return (r); - /* Update checksum */ - if (*size) - zip->entry_crc32 = zip->crc32func(zip->entry_crc32, *buff, - (unsigned)*size); - /* If we hit the end, swallow any end-of-data marker. */ + if (*size > 0) { + zip->computed_crc32 = zip->crc32func(zip->computed_crc32, *buff, + (unsigned)*size); + } + /* If we hit the end, swallow any end-of-data marker and + * verify the final check values. */ if (zip->end_of_entry) { - /* Check file size, CRC against these values. */ + consume_end_of_file_marker(a, zip); + + /* Check computed CRC against header */ + if ((!zip->hctx_valid || + zip->entry->aes_extra.vendor != AES_VENDOR_AE_2) && + zip->entry->crc32 != zip->computed_crc32 + && !zip->ignore_crc32) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "ZIP bad CRC: 0x%lx should be 0x%lx", + (unsigned long)zip->computed_crc32, + (unsigned long)zip->entry->crc32); + return (ARCHIVE_FAILED); + } + /* Check file size against header. */ if (zip->entry->compressed_size != zip->entry_compressed_bytes_read) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, @@ -3042,7 +3205,7 @@ archive_read_format_zip_read_data(struct archive_read *a, "(read %jd, expected %jd)", (intmax_t)zip->entry_compressed_bytes_read, (intmax_t)zip->entry->compressed_size); - return (ARCHIVE_WARN); + return (ARCHIVE_FAILED); } /* Size field only stores the lower 32 bits of the actual * size. */ @@ -3053,18 +3216,7 @@ archive_read_format_zip_read_data(struct archive_read *a, "(read %jd, expected %jd)\n", (intmax_t)zip->entry_uncompressed_bytes_read, (intmax_t)zip->entry->uncompressed_size); - return (ARCHIVE_WARN); - } - /* Check computed CRC against header */ - if ((!zip->hctx_valid || - zip->entry->aes_extra.vendor != AES_VENDOR_AE_2) && - zip->entry->crc32 != zip->entry_crc32 - && !zip->ignore_crc32) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "ZIP bad CRC: 0x%lx should be 0x%lx", - (unsigned long)zip->entry_crc32, - (unsigned long)zip->entry->crc32); - return (ARCHIVE_WARN); + return (ARCHIVE_FAILED); } } diff --git a/libarchive/archive_write_set_format_zip.c b/libarchive/archive_write_set_format_zip.c index 50f7cc6fd..e37e7b5ed 100644 --- a/libarchive/archive_write_set_format_zip.c +++ b/libarchive/archive_write_set_format_zip.c @@ -131,7 +131,6 @@ struct zip { enum compression entry_compression; enum encryption entry_encryption; int entry_flags; - int entry_uses_zip64; int experiments; struct trad_enc_ctx tctx; char tctx_valid; @@ -522,6 +521,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) int ret, ret2 = ARCHIVE_OK; mode_t type; int version_needed = 10; +#define MIN_VERSION_NEEDED(x) do { if (version_needed < x) { version_needed = x; } } while (0) /* Ignore types of entries that we don't support. */ type = archive_entry_filetype(entry); @@ -556,12 +556,12 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) /* Reset information from last entry. */ zip->entry_offset = zip->written_bytes; zip->entry_uncompressed_limit = INT64_MAX; + /* Zero size values implies that we're using a trailing data descriptor */ zip->entry_compressed_size = 0; zip->entry_uncompressed_size = 0; zip->entry_compressed_written = 0; zip->entry_uncompressed_written = 0; zip->entry_flags = 0; - zip->entry_uses_zip64 = 0; zip->entry_crc32 = zip->crc32func(0, NULL, 0); zip->entry_encryption = 0; archive_entry_free(zip->entry); @@ -671,11 +671,11 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) zip->entry_crc32 = zip->crc32func(zip->entry_crc32, (const unsigned char *)slink, slink_size); zip->entry_compression = COMPRESSION_STORE; - version_needed = 20; + MIN_VERSION_NEEDED(20); } else if (type != AE_IFREG) { zip->entry_compression = COMPRESSION_STORE; zip->entry_uncompressed_limit = 0; - version_needed = 20; + MIN_VERSION_NEEDED(20); } else if (archive_entry_size_is_set(zip->entry)) { int64_t size = archive_entry_size(zip->entry); int64_t additional_size = 0; @@ -688,27 +688,27 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) if (zip->entry_compression == COMPRESSION_STORE) { zip->entry_compressed_size = size; zip->entry_uncompressed_size = size; - version_needed = 10; + MIN_VERSION_NEEDED(10); } else { zip->entry_uncompressed_size = size; - version_needed = 20; + MIN_VERSION_NEEDED(20); } if (zip->entry_flags & ZIP_ENTRY_FLAG_ENCRYPTED) { switch (zip->entry_encryption) { case ENCRYPTION_TRADITIONAL: additional_size = TRAD_HEADER_SIZE; - version_needed = 20; + MIN_VERSION_NEEDED(20); break; case ENCRYPTION_WINZIP_AES128: additional_size = WINZIP_AES128_HEADER_SIZE + AUTH_CODE_SIZE; - version_needed = 20; + MIN_VERSION_NEEDED(20); break; case ENCRYPTION_WINZIP_AES256: additional_size = WINZIP_AES256_HEADER_SIZE + AUTH_CODE_SIZE; - version_needed = 20; + MIN_VERSION_NEEDED(20); break; case ENCRYPTION_NONE: default: @@ -732,8 +732,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) || (zip->entry_uncompressed_size + additional_size > ZIP_4GB_MAX) || (zip->entry_uncompressed_size > ZIP_4GB_MAX_UNCOMPRESSED && zip->entry_compression != COMPRESSION_STORE)) { - zip->entry_uses_zip64 = 1; - version_needed = 45; + MIN_VERSION_NEEDED(45); } /* We may know the size, but never the CRC. */ @@ -741,7 +740,6 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) } else { /* We don't know the size. Use the default * compression unless specified otherwise. - * We enable Zip64 extensions unless we're told not to. */ zip->entry_compression = zip->requested_compression; @@ -751,12 +749,12 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) zip->entry_flags |= ZIP_ENTRY_FLAG_LENGTH_AT_END; if ((zip->flags & ZIP_FLAG_AVOID_ZIP64) == 0) { - zip->entry_uses_zip64 = 1; - version_needed = 45; + /* We might use zip64 extensions, so require 4.5 */ + MIN_VERSION_NEEDED(45); } else if (zip->entry_compression == COMPRESSION_STORE) { - version_needed = 10; + MIN_VERSION_NEEDED(10); } else { - version_needed = 20; + MIN_VERSION_NEEDED(20); } if (zip->entry_flags & ZIP_ENTRY_FLAG_ENCRYPTED) { @@ -764,8 +762,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) case ENCRYPTION_TRADITIONAL: case ENCRYPTION_WINZIP_AES128: case ENCRYPTION_WINZIP_AES256: - if (version_needed < 20) - version_needed = 20; + MIN_VERSION_NEEDED(20); break; case ENCRYPTION_NONE: default: @@ -786,16 +783,8 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) archive_le16enc(local_header + 8, zip->entry_compression); archive_le32enc(local_header + 10, dos_time(archive_entry_mtime(zip->entry))); - archive_le32enc(local_header + 14, zip->entry_crc32); - if (zip->entry_uses_zip64) { - /* Zip64 data in the local header "must" include both - * compressed and uncompressed sizes AND those fields - * are included only if these are 0xffffffff; - * THEREFORE these must be set this way, even if we - * know one of them is smaller. */ - archive_le32enc(local_header + 18, ZIP_4GB_MAX); - archive_le32enc(local_header + 22, ZIP_4GB_MAX); - } else { + if ((zip->entry_flags & ZIP_ENTRY_FLAG_LENGTH_AT_END) == 0) { + archive_le32enc(local_header + 14, zip->entry_crc32); archive_le32enc(local_header + 18, (uint32_t)zip->entry_compressed_size); archive_le32enc(local_header + 22, (uint32_t)zip->entry_uncompressed_size); } @@ -841,42 +830,19 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) * the local file header and the central directory. * We format them once and then duplicate them. */ - /* UT timestamp, length depends on what timestamps are set. */ - memcpy(e, "UT", 2); - archive_le16enc(e + 2, - 1 - + (archive_entry_mtime_is_set(entry) ? 4 : 0) - + (archive_entry_atime_is_set(entry) ? 4 : 0) - + (archive_entry_ctime_is_set(entry) ? 4 : 0)); - e += 4; - *e++ = - (archive_entry_mtime_is_set(entry) ? 1 : 0) - | (archive_entry_atime_is_set(entry) ? 2 : 0) - | (archive_entry_ctime_is_set(entry) ? 4 : 0); - if (archive_entry_mtime_is_set(entry)) { - archive_le32enc(e, (uint32_t)archive_entry_mtime(entry)); - e += 4; - } - if (archive_entry_atime_is_set(entry)) { - archive_le32enc(e, (uint32_t)archive_entry_atime(entry)); + /* ux Unix extra data, length 11, version 1 */ + if (archive_entry_uid_is_set(entry) || archive_entry_gid_is_set(entry)) { + /* TODO: If uid < 64k, use 2 bytes, ditto for gid. */ + memcpy(e, "ux\013\000\001", 5); + e += 5; + *e++ = 4; /* Length of following UID */ + archive_le32enc(e, (uint32_t)archive_entry_uid(entry)); e += 4; - } - if (archive_entry_ctime_is_set(entry)) { - archive_le32enc(e, (uint32_t)archive_entry_ctime(entry)); + *e++ = 4; /* Length of following GID */ + archive_le32enc(e, (uint32_t)archive_entry_gid(entry)); e += 4; } - /* ux Unix extra data, length 11, version 1 */ - /* TODO: If uid < 64k, use 2 bytes, ditto for gid. */ - memcpy(e, "ux\013\000\001", 5); - e += 5; - *e++ = 4; /* Length of following UID */ - archive_le32enc(e, (uint32_t)archive_entry_uid(entry)); - e += 4; - *e++ = 4; /* Length of following GID */ - archive_le32enc(e, (uint32_t)archive_entry_gid(entry)); - e += 4; - /* AES extra data field: WinZIP AES information, ID=0x9901 */ if ((zip->entry_flags & ZIP_ENTRY_FLAG_ENCRYPTED) && (zip->entry_encryption == ENCRYPTION_WINZIP_AES128 @@ -903,7 +869,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) e += 2; } - /* Copy UT ,ux, and AES-extra into central directory as well. */ + /* Copy ux, AES-extra into central directory as well. */ zip->file_header_extra_offset = zip->central_directory_bytes; cd_extra = cd_alloc(zip, e - local_extra); memcpy(cd_extra, local_extra, e - local_extra); @@ -915,17 +881,50 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) * archive_write_zip_finish_entry() below. */ - /* "[Zip64 entry] in the local header MUST include BOTH - * original [uncompressed] and compressed size fields." */ - if (zip->entry_uses_zip64) { - unsigned char *zip64_start = e; - memcpy(e, "\001\000\020\000", 4); + /* UT timestamp: length depends on what timestamps are set. + * This header appears in the Central Directory also, but + * according to Info-Zip specification, the CD form + * only holds mtime, so we format it separately. */ + if (archive_entry_mtime_is_set(entry) + || archive_entry_atime_is_set(entry) + || archive_entry_ctime_is_set(entry)) { + unsigned char *ut = e; + memcpy(e, "UT\000\000", 4); + e += 4; + *e++ = (archive_entry_mtime_is_set(entry) ? 1 : 0) + | (archive_entry_atime_is_set(entry) ? 2 : 0) + | (archive_entry_ctime_is_set(entry) ? 4 : 0); + if (archive_entry_mtime_is_set(entry)) { + archive_le32enc(e, (uint32_t)archive_entry_mtime(entry)); + e += 4; + } + if (archive_entry_atime_is_set(entry)) { + archive_le32enc(e, (uint32_t)archive_entry_atime(entry)); + e += 4; + } + if (archive_entry_ctime_is_set(entry)) { + archive_le32enc(e, (uint32_t)archive_entry_ctime(entry)); + e += 4; + } + archive_le16enc(ut + 2, e - ut - 4); + } + + /* + * Note about Zip64 Extended Information Extra Field: + * Because libarchive always writes in a streaming + * fashion, we never know the CRC when we're writing + * the local header. So we have to use length-at-end, which + * prevents us from putting size information into a Zip64 + * extra field. However, apparently some readers find it + * a helpful clue to have an empty such field so they + * can expect a 64-bit length-at-end marker. + */ + if (archive_entry_size_is_set(zip->entry) + && (zip->entry_uncompressed_size > ZIP_4GB_MAX + || zip->entry_compressed_size > ZIP_4GB_MAX)) { + /* Header ID 0x0001, size 0 */ + memcpy(e, "\001\000\000\000", 4); e += 4; - archive_le64enc(e, zip->entry_uncompressed_size); - e += 8; - archive_le64enc(e, zip->entry_compressed_size); - e += 8; - archive_le16enc(zip64_start + 2, (uint16_t)(e - (zip64_start + 4))); } if (zip->flags & ZIP_FLAG_EXPERIMENT_xl) { @@ -1204,7 +1203,9 @@ archive_write_zip_finish_entry(struct archive_write *a) archive_le32enc(d + 4, 0);/* no CRC.*/ else archive_le32enc(d + 4, zip->entry_crc32); - if (zip->entry_uses_zip64) { + if (zip->entry_compressed_written > ZIP_4GB_MAX + || zip->entry_uncompressed_written > ZIP_4GB_MAX + || zip->flags & ZIP_FLAG_FORCE_ZIP64) { archive_le64enc(d + 8, (uint64_t)zip->entry_compressed_written); archive_le64enc(d + 16, @@ -1223,23 +1224,60 @@ archive_write_zip_finish_entry(struct archive_write *a) return (ARCHIVE_FATAL); } - /* Append Zip64 extra data to central directory information. */ - if (zip->entry_compressed_written > ZIP_4GB_MAX - || zip->entry_uncompressed_written > ZIP_4GB_MAX + /* UT timestamp: Info-Zip specifies that _only_ the mtime should + * be recorded here; ctime and atime are also included in the + * local file descriptor. */ + if (archive_entry_mtime_is_set(zip->entry)) { + unsigned char ut[9]; + unsigned char *u = ut, *ud; + memcpy(u, "UT\005\000\001", 5); + u += 5; + archive_le32enc(u, (uint32_t)archive_entry_mtime(zip->entry)); + u += 4; + ud = cd_alloc(zip, u - ut); + if (ud == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate zip data"); + return (ARCHIVE_FATAL); + } + memcpy(ud, ut, u - ut); + } + + /* Fill in size information in the central directory entry. */ + /* Fix up central directory file header. */ + if (zip->cctx_valid && zip->aes_vendor == AES_VENDOR_AE_2) + archive_le32enc(zip->file_header + 16, 0);/* no CRC.*/ + else + archive_le32enc(zip->file_header + 16, zip->entry_crc32); + /* Truncate to 32 bits; we'll fix up below. */ + archive_le32enc(zip->file_header + 20, (uint32_t)zip->entry_compressed_written); + archive_le32enc(zip->file_header + 24, (uint32_t)zip->entry_uncompressed_written); + archive_le16enc(zip->file_header + 30, + (uint16_t)(zip->central_directory_bytes - zip->file_header_extra_offset)); + archive_le32enc(zip->file_header + 42, (uint32_t)zip->entry_offset); + + /* If any of the values immediately above are too large, we'll + * need to put the corresponding value in a Zip64 extra field + * and set the central directory value to 0xffffffff as a flag. */ + if (zip->entry_compressed_written >= ZIP_4GB_MAX + || zip->entry_uncompressed_written >= ZIP_4GB_MAX || zip->entry_offset > ZIP_4GB_MAX) { unsigned char zip64[32]; unsigned char *z = zip64, *zd; memcpy(z, "\001\000\000\000", 4); z += 4; if (zip->entry_uncompressed_written >= ZIP_4GB_MAX) { + archive_le32enc(zip->file_header + 24, ZIP_4GB_MAX); archive_le64enc(z, zip->entry_uncompressed_written); z += 8; } if (zip->entry_compressed_written >= ZIP_4GB_MAX) { + archive_le32enc(zip->file_header + 20, ZIP_4GB_MAX); archive_le64enc(z, zip->entry_compressed_written); z += 8; } if (zip->entry_offset >= ZIP_4GB_MAX) { + archive_le32enc(zip->file_header + 42, ZIP_4GB_MAX); archive_le64enc(z, zip->entry_offset); z += 8; } diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index 5e17d62fd..8209c25a5 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -277,6 +277,7 @@ IF(ENABLE_TEST) test_write_format_xar.c test_write_format_xar_empty.c test_write_format_zip.c + test_write_format_zip64_stream.c test_write_format_zip_compression_store.c test_write_format_zip_empty.c test_write_format_zip_empty_zip64.c @@ -284,6 +285,7 @@ IF(ENABLE_TEST) test_write_format_zip_file.c test_write_format_zip_file_zip64.c test_write_format_zip_large.c + test_write_format_zip_stream.c test_write_format_zip_zip64.c test_write_open_memory.c test_write_read_format_zip.c diff --git a/libarchive/test/test_read_format_zip.c b/libarchive/test/test_read_format_zip.c index 4ffbade82..9e820f509 100644 --- a/libarchive/test/test_read_format_zip.c +++ b/libarchive/test/test_read_format_zip.c @@ -156,7 +156,7 @@ verify_basic(struct archive *a, int seek_checks) if (archive_zlib_version() != NULL) { failure("file2 has a bad CRC, so read should fail and not change buff"); memset(buff, 'a', 19); - assertEqualInt(ARCHIVE_WARN, archive_read_data(a, buff, 19)); + assertEqualInt(ARCHIVE_FAILED, archive_read_data(a, buff, 19)); assertEqualMem(buff, "aaaaaaaaaaaaaaaaaaa", 19); } else { assertEqualInt(ARCHIVE_FAILED, archive_read_data(a, buff, 19)); diff --git a/libarchive/test/test_write_format_zip.c b/libarchive/test/test_write_format_zip.c index 1c00ed465..54240eeb7 100644 --- a/libarchive/test/test_write_format_zip.c +++ b/libarchive/test/test_write_format_zip.c @@ -289,9 +289,11 @@ verify_contents(struct archive *a, int seeking, int content) assertEqualString("file", archive_entry_pathname(ae)); if (seeking) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); + assert(archive_entry_size_is_set(ae)); + assertEqualInt(8, archive_entry_size(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); } - assert(archive_entry_size_is_set(ae)); - assertEqualInt(8, archive_entry_size(ae)); if (content) { assertEqualIntA(a, 8, archive_read_data(a, filedata, sizeof(filedata))); @@ -307,9 +309,11 @@ verify_contents(struct archive *a, int seeking, int content) assertEqualString("file2", archive_entry_pathname(ae)); if (seeking) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); + assertEqualInt(4, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); } - assertEqualInt(4, archive_entry_size(ae)); - assert(archive_entry_size_is_set(ae)); if (content) { assertEqualIntA(a, 4, archive_read_data(a, filedata, sizeof(filedata))); @@ -373,10 +377,13 @@ verify_contents(struct archive *a, int seeking, int content) assertEqualInt(0, archive_entry_atime(ae)); assertEqualInt(0, archive_entry_ctime(ae)); assertEqualString("file_deflate", archive_entry_pathname(ae)); - if (seeking) + if (seeking) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); - assertEqualInt(8, archive_entry_size(ae)); - assert(archive_entry_size_is_set(ae)); + assertEqualInt(8, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } if (content) { assertEqualIntA(a, 8, archive_read_data(a, filedata, sizeof(filedata))); @@ -391,10 +398,13 @@ verify_contents(struct archive *a, int seeking, int content) assertEqualInt(0, archive_entry_atime(ae)); assertEqualInt(0, archive_entry_ctime(ae)); assertEqualString("file2_deflate", archive_entry_pathname(ae)); - if (seeking) + if (seeking) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); - assertEqualInt(4, archive_entry_size(ae)); - assert(archive_entry_size_is_set(ae)); + assertEqualInt(4, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } if (content) { assertEqualIntA(a, 4, archive_read_data(a, filedata, sizeof(filedata))); @@ -409,6 +419,7 @@ verify_contents(struct archive *a, int seeking, int content) assertEqualInt(0, archive_entry_ctime(ae)); assertEqualString("file3_deflate", archive_entry_pathname(ae)); if (seeking) { + assert(archive_entry_size_is_set(ae)); assertEqualInt(5, archive_entry_size(ae)); assertEqualInt(AE_IFREG | 0621, archive_entry_mode(ae)); } else { @@ -459,10 +470,13 @@ verify_contents(struct archive *a, int seeking, int content) assertEqualInt(0, archive_entry_atime(ae)); assertEqualInt(0, archive_entry_ctime(ae)); assertEqualString("file_stored", archive_entry_pathname(ae)); - if (seeking) + if (seeking) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); - assert(archive_entry_size_is_set(ae)); - assertEqualInt(8, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + assertEqualInt(8, archive_entry_size(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } if (content) { assertEqualIntA(a, 8, archive_read_data(a, filedata, sizeof(filedata))); @@ -477,10 +491,13 @@ verify_contents(struct archive *a, int seeking, int content) assertEqualInt(0, archive_entry_atime(ae)); assertEqualInt(0, archive_entry_ctime(ae)); assertEqualString("file2_stored", archive_entry_pathname(ae)); - if (seeking) + if (seeking) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); - assertEqualInt(4, archive_entry_size(ae)); - assert(archive_entry_size_is_set(ae)); + assertEqualInt(4, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } if (content) { assertEqualIntA(a, 4, archive_read_data(a, filedata, sizeof(filedata))); diff --git a/libarchive/test/test_write_format_zip64_stream.c b/libarchive/test/test_write_format_zip64_stream.c new file mode 100644 index 000000000..bed97894e --- /dev/null +++ b/libarchive/test/test_write_format_zip64_stream.c @@ -0,0 +1,276 @@ +/*- + * Copyright (c) 2003-2023 Tim Kientzle + * Copyright (c) 2008 Anselm Strauss + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test.h" + +/* + * Detailed byte-for-byte verification of the format of a zip archive + * written in streaming mode with Zip64 extensions enabled. + */ + +static unsigned long +bitcrc32(unsigned long c, void *_p, size_t s) +{ + /* This is a drop-in replacement for crc32() from zlib. + * Libarchive should be able to correctly generate + * uncompressed zip archives (including correct CRCs) even + * when zlib is unavailable, and this function helps us verify + * that. Yes, this is very, very slow and unsuitable for + * production use, but it's correct, compact, and works well + * enough for this particular usage. Libarchive internally + * uses a much more efficient implementation. */ + const unsigned char *p = _p; + int bitctr; + + if (p == NULL) + return (0); + + for (; s > 0; --s) { + c ^= *p++; + for (bitctr = 8; bitctr > 0; --bitctr) { + if (c & 1) c = (c >> 1); + else c = (c >> 1) ^ 0xedb88320; + c ^= 0x80000000; + } + } + return (c); +} + +/* Quick and dirty: Read 2-byte and 4-byte integers from Zip file. */ +static unsigned i2(const unsigned char *p) { return ((p[0] & 0xff) | ((p[1] & 0xff) << 8)); } +static unsigned i4(const unsigned char *p) { return (i2(p) | (i2(p + 2) << 16)); } +/* We're only working with small values here; ignore the 4 high bytes. */ +static unsigned i8(const unsigned char *p) { return (i4(p)); } + +DEFINE_TEST(test_write_format_zip64_stream) +{ + struct archive *a; + struct archive_entry *ae; + size_t used, buffsize = 1000000; + unsigned long crc; + unsigned long compressed_size = 0; + int file_perm = 00644; + int zip_version = 45; + int zip_compression = 8; + short file_uid = 10, file_gid = 20; + unsigned char *buff, *buffend, *p; + unsigned char *central_header, *local_header, *eocd, *eocd_record; + unsigned char *extension_start, *extension_end; + unsigned char *data_start, *data_end; + char file_data[] = {'1', '2', '3', '4', '5', '6', '7', '8'}; + const char *file_name = "file"; + +#ifndef HAVE_ZLIB_H + zip_compression = 0; +#endif + + buff = malloc(buffsize); + + /* Create a new archive in memory. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:zip64")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:experimental")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_open_memory(a, buff, buffsize, &used)); + + assert((ae = archive_entry_new()) != NULL); + archive_entry_copy_pathname(ae, file_name); + archive_entry_set_mode(ae, AE_IFREG | file_perm); + archive_entry_set_uid(ae, file_uid); + archive_entry_set_gid(ae, file_gid); + archive_entry_set_mtime(ae, 0, 0); + assertEqualInt(0, archive_write_header(a, ae)); + archive_entry_free(ae); + assertEqualInt(8, archive_write_data(a, file_data, sizeof(file_data))); + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + buffend = buff + used; + dumpfile("constructed.zip", buff, used); + + /* Verify "End of Central Directory" record. */ + /* Get address of end-of-central-directory record. */ + eocd_record = p = buffend - 22; /* Assumes there is no zip comment field. */ + failure("End-of-central-directory begins with PK\\005\\006 signature"); + assertEqualMem(p, "PK\005\006", 4); + failure("This must be disk 0"); + assertEqualInt(i2(p + 4), 0); + failure("Central dir must start on disk 0"); + assertEqualInt(i2(p + 6), 0); + failure("All central dir entries are on this disk"); + assertEqualInt(i2(p + 8), i2(p + 10)); + eocd = buff + i4(p + 12) + i4(p + 16); + failure("no zip comment"); + assertEqualInt(i2(p + 20), 0); + + /* Get address of first entry in central directory. */ + central_header = p = buff + i4(buffend - 6); + failure("Central file record at offset %d should begin with" + " PK\\001\\002 signature", + i4(buffend - 10)); + + /* Verify file entry in central directory. */ + assertEqualMem(p, "PK\001\002", 4); /* Signature */ + assertEqualInt(i2(p + 4), 3 * 256 + zip_version); /* Version made by */ + assertEqualInt(i2(p + 6), zip_version); /* Version needed to extract */ + assertEqualInt(i2(p + 8), 8); /* Flags */ + assertEqualInt(i2(p + 10), zip_compression); /* Compression method */ + assertEqualInt(i2(p + 12), 0); /* File time */ + assertEqualInt(i2(p + 14), 33); /* File date */ + crc = bitcrc32(0, file_data, sizeof(file_data)); + assertEqualInt(i4(p + 16), crc); /* CRC-32 */ + compressed_size = i4(p + 20); /* Compressed size */ + assertEqualInt(i4(p + 24), sizeof(file_data)); /* Uncompressed size */ + assertEqualInt(i2(p + 28), strlen(file_name)); /* Pathname length */ + /* assertEqualInt(i2(p + 30), 28); */ /* Extra field length: See below */ + assertEqualInt(i2(p + 32), 0); /* File comment length */ + assertEqualInt(i2(p + 34), 0); /* Disk number start */ + assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ + assertEqualInt(i4(p + 38) >> 16 & 01777, file_perm); /* External file attrs */ + assertEqualInt(i4(p + 42), 0); /* Offset of local header */ + assertEqualMem(p + 46, file_name, strlen(file_name)); /* Pathname */ + p = extension_start = central_header + 46 + strlen(file_name); + extension_end = extension_start + i2(central_header + 30); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ + /* TODO: verify 'ux' contents */ + p += 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), 0); /* 'UT' mtime */ + p += 4 + i2(p + 2); + + /* Note: We don't expect to see zip64 extension in the central + * directory, since the writer knows the actual full size by + * the time it is ready to write the central directory and has + * no reason to insert it then. Info-Zip seems to do the same + * thing. */ + + /* Just in case: Report any extra extensions. */ + while (p < extension_end) { + failure("Unexpected extension 0x%04X", i2(p)); + assert(0); + p += 4 + i2(p + 2); + } + + /* Should have run exactly to end of extra data. */ + assertEqualAddress(p, extension_end); + + assertEqualAddress(p, eocd); + + /* After Central dir, we find Zip64 eocd and Zip64 eocd locator. */ + assertEqualMem(p, "PK\006\006", 4); /* Zip64 eocd */ + assertEqualInt(i8(p + 4), 44); /* We're using v1 Zip64 eocd */ + assertEqualInt(i2(p + 12), 45); /* Written by Version 4.5 */ + assertEqualInt(i2(p + 14), 45); /* Needs version 4.5 to extract */ + assertEqualInt(i4(p + 16), 0); /* This is disk #0 */ + assertEqualInt(i4(p + 20), 0); /* Dir starts on disk #0 */ + assertEqualInt(i8(p + 24), 1); /* 1 entry on this disk */ + assertEqualInt(i8(p + 32), 1); /* 1 entry total */ + assertEqualInt(i8(p + 40), eocd - central_header); /* size of cd */ + assertEqualInt(i8(p + 48), central_header - buff); /* start of cd */ + p += 12 + i8(p + 4); + + assertEqualMem(p, "PK\006\007", 4); /* Zip64 eocd locator */ + assertEqualInt(i4(p + 4), 0); /* Zip64 eocd is on disk #0 */ + assertEqualInt(i8(p + 8), eocd - buff); /* Offset of Zip64 eocd */ + assertEqualInt(i4(p + 16), 1); /* 1 disk */ + p += 20; + + /* Regular EOCD immediately follows Zip64 records. */ + assertEqualAddress(p, eocd_record); + + /* Verify local header of file entry. */ + p = local_header = buff; + assertEqualMem(p, "PK\003\004", 4); /* Signature */ + assertEqualInt(i2(p + 4), zip_version); /* Version needed to extract */ + assertEqualInt(i2(p + 6), 8); /* Flags: bit 3 = length-at-end */ + assertEqualInt(i2(p + 8), zip_compression); /* Compression method */ + assertEqualInt(i2(p + 10), 0); /* File time */ + assertEqualInt(i2(p + 12), 33); /* File date */ + assertEqualInt(i4(p + 14), 0); /* CRC-32 */ + assertEqualInt(i4(p + 18), 0); /* Compressed size must be zero for length-at-end */ + assertEqualInt(i4(p + 22), 0); /* Uncompressed size must be zero for length-at-end */ + assertEqualInt(i2(p + 26), strlen(file_name)); /* Pathname length */ + assertEqualInt(i2(p + 28), 37); /* Extra field length */ + assertEqualMem(p + 30, file_name, strlen(file_name)); /* Pathname */ + p = extension_start = local_header + 30 + strlen(file_name); + extension_end = extension_start + i2(local_header + 28); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ + assertEqualInt(p[4], 1); /* 'ux' version */ + assertEqualInt(p[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(p + 6), file_uid); /* 'Ux' UID */ + assertEqualInt(p[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(p + 11), file_gid); /* 'Ux' GID */ + p += 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), 0); /* 'UT' mtime */ + p += 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x6c78); /* 'xl' experimental extension header */ + assertEqualInt(i2(p + 2), 9); /* size */ + assertEqualInt(p[4], 7); /* bitmap of included fields */ + assertEqualInt(i2(p + 5) >> 8, 3); /* system & version made by */ + assertEqualInt(i2(p + 7), 0); /* internal file attributes */ + assertEqualInt(i4(p + 9) >> 16 & 01777, file_perm); /* external file attributes */ + p += 4 + i2(p + 2); + + /* Just in case: Report any extra extensions. */ + while (p < extension_end) { + failure("Unexpected extension 0x%04X", i2(p)); + assert(0); + p += 4 + i2(p + 2); + } + + /* Should have run exactly to end of extra data. */ + assertEqualAddress(p, extension_end); + data_start = p; + + /* Data descriptor should follow compressed data. */ + while (p < central_header && memcmp(p, "PK\007\010", 4) != 0) + ++p; + data_end = p; + assertEqualInt(data_end - data_start, compressed_size); + assertEqualMem(p, "PK\007\010", 4); + assertEqualInt(i4(p + 4), crc); /* CRC-32 */ + assertEqualInt(i8(p + 8), compressed_size); /* compressed size */ + assertEqualInt(i8(p + 16), sizeof(file_data)); /* uncompressed size */ + + /* Central directory should immediately follow the only entry. */ + assertEqualAddress(p + 24, central_header); + + free(buff); +} diff --git a/libarchive/test/test_write_format_zip_compression_store.c b/libarchive/test/test_write_format_zip_compression_store.c index a8c80e977..fc764ac7a 100644 --- a/libarchive/test/test_write_format_zip_compression_store.c +++ b/libarchive/test/test_write_format_zip_compression_store.c @@ -183,7 +183,7 @@ static void verify_uncompressed_contents(const char *buff, size_t used) assertEqualInt(i4(p + 20), sizeof(file_data1) + sizeof(file_data2)); /* Compressed size */ assertEqualInt(i4(p + 24), sizeof(file_data1) + sizeof(file_data2)); /* Uncompressed size */ assertEqualInt(i2(p + 28), strlen(file_name)); /* Pathname length */ - assertEqualInt(i2(p + 30), 28); /* Extra field length */ + assertEqualInt(i2(p + 30), 24); /* Extra field length */ assertEqualInt(i2(p + 32), 0); /* File comment length */ assertEqualInt(i2(p + 34), 0); /* Disk number start */ assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ @@ -191,38 +191,33 @@ static void verify_uncompressed_contents(const char *buff, size_t used) assertEqualInt(i4(p + 42), 0); /* Offset of local header */ assertEqualMem(p + 46, file_name, strlen(file_name)); /* Pathname */ p = p + 46 + strlen(file_name); - assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ - assertEqualInt(i2(p + 2), 9); /* 'UT' size */ - assertEqualInt(p[4], 3); /* 'UT' flags */ - assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ - assertEqualInt(i4(p + 9), now + 3); /* 'UT' atime */ - p = p + 4 + i2(p + 2); + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ assertEqualInt(i2(p + 2), 11); /* 'ux' size */ /* TODO */ p = p + 4 + i2(p + 2); + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ + p = p + 4 + i2(p + 2); + /* Verify local header of file entry. */ local_header = q = buff; assertEqualMem(q, "PK\003\004", 4); /* Signature */ assertEqualInt(i2(q + 4), 10); /* Version needed to extract */ - assertEqualInt(i2(q + 6), 8); /* Flags */ + assertEqualInt(i2(q + 6), 8); /* Flags: bit 3 = length-at-end. Required because CRC32 is unknown */ assertEqualInt(i2(q + 8), 0); /* Compression method */ assertEqualInt(i2(q + 10), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ assertEqualInt(i2(q + 12), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ assertEqualInt(i4(q + 14), 0); /* CRC-32 */ - assertEqualInt(i4(q + 18), sizeof(file_data1) + sizeof(file_data2)); /* Compressed size */ - assertEqualInt(i4(q + 22), sizeof(file_data1) + sizeof(file_data2)); /* Uncompressed size */ + assertEqualInt(i4(q + 18), 0); /* Compressed size, must be zero because of length-at-end */ + assertEqualInt(i4(q + 22), 0); /* Uncompressed size, must be zero because of length-at-end */ assertEqualInt(i2(q + 26), strlen(file_name)); /* Pathname length */ assertEqualInt(i2(q + 28), 41); /* Extra field length */ assertEqualMem(q + 30, file_name, strlen(file_name)); /* Pathname */ extra_start = q = q + 30 + strlen(file_name); - assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ - assertEqualInt(i2(q + 2), 9); /* 'UT' size */ - assertEqualInt(q[4], 3); /* 'UT' flags */ - assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ - assertEqualInt(i4(q + 9), now + 3); /* 'UT' atime */ - q = q + 4 + i2(q + 2); assertEqualInt(i2(q), 0x7875); /* 'ux' extension header */ assertEqualInt(i2(q + 2), 11); /* 'ux' size */ @@ -233,6 +228,13 @@ static void verify_uncompressed_contents(const char *buff, size_t used) assertEqualInt(i4(q + 11), file_gid); /* 'Ux' GID */ q = q + 4 + i2(q + 2); + assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(q + 2), 9); /* 'UT' size */ + assertEqualInt(q[4], 3); /* 'UT' flags */ + assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ + assertEqualInt(i4(q + 9), now + 3); /* 'UT' atime */ + q = q + 4 + i2(q + 2); + assertEqualInt(i2(q), 0x6c78); /* 'xl' experimental extension header */ assertEqualInt(i2(q + 2), 9); /* size */ assertEqualInt(q[4], 7); /* Bitmap of fields included. */ @@ -269,7 +271,7 @@ static void verify_uncompressed_contents(const char *buff, size_t used) assertEqualInt(i4(p + 20), 0); /* Compressed size */ assertEqualInt(i4(p + 24), 0); /* Uncompressed size */ assertEqualInt(i2(p + 28), strlen(folder_name)); /* Pathname length */ - assertEqualInt(i2(p + 30), 28); /* Extra field length */ + assertEqualInt(i2(p + 30), 24); /* Extra field length */ assertEqualInt(i2(p + 32), 0); /* File comment length */ assertEqualInt(i2(p + 34), 0); /* Disk number start */ assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ @@ -277,12 +279,7 @@ static void verify_uncompressed_contents(const char *buff, size_t used) assertEqualInt(i4(p + 42), q - buff); /* Offset of local header */ assertEqualMem(p + 46, folder_name, strlen(folder_name)); /* Pathname */ p = p + 46 + strlen(folder_name); - assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ - assertEqualInt(i2(p + 2), 9); /* 'UT' size */ - assertEqualInt(p[4], 5); /* 'UT' flags */ - assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ - assertEqualInt(i4(p + 9), now + 5); /* 'UT' atime */ - p = p + 4 + i2(p + 2); + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ assertEqualInt(i2(p + 2), 11); /* 'ux' size */ assertEqualInt(p[4], 1); /* 'ux' version */ @@ -290,7 +287,13 @@ static void verify_uncompressed_contents(const char *buff, size_t used) assertEqualInt(i4(p + 6), folder_uid); /* 'ux' UID */ assertEqualInt(p[10], 4); /* 'ux' gid size */ assertEqualInt(i4(p + 11), folder_gid); /* 'ux' GID */ - /*p = p + 4 + i2(p + 2);*/ + p = p + 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ + p = p + 4 + i2(p + 2); /* Verify local header of folder entry. */ local_header = q; @@ -307,12 +310,7 @@ static void verify_uncompressed_contents(const char *buff, size_t used) assertEqualInt(i2(q + 28), 41); /* Extra field length */ assertEqualMem(q + 30, folder_name, strlen(folder_name)); /* Pathname */ extra_start = q = q + 30 + strlen(folder_name); - assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ - assertEqualInt(i2(q + 2), 9); /* 'UT' size */ - assertEqualInt(q[4], 5); /* 'UT' flags */ - assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ - assertEqualInt(i4(q + 9), now + 5); /* 'UT' atime */ - q = q + 4 + i2(q + 2); + assertEqualInt(i2(q), 0x7875); /* 'ux' extension header */ assertEqualInt(i2(q + 2), 11); /* 'ux' size */ assertEqualInt(q[4], 1); /* 'ux' version */ @@ -322,6 +320,13 @@ static void verify_uncompressed_contents(const char *buff, size_t used) assertEqualInt(i4(q + 11), folder_gid); /* 'ux' GID */ q = q + 4 + i2(q + 2); + assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(q + 2), 9); /* 'UT' size */ + assertEqualInt(q[4], 5); /* 'UT' flags */ + assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ + assertEqualInt(i4(q + 9), now + 5); /* 'UT' atime */ + q = q + 4 + i2(q + 2); + assertEqualInt(i2(q), 0x6c78); /* 'xl' experimental extension header */ assertEqualInt(i2(q + 2), 9); /* size */ assertEqualInt(q[4], 7); /* bitmap of fields */ diff --git a/libarchive/test/test_write_format_zip_entry_size_unset.c b/libarchive/test/test_write_format_zip_entry_size_unset.c index ac888670f..10c191f7c 100644 --- a/libarchive/test/test_write_format_zip_entry_size_unset.c +++ b/libarchive/test/test_write_format_zip_entry_size_unset.c @@ -151,7 +151,7 @@ static void verify_contents(const char *zip_buff, size_t size) /* Check file name length */ assertEqualInt(i2(central_directory + 28), strlen(file_name)); /* Check extra field length */ - assertEqualInt(i2(central_directory + 30), 20); + assertEqualInt(i2(central_directory + 30), 15); /* Check file comment length */ assertEqualInt(i2(central_directory + 32), 0); /* Check disk number where file starts */ @@ -187,7 +187,7 @@ static void verify_contents(const char *zip_buff, size_t size) /* Check pathname length */ assertEqualInt(i2(local_file_header + 26), strlen(file_name)); /* Check extra field length */ - assertEqualInt(i2(local_file_header + 28), 20); + assertEqualInt(i2(local_file_header + 28), 15); /* Check path name match */ assertEqualMem(local_file_header + 30, file_name, strlen(file_name)); @@ -209,7 +209,7 @@ static void verify_contents(const char *zip_buff, size_t size) assertEqualInt(i4(data_descriptor + 12), sizeof(file_data1) + sizeof(file_data2)); /* Get folder entry in central directory */ - const char *central_directory_folder_entry = central_directory + 46 + 20 + strlen(file_name); + const char *central_directory_folder_entry = central_directory + 46 + i2(local_file_header + 28) + strlen(file_name); /* Get start of folder entry */ const char *local_folder_header = data_descriptor + 16; @@ -233,7 +233,7 @@ static void verify_contents(const char *zip_buff, size_t size) /* Check path name length */ assertEqualInt(i2(central_directory_folder_entry + 28), strlen(folder_name)); /* Check extra field length */ - assertEqualInt(i2(central_directory_folder_entry + 30), 20); + assertEqualInt(i2(central_directory_folder_entry + 30), 15); /* Check file comment length */ assertEqualInt(i2(central_directory_folder_entry + 32), 0); /* Check disk number start */ @@ -264,11 +264,11 @@ static void verify_contents(const char *zip_buff, size_t size) /* Check path name length */ assertEqualInt(i2(local_folder_header + 26), strlen(folder_name)); /* Check extra field length */ - assertEqualInt(i2(local_folder_header + 28), 20); + assertEqualInt(i2(local_folder_header + 28), 15); /* Check path name */ assertEqualMem(local_folder_header + 30, folder_name, strlen(folder_name)); - const char *post_local_folder = local_folder_header + 30 + strlen(folder_name) + 20; + const char *post_local_folder = local_folder_header + 30 + i2(local_folder_header + 28) + strlen(folder_name); assertEqualMem(post_local_folder, central_directory, 4); } diff --git a/libarchive/test/test_write_format_zip_file.c b/libarchive/test/test_write_format_zip_file.c index 7363e502c..d4f1b9b22 100644 --- a/libarchive/test/test_write_format_zip_file.c +++ b/libarchive/test/test_write_format_zip_file.c @@ -169,17 +169,17 @@ DEFINE_TEST(test_write_format_zip_file) p = extension_start = central_header + 46 + strlen(file_name); extension_end = extension_start + i2(central_header + 30); + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ + /* TODO: verify 'ux' contents */ + p += 4 + i2(p + 2); + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ assertEqualInt(i2(p + 2), 5); /* 'UT' size */ assertEqualInt(p[4], 1); /* 'UT' flags */ assertEqualInt(i4(p + 5), t); /* 'UT' mtime */ p += 4 + i2(p + 2); - assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ - assertEqualInt(i2(p + 2), 11); /* 'ux' size */ - /* TODO: verify 'ux' contents */ - p += 4 + i2(p + 2); - /* Just in case: Report any extra extensions. */ while (p < extension_end) { failure("Unexpected extension 0x%04X", i2(p)); @@ -188,36 +188,30 @@ DEFINE_TEST(test_write_format_zip_file) } /* Should have run exactly to end of extra data. */ - assert(p == extension_end); + assertEqualAddress(p, extension_end); - assert(p == eocd); + assertEqualAddress(p, eocd); /* Regular EOCD immediately follows central directory. */ - assert(p == eocd_record); + assertEqualAddress(p, eocd_record); /* Verify local header of file entry. */ p = local_header = buff; assertEqualMem(p, "PK\003\004", 4); /* Signature */ assertEqualInt(i2(p + 4), zip_version); /* Version needed to extract */ - assertEqualInt(i2(p + 6), 8); /* Flags */ + assertEqualInt(i2(p + 6), 8); /* Flags: bit 3 = length-at-end */ assertEqualInt(i2(p + 8), zip_compression); /* Compression method */ assertEqualInt(i2(p + 10), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ assertEqualInt(i2(p + 12), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ - assertEqualInt(i4(p + 14), 0); /* CRC-32 */ - /* assertEqualInt(i4(p + 18), sizeof(file_data)); */ /* Compressed size */ - /* assertEqualInt(i4(p + 22), sizeof(file_data)); */ /* Uncompressed size not stored because we're using length-at-end. */ + assertEqualInt(i4(p + 14), 0); /* CRC-32 stored as zero because we're using length-at-end */ + assertEqualInt(i4(p + 18), 0); /* Compressed size stored as zero because we're using length-at-end. */ + assertEqualInt(i4(p + 22), 0); /* Uncompressed size stored as zero because we're using length-at-end. */ assertEqualInt(i2(p + 26), strlen(file_name)); /* Pathname length */ assertEqualInt(i2(p + 28), 37); /* Extra field length */ assertEqualMem(p + 30, file_name, strlen(file_name)); /* Pathname */ p = extension_start = local_header + 30 + strlen(file_name); extension_end = extension_start + i2(local_header + 28); - assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ - assertEqualInt(i2(p + 2), 5); /* size */ - assertEqualInt(p[4], 1); /* 'UT' flags */ - assertEqualInt(i4(p + 5), t); /* 'UT' mtime */ - p += 4 + i2(p + 2); - assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ assertEqualInt(i2(p + 2), 11); /* size */ assertEqualInt(p[4], 1); /* 'ux' version */ @@ -227,6 +221,12 @@ DEFINE_TEST(test_write_format_zip_file) assertEqualInt(i4(p + 11), file_gid); /* 'Ux' GID */ p += 4 + i2(p + 2); + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), t); /* 'UT' mtime */ + p += 4 + i2(p + 2); + assertEqualInt(i2(p), 0x6c78); /* 'xl' experimental extension block */ assertEqualInt(i2(p + 2), 9); /* size */ assertEqualInt(p[4], 7); /* bitmap of fields in this block */ @@ -243,18 +243,18 @@ DEFINE_TEST(test_write_format_zip_file) } /* Should have run exactly to end of extra data. */ - assert(p == extension_end); + assertEqualAddress(p, extension_end); /* Data descriptor should follow compressed data. */ while (p < central_header && memcmp(p, "PK\007\010", 4) != 0) ++p; assertEqualMem(p, "PK\007\010", 4); assertEqualInt(i4(p + 4), crc); /* CRC-32 */ - /* assertEqualInt(i4(p + 8), ???); */ /* compressed size */ + assertEqualInt(i4(p + 8), p - extension_end); /* compressed size */ assertEqualInt(i4(p + 12), sizeof(file_data)); /* uncompressed size */ /* Central directory should immediately follow the only entry. */ - assert(p + 16 == central_header); + assertEqualAddress(p + 16, central_header); free(buff); } diff --git a/libarchive/test/test_write_format_zip_file_zip64.c b/libarchive/test/test_write_format_zip_file_zip64.c index 29bb3bfd3..f06f2aad3 100644 --- a/libarchive/test/test_write_format_zip_file_zip64.c +++ b/libarchive/test/test_write_format_zip_file_zip64.c @@ -172,17 +172,17 @@ DEFINE_TEST(test_write_format_zip_file_zip64) p = extension_start = central_header + 46 + strlen(file_name); extension_end = extension_start + i2(central_header + 30); + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ + /* TODO: verify 'ux' contents */ + p += 4 + i2(p + 2); + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ assertEqualInt(i2(p + 2), 5); /* 'UT' size */ assertEqualInt(p[4], 1); /* 'UT' flags */ assertEqualInt(i4(p + 5), t); /* 'UT' mtime */ p += 4 + i2(p + 2); - assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ - assertEqualInt(i2(p + 2), 11); /* 'ux' size */ - /* TODO: verify 'ux' contents */ - p += 4 + i2(p + 2); - /* Note: We don't expect to see zip64 extension in the central * directory, since the writer knows the actual full size by * the time it is ready to write the central directory and has @@ -197,9 +197,9 @@ DEFINE_TEST(test_write_format_zip_file_zip64) } /* Should have run exactly to end of extra data. */ - assert(p == extension_end); + assertEqualAddress(p, extension_end); - assert(p == eocd); + assertEqualAddress(p, eocd); /* After Central dir, we find Zip64 eocd and Zip64 eocd locator. */ assertEqualMem(p, "PK\006\006", 4); /* Zip64 eocd */ @@ -221,31 +221,25 @@ DEFINE_TEST(test_write_format_zip_file_zip64) p += 20; /* Regular EOCD immediately follows Zip64 records. */ - assert(p == eocd_record); + assertEqualAddress(p, eocd_record); /* Verify local header of file entry. */ p = local_header = buff; assertEqualMem(p, "PK\003\004", 4); /* Signature */ assertEqualInt(i2(p + 4), zip_version); /* Version needed to extract */ - assertEqualInt(i2(p + 6), 8); /* Flags */ + assertEqualInt(i2(p + 6), 8); /* Flags: bit 3 = length-at-end */ assertEqualInt(i2(p + 8), zip_compression); /* Compression method */ assertEqualInt(i2(p + 10), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ assertEqualInt(i2(p + 12), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ - assertEqualInt(i4(p + 14), 0); /* CRC-32 */ - /* assertEqualInt(i4(p + 18), sizeof(file_data)); */ /* Compressed size */ - /* assertEqualInt(i4(p + 22), sizeof(file_data)); */ /* Uncompressed size not stored because we're using length-at-end. */ + assertEqualInt(i4(p + 14), 0); /* CRC-32 must be 0 because of length-at-end */ + assertEqualInt(i4(p + 18), 0); /* Compressed size must be 0 because of length-at-end */ + assertEqualInt(i4(p + 22), 0); /* Uncompressed size must be 0 because of length-at-end. */ assertEqualInt(i2(p + 26), strlen(file_name)); /* Pathname length */ - assertEqualInt(i2(p + 28), 57); /* Extra field length */ + assertEqualInt(i2(p + 28), 37); /* Extra field length */ assertEqualMem(p + 30, file_name, strlen(file_name)); /* Pathname */ p = extension_start = local_header + 30 + strlen(file_name); extension_end = extension_start + i2(local_header + 28); - assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ - assertEqualInt(i2(p + 2), 5); /* 'UT' size */ - assertEqualInt(p[4], 1); /* 'UT' flags */ - assertEqualInt(i4(p + 5), t); /* 'UT' mtime */ - p += 4 + i2(p + 2); - assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ assertEqualInt(i2(p + 2), 11); /* 'ux' size */ assertEqualInt(p[4], 1); /* 'ux' version */ @@ -255,10 +249,10 @@ DEFINE_TEST(test_write_format_zip_file_zip64) assertEqualInt(i4(p + 11), file_gid); /* 'Ux' GID */ p += 4 + i2(p + 2); - assertEqualInt(i2(p), 0x0001); /* Zip64 extension header */ - assertEqualInt(i2(p + 2), 16); /* size */ - assertEqualInt(i8(p + 4), 8); /* uncompressed file size */ - /* compressed file size we can't verify here */ + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), t); /* 'UT' mtime */ p += 4 + i2(p + 2); assertEqualInt(i2(p), 0x6c78); /* 'xl' experimental extension header */ @@ -277,18 +271,18 @@ DEFINE_TEST(test_write_format_zip_file_zip64) } /* Should have run exactly to end of extra data. */ - assert(p == extension_end); + assertEqualAddress(p, extension_end); /* Data descriptor should follow compressed data. */ while (p < central_header && memcmp(p, "PK\007\010", 4) != 0) ++p; assertEqualMem(p, "PK\007\010", 4); assertEqualInt(i4(p + 4), crc); /* CRC-32 */ - /* assertEqualInt(i8(p + 8), ???); */ /* compressed size */ + assertEqualInt(i8(p + 8), p - extension_end); /* compressed size */ assertEqualInt(i8(p + 16), sizeof(file_data)); /* uncompressed size */ /* Central directory should immediately follow the only entry. */ - assert(p + 24 == central_header); + assertEqualAddress(p + 24, central_header); free(buff); } diff --git a/libarchive/test/test_write_format_zip_large.c b/libarchive/test/test_write_format_zip_large.c index 20613d157..90bd16aaa 100644 --- a/libarchive/test/test_write_format_zip_large.c +++ b/libarchive/test/test_write_format_zip_large.c @@ -272,22 +272,21 @@ static int64_t test_sizes[] = { 2 * GB - 1, 2 * GB, 2 * GB + 1, /* Test for 32-bit unsigned overflow. */ 4 * GB - 1, 4 * GB, 4 * GB + 1, - /* And beyond ... because we can. */ - 16 * GB - 1, 16 * GB, 16 * GB + 1, - 64 * GB - 1, 64 * GB, 64 * GB + 1, - 256 * GB - 1, 256 * GB, 256 * GB + 1, - 1 * TB, + /* And one larger sample */ + 5 * GB, 0 }; static void -verify_large_zip(struct archive *a, struct fileblocks *fileblocks) +verify_large_zip(struct archive *a, struct fileblocks *fileblocks, int seeking) { char namebuff[64]; struct archive_entry *ae; int i; + (void)seeking; /* UNUSED */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_set_options(a, "zip:ignorecrc32")); assertEqualIntA(a, ARCHIVE_OK, @@ -310,7 +309,13 @@ verify_large_zip(struct archive *a, struct fileblocks *fileblocks) archive_read_next_header(a, &ae)); snprintf(namebuff, sizeof(namebuff), "file_%d", i); assertEqualString(namebuff, archive_entry_pathname(ae)); - assertEqualInt(test_sizes[i], archive_entry_size(ae)); + if (seeking) { + assert(archive_entry_size_is_set(ae)); + assertEqualInt(test_sizes[i], archive_entry_size(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } + /* TODO: Read to end of data, verify length */ } assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); assertEqualString("lastfile", archive_entry_pathname(ae)); @@ -344,6 +349,7 @@ DEFINE_TEST(test_write_format_zip_large) */ a = archive_write_new(); archive_write_set_format_zip(a); + /* TODO: Repeat this entire test suite with default compression */ assertEqualIntA(a, ARCHIVE_OK, archive_write_set_options(a, "zip:compression=store")); assertEqualIntA(a, ARCHIVE_OK, @@ -397,7 +403,7 @@ DEFINE_TEST(test_write_format_zip_large) a = archive_read_new(); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip_seekable(a)); - verify_large_zip(a, fileblocks); + verify_large_zip(a, fileblocks, 1); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); /* @@ -406,7 +412,7 @@ DEFINE_TEST(test_write_format_zip_large) a = archive_read_new(); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip_streamable(a)); - verify_large_zip(a, fileblocks); + verify_large_zip(a, fileblocks, 0); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); /* @@ -429,7 +435,7 @@ DEFINE_TEST(test_write_format_zip_large) /* Verify regular end-of-central-directory record */ eocd = p - 22; assertEqualMem(eocd, "PK\005\006\0\0\0\0", 8); - assertEqualMem(eocd + 8, "\021\0\021\0", 4); /* 17 entries total */ + assertEqualMem(eocd + 8, "\010\0\010\0", 4); /* 8 entries total */ cd_size = le32(eocd + 12); /* Start of CD offset should be 0xffffffff */ assertEqualMem(eocd + 16, "\xff\xff\xff\xff", 4); @@ -449,8 +455,8 @@ DEFINE_TEST(test_write_format_zip_large) assertEqualMem(zip64_eocd + 14, "\055\0", 2); // Requires version: 45 assertEqualMem(zip64_eocd + 16, "\0\0\0\0", 4); // This disk assertEqualMem(zip64_eocd + 20, "\0\0\0\0", 4); // Total disks - assertEqualInt(17, le64(zip64_eocd + 24)); // Entries on this disk - assertEqualInt(17, le64(zip64_eocd + 32)); // Total entries + assertEqualInt(8, le64(zip64_eocd + 24)); // Entries on this disk + assertEqualInt(8, le64(zip64_eocd + 32)); // Total entries cd_size = le64(zip64_eocd + 40); cd_start = p - (fileblocks->filesize - le64(zip64_eocd + 48)); diff --git a/libarchive/test/test_write_format_zip_stream.c b/libarchive/test/test_write_format_zip_stream.c new file mode 100644 index 000000000..aff6a31ae --- /dev/null +++ b/libarchive/test/test_write_format_zip_stream.c @@ -0,0 +1,247 @@ +/*- + * Copyright (c) 2003-2023 Tim Kientzle + * Copyright (c) 2008 Anselm Strauss + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test.h" + +/* + * Detailed byte-for-byte verification of the format of a zip archive + * written in streaming mode WITHOUT Zip64 extensions enabled. + */ + +static unsigned long +bitcrc32(unsigned long c, void *_p, size_t s) +{ + /* This is a drop-in replacement for crc32() from zlib. + * Libarchive should be able to correctly generate + * uncompressed zip archives (including correct CRCs) even + * when zlib is unavailable, and this function helps us verify + * that. Yes, this is very, very slow and unsuitable for + * production use, but it's correct, compact, and works well + * enough for this particular usage. Libarchive internally + * uses a much more efficient implementation. */ + const unsigned char *p = _p; + int bitctr; + + if (p == NULL) + return (0); + + for (; s > 0; --s) { + c ^= *p++; + for (bitctr = 8; bitctr > 0; --bitctr) { + if (c & 1) c = (c >> 1); + else c = (c >> 1) ^ 0xedb88320; + c ^= 0x80000000; + } + } + return (c); +} + +/* Quick and dirty: Read 2-byte and 4-byte integers from Zip file. */ +static unsigned i2(const unsigned char *p) { return ((p[0] & 0xff) | ((p[1] & 0xff) << 8)); } +static unsigned i4(const unsigned char *p) { return (i2(p) | (i2(p + 2) << 16)); } + +DEFINE_TEST(test_write_format_zip_stream) +{ + struct archive *a; + struct archive_entry *ae; + size_t used, buffsize = 1000000; + unsigned long crc; + unsigned long compressed_size = 0; + int file_perm = 00644; + int zip_version = 20; + int zip_compression = 8; + short file_uid = 10, file_gid = 20; + unsigned char *buff, *buffend, *p; + unsigned char *central_header, *local_header, *eocd, *eocd_record; + unsigned char *extension_start, *extension_end; + unsigned char *data_start, *data_end; + char file_data[] = {'1', '2', '3', '4', '5', '6', '7', '8'}; + const char *file_name = "file"; + +#ifndef HAVE_ZLIB_H + zip_compression = 0; +#endif + + buff = malloc(buffsize); + + /* Create a new archive in memory. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:!zip64")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_open_memory(a, buff, buffsize, &used)); + + assert((ae = archive_entry_new()) != NULL); + archive_entry_copy_pathname(ae, file_name); + archive_entry_set_mode(ae, AE_IFREG | file_perm); + archive_entry_set_uid(ae, file_uid); + archive_entry_set_gid(ae, file_gid); + archive_entry_set_mtime(ae, 0, 0); + assertEqualInt(0, archive_write_header(a, ae)); + archive_entry_free(ae); + assertEqualInt(8, archive_write_data(a, file_data, sizeof(file_data))); + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + buffend = buff + used; + dumpfile("constructed.zip", buff, used); + + /* Verify "End of Central Directory" record. */ + /* Get address of end-of-central-directory record. */ + eocd_record = p = buffend - 22; /* Assumes there is no zip comment field. */ + failure("End-of-central-directory begins with PK\\005\\006 signature"); + assertEqualMem(p, "PK\005\006", 4); + failure("This must be disk 0"); + assertEqualInt(i2(p + 4), 0); + failure("Central dir must start on disk 0"); + assertEqualInt(i2(p + 6), 0); + failure("All central dir entries are on this disk"); + assertEqualInt(i2(p + 8), i2(p + 10)); + eocd = buff + i4(p + 12) + i4(p + 16); + failure("no zip comment"); + assertEqualInt(i2(p + 20), 0); + + /* Get address of first entry in central directory. */ + central_header = p = buff + i4(buffend - 6); + failure("Central file record at offset %d should begin with" + " PK\\001\\002 signature", + i4(buffend - 10)); + + /* Verify file entry in central directory. */ + assertEqualMem(p, "PK\001\002", 4); /* Signature */ + assertEqualInt(i2(p + 4), 3 * 256 + zip_version); /* Version made by */ + assertEqualInt(i2(p + 6), zip_version); /* Version needed to extract */ + assertEqualInt(i2(p + 8), 8); /* Flags */ + assertEqualInt(i2(p + 10), zip_compression); /* Compression method */ + assertEqualInt(i2(p + 12), 0); /* File time */ + assertEqualInt(i2(p + 14), 33); /* File date */ + crc = bitcrc32(0, file_data, sizeof(file_data)); + assertEqualInt(i4(p + 16), crc); /* CRC-32 */ + compressed_size = i4(p + 20); /* Compressed size */ + assertEqualInt(i4(p + 24), sizeof(file_data)); /* Uncompressed size */ + assertEqualInt(i2(p + 28), strlen(file_name)); /* Pathname length */ + /* assertEqualInt(i2(p + 30), 28); */ /* Extra field length: See below */ + assertEqualInt(i2(p + 32), 0); /* File comment length */ + assertEqualInt(i2(p + 34), 0); /* Disk number start */ + assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ + assertEqualInt(i4(p + 38) >> 16 & 01777, file_perm); /* External file attrs */ + assertEqualInt(i4(p + 42), 0); /* Offset of local header */ + assertEqualMem(p + 46, file_name, strlen(file_name)); /* Pathname */ + p = extension_start = central_header + 46 + strlen(file_name); + extension_end = extension_start + i2(central_header + 30); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ + assertEqualInt(p[4], 1); /* 'ux' version */ + assertEqualInt(p[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(p + 6), file_uid); /* 'Ux' UID */ + assertEqualInt(p[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(p + 11), file_gid); /* 'Ux' GID */ + p += 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), 0); /* 'UT' mtime */ + p += 4 + i2(p + 2); + + /* Note: We don't expect to see zip64 extension in the central + * directory, since the writer knows the actual full size by + * the time it is ready to write the central directory and has + * no reason to insert it then. Info-Zip seems to do the same + * thing. */ + + /* Just in case: Report any extra extensions. */ + while (p < extension_end) { + failure("Unexpected extension 0x%04X", i2(p)); + assert(0); + p += 4 + i2(p + 2); + } + + /* Should have run exactly to end of extra data. */ + assert(p == extension_end); + + assert(p == eocd); + assert(p == eocd_record); + + /* Verify local header of file entry. */ + p = local_header = buff; + assertEqualMem(p, "PK\003\004", 4); /* Signature */ + assertEqualInt(i2(p + 4), zip_version); /* Version needed to extract */ + assertEqualInt(i2(p + 6), 8); /* Flags */ + assertEqualInt(i2(p + 8), zip_compression); /* Compression method */ + assertEqualInt(i2(p + 10), 0); /* File time */ + assertEqualInt(i2(p + 12), 33); /* File date */ + assertEqualInt(i4(p + 14), 0); /* CRC-32 */ + assertEqualInt(i4(p + 18), 0); /* Compressed size */ + assertEqualInt(i4(p + 22), 0); /* Uncompressed size */ + assertEqualInt(i2(p + 26), strlen(file_name)); /* Pathname length */ + assertEqualInt(i2(p + 28), 24); /* Extra field length */ + assertEqualMem(p + 30, file_name, strlen(file_name)); /* Pathname */ + p = extension_start = local_header + 30 + strlen(file_name); + extension_end = extension_start + i2(local_header + 28); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ + assertEqualInt(p[4], 1); /* 'ux' version */ + assertEqualInt(p[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(p + 6), file_uid); /* 'Ux' UID */ + assertEqualInt(p[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(p + 11), file_gid); /* 'Ux' GID */ + p += 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), 0); /* 'UT' mtime */ + p += 4 + i2(p + 2); + + /* Just in case: Report any extra extensions. */ + while (p < extension_end) { + failure("Unexpected extension 0x%04X", i2(p)); + assert(0); + p += 4 + i2(p + 2); + } + + /* Should have run exactly to end of extra data. */ + assert(p == extension_end); + data_start = p; + + /* Data descriptor should follow compressed data. */ + while (p < central_header && memcmp(p, "PK\007\010", 4) != 0) + ++p; + data_end = p; + assertEqualInt(data_end - data_start, compressed_size); + assertEqualMem(p, "PK\007\010", 4); + assertEqualInt(i4(p + 4), crc); /* CRC-32 */ + assertEqualInt(i4(p + 8), compressed_size); /* compressed size */ + assertEqualInt(i4(p + 12), sizeof(file_data)); /* uncompressed size */ + + /* Central directory should immediately follow the data descriptor. */ + assert(p + 16 == central_header); + + free(buff); +} diff --git a/libarchive/test/test_write_read_format_zip.c b/libarchive/test/test_write_read_format_zip.c index 0c7ac328a..828b092c7 100644 --- a/libarchive/test/test_write_read_format_zip.c +++ b/libarchive/test/test_write_read_format_zip.c @@ -287,8 +287,12 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) if (seeking || improved_streaming) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); } - assertEqualInt(8, archive_entry_size(ae)); - assert(archive_entry_size_is_set(ae)); + if (seeking) { + assertEqualInt(8, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } assertEqualIntA(a, 8, archive_read_data(a, filedata, sizeof(filedata))); assertEqualMem(filedata, "12345678", 8); @@ -304,8 +308,12 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) if (seeking || improved_streaming) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); } - assertEqualInt(4, archive_entry_size(ae)); - assert(archive_entry_size_is_set(ae)); + if (seeking) { + assertEqualInt(4, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } assertEqualIntA(a, 4, archive_read_data(a, filedata, sizeof(filedata))); assertEqualMem(filedata, "1234", 4); @@ -322,6 +330,7 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) } if (seeking) { assertEqualInt(5, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); } else { assertEqualInt(0, archive_entry_size_is_set(ae)); } @@ -355,8 +364,9 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) assertEqualInt(0, archive_entry_atime(ae)); assertEqualInt(0, archive_entry_ctime(ae)); assertEqualString("dir/", archive_entry_pathname(ae)); - if (seeking || improved_streaming) + if (seeking || improved_streaming) { assertEqualInt(AE_IFDIR | 0755, archive_entry_mode(ae)); + } assertEqualInt(0, archive_entry_size(ae)); assert(archive_entry_size_is_set(ae)); assertEqualIntA(a, 0, archive_read_data(a, filedata, 10)); @@ -377,8 +387,12 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) if (seeking || improved_streaming) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); } - assertEqualInt(8, archive_entry_size(ae)); - assert(archive_entry_size_is_set(ae)); + if (seeking) { + assertEqualInt(8, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } assertEqualIntA(a, 8, archive_read_data(a, filedata, sizeof(filedata))); assertEqualMem(filedata, "12345678", 8); @@ -394,8 +408,12 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) if (seeking || improved_streaming) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); } - assertEqualInt(4, archive_entry_size(ae)); - assert(archive_entry_size_is_set(ae)); + if (seeking) { + assertEqualInt(4, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } assertEqualIntA(a, 4, archive_read_data(a, filedata, sizeof(filedata))); assertEqualMem(filedata, "1234", 4); @@ -412,6 +430,7 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) } if (seeking) { assertEqualInt(5, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); } else { assertEqualInt(0, archive_entry_size_is_set(ae)); } @@ -467,9 +486,12 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) if (seeking || improved_streaming) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); } - assert(archive_entry_size_is_set(ae)); - assert(archive_entry_size_is_set(ae)); - assertEqualInt(8, archive_entry_size(ae)); + if (seeking) { + assert(archive_entry_size_is_set(ae)); + assertEqualInt(8, archive_entry_size(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } assertEqualIntA(a, 8, archive_read_data(a, filedata, sizeof(filedata))); assertEqualMem(filedata, "12345678", 8); @@ -485,8 +507,12 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) if (seeking || improved_streaming) { assertEqualInt(AE_IFREG | 0755, archive_entry_mode(ae)); } - assertEqualInt(4, archive_entry_size(ae)); - assert(archive_entry_size_is_set(ae)); + if (seeking) { + assertEqualInt(4, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); + } else { + assertEqualInt(0, archive_entry_size_is_set(ae)); + } assertEqualIntA(a, 4, archive_read_data(a, filedata, sizeof(filedata))); assertEqualMem(filedata, "ACEG", 4); @@ -502,6 +528,7 @@ verify_contents(struct archive *a, int seeking, int improved_streaming) assertEqualInt(AE_IFREG | 0621, archive_entry_mode(ae)); if (seeking) { assertEqualInt(5, archive_entry_size(ae)); + assert(archive_entry_size_is_set(ae)); } else { assertEqualInt(0, archive_entry_size_is_set(ae)); } diff --git a/test_utils/test_common.h b/test_utils/test_common.h index 62df8b84e..8e1ec8243 100644 --- a/test_utils/test_common.h +++ b/test_utils/test_common.h @@ -166,6 +166,8 @@ /* Assert two integers are the same. Reports value of each one if not. */ #define assertEqualInt(v1,v2) \ assertion_equal_int(__FILE__, __LINE__, (v1), #v1, (v2), #v2, NULL) +#define assertEqualAddress(v1,v2) \ + assertion_equal_address(__FILE__, __LINE__, (v1), #v1, (v2), #v2, NULL) /* Assert two strings are the same. Reports value of each one if not. */ #define assertEqualString(v1,v2) \ assertion_equal_string(__FILE__, __LINE__, (v1), #v1, (v2), #v2, NULL, 0) @@ -279,6 +281,7 @@ int assertion_compare_fflags(const char *, int, const char *, const char *, int assertion_empty_file(const char *, int, const char *); int assertion_equal_file(const char *, int, const char *, const char *); int assertion_equal_int(const char *, int, long long, const char *, long long, const char *, void *); +int assertion_equal_address(const char *, int, const void *, const char *, const void *, const char *, void *); int assertion_equal_mem(const char *, int, const void *, const char *, const void *, const char *, size_t, const char *, void *); int assertion_memory_filled_with(const char *, int, const void *, const char *, size_t, const char *, char, const char *, void *); int assertion_equal_string(const char *, int, const char *v1, const char *, const char *v2, const char *, void *, int); diff --git a/test_utils/test_main.c b/test_utils/test_main.c index 16b677a94..6617732a3 100644 --- a/test_utils/test_main.c +++ b/test_utils/test_main.c @@ -625,6 +625,21 @@ assertion_equal_int(const char *file, int line, return (0); } +/* Verify two pointers are equal. */ +int +assertion_equal_address(const char *file, int line, + const void *v1, const char *e1, const void *v2, const char *e2, void *extra) +{ + assertion_count(file, line); + if (v1 == v2) + return (1); + failure_start(file, line, "%s != %s", e1, e2); + logprintf(" %s=0x%llx\n", e1, (unsigned long long)(uintptr_t)v1); + logprintf(" %s=0x%llx\n", e2, (unsigned long long)(uintptr_t)v2); + failure_finish(extra); + return (0); +} + /* * Utility to convert a single UTF-8 sequence. */ -- 2.47.2