From: ljdarj Date: Tue, 22 Oct 2024 08:58:22 +0000 (+0200) Subject: Adding XZ, LZMA, ZSTD and BZIP2 support to ZIP writer (#2284) X-Git-Tag: v3.8.0~133 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=15c686c55fc7dcac8452f6d6a7683d846ea1600c;p=thirdparty%2Flibarchive.git Adding XZ, LZMA, ZSTD and BZIP2 support to ZIP writer (#2284) PPMD may come later but I'd rather first iron out style issues with the ones needing only to wire up libraries already-used in Libarchive before going at the ones possibly requiring implementing algorithms as well. Closes #1046 and resolves #1179. --- diff --git a/Makefile.am b/Makefile.am index a36126c47..c4537beab 100644 --- a/Makefile.am +++ b/Makefile.am @@ -644,6 +644,9 @@ libarchive_test_SOURCES= \ libarchive/test/test_write_format_zip.c \ libarchive/test/test_write_format_zip64_stream.c \ libarchive/test/test_write_format_zip_compression_store.c \ + libarchive/test/test_write_format_zip_compression_zstd.c \ + libarchive/test/test_write_format_zip_compression_bzip2.c \ + libarchive/test/test_write_format_zip_compression_lzmaxz.c \ libarchive/test/test_write_format_zip_empty.c \ libarchive/test/test_write_format_zip_empty_zip64.c \ libarchive/test/test_write_format_zip_entry_size_unset.c \ diff --git a/README.md b/README.md index 933de6986..28a491b00 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ The library can create archives in any of the following formats: * PWB binary cpio * shar archives * ZIP archives (with uncompressed or "deflate" compressed entries) + * ZIPX archives (with bzip2, zstd, lzma or xz compressed entries) * GNU and BSD 'ar' archives * 'mtree' format * ISO9660 format @@ -240,5 +241,4 @@ questions we are asked about libarchive: format that should be used when the older _ustar_ format is not appropriate. It has many advantages over other tar formats (including the legacy GNU tar format) and is widely supported by - current tar implementations. - + current tar implementations. \ No newline at end of file diff --git a/libarchive/archive.h b/libarchive/archive.h index 04601a79a..20626b966 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -828,6 +828,10 @@ __LA_DECL int archive_write_set_format_filter_by_ext(struct archive *a, const ch __LA_DECL int archive_write_set_format_filter_by_ext_def(struct archive *a, const char *filename, const char * def_ext); __LA_DECL int archive_write_zip_set_compression_deflate(struct archive *); __LA_DECL int archive_write_zip_set_compression_store(struct archive *); +__LA_DECL int archive_write_zip_set_compression_lzma(struct archive *); +__LA_DECL int archive_write_zip_set_compression_xz(struct archive *); +__LA_DECL int archive_write_zip_set_compression_bzip2(struct archive *); +__LA_DECL int archive_write_zip_set_compression_zstd(struct archive *); /* Deprecated; use archive_write_open2 instead */ __LA_DECL int archive_write_open(struct archive *, void *, archive_open_callback *, archive_write_callback *, diff --git a/libarchive/archive_write_set_format_zip.c b/libarchive/archive_write_set_format_zip.c index b4723ed13..b5409beac 100644 --- a/libarchive/archive_write_set_format_zip.c +++ b/libarchive/archive_write_set_format_zip.c @@ -43,9 +43,24 @@ #ifdef HAVE_STRING_H #include #endif +#ifdef HAVE_LIMITS_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif #ifdef HAVE_ZLIB_H #include #endif +#ifdef HAVE_LZMA_H +#include +#endif +#ifdef HAVE_BZLIB_H +#include +#endif +#ifdef HAVE_ZSTD_H +#include +#endif #include "archive.h" #include "archive_cryptor_private.h" @@ -62,8 +77,12 @@ #include "archive_crc32.h" #endif -#define ZIP_ENTRY_FLAG_ENCRYPTED (1<<0) -#define ZIP_ENTRY_FLAG_LENGTH_AT_END (1<<3) +#define ZIP_ENTRY_FLAG_ENCRYPTED (1 << 0) +#define ZIP_ENTRY_FLAG_LZMA_EOPM (1 << 1) +#define ZIP_ENTRY_FLAG_DEFLATE_MAX (1 << 1) /* i.e. compression levels 8 & 9 */ +#define ZIP_ENTRY_FLAG_DEFLATE_FAST (1 << 2) /* i.e. compression levels 3 & 4 */ +#define ZIP_ENTRY_FLAG_DEFLATE_SUPER_FAST (1 << 1) | (1 << 2) /* i.e. compression levels 1 & 2 */ +#define ZIP_ENTRY_FLAG_LENGTH_AT_END (1 << 3) #define ZIP_ENTRY_FLAG_UTF8_NAME (1 << 11) #define ZIP_4GB_MAX ARCHIVE_LITERAL_LL(0xffffffff) @@ -72,7 +91,11 @@ enum compression { COMPRESSION_UNSPECIFIED = -1, COMPRESSION_STORE = 0, - COMPRESSION_DEFLATE = 8 + COMPRESSION_DEFLATE = 8, + COMPRESSION_BZIP2 = 12, + COMPRESSION_LZMA = 14, + COMPRESSION_ZSTD = 93, + COMPRESSION_XZ = 95 }; #ifdef HAVE_ZLIB_H @@ -119,7 +142,6 @@ struct trad_enc_ctx { }; struct zip { - int64_t entry_offset; int64_t entry_compressed_size; int64_t entry_uncompressed_size; @@ -155,17 +177,45 @@ struct zip { struct archive_string_conv *opt_sconv; struct archive_string_conv *sconv_default; enum compression requested_compression; - int deflate_compression_level; + short compression_level; int init_default_conversion; - enum encryption encryption_type; + enum encryption encryption_type; + short threads; #define ZIP_FLAG_AVOID_ZIP64 1 #define ZIP_FLAG_FORCE_ZIP64 2 #define ZIP_FLAG_EXPERIMENT_xl 4 int flags; - +#if defined(HAVE_LZMA_H) || defined(HAVE_ZLIB_H) || defined(HAVE_BZLIB_H) || defined(HAVE_ZSTD_H) + union { +#ifdef HAVE_LZMA_H + /* ZIP's XZ format (id 95) is easy enough: copy Deflate, mutatis + * mutandis the library changes. ZIP's LZMA format (id 14), + * however, is rather more involved, starting here: it being a + * modified LZMA Alone format requires a bit more + * book-keeping. */ + struct { + char headers_to_write; + lzma_options_lzma options; + lzma_stream context; + } lzma; +#endif #ifdef HAVE_ZLIB_H - z_stream stream; + z_stream deflate; +#endif +#ifdef HAVE_BZLIB_H + bz_stream bzip2; +#endif +#if defined(HAVE_ZSTD_H) && HAVE_ZSTD_compressStream + struct { + /* Libzstd's init function gives a pointer to a memory area + * it manages rather than asking for memory to initialise. */ + ZSTD_CStream* context; + ZSTD_inBuffer in; + ZSTD_outBuffer out; + } zstd; +#endif + } stream; #endif size_t len_buf; unsigned char *buf; @@ -197,6 +247,44 @@ static int is_traditional_pkware_encryption_supported(void); static int init_winzip_aes_encryption(struct archive_write *); static int is_winzip_aes_encryption_supported(int encryption); +#ifdef HAVE_LZMA_H +/* ZIP's LZMA format requires the use of a alas not exposed in LibLZMA + * function to write the ZIP header. Given our internal version never + * fails, no need for a non-void return type. */ +static void +lzma_lzma_props_encode(const lzma_options_lzma* options, uint8_t* out) +{ + out[0] = (options->pb * 5 + options->lp) * 9 + options->lc; + archive_le32enc(out + 1, options->dict_size); +} +#endif + +#if defined(HAVE_LZMA_H) && !defined(HAVE_LZMA_STREAM_ENCODER_MT) +/* Dummy mt declarations, to avoid spaghetti includes below. Defined with + * macros being renamed afterwards to shadow liblzma's types in order to + * avoid some compiler errors. */ +#define lzma_stream_encoder_mt(str, opt) dummy_mt(str, opt) +#define lzma_mt dummy_options + +typedef struct { + void* filters; + uint32_t preset; + lzma_check check; + short threads; + char flags; + char block_size; + char timeout; +} dummy_options; + +static inline lzma_ret +dummy_mt(lzma_stream* stream, const lzma_mt* options) +{ + (void)stream; /* UNUSED */ + (void)options; /* UNUSED */ + return LZMA_PROG_ERROR; +} +#endif + static unsigned char * cd_alloc(struct zip *zip, size_t length) { @@ -274,26 +362,103 @@ archive_write_zip_options(struct archive_write *a, const char *key, } else if (strcmp(val, "store") == 0) { zip->requested_compression = COMPRESSION_STORE; ret = ARCHIVE_OK; + } else if (strcmp(val, "bzip2") == 0) { +#ifdef HAVE_BZLIB_H + zip->requested_compression = COMPRESSION_BZIP2; + ret = ARCHIVE_OK; +#else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "bzip2 compression not supported"); +#endif + } else if (strcmp(val, "lzma") == 0) { +#ifdef HAVE_LZMA_H + zip->requested_compression = COMPRESSION_LZMA; + ret = ARCHIVE_OK; +#else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "lzma compression not supported"); +#endif + } else if (strcmp(val, "xz") == 0) { +#ifdef HAVE_LZMA_H + zip->requested_compression = COMPRESSION_XZ; + ret = ARCHIVE_OK; +#else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "xz compression not supported"); +#endif + } else if (strcmp(val, "zstd") == 0) { +#if defined(HAVE_ZSTD_H) && HAVE_ZSTD_compressStream + zip->requested_compression = COMPRESSION_ZSTD; + ret = ARCHIVE_OK; +#else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "zstd compression not supported"); +#endif } return (ret); } else if (strcmp(key, "compression-level") == 0) { - if (val == NULL || !(val[0] >= '0' && val[0] <= '9') || val[1] != '\0') { - return ARCHIVE_WARN; + char *endptr; + + if (val == NULL) + return (ARCHIVE_WARN); + errno = 0; + zip->compression_level = (short)strtoul(val, &endptr, 10); + if (errno != 0 || *endptr != '\0' || zip->compression_level < 0 || + zip->compression_level > 9) { + zip->compression_level = 6; // set to default + return (ARCHIVE_WARN); } - if (val[0] == '0') { + if (zip->compression_level == 0) { zip->requested_compression = COMPRESSION_STORE; return ARCHIVE_OK; } else { +#if defined(HAVE_ZLIB_H) || defined(HAVE_LZMA_H) || defined(HAVE_BZLIB_H) || (defined(HAVE_ZSTD_H) && HAVE_ZSTD_compressStream) + // Not forcing an already specified compression algorithm + if (zip->requested_compression == COMPRESSION_UNSPECIFIED) { #ifdef HAVE_ZLIB_H - zip->requested_compression = COMPRESSION_DEFLATE; - zip->deflate_compression_level = val[0] - '0'; + zip->requested_compression = COMPRESSION_DEFLATE; +#elif defined(HAVE_BZLIB_H) + zip->requested_compression = COMPRESSION_BZIP2; +#elif defined(HAVE_LZMA_H) + // Arbitrarily choosing LZMA of the two LZMA methods + zip->requested_compression = COMPRESSION_LZMA; +#else + zip->requested_compression = COMPRESSION_ZSTD; +#endif + } return ARCHIVE_OK; #else archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "deflate compression not supported"); + "compression not supported"); #endif } + } else if (strcmp(key, "threads") == 0) { + char *endptr; + + if (val == NULL) + return (ARCHIVE_FAILED); + errno = 0; + zip->threads = (short)strtoul(val, &endptr, 10); + if (errno != 0 || *endptr != '\0') { + zip->threads = 1; + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "Illegal value `%s'", val); + return (ARCHIVE_FAILED); + } + if (zip->threads == 0) { +#ifdef HAVE_LZMA_STREAM_ENCODER_MT + zip->threads = lzma_cputhreads(); +#elif defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) + zip->threads = sysconf(_SC_NPROCESSORS_ONLN); +#elif !defined(__CYGWIN__) && defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0601 + /* Windows 7 and up */ + zip->threads = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); +#else + zip->threads = 1; +#endif + } + return (ARCHIVE_OK); } else if (strcmp(key, "encryption") == 0) { if (val == NULL) { zip->encryption_type = ENCRYPTION_NONE; @@ -305,8 +470,7 @@ archive_write_zip_options(struct archive_write *a, const char *key, zip->encryption_type = ENCRYPTION_TRADITIONAL; ret = ARCHIVE_OK; } else { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_MISC, + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "encryption not supported"); } } else if (strcmp(val, "aes128") == 0) { @@ -315,8 +479,7 @@ archive_write_zip_options(struct archive_write *a, const char *key, zip->encryption_type = ENCRYPTION_WINZIP_AES128; ret = ARCHIVE_OK; } else { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_MISC, + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "encryption not supported"); } } else if (strcmp(val, "aes256") == 0) { @@ -325,14 +488,12 @@ archive_write_zip_options(struct archive_write *a, const char *key, zip->encryption_type = ENCRYPTION_WINZIP_AES256; ret = ARCHIVE_OK; } else { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_MISC, + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "encryption not supported"); } } else { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "%s: unknown encryption '%s'", - a->format_name, val); + "%s: unknown encryption '%s'", a->format_name, val); } return (ret); } else if (strcmp(key, "experimental") == 0) { @@ -421,6 +582,118 @@ archive_write_zip_set_compression_deflate(struct archive *_a) return (ret); } +int +archive_write_zip_set_compression_bzip2(struct archive *_a) +{ + struct archive_write *a = (struct archive_write *)_a; + int ret = ARCHIVE_FAILED; + + archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, + ARCHIVE_STATE_NEW | ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, + "archive_write_zip_set_compression_bzip2"); + if (a->archive.archive_format != ARCHIVE_FORMAT_ZIP) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Can only use archive_write_zip_set_compression_bzip2" + " with zip format"); + ret = ARCHIVE_FATAL; + } else { +#ifdef HAVE_BZLIB_H + struct zip *zip = a->format_data; + zip->requested_compression = COMPRESSION_BZIP2; + ret = ARCHIVE_OK; +#else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "bzip2 compression not supported"); + ret = ARCHIVE_FAILED; +#endif + } + return (ret); +} + +int +archive_write_zip_set_compression_zstd(struct archive *_a) +{ + struct archive_write *a = (struct archive_write *)_a; + int ret = ARCHIVE_FAILED; + + archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, + ARCHIVE_STATE_NEW | ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, + "archive_write_zip_set_compression_zstd"); + if (a->archive.archive_format != ARCHIVE_FORMAT_ZIP) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Can only use archive_write_zip_set_compression_zstd" + " with zip format"); + ret = ARCHIVE_FATAL; + } else { +#if defined(HAVE_ZSTD_H) && HAVE_ZSTD_compressStream + struct zip *zip = a->format_data; + zip->requested_compression = COMPRESSION_ZSTD; + ret = ARCHIVE_OK; +#else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "zstd compression not supported"); + ret = ARCHIVE_FAILED; +#endif + } + return (ret); +} + +int +archive_write_zip_set_compression_lzma(struct archive *_a) +{ + struct archive_write *a = (struct archive_write *)_a; + int ret = ARCHIVE_FAILED; + + archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, + ARCHIVE_STATE_NEW | ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, + "archive_write_zip_set_compression_lzma"); + if (a->archive.archive_format != ARCHIVE_FORMAT_ZIP) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Can only use archive_write_zip_set_compression_lzma" + " with zip format"); + ret = ARCHIVE_FATAL; + } else { +#ifdef HAVE_LZMA_H + struct zip *zip = a->format_data; + zip->requested_compression = COMPRESSION_LZMA; + ret = ARCHIVE_OK; +#else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "lzma compression not supported"); + ret = ARCHIVE_FAILED; +#endif + } + return (ret); +} + +int +archive_write_zip_set_compression_xz(struct archive *_a) +{ + struct archive_write *a = (struct archive_write *)_a; + int ret = ARCHIVE_FAILED; + + archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, + ARCHIVE_STATE_NEW | ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, + "archive_write_zip_set_compression_xz"); + if (a->archive.archive_format != ARCHIVE_FORMAT_ZIP) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Can only use archive_write_zip_set_compression_xz" + " with zip format"); + ret = ARCHIVE_FATAL; + } else { +#ifdef HAVE_LZMA_H + struct zip *zip = a->format_data; + zip->requested_compression = COMPRESSION_XZ; + ret = ARCHIVE_OK; +#else + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "xz compression not supported"); + ret = ARCHIVE_FAILED; +#endif + } + return (ret); +} + int archive_write_zip_set_compression_store(struct archive *_a) { @@ -430,7 +703,7 @@ archive_write_zip_set_compression_store(struct archive *_a) archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW | ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, - "archive_write_zip_set_compression_deflate"); + "archive_write_zip_set_compression_store"); if (a->archive.archive_format != ARCHIVE_FORMAT_ZIP) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can only use archive_write_zip_set_compression_store" @@ -465,9 +738,14 @@ archive_write_set_format_zip(struct archive *_a) /* "Unspecified" lets us choose the appropriate compression. */ zip->requested_compression = COMPRESSION_UNSPECIFIED; -#ifdef HAVE_ZLIB_H - zip->deflate_compression_level = Z_DEFAULT_COMPRESSION; -#endif + /* Following the 7-zip write support's lead, setting the default + * compression level explicitely to 6 no matter what. */ + zip->compression_level = 6; + /* Following the xar write support's lead, the default number of + * threads is 1 (i.e. the xz compression, the only one caring about + * that, not being multi-threaded even if the multi-threaded encoder + * were available) */ + zip->threads = 1; zip->crc32func = real_crc32; /* A buffer used for both compression and encryption. */ @@ -563,7 +841,6 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) if (type != AE_IFREG) archive_entry_set_size(entry, 0); - /* Reset information from last entry. */ zip->entry_offset = zip->written_bytes; zip->entry_uncompressed_limit = INT64_MAX; @@ -600,7 +877,6 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) } } - #if defined(_WIN32) && !defined(__CYGWIN__) /* Make sure the path separators in pathname, hardlink and symlink * are all slash '/', not the Windows path separator '\'. */ @@ -696,13 +972,49 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) if (zip->entry_compression == COMPRESSION_UNSPECIFIED) { zip->entry_compression = COMPRESSION_DEFAULT; } - if (zip->entry_compression == COMPRESSION_STORE) { + switch (zip->entry_compression) { + case COMPRESSION_STORE: zip->entry_compressed_size = size; zip->entry_uncompressed_size = size; MIN_VERSION_NEEDED(10); - } else { + break; + case COMPRESSION_ZSTD: + zip->entry_uncompressed_size = size; + MIN_VERSION_NEEDED(63); + break; + case COMPRESSION_LZMA: + zip->entry_uncompressed_size = size; + zip->entry_flags |= ZIP_ENTRY_FLAG_LZMA_EOPM; + MIN_VERSION_NEEDED(63); + break; + case COMPRESSION_XZ: + zip->entry_uncompressed_size = size; + MIN_VERSION_NEEDED(63); + break; + case COMPRESSION_BZIP2: + zip->entry_uncompressed_size = size; + MIN_VERSION_NEEDED(46); + break; + default: // i.e. deflate compression zip->entry_uncompressed_size = size; + switch (zip->compression_level) { + case 1: + case 2: + zip->entry_flags |= ZIP_ENTRY_FLAG_DEFLATE_SUPER_FAST; + break; + case 3: + case 4: + zip->entry_flags |= ZIP_ENTRY_FLAG_DEFLATE_FAST; + break; + case 8: + case 9: + zip->entry_flags |= ZIP_ENTRY_FLAG_DEFLATE_MAX; + break; + default: + break; + } MIN_VERSION_NEEDED(20); + break; } if (zip->entry_flags & ZIP_ENTRY_FLAG_ENCRYPTED) { @@ -752,9 +1064,8 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) /* We don't know the size. Use the default * compression unless specified otherwise. */ - zip->entry_compression = zip->requested_compression; - if(zip->entry_compression == COMPRESSION_UNSPECIFIED){ + if (zip->entry_compression == COMPRESSION_UNSPECIFIED) { zip->entry_compression = COMPRESSION_DEFAULT; } @@ -762,10 +1073,43 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) if ((zip->flags & ZIP_FLAG_AVOID_ZIP64) == 0) { /* We might use zip64 extensions, so require 4.5 */ MIN_VERSION_NEEDED(45); - } else if (zip->entry_compression == COMPRESSION_STORE) { + } + switch (zip->entry_compression) { + case COMPRESSION_STORE: MIN_VERSION_NEEDED(10); - } else { + break; + case COMPRESSION_ZSTD: + MIN_VERSION_NEEDED(63); + break; + case COMPRESSION_LZMA: + zip->entry_flags |= ZIP_ENTRY_FLAG_LZMA_EOPM; + MIN_VERSION_NEEDED(63); + break; + case COMPRESSION_XZ: + MIN_VERSION_NEEDED(63); + break; + case COMPRESSION_BZIP2: + MIN_VERSION_NEEDED(46); + break; + default: // i.e. deflate compression + switch (zip->compression_level) { + case 1: + case 2: + zip->entry_flags |= ZIP_ENTRY_FLAG_DEFLATE_SUPER_FAST; + break; + case 3: + case 4: + zip->entry_flags |= ZIP_ENTRY_FLAG_DEFLATE_FAST; + break; + case 8: + case 9: + zip->entry_flags |= ZIP_ENTRY_FLAG_DEFLATE_MAX; + break; + default: + break; + } MIN_VERSION_NEEDED(20); + break; } if (zip->entry_flags & ZIP_ENTRY_FLAG_ENCRYPTED) { @@ -994,21 +1338,147 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) zip->written_bytes += slink_size; } + switch (zip->entry_compression) { #ifdef HAVE_ZLIB_H - if (zip->entry_compression == COMPRESSION_DEFLATE) { - zip->stream.zalloc = Z_NULL; - zip->stream.zfree = Z_NULL; - zip->stream.opaque = Z_NULL; - zip->stream.next_out = zip->buf; - zip->stream.avail_out = (uInt)zip->len_buf; - if (deflateInit2(&zip->stream, zip->deflate_compression_level, + case COMPRESSION_DEFLATE: + zip->stream.deflate.zalloc = Z_NULL; + zip->stream.deflate.zfree = Z_NULL; + zip->stream.deflate.opaque = Z_NULL; + zip->stream.deflate.next_out = zip->buf; + zip->stream.deflate.avail_out = (uInt)zip->len_buf; + if (deflateInit2(&zip->stream.deflate, zip->compression_level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) { archive_set_error(&a->archive, ENOMEM, "Can't init deflate compressor"); return (ARCHIVE_FATAL); } - } + break; +#endif +#ifdef HAVE_BZLIB_H + case COMPRESSION_BZIP2: + memset(&zip->stream.bzip2, 0, sizeof(bz_stream)); + zip->stream.bzip2.next_out = (char*)zip->buf; + zip->stream.bzip2.avail_out = (unsigned int)zip->len_buf; + if (BZ2_bzCompressInit(&zip->stream.bzip2, zip->compression_level, 0, 0) != BZ_OK) { + archive_set_error(&a->archive, ENOMEM, + "Can't init bzip2 compressor"); + return (ARCHIVE_FATAL); + } + break; #endif +#if defined(HAVE_ZSTD_H) && HAVE_ZSTD_compressStream + case COMPRESSION_ZSTD: + {/* Libzstd, contrary to many compression libraries, doesn't use + * zlib's 0 to 9 scale and its negative scale is way bigger than + * its positive one. So setting 1 as the lowest allowed compression + * level and rescaling to 2 to 9 to libzstd's positive scale. */ + int zstd_compression_level = zip->compression_level == 1 + ? ZSTD_minCLevel() // ZSTD_minCLevel is negative ! + : (zip->compression_level - 1) * ZSTD_maxCLevel() / 8; + zip->stream.zstd.context = ZSTD_createCStream(); + ret = ZSTD_initCStream(zip->stream.zstd.context, zstd_compression_level); + if (ZSTD_isError(ret)) { + archive_set_error(&a->archive, ENOMEM, + "Can't init zstd compressor"); + return (ARCHIVE_FATAL); + } + /* Asking for the multi-threaded compressor is a no-op in zstd if + * it's not supported, so no need to explicitely check for it */ + ZSTD_CCtx_setParameter(zip->stream.zstd.context, ZSTD_c_nbWorkers, zip->threads); + zip->stream.zstd.out.dst = zip->buf; + zip->stream.zstd.out.size = zip->len_buf; + zip->stream.zstd.out.pos = 0; + break;} +#endif +#ifdef HAVE_LZMA_H + case COMPRESSION_LZMA: + {/* Set compression level 9 as the no-holds barred one */ + uint32_t lzma_compression_level = zip->compression_level == 9 + ? LZMA_PRESET_EXTREME | zip->compression_level + : (uint32_t)zip->compression_level; + /* Forcibly setting up the encoder to use the LZMA1 variant, as + * it is the one LZMA Alone uses. */ + lzma_filter filters[2] = { + { + .id = LZMA_FILTER_LZMA1, + .options = &zip->stream.lzma.options + }, + { + .id = LZMA_VLI_UNKNOWN + } + }; + memset(&zip->stream.lzma.context, 0, sizeof(lzma_stream)); + lzma_lzma_preset(&zip->stream.lzma.options, lzma_compression_level); + zip->stream.lzma.headers_to_write = 1; + /* We'll be writing the headers ourselves, so using the raw + * encoder */ + if (lzma_raw_encoder(&zip->stream.lzma.context, filters) != LZMA_OK) { + archive_set_error(&a->archive, ENOMEM, + "Can't init lzma compressor"); + return (ARCHIVE_FATAL); + } + zip->stream.lzma.context.next_out = zip->buf; + zip->stream.lzma.context.avail_out = (unsigned int)zip->len_buf; + break;} + case COMPRESSION_XZ: + {/* Set compression level 9 as the no-holds barred one */ + uint32_t lzma_compression_level = zip->compression_level == 9 + ? LZMA_PRESET_EXTREME | zip->compression_level + : (uint32_t)zip->compression_level; + lzma_ret retval; +#ifndef HAVE_LZMA_STREAM_ENCODER_MT + /* Force the number of threads to one, and thus to a mono-threaded + * encoder in case we don't have the multi-threaded one */ + zip->threads = 1; +#endif + memset(&zip->stream.lzma.context, 0, sizeof(lzma_stream)); + /* The XZ check will be arbitrarily set to none: ZIP already has + * a CRC-32 check of its own */ + if (zip->threads == 1) { + /* XZ uses LZMA2. */ + lzma_filter filters[2] = { + { + .id = LZMA_FILTER_LZMA2, + .options = &zip->stream.lzma.options + }, + { + .id = LZMA_VLI_UNKNOWN + } + }; + /* Might as well use the lzma_options we already allocated, + * even if we'll never use it after the initialisation */ + lzma_lzma_preset(&zip->stream.lzma.options, lzma_compression_level); + /* 1 thread requested, so non multi-threaded encoder */ + retval = lzma_stream_encoder(&zip->stream.lzma.context, + filters, LZMA_CHECK_NONE); + } + else { + lzma_mt options = { + .flags = 0, + .block_size = 0, + .timeout = 0, + .filters = NULL, + .check = LZMA_CHECK_NONE, + .preset = lzma_compression_level, + .threads = zip->threads + }; + /* More than 1 thread requested, so multi-threaded encoder + * which always outputs XZ */ + retval = lzma_stream_encoder_mt(&zip->stream.lzma.context, + &options); + } + if (retval != LZMA_OK) { + archive_set_error(&a->archive, ENOMEM, + "Can't init xz compressor"); + return (ARCHIVE_FATAL); + } + zip->stream.lzma.context.next_out = zip->buf; + zip->stream.lzma.context.avail_out = (unsigned int)zip->len_buf; + break;} +#endif + default: + break; + } return (ret2); } @@ -1093,15 +1563,15 @@ archive_write_zip_data(struct archive_write *a, const void *buff, size_t s) zip->entry_compressed_written += s; } break; -#if HAVE_ZLIB_H +#ifdef HAVE_ZLIB_H case COMPRESSION_DEFLATE: - zip->stream.next_in = (unsigned char*)(uintptr_t)buff; - zip->stream.avail_in = (uInt)s; + zip->stream.deflate.next_in = (unsigned char*)(uintptr_t)buff; + zip->stream.deflate.avail_in = (uInt)s; do { - ret = deflate(&zip->stream, Z_NO_FLUSH); + ret = deflate(&zip->stream.deflate, Z_NO_FLUSH); if (ret == Z_STREAM_ERROR) return (ARCHIVE_FATAL); - if (zip->stream.avail_out == 0) { + if (zip->stream.deflate.avail_out == 0) { if (zip->tctx_valid) { trad_enc_encrypt_update(&zip->tctx, zip->buf, zip->len_buf, @@ -1127,13 +1597,223 @@ archive_write_zip_data(struct archive_write *a, const void *buff, size_t s) return (ret); zip->entry_compressed_written += zip->len_buf; zip->written_bytes += zip->len_buf; - zip->stream.next_out = zip->buf; - zip->stream.avail_out = (uInt)zip->len_buf; + zip->stream.deflate.next_out = zip->buf; + zip->stream.deflate.avail_out = (uInt)zip->len_buf; } - } while (zip->stream.avail_in != 0); + } while (zip->stream.deflate.avail_in != 0); + break; +#endif +#if defined(HAVE_ZSTD_H) && HAVE_ZSTD_compressStream + case COMPRESSION_ZSTD: + zip->stream.zstd.in.src = buff; + zip->stream.zstd.in.size = s; + zip->stream.zstd.in.pos = 0; + do { + ret = ZSTD_compressStream(zip->stream.zstd.context, + &zip->stream.zstd.out, &zip->stream.zstd.in); + if (ZSTD_isError(ret)) + return (ARCHIVE_FATAL); + if (zip->stream.zstd.out.pos == zip->stream.zstd.out.size) { + if (zip->tctx_valid) { + trad_enc_encrypt_update(&zip->tctx, + zip->buf, zip->len_buf, + zip->buf, zip->len_buf); + } else if (zip->cctx_valid) { + size_t outl = zip->len_buf; + ret = archive_encrypto_aes_ctr_update( + &zip->cctx, + zip->buf, zip->len_buf, + zip->buf, &outl); + if (ret < 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Failed to encrypt file"); + return (ARCHIVE_FAILED); + } + archive_hmac_sha1_update(&zip->hctx, + zip->buf, zip->len_buf); + } + ret = __archive_write_output(a, zip->buf, + zip->len_buf); + if (ret != ARCHIVE_OK) + return (ret); + zip->entry_compressed_written += zip->len_buf; + zip->written_bytes += zip->len_buf; + zip->stream.zstd.out.dst = zip->buf; + zip->stream.zstd.out.size = zip->len_buf; + zip->stream.zstd.out.pos = 0; + } + } while (zip->stream.zstd.in.pos != zip->stream.zstd.in.size); + break; +#endif +#ifdef HAVE_BZLIB_H + case COMPRESSION_BZIP2: + zip->stream.bzip2.next_in = (char*)(uintptr_t)buff; + zip->stream.bzip2.avail_in = (unsigned int)s; + do { + ret = BZ2_bzCompress(&zip->stream.bzip2, BZ_RUN); + if (ret != BZ_RUN_OK) + return (ARCHIVE_FATAL); + if (zip->stream.bzip2.avail_out == 0) { + if (zip->tctx_valid) { + trad_enc_encrypt_update(&zip->tctx, + zip->buf, zip->len_buf, + zip->buf, zip->len_buf); + } else if (zip->cctx_valid) { + size_t outl = zip->len_buf; + ret = archive_encrypto_aes_ctr_update( + &zip->cctx, + zip->buf, zip->len_buf, + zip->buf, &outl); + if (ret < 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Failed to encrypt file"); + return (ARCHIVE_FAILED); + } + archive_hmac_sha1_update(&zip->hctx, + zip->buf, zip->len_buf); + } + ret = __archive_write_output(a, zip->buf, + zip->len_buf); + if (ret != ARCHIVE_OK) + return (ret); + zip->entry_compressed_written += zip->len_buf; + zip->written_bytes += zip->len_buf; + zip->stream.bzip2.next_out = (char*)zip->buf; + zip->stream.bzip2.avail_out = (unsigned int)zip->len_buf; + } + } while (zip->stream.bzip2.avail_in != 0); + break; +#endif +#ifdef HAVE_LZMA_H + case COMPRESSION_LZMA: + if (zip->stream.lzma.headers_to_write) { + /* LZMA Alone and ZIP's LZMA format (i.e. id 14) are almost + * the same. Here's an example of a structure of LZMA Alone: + * + * $ cat /bin/ls | lzma | xxd | head -n 1 + * 00000000: 5d00 0080 00ff ffff ffff ffff ff00 2814 + * + * 5 bytes 8 bytes n bytes + * + * + * lzma_params is a 5-byte blob that has to be decoded to + * extract parameters of this LZMA stream. The + * uncompressed_size field is an uint64_t value that contains + * information about the size of the uncompressed file, or + * UINT64_MAX if this value is unknown. The part is + * the actual LZMA-compressed data stream. + * + * Now here's the structure of ZIP's LZMA format: + * + * $ cat stream_inside_zipx | xxd | head -n 1 + * 00000000: 0914 0500 5d00 8000 0000 2814 .... .... + * + * 2byte 2byte 5 bytes n bytes + * + * + * This means that ZIP's LZMA format contains an additional + * magic1 and magic2 headers, the lzma_params field contains + * the same parameter set as in LZMA Alone, and the + * field is the same as in LZMA Alone as well. However, note + * that ZIP's format is missing the uncompressed_size field. + * + * So we need to write a raw LZMA stream, set up for LZMA1 + * (i.e. the algoritm variant LZMA Alone uses), which was + * done above in the initialisation but first we need to + * write ZIP's LZMA header, as if it were Stored data. Then + * we can use the raw stream as if it were any other. magic1 + * being version numbers and magic2 being lzma_params's size, + * they get written in without further ado but lzma_params + * requires to use other functions than the usual lzma_stream + * manipulating ones, hence the additional book-keeping + * required alongside the lzma_stream. + */ + uint8_t buf[9] = { LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, 5, 0 }; + lzma_lzma_props_encode(&zip->stream.lzma.options, buf + 4); + const size_t sh = 9; + if (zip->tctx_valid || zip->cctx_valid) { + uint8_t* header = buf; + const uint8_t * const rh = header + sh; + + while (header < rh) { + size_t l; + + if (zip->tctx_valid) { + l = trad_enc_encrypt_update(&zip->tctx, + header, rh - header, + zip->buf, zip->len_buf); + } else { + l = zip->len_buf; + ret = archive_encrypto_aes_ctr_update( + &zip->cctx, + header, rh - header, zip->buf, &l); + if (ret < 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Failed to encrypt file"); + return (ARCHIVE_FAILED); + } + archive_hmac_sha1_update(&zip->hctx, + zip->buf, l); + } + ret = __archive_write_output(a, zip->buf, l); + if (ret != ARCHIVE_OK) + return (ret); + zip->entry_compressed_written += l; + zip->written_bytes += l; + header += l; + } + } else { + ret = __archive_write_output(a, buf, sh); + if (ret != ARCHIVE_OK) + return (ret); + zip->written_bytes += sh; + zip->entry_compressed_written += sh; + } + zip->stream.lzma.headers_to_write = 0; + } + /* FALLTHROUGH */ + case COMPRESSION_XZ: + zip->stream.lzma.context.next_in = (unsigned char*)(uintptr_t)buff; + zip->stream.lzma.context.avail_in = (unsigned int)s; + do { + ret = lzma_code(&zip->stream.lzma.context, LZMA_RUN); + if (ret == LZMA_MEM_ERROR) + return (ARCHIVE_FATAL); + if (zip->stream.lzma.context.avail_out == 0) { + if (zip->tctx_valid) { + trad_enc_encrypt_update(&zip->tctx, + zip->buf, zip->len_buf, + zip->buf, zip->len_buf); + } else if (zip->cctx_valid) { + size_t outl = zip->len_buf; + ret = archive_encrypto_aes_ctr_update( + &zip->cctx, + zip->buf, zip->len_buf, + zip->buf, &outl); + if (ret < 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Failed to encrypt file"); + return (ARCHIVE_FAILED); + } + archive_hmac_sha1_update(&zip->hctx, + zip->buf, zip->len_buf); + } + ret = __archive_write_output(a, zip->buf, + zip->len_buf); + if (ret != ARCHIVE_OK) + return (ret); + zip->entry_compressed_written += zip->len_buf; + zip->written_bytes += zip->len_buf; + zip->stream.lzma.context.next_out = zip->buf; + zip->stream.lzma.context.avail_out = (unsigned int)zip->len_buf; + } + } while (zip->stream.lzma.context.avail_in != 0); break; #endif - case COMPRESSION_UNSPECIFIED: default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, @@ -1146,7 +1826,6 @@ archive_write_zip_data(struct archive_write *a, const void *buff, size_t s) zip->entry_crc32 = zip->crc32func(zip->entry_crc32, buff, (unsigned)s); return (s); - } static int @@ -1154,16 +1833,18 @@ archive_write_zip_finish_entry(struct archive_write *a) { struct zip *zip = a->format_data; int ret; + char finishing; -#if HAVE_ZLIB_H - if (zip->entry_compression == COMPRESSION_DEFLATE) { + switch (zip->entry_compression) { +#ifdef HAVE_ZLIB_H + case COMPRESSION_DEFLATE: for (;;) { size_t remainder; - ret = deflate(&zip->stream, Z_FINISH); + ret = deflate(&zip->stream.deflate, Z_FINISH); if (ret == Z_STREAM_ERROR) return (ARCHIVE_FATAL); - remainder = zip->len_buf - zip->stream.avail_out; + remainder = zip->len_buf - zip->stream.deflate.avail_out; if (zip->tctx_valid) { trad_enc_encrypt_update(&zip->tctx, zip->buf, remainder, zip->buf, remainder); @@ -1186,14 +1867,145 @@ archive_write_zip_finish_entry(struct archive_write *a) return (ret); zip->entry_compressed_written += remainder; zip->written_bytes += remainder; - zip->stream.next_out = zip->buf; - if (zip->stream.avail_out != 0) + zip->stream.deflate.next_out = zip->buf; + if (zip->stream.deflate.avail_out != 0) break; - zip->stream.avail_out = (uInt)zip->len_buf; + zip->stream.deflate.avail_out = (uInt)zip->len_buf; } - deflateEnd(&zip->stream); - } + deflateEnd(&zip->stream.deflate); + break; +#endif +#ifdef HAVE_BZLIB_H + case COMPRESSION_BZIP2: + finishing = 1; + do { + size_t remainder; + + ret = BZ2_bzCompress(&zip->stream.bzip2, BZ_FINISH); + if (ret == BZ_STREAM_END) + finishing = 0; + else if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK) + return (ARCHIVE_FATAL); + remainder = zip->len_buf - zip->stream.bzip2.avail_out; + if (zip->tctx_valid) { + trad_enc_encrypt_update(&zip->tctx, + zip->buf, remainder, zip->buf, remainder); + } else if (zip->cctx_valid) { + size_t outl = remainder; + ret = archive_encrypto_aes_ctr_update( + &zip->cctx, zip->buf, remainder, + zip->buf, &outl); + if (ret < 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Failed to encrypt file"); + return (ARCHIVE_FAILED); + } + archive_hmac_sha1_update(&zip->hctx, + zip->buf, remainder); + } + ret = __archive_write_output(a, zip->buf, remainder); + if (ret != ARCHIVE_OK) + return (ret); + zip->entry_compressed_written += remainder; + zip->written_bytes += remainder; + zip->stream.bzip2.next_out = (char*)zip->buf; + if (zip->stream.bzip2.avail_out != 0) + finishing = 0; + zip->stream.bzip2.avail_out = (unsigned int)zip->len_buf; + } while (finishing); + BZ2_bzCompressEnd(&zip->stream.bzip2); + break; +#endif +#if defined(HAVE_ZSTD_H) && HAVE_ZSTD_compressStream + case COMPRESSION_ZSTD: + finishing = 1; + do { + size_t remainder; + + ret = ZSTD_endStream(zip->stream.zstd.context, &zip->stream.zstd.out); + if (ret == 0) + finishing = 0; + else if (ZSTD_isError(ret)) + return (ARCHIVE_FATAL); + remainder = zip->len_buf - (zip->stream.zstd.out.size - zip->stream.zstd.out.pos); + if (zip->tctx_valid) { + trad_enc_encrypt_update(&zip->tctx, + zip->buf, remainder, zip->buf, remainder); + } else if (zip->cctx_valid) { + size_t outl = remainder; + ret = archive_encrypto_aes_ctr_update( + &zip->cctx, zip->buf, remainder, + zip->buf, &outl); + if (ret < 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Failed to encrypt file"); + return (ARCHIVE_FAILED); + } + archive_hmac_sha1_update(&zip->hctx, + zip->buf, remainder); + } + ret = __archive_write_output(a, zip->buf, remainder); + if (ret != ARCHIVE_OK) + return (ret); + zip->entry_compressed_written += remainder; + zip->written_bytes += remainder; + zip->stream.zstd.out.dst = zip->buf; + if (zip->stream.zstd.out.pos != zip->stream.zstd.out.size) + finishing = 0; + zip->stream.zstd.out.size = zip->len_buf; + } while (finishing); + ZSTD_freeCStream(zip->stream.zstd.context); + break; +#endif +#ifdef HAVE_LZMA_H + /* XZ and LZMA share clean-up code */ + case COMPRESSION_LZMA: + case COMPRESSION_XZ: + finishing = 1; + do { + size_t remainder; + + ret = lzma_code(&zip->stream.lzma.context, LZMA_FINISH); + if (ret == LZMA_STREAM_END) + finishing = 0; + else if (ret == LZMA_MEM_ERROR) + return (ARCHIVE_FATAL); + remainder = zip->len_buf - zip->stream.lzma.context.avail_out; + if (zip->tctx_valid) { + trad_enc_encrypt_update(&zip->tctx, + zip->buf, remainder, zip->buf, remainder); + } else if (zip->cctx_valid) { + size_t outl = remainder; + ret = archive_encrypto_aes_ctr_update( + &zip->cctx, zip->buf, remainder, + zip->buf, &outl); + if (ret < 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Failed to encrypt file"); + return (ARCHIVE_FAILED); + } + archive_hmac_sha1_update(&zip->hctx, + zip->buf, remainder); + } + ret = __archive_write_output(a, zip->buf, remainder); + if (ret != ARCHIVE_OK) + return (ret); + zip->entry_compressed_written += remainder; + zip->written_bytes += remainder; + zip->stream.lzma.context.next_out = zip->buf; + if (zip->stream.lzma.context.avail_out != 0) + finishing = 0; + zip->stream.lzma.context.avail_out = (unsigned int)zip->len_buf; + } while (finishing); + lzma_end(&zip->stream.lzma.context); + break; #endif + default: + break; + } if (zip->hctx_valid) { uint8_t hmac[20]; size_t hmac_len = 20; @@ -1377,7 +2189,6 @@ archive_write_zip_close(struct archive_write *a) if (ret != ARCHIVE_OK) return (ARCHIVE_FATAL); zip->written_bytes += 20; - } /* Format and write end of central directory. */ @@ -1529,7 +2340,6 @@ copy_path(struct archive_entry *entry, unsigned char *p) p[pathlen] = '/'; } - static struct archive_string_conv * get_sconv(struct archive_write *a, struct zip *zip) { @@ -1587,7 +2397,6 @@ trad_enc_encrypt_update(struct trad_enc_ctx *ctx, const uint8_t *in, static int trad_enc_init(struct trad_enc_ctx *ctx, const char *pw, size_t pw_len) { - ctx->keys[0] = 305419896L; ctx->keys[1] = 591751049L; ctx->keys[2] = 878082192L; diff --git a/libarchive/archive_write_set_options.3 b/libarchive/archive_write_set_options.3 index 454c79671..2e784d872 100644 --- a/libarchive/archive_write_set_options.3 +++ b/libarchive/archive_write_set_options.3 @@ -263,7 +263,7 @@ decimal integer specifying log2 window size in bytes. Values from The value is interpreted as a decimal integer specifying the number of threads for multi-threaded zstd compression. If set to 0, zstd will attempt to detect and use the number -of physical CPU cores. +of active physical CPU cores. .El .It Format 7zip .Bl -tag -compact -width indent @@ -632,9 +632,13 @@ and .Bl -tag -compact -width indent .It Cm compression The value is either -.Dq store +.Dq store , +.Dq deflate , +.Dq bzip2 , +.Dq lzma , +.Dq xz , or -.Dq deflate +.Dq zstd to indicate how the following entries should be compressed. Note that this setting is ignored for directories, symbolic links, and other special entries. @@ -645,8 +649,23 @@ Values between 0 and 9 are supported. A compression level of 0 switches the compression method to .Dq store , other values will enable -.Dq deflate -compression with the given level. +.Dq deflate , +.Dq bzip2 , +.Dq lzma , +or +.Dq zstd +compression (in order of priority, depending on what libraries +are linked) with the given level. +.It Cm threads +The value is interpreted as a decimal integer specifying the +number of threads to use for compression. +It is supported only for +.Dq xz +or +.Dq zstd +compression and ignored for any other. +A threads value of 0 is a special one requesting to detect and use as +many threads as the number of active physical CPU cores. .It Cm encryption Enable encryption using traditional zip encryption. .It Cm encryption Ns = Ns Ar type diff --git a/libarchive/libarchive-formats.5 b/libarchive/libarchive-formats.5 index fab2f8660..1b057b4da 100644 --- a/libarchive/libarchive-formats.5 +++ b/libarchive/libarchive-formats.5 @@ -327,10 +327,26 @@ by the usual environment variables. Libarchive can read and write zip format archives that have uncompressed entries and entries compressed with the .Dq deflate +, +.Dq LZMA +, +.Dq XZ +, +.Dq BZIP2 +and +.Dq ZSTD +algorithms. +Libarchive can also read, but not write, zip format archives that +have entries compressed with the +.Dq PPMd algorithm. Other zip compression algorithms are not supported. -It can extract jar archives, archives that use Zip64 extensions and -self-extracting zip archives. +The extensions supported by libarchive are Zip64, Libarchive's +extensions to better support streaming, PKZIP's traditional +ZIP encryption, Info-ZIP's Unix extra fields, extra time, and +Unicode path, as well as WinZIP's AES encryption. +It can extract jar archives, __MACOSX resource forks extension +for OS X, and self-extracting zip archives. Libarchive can use either of two different strategies for reading Zip archives: a streaming strategy which is fast and can handle extremely diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index 314c972d2..e11f1a147 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -288,6 +288,9 @@ IF(ENABLE_TEST) test_write_format_zip.c test_write_format_zip64_stream.c test_write_format_zip_compression_store.c + test_write_format_zip_compression_bzip2.c + test_write_format_zip_compression_lzmaxz.c + test_write_format_zip_compression_zstd.c test_write_format_zip_empty.c test_write_format_zip_empty_zip64.c test_write_format_zip_entry_size_unset.c diff --git a/libarchive/test/test_write_format_zip_compression_bzip2.c b/libarchive/test/test_write_format_zip_compression_bzip2.c new file mode 100644 index 000000000..ebc2b740b --- /dev/null +++ b/libarchive/test/test_write_format_zip_compression_bzip2.c @@ -0,0 +1,402 @@ +/*-SPDX-License-Identifier: BSD-2-Clause + * Copyright (c) 2024 ARJANEN Loïc Jean David + * All rights reserved. + */ + +#include "test.h" +#ifdef HAVE_BZLIB_H +#include + +static unsigned long +bitcrc32(unsigned long c, const void *_p, size_t s) +{ + /* This is a drop-in replacement for crc32() from zlib, given that + * libbzip2 doesn't expose its CRC32 function, as far as I'm aware. + * Libarchive should be able to correctly generate BZIP2-compressed + * zip archives (including correct CRCs) even when zlib is + * unavailable, and this function helps us verify that. Yes, this is + * very, very slow and unsuitable for production use, but it's very, + * very obviously correct, compact, and works well for this + * particular usage. Libarchive internally uses a much more + * efficient implementation when zlib is unavailable */ + const unsigned char *p = _p; + int bitctr; + + if (p == NULL) + return (0); + + for (; s > 0; --s) { + c ^= *p++; + for (bitctr = 8; bitctr > 0; --bitctr) { + if (c & 1) c = (c >> 1); + else c = (c >> 1) ^ 0xedb88320; + c ^= 0x80000000; + } + } + return (c); +} + +/* File data */ +static const char file_name[] = "file"; +static const char file_data1[] = {'1', '2', '3', '4', '5', '6', '7', '8'}; +static const char file_data2[] = {'9', '0', 'A', 'B', 'C', 'D', 'E', 'F'}; +static const int file_perm = 00644; +static const short file_uid = 10; +static const short file_gid = 20; + +/* Folder data */ +static const char folder_name[] = "folder/"; +static const int folder_perm = 00755; +static const short folder_uid = 30; +static const short folder_gid = 40; + +static time_t now; + +static void verify_write_bzip2(struct archive *a) +{ + struct archive_entry *entry; + + /* Write entries. */ + + /* Regular file */ + assert((entry = archive_entry_new()) != NULL); + archive_entry_set_pathname(entry, file_name); + archive_entry_set_mode(entry, S_IFREG | 0644); + archive_entry_set_size(entry, sizeof(file_data1) + sizeof(file_data2)); + archive_entry_set_uid(entry, file_uid); + archive_entry_set_gid(entry, file_gid); + archive_entry_set_mtime(entry, now, 0); + archive_entry_set_atime(entry, now + 3, 0); + assertEqualIntA(a, 0, archive_write_header(a, entry)); + assertEqualIntA(a, sizeof(file_data1), archive_write_data(a, file_data1, sizeof(file_data1))); + assertEqualIntA(a, sizeof(file_data2), archive_write_data(a, file_data2, sizeof(file_data2))); + archive_entry_free(entry); + + /* Folder */ + assert((entry = archive_entry_new()) != NULL); + archive_entry_set_pathname(entry, folder_name); + archive_entry_set_mode(entry, S_IFDIR | folder_perm); + archive_entry_set_size(entry, 0); + archive_entry_set_uid(entry, folder_uid); + archive_entry_set_gid(entry, folder_gid); + archive_entry_set_mtime(entry, now, 0); + archive_entry_set_ctime(entry, now + 5, 0); + assertEqualIntA(a, 0, archive_write_header(a, entry)); + archive_entry_free(entry); +} + +/* Quick and dirty: Read 2-byte and 4-byte integers from Zip file. */ +static unsigned int +i2(const void *p_) +{ + const unsigned char *p = p_; + return (p[0] | (p[1] << 8)); +} + +static unsigned int +i4(const void *p_) +{ + const unsigned char *p = p_; + return (i2(p) | (i2(p + 2) << 16)); +} + +static void verify_bzip2_contents(const char *buff, size_t used) +{ + const char *buffend; + struct archive* zip_archive; + struct archive_entry *ae; + char filedata[sizeof(file_data1) + sizeof(file_data2)]; + /* Misc variables */ + unsigned long crc; + struct tm *tm; +#if defined(HAVE_LOCALTIME_R) || defined(HAVE_LOCALTIME_S) + struct tm tmbuf; +#endif + /* p is the pointer to walk over the central directory, + * q walks over the local headers, the data and the data descriptors. */ + const char *p, *q, *local_header, *extra_start; + +#if defined(HAVE_LOCALTIME_S) + tm = localtime_s(&tmbuf, &now) ? NULL : &tmbuf; +#elif defined(HAVE_LOCALTIME_R) + tm = localtime_r(&now, &tmbuf); +#else + tm = localtime(&now); +#endif + + /* Open archive from memory, we'll need it for checking the file + * value */ + assert((zip_archive = archive_read_new()) != NULL); + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_support_format_zip(zip_archive)); + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_support_filter_all(zip_archive)); + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_open_memory(zip_archive, buff, used)); + + /* Remember the end of the archive in memory. */ + buffend = buff + used; + + /* Verify "End of Central Directory" record. */ + /* Get address of end-of-central-directory record. */ + p = buffend - 22; /* Assumes there is no zip comment field. */ + failure("End-of-central-directory begins with PK\\005\\006 signature"); + assertEqualMem(p, "PK\005\006", 4); + failure("This must be disk 0"); + assertEqualInt(i2(p + 4), 0); + failure("Central dir must start on disk 0"); + assertEqualInt(i2(p + 6), 0); + failure("All central dir entries are on this disk"); + assertEqualInt(i2(p + 8), i2(p + 10)); + failure("CD start (%d) + CD length (%d) should == archive size - 22", + i4(p + 12), i4(p + 16)); + assertEqualInt(i4(p + 12) + i4(p + 16), used - 22); + failure("no zip comment"); + assertEqualInt(i2(p + 20), 0); + + /* Get address of first entry in central directory. */ + p = buff + i4(buffend - 6); + failure("Central file record at offset %d should begin with" + " PK\\001\\002 signature", + i4(buffend - 10)); + + /* Verify file entry in central directory, except compressed size (offset 20). */ + assertEqualMem(p, "PK\001\002", 4); /* Signature */ + assertEqualInt(i2(p + 4), 3 * 256 + 46); /* Version made by */ + assertEqualInt(i2(p + 6), 46); /* Version needed to extract */ + assertEqualInt(i2(p + 8), 8); /* Flags */ + assertEqualInt(i2(p + 10), 12); /* Compression method */ + assertEqualInt(i2(p + 12), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(p + 14), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + crc = bitcrc32(0, file_data1, sizeof(file_data1)); + crc = bitcrc32(crc, file_data2, sizeof(file_data2)); + assertEqualInt(i4(p + 16), crc); /* CRC-32 */ + assertEqualInt(i4(p + 24), sizeof(file_data1) + sizeof(file_data2)); /* Uncompressed size */ + assertEqualInt(i2(p + 28), strlen(file_name)); /* Pathname length */ + assertEqualInt(i2(p + 30), 24); /* Extra field length */ + assertEqualInt(i2(p + 32), 0); /* File comment length */ + assertEqualInt(i2(p + 34), 0); /* Disk number start */ + assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ + assertEqualInt(i4(p + 38) >> 16 & 01777, file_perm); /* External file attrs */ + assertEqualInt(i4(p + 42), 0); /* Offset of local header */ + assertEqualMem(p + 46, file_name, strlen(file_name)); /* Pathname */ + p = p + 46 + strlen(file_name); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ +/* TODO */ + p = p + 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ + p = p + 4 + i2(p + 2); + + /* Verify local header of file entry. */ + local_header = q = buff; + assertEqualMem(q, "PK\003\004", 4); /* Signature */ + assertEqualInt(i2(q + 4), 46); /* Version needed to extract */ + assertEqualInt(i2(q + 6), 8); /* Flags: bit 3 = length-at-end (required because CRC32 is unknown) */ + assertEqualInt(i2(q + 8), 12); /* Compression method */ + assertEqualInt(i2(q + 10), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(q + 12), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + assertEqualInt(i4(q + 14), 0); /* CRC-32 */ + assertEqualInt(i4(q + 18), 0); /* Compressed size, must be zero because of length-at-end */ + assertEqualInt(i4(q + 22), 0); /* Uncompressed size, must be zero because of length-at-end */ + assertEqualInt(i2(q + 26), strlen(file_name)); /* Pathname length */ + assertEqualInt(i2(q + 28), 41); /* Extra field length */ + assertEqualMem(q + 30, file_name, strlen(file_name)); /* Pathname */ + extra_start = q = q + 30 + strlen(file_name); + + assertEqualInt(i2(q), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(q + 2), 11); /* 'ux' size */ + assertEqualInt(q[4], 1); /* 'ux' version */ + assertEqualInt(q[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(q + 6), file_uid); /* 'Ux' UID */ + assertEqualInt(q[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(q + 11), file_gid); /* 'Ux' GID */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(q + 2), 9); /* 'UT' size */ + assertEqualInt(q[4], 3); /* 'UT' flags */ + assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ + assertEqualInt(i4(q + 9), now + 3); /* 'UT' atime */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x6c78); /* 'xl' experimental extension header */ + assertEqualInt(i2(q + 2), 9); /* size */ + assertEqualInt(q[4], 7); /* Bitmap of fields included. */ + assertEqualInt(i2(q + 5) >> 8, 3); /* system & version made by */ + assertEqualInt(i2(q + 7), 0); /* internal file attributes */ + assertEqualInt(i4(q + 9) >> 16 & 01777, file_perm); /* external file attributes */ + q = q + 4 + i2(q + 2); + + assert(q == extra_start + i2(local_header + 28)); + q = extra_start + i2(local_header + 28); + + /* Verify data of file entry, using our own zip reader to test. */ + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_next_header(zip_archive, &ae)); + assertEqualString("file", archive_entry_pathname(ae)); + assertEqualIntA(zip_archive, sizeof(filedata), archive_read_data(zip_archive, filedata, sizeof(filedata))); + assertEqualMem(filedata, file_data1, sizeof(file_data1)); + assertEqualMem(filedata + sizeof(file_data1), file_data2, + sizeof(file_data2)); + + /* Skip data of file entry in q */ + while (q < buffend - 3) { + if (memcmp(q, "PK\007\010", 4) == 0) { + break; + } + q++; + } + + /* Verify data descriptor of file entry, except compressed size (offset 8). */ + assertEqualMem(q, "PK\007\010", 4); /* Signature */ + assertEqualInt(i4(q + 4), crc); /* CRC-32 */ + assertEqualInt(i4(q + 12), sizeof(file_data1) + sizeof(file_data2)); /* Uncompressed size */ + q = q + 16; + + /* Verify folder entry in central directory. */ + assertEqualMem(p, "PK\001\002", 4); /* Signature */ + assertEqualInt(i2(p + 4), 3 * 256 + 20); /* Version made by */ + assertEqualInt(i2(p + 6), 20); /* Version needed to extract */ + assertEqualInt(i2(p + 8), 0); /* Flags */ + assertEqualInt(i2(p + 10), 0); /* Compression method */ + assertEqualInt(i2(p + 12), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(p + 14), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + crc = 0; + assertEqualInt(i4(p + 16), crc); /* CRC-32 */ + assertEqualInt(i4(p + 20), 0); /* Compressed size */ + assertEqualInt(i4(p + 24), 0); /* Uncompressed size */ + assertEqualInt(i2(p + 28), strlen(folder_name)); /* Pathname length */ + assertEqualInt(i2(p + 30), 24); /* Extra field length */ + assertEqualInt(i2(p + 32), 0); /* File comment length */ + assertEqualInt(i2(p + 34), 0); /* Disk number start */ + assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ + assertEqualInt(i4(p + 38) >> 16 & 01777, folder_perm); /* External file attrs */ + assertEqualInt(i4(p + 42), q - buff); /* Offset of local header */ + assertEqualMem(p + 46, folder_name, strlen(folder_name)); /* Pathname */ + p = p + 46 + strlen(folder_name); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ + assertEqualInt(p[4], 1); /* 'ux' version */ + assertEqualInt(p[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(p + 6), folder_uid); /* 'ux' UID */ + assertEqualInt(p[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(p + 11), folder_gid); /* 'ux' GID */ + p = p + 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ + p = p + 4 + i2(p + 2); + + /* Verify local header of folder entry. */ + local_header = q; + assertEqualMem(q, "PK\003\004", 4); /* Signature */ + assertEqualInt(i2(q + 4), 20); /* Version needed to extract */ + assertEqualInt(i2(q + 6), 0); /* Flags */ + assertEqualInt(i2(q + 8), 0); /* Compression method */ + assertEqualInt(i2(q + 10), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(q + 12), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + assertEqualInt(i4(q + 14), 0); /* CRC-32 */ + assertEqualInt(i4(q + 18), 0); /* Compressed size */ + assertEqualInt(i4(q + 22), 0); /* Uncompressed size */ + assertEqualInt(i2(q + 26), strlen(folder_name)); /* Pathname length */ + assertEqualInt(i2(q + 28), 41); /* Extra field length */ + assertEqualMem(q + 30, folder_name, strlen(folder_name)); /* Pathname */ + extra_start = q = q + 30 + strlen(folder_name); + + assertEqualInt(i2(q), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(q + 2), 11); /* 'ux' size */ + assertEqualInt(q[4], 1); /* 'ux' version */ + assertEqualInt(q[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(q + 6), folder_uid); /* 'ux' UID */ + assertEqualInt(q[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(q + 11), folder_gid); /* 'ux' GID */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(q + 2), 9); /* 'UT' size */ + assertEqualInt(q[4], 5); /* 'UT' flags */ + assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ + assertEqualInt(i4(q + 9), now + 5); /* 'UT' atime */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x6c78); /* 'xl' experimental extension header */ + assertEqualInt(i2(q + 2), 9); /* size */ + assertEqualInt(q[4], 7); /* bitmap of fields */ + assertEqualInt(i2(q + 5) >> 8, 3); /* system & version made by */ + assertEqualInt(i2(q + 7), 0); /* internal file attributes */ + assertEqualInt(i4(q + 9) >> 16 & 01777, folder_perm); /* external file attributes */ + q = q + 4 + i2(q + 2); + + assert(q == extra_start + i2(local_header + 28)); + q = extra_start + i2(local_header + 28); + + /* There should not be any data in the folder entry, + * so the first central directory entry should be next: */ + assertEqualMem(q, "PK\001\002", 4); /* Signature */ + + /* Close archive, in case. */ + archive_read_free(zip_archive); +} + +DEFINE_TEST(test_write_format_zip_compression_bzip2) +{ + /* Buffer data */ + struct archive *a; + char buff[100000]; + size_t used; + + /* Time data */ + now = time(NULL); + + /* Create new ZIP archive in memory without padding. */ + /* Use the setter function to use BZIP2 compression. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_zip_set_compression_bzip2(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:experimental")); + assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_per_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_in_last_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + + verify_write_bzip2(a); + + /* Close the archive . */ + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + dumpfile("constructed.zip", buff, used); + + verify_bzip2_contents(buff, used); + + /* Create new ZIP archive in memory without padding. */ + /* Use compression-level=3 to check that compression + * levels are somewhat supported. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:compression=bzip2")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:compression-level=3")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:experimental")); + assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_per_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_in_last_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + + verify_write_bzip2(a); + + /* Close the archive. */ + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + dumpfile("constructed.zip", buff, used); + + verify_bzip2_contents(buff, used); +} +#endif diff --git a/libarchive/test/test_write_format_zip_compression_lzmaxz.c b/libarchive/test/test_write_format_zip_compression_lzmaxz.c new file mode 100644 index 000000000..72b9faf1a --- /dev/null +++ b/libarchive/test/test_write_format_zip_compression_lzmaxz.c @@ -0,0 +1,438 @@ +/*-SPDX-License-Identifier: BSD-2-Clause + * Copyright (c) 2024 ARJANEN Loïc Jean David + * All rights reserved. + */ + +#include "test.h" +#ifdef HAVE_LZMA_H +#include + +/* File data */ +static const char file_name[] = "file"; +/* We have liblzma's lzma_crc32() so no need to rely on a handmade + * CRC-32...but it requires a uint8_t* for its data, hence the change + * compared to the usual */ +static const uint8_t file_data1[] = {'.', ';', ':', '!', '?', ',', '"', '\'', ')', '(', '*'}; +static const uint8_t file_data2[] = {'-', '/', '>', '$', '\\', '#', '@', '+', '=', '{', ']', '[', '}', '&', '<', '%'}; +static const int file_perm = 00644; +static const short file_uid = 10; +static const short file_gid = 20; + +/* Folder data */ +static const char folder_name[] = "folder/"; +static const int folder_perm = 00755; +static const short folder_uid = 30; +static const short folder_gid = 40; + +static time_t now; + +static void verify_write_lzma(struct archive *a) +{ + struct archive_entry *entry; + + /* Write entries. */ + + /* Regular file */ + assert((entry = archive_entry_new()) != NULL); + archive_entry_set_pathname(entry, file_name); + archive_entry_set_mode(entry, S_IFREG | 0644); + archive_entry_set_size(entry, sizeof(file_data1) + sizeof(file_data2)); + archive_entry_set_uid(entry, file_uid); + archive_entry_set_gid(entry, file_gid); + archive_entry_set_mtime(entry, now, 0); + archive_entry_set_atime(entry, now + 3, 0); + assertEqualIntA(a, 0, archive_write_header(a, entry)); + assertEqualIntA(a, sizeof(file_data1), archive_write_data(a, file_data1, sizeof(file_data1))); + assertEqualIntA(a, sizeof(file_data2), archive_write_data(a, file_data2, sizeof(file_data2))); + archive_entry_free(entry); + + /* Folder */ + assert((entry = archive_entry_new()) != NULL); + archive_entry_set_pathname(entry, folder_name); + archive_entry_set_mode(entry, S_IFDIR | folder_perm); + archive_entry_set_size(entry, 0); + archive_entry_set_uid(entry, folder_uid); + archive_entry_set_gid(entry, folder_gid); + archive_entry_set_mtime(entry, now, 0); + archive_entry_set_ctime(entry, now + 5, 0); + assertEqualIntA(a, 0, archive_write_header(a, entry)); + archive_entry_free(entry); +} + +/* Quick and dirty: Read 2-byte and 4-byte integers from Zip file. */ +static unsigned int +i2(const void *p_) +{ + const unsigned char *p = p_; + return (p[0] | (p[1] << 8)); +} + +static unsigned int +i4(const void *p_) +{ + const unsigned char *p = p_; + return (i2(p) | (i2(p + 2) << 16)); +} + +static void verify_xz_lzma(const char *buff, size_t used, uint16_t id, + uint16_t flags) +{ + const char *buffend; + struct archive* zip_archive; + struct archive_entry *ae; + char filedata[sizeof(file_data1) + sizeof(file_data2)]; + /* Misc variables */ + unsigned long crc; + struct tm *tm; +#if defined(HAVE_LOCALTIME_R) || defined(HAVE_LOCALTIME_S) + struct tm tmbuf; +#endif + /* p is the pointer to walk over the central directory, + * q walks over the local headers, the data and the data descriptors. */ + const char *p, *q, *local_header, *extra_start; + +#if defined(HAVE_LOCALTIME_S) + tm = localtime_s(&tmbuf, &now) ? NULL : &tmbuf; +#elif defined(HAVE_LOCALTIME_R) + tm = localtime_r(&now, &tmbuf); +#else + tm = localtime(&now); +#endif + + /* Open archive from memory, we'll need it for checking the file + * value */ + assert((zip_archive = archive_read_new()) != NULL); + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_support_format_all(zip_archive)); + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_support_filter_all(zip_archive)); + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_open_memory(zip_archive, buff, used)); + + /* Remember the end of the archive in memory. */ + buffend = buff + used; + + /* Verify "End of Central Directory" record. */ + /* Get address of end-of-central-directory record. */ + p = buffend - 22; /* Assumes there is no zip comment field. */ + failure("End-of-central-directory begins with PK\\005\\006 signature"); + assertEqualMem(p, "PK\005\006", 4); + failure("This must be disk 0"); + assertEqualInt(i2(p + 4), 0); + failure("Central dir must start on disk 0"); + assertEqualInt(i2(p + 6), 0); + failure("All central dir entries are on this disk"); + assertEqualInt(i2(p + 8), i2(p + 10)); + failure("CD start (%d) + CD length (%d) should == archive size - 22", + i4(p + 12), i4(p + 16)); + assertEqualInt(i4(p + 12) + i4(p + 16), used - 22); + failure("no zip comment"); + assertEqualInt(i2(p + 20), 0); + + /* Get address of first entry in central directory. */ + p = buff + i4(buffend - 6); + failure("Central file record at offset %d should begin with" + " PK\\001\\002 signature", + i4(buffend - 10)); + + /* Verify file entry in central directory, except compressed size (offset 20). */ + assertEqualMem(p, "PK\001\002", 4); /* Signature */ + assertEqualInt(i2(p + 4), 3 * 256 + 63); /* Version made by */ + assertEqualInt(i2(p + 6), 63); /* Version needed to extract */ + assertEqualInt(i2(p + 8), flags); /* Flags */ + assertEqualInt(i2(p + 10), id); /* Compression method */ + assertEqualInt(i2(p + 12), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(p + 14), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + crc = lzma_crc32(file_data1, sizeof(file_data1), 0); + crc = lzma_crc32(file_data2, sizeof(file_data2), crc); + assertEqualInt(i4(p + 16), crc); /* CRC-32 */ + assertEqualInt(i4(p + 24), sizeof(file_data1) + sizeof(file_data2)); /* Uncompressed size */ + assertEqualInt(i2(p + 28), strlen(file_name)); /* Pathname length */ + assertEqualInt(i2(p + 30), 24); /* Extra field length */ + assertEqualInt(i2(p + 32), 0); /* File comment length */ + assertEqualInt(i2(p + 34), 0); /* Disk number start */ + assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ + assertEqualInt(i4(p + 38) >> 16 & 01777, file_perm); /* External file attrs */ + assertEqualInt(i4(p + 42), 0); /* Offset of local header */ + assertEqualMem(p + 46, file_name, strlen(file_name)); /* Pathname */ + p = p + 46 + strlen(file_name); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ +/* TODO */ + p = p + 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ + p = p + 4 + i2(p + 2); + + /* Verify local header of file entry. */ + local_header = q = buff; + assertEqualMem(q, "PK\003\004", 4); /* Signature */ + assertEqualInt(i2(q + 4), 63); /* Version needed to extract */ + assertEqualInt(i2(q + 6), flags); /* Flags: bit 3 = length-at-end (required because CRC32 is unknown) and bit 1 = EOPM (because we always write it) */ + assertEqualInt(i2(q + 8), id); /* Compression method */ + assertEqualInt(i2(q + 10), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(q + 12), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + assertEqualInt(i4(q + 14), 0); /* CRC-32 */ + assertEqualInt(i4(q + 18), 0); /* Compressed size, must be zero because of length-at-end */ + assertEqualInt(i4(q + 22), 0); /* Uncompressed size, must be zero because of length-at-end */ + assertEqualInt(i2(q + 26), strlen(file_name)); /* Pathname length */ + assertEqualInt(i2(q + 28), 41); /* Extra field length */ + assertEqualMem(q + 30, file_name, strlen(file_name)); /* Pathname */ + extra_start = q = q + 30 + strlen(file_name); + + assertEqualInt(i2(q), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(q + 2), 11); /* 'ux' size */ + assertEqualInt(q[4], 1); /* 'ux' version */ + assertEqualInt(q[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(q + 6), file_uid); /* 'Ux' UID */ + assertEqualInt(q[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(q + 11), file_gid); /* 'Ux' GID */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(q + 2), 9); /* 'UT' size */ + assertEqualInt(q[4], 3); /* 'UT' flags */ + assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ + assertEqualInt(i4(q + 9), now + 3); /* 'UT' atime */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x6c78); /* 'xl' experimental extension header */ + assertEqualInt(i2(q + 2), 9); /* size */ + assertEqualInt(q[4], 7); /* Bitmap of fields included. */ + assertEqualInt(i2(q + 5) >> 8, 3); /* system & version made by */ + assertEqualInt(i2(q + 7), 0); /* internal file attributes */ + assertEqualInt(i4(q + 9) >> 16 & 01777, file_perm); /* external file attributes */ + q = q + 4 + i2(q + 2); + + assert(q == extra_start + i2(local_header + 28)); + q = extra_start + i2(local_header + 28); + + /* Verify data of file entry, using our own zip reader to test. */ + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_next_header(zip_archive, &ae)); + assertEqualString("file", archive_entry_pathname(ae)); + assertEqualIntA(zip_archive, sizeof(filedata), archive_read_data(zip_archive, filedata, sizeof(filedata))); + assertEqualMem(filedata, file_data1, sizeof(file_data1)); + assertEqualMem(filedata + sizeof(file_data1), file_data2, + sizeof(file_data2)); + + /* Skip data of file entry in q */ + while (q < buffend - 3) { + if (memcmp(q, "PK\007\010", 4) == 0) { + break; + } + q++; + } + + /* Verify data descriptor of file entry, except compressed size (offset 8). */ + assertEqualMem(q, "PK\007\010", 4); /* Signature */ + assertEqualInt(i4(q + 4), crc); /* CRC-32 */ + assertEqualInt(i4(q + 12), sizeof(file_data1) + sizeof(file_data2)); /* Uncompressed size */ + q = q + 16; + + /* Verify folder entry in central directory. */ + assertEqualMem(p, "PK\001\002", 4); /* Signature */ + assertEqualInt(i2(p + 4), 3 * 256 + 20); /* Version made by */ + assertEqualInt(i2(p + 6), 20); /* Version needed to extract */ + assertEqualInt(i2(p + 8), 0); /* Flags */ + assertEqualInt(i2(p + 10), 0); /* Compression method */ + assertEqualInt(i2(p + 12), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(p + 14), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + crc = 0; + assertEqualInt(i4(p + 16), crc); /* CRC-32 */ + assertEqualInt(i4(p + 20), 0); /* Compressed size */ + assertEqualInt(i4(p + 24), 0); /* Uncompressed size */ + assertEqualInt(i2(p + 28), strlen(folder_name)); /* Pathname length */ + assertEqualInt(i2(p + 30), 24); /* Extra field length */ + assertEqualInt(i2(p + 32), 0); /* File comment length */ + assertEqualInt(i2(p + 34), 0); /* Disk number start */ + assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ + assertEqualInt(i4(p + 38) >> 16 & 01777, folder_perm); /* External file attrs */ + assertEqualInt(i4(p + 42), q - buff); /* Offset of local header */ + assertEqualMem(p + 46, folder_name, strlen(folder_name)); /* Pathname */ + p = p + 46 + strlen(folder_name); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ + assertEqualInt(p[4], 1); /* 'ux' version */ + assertEqualInt(p[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(p + 6), folder_uid); /* 'ux' UID */ + assertEqualInt(p[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(p + 11), folder_gid); /* 'ux' GID */ + p = p + 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ + p = p + 4 + i2(p + 2); + + /* Verify local header of folder entry. */ + local_header = q; + assertEqualMem(q, "PK\003\004", 4); /* Signature */ + assertEqualInt(i2(q + 4), 20); /* Version needed to extract */ + assertEqualInt(i2(q + 6), 0); /* Flags */ + assertEqualInt(i2(q + 8), 0); /* Compression method */ + assertEqualInt(i2(q + 10), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(q + 12), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + assertEqualInt(i4(q + 14), 0); /* CRC-32 */ + assertEqualInt(i4(q + 18), 0); /* Compressed size */ + assertEqualInt(i4(q + 22), 0); /* Uncompressed size */ + assertEqualInt(i2(q + 26), strlen(folder_name)); /* Pathname length */ + assertEqualInt(i2(q + 28), 41); /* Extra field length */ + assertEqualMem(q + 30, folder_name, strlen(folder_name)); /* Pathname */ + extra_start = q = q + 30 + strlen(folder_name); + + assertEqualInt(i2(q), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(q + 2), 11); /* 'ux' size */ + assertEqualInt(q[4], 1); /* 'ux' version */ + assertEqualInt(q[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(q + 6), folder_uid); /* 'ux' UID */ + assertEqualInt(q[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(q + 11), folder_gid); /* 'ux' GID */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(q + 2), 9); /* 'UT' size */ + assertEqualInt(q[4], 5); /* 'UT' flags */ + assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ + assertEqualInt(i4(q + 9), now + 5); /* 'UT' atime */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x6c78); /* 'xl' experimental extension header */ + assertEqualInt(i2(q + 2), 9); /* size */ + assertEqualInt(q[4], 7); /* bitmap of fields */ + assertEqualInt(i2(q + 5) >> 8, 3); /* system & version made by */ + assertEqualInt(i2(q + 7), 0); /* internal file attributes */ + assertEqualInt(i4(q + 9) >> 16 & 01777, folder_perm); /* external file attributes */ + q = q + 4 + i2(q + 2); + + assert(q == extra_start + i2(local_header + 28)); + q = extra_start + i2(local_header + 28); + + /* There should not be any data in the folder entry, + * so the first central directory entry should be next: */ + assertEqualMem(q, "PK\001\002", 4); /* Signature */ + + /* Close archive, in case. */ + archive_read_free(zip_archive); +} + +static void verify_xz_contents(const char *buff, size_t used) +{ + verify_xz_lzma(buff, used, 95, 0x8); +} + +static void verify_lzma_contents(const char *buff, size_t used) +{ + verify_xz_lzma(buff, used, 14, 0xA); +} + +DEFINE_TEST(test_write_format_zip_compression_lzmaxz) +{ + /* Buffer data */ + struct archive *a; + char buff[100000]; + size_t used; + + /* Time data */ + now = time(NULL); + + /* Create new ZIP archive in memory without padding. */ + /* Use the setter function to use LZMA Alone compression. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_zip_set_compression_lzma(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:experimental")); + assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_per_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_in_last_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + + verify_write_lzma(a); + + /* Close the archive . */ + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + dumpfile("constructed.zip", buff, used); + + verify_lzma_contents(buff, used); + + /* Create new ZIP archive in memory without padding. */ + /* Use compression-level=9 to check that compression + * levels are somewhat supported. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:compression=lzma")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:compression-level=9")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:experimental")); + assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_per_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_in_last_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + + verify_write_lzma(a); + + /* Close the archive. */ + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + dumpfile("constructed.zip", buff, used); + + verify_lzma_contents(buff, used); + + /* Same song and dance, but for XZ */ + + /* Create new ZIP archive in memory without padding. */ + /* Use the setter function to use XZ compression. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_zip_set_compression_xz(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:experimental")); + assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_per_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_in_last_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + + verify_write_lzma(a); + + /* Close the archive . */ + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + dumpfile("constructed.zip", buff, used); + + verify_xz_contents(buff, used); + + /* Create new ZIP archive in memory without padding. */ + /* Use compression-level=9 to check that compression levels are + * somewhat supported as well as threads=2 to check the multi-threaded + * encoder, if available. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:compression=xz")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:compression-level=9")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:threads=2")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:experimental")); + assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_per_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_in_last_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + + verify_write_lzma(a); + + /* Close the archive. */ + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + dumpfile("constructed.zip", buff, used); + + verify_xz_contents(buff, used); +} +#endif diff --git a/libarchive/test/test_write_format_zip_compression_zstd.c b/libarchive/test/test_write_format_zip_compression_zstd.c new file mode 100644 index 000000000..22c30ef24 --- /dev/null +++ b/libarchive/test/test_write_format_zip_compression_zstd.c @@ -0,0 +1,405 @@ +/*-SPDX-License-Identifier: BSD-2-Clause + * Copyright (c) 2024 ARJANEN Loïc Jean David + * All rights reserved. + */ + +#include "test.h" +#ifdef HAVE_ZSTD_H +#include + +static unsigned long +bitcrc32(unsigned long c, const void *_p, size_t s) +{ + /* This is a drop-in replacement for crc32() from zlib, given that + * libzstd doesn't use CRC32 in the first place, let alone has a + * function for it. Libarchive should be able to correctly generate + * ZSTD-compressed zip archives (including correct CRCs) even when + * zlib is unavailable, and this function helps us verify that. Yes, + * this is very, very slow and unsuitable for production use, but + * it's very, very obviously correct, compact, and works well for + * this particular usage. Libarchive internally uses a much more + * efficient implementation when zlib is unavailable */ + const unsigned char *p = _p; + int bitctr; + + if (p == NULL) + return (0); + + for (; s > 0; --s) { + c ^= *p++; + for (bitctr = 8; bitctr > 0; --bitctr) { + if (c & 1) c = (c >> 1); + else c = (c >> 1) ^ 0xedb88320; + c ^= 0x80000000; + } + } + return (c); +} + +/* File data */ +static const char file_name[] = "file"; +static const char file_data1[] = {'~', 'Z', '`', '^', 'Y', 'X', 'N', 'W', 'V', 'G', 'H', 'I', 'J'}; +static const char file_data2[] = {'U', 'T', 'S', 'M', 'R', 'Q', 'P', 'O', 'K', 'L'}; +static const int file_perm = 00644; +static const short file_uid = 10; +static const short file_gid = 20; + +/* Folder data */ +static const char folder_name[] = "folder/"; +static const int folder_perm = 00755; +static const short folder_uid = 30; +static const short folder_gid = 40; + +static time_t now; + +static void verify_write_zstd(struct archive *a) +{ + struct archive_entry *entry; + + /* Write entries. */ + + /* Regular file */ + assert((entry = archive_entry_new()) != NULL); + archive_entry_set_pathname(entry, file_name); + archive_entry_set_mode(entry, S_IFREG | 0644); + archive_entry_set_size(entry, sizeof(file_data1) + sizeof(file_data2)); + archive_entry_set_uid(entry, file_uid); + archive_entry_set_gid(entry, file_gid); + archive_entry_set_mtime(entry, now, 0); + archive_entry_set_atime(entry, now + 3, 0); + assertEqualIntA(a, 0, archive_write_header(a, entry)); + assertEqualIntA(a, sizeof(file_data1), archive_write_data(a, file_data1, sizeof(file_data1))); + assertEqualIntA(a, sizeof(file_data2), archive_write_data(a, file_data2, sizeof(file_data2))); + archive_entry_free(entry); + + /* Folder */ + assert((entry = archive_entry_new()) != NULL); + archive_entry_set_pathname(entry, folder_name); + archive_entry_set_mode(entry, S_IFDIR | folder_perm); + archive_entry_set_size(entry, 0); + archive_entry_set_uid(entry, folder_uid); + archive_entry_set_gid(entry, folder_gid); + archive_entry_set_mtime(entry, now, 0); + archive_entry_set_ctime(entry, now + 5, 0); + assertEqualIntA(a, 0, archive_write_header(a, entry)); + archive_entry_free(entry); +} + +/* Quick and dirty: Read 2-byte and 4-byte integers from Zip file. */ +static unsigned int +i2(const void *p_) +{ + const unsigned char *p = p_; + return (p[0] | (p[1] << 8)); +} + +static unsigned int +i4(const void *p_) +{ + const unsigned char *p = p_; + return (i2(p) | (i2(p + 2) << 16)); +} + +static void verify_zstd_contents(const char *buff, size_t used) +{ + const char *buffend; + struct archive* zip_archive; + struct archive_entry *ae; + char filedata[sizeof(file_data1) + sizeof(file_data2)]; + /* Misc variables */ + unsigned long crc; + struct tm *tm; +#if defined(HAVE_LOCALTIME_R) || defined(HAVE_LOCALTIME_S) + struct tm tmbuf; +#endif + /* p is the pointer to walk over the central directory, + * q walks over the local headers, the data and the data descriptors. */ + const char *p, *q, *local_header, *extra_start; + +#if defined(HAVE_LOCALTIME_S) + tm = localtime_s(&tmbuf, &now) ? NULL : &tmbuf; +#elif defined(HAVE_LOCALTIME_R) + tm = localtime_r(&now, &tmbuf); +#else + tm = localtime(&now); +#endif + + /* Open archive from memory, we'll need it for checking the file + * value */ + assert((zip_archive = archive_read_new()) != NULL); + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_support_format_zip(zip_archive)); + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_support_filter_all(zip_archive)); + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_open_memory(zip_archive, buff, used)); + + /* Remember the end of the archive in memory. */ + buffend = buff + used; + + /* Verify "End of Central Directory" record. */ + /* Get address of end-of-central-directory record. */ + p = buffend - 22; /* Assumes there is no zip comment field. */ + failure("End-of-central-directory begins with PK\\005\\006 signature"); + assertEqualMem(p, "PK\005\006", 4); + failure("This must be disk 0"); + assertEqualInt(i2(p + 4), 0); + failure("Central dir must start on disk 0"); + assertEqualInt(i2(p + 6), 0); + failure("All central dir entries are on this disk"); + assertEqualInt(i2(p + 8), i2(p + 10)); + failure("CD start (%d) + CD length (%d) should == archive size - 22", + i4(p + 12), i4(p + 16)); + assertEqualInt(i4(p + 12) + i4(p + 16), used - 22); + failure("no zip comment"); + assertEqualInt(i2(p + 20), 0); + + /* Get address of first entry in central directory. */ + p = buff + i4(buffend - 6); + failure("Central file record at offset %d should begin with" + " PK\\001\\002 signature", + i4(buffend - 10)); + + /* Verify file entry in central directory, except compressed size (offset 20). */ + assertEqualMem(p, "PK\001\002", 4); /* Signature */ + assertEqualInt(i2(p + 4), 3 * 256 + 63); /* Version made by */ + assertEqualInt(i2(p + 6), 63); /* Version needed to extract */ + assertEqualInt(i2(p + 8), 8); /* Flags */ + assertEqualInt(i2(p + 10), 93); /* Compression method */ + assertEqualInt(i2(p + 12), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(p + 14), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + crc = bitcrc32(0, file_data1, sizeof(file_data1)); + crc = bitcrc32(crc, file_data2, sizeof(file_data2)); + assertEqualInt(i4(p + 16), crc); /* CRC-32 */ + assertEqualInt(i4(p + 24), sizeof(file_data1) + sizeof(file_data2)); /* Uncompressed size */ + assertEqualInt(i2(p + 28), strlen(file_name)); /* Pathname length */ + assertEqualInt(i2(p + 30), 24); /* Extra field length */ + assertEqualInt(i2(p + 32), 0); /* File comment length */ + assertEqualInt(i2(p + 34), 0); /* Disk number start */ + assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ + assertEqualInt(i4(p + 38) >> 16 & 01777, file_perm); /* External file attrs */ + assertEqualInt(i4(p + 42), 0); /* Offset of local header */ + assertEqualMem(p + 46, file_name, strlen(file_name)); /* Pathname */ + p = p + 46 + strlen(file_name); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ +/* TODO */ + p = p + 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ + p = p + 4 + i2(p + 2); + + /* Verify local header of file entry. */ + local_header = q = buff; + assertEqualMem(q, "PK\003\004", 4); /* Signature */ + assertEqualInt(i2(q + 4), 63); /* Version needed to extract */ + assertEqualInt(i2(q + 6), 8); /* Flags: bit 3 = length-at-end (required because CRC32 is unknown) */ + assertEqualInt(i2(q + 8), 93); /* Compression method */ + assertEqualInt(i2(q + 10), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(q + 12), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + assertEqualInt(i4(q + 14), 0); /* CRC-32 */ + assertEqualInt(i4(q + 18), 0); /* Compressed size, must be zero because of length-at-end */ + assertEqualInt(i4(q + 22), 0); /* Uncompressed size, must be zero because of length-at-end */ + assertEqualInt(i2(q + 26), strlen(file_name)); /* Pathname length */ + assertEqualInt(i2(q + 28), 41); /* Extra field length */ + assertEqualMem(q + 30, file_name, strlen(file_name)); /* Pathname */ + extra_start = q = q + 30 + strlen(file_name); + + assertEqualInt(i2(q), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(q + 2), 11); /* 'ux' size */ + assertEqualInt(q[4], 1); /* 'ux' version */ + assertEqualInt(q[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(q + 6), file_uid); /* 'Ux' UID */ + assertEqualInt(q[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(q + 11), file_gid); /* 'Ux' GID */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(q + 2), 9); /* 'UT' size */ + assertEqualInt(q[4], 3); /* 'UT' flags */ + assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ + assertEqualInt(i4(q + 9), now + 3); /* 'UT' atime */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x6c78); /* 'xl' experimental extension header */ + assertEqualInt(i2(q + 2), 9); /* size */ + assertEqualInt(q[4], 7); /* Bitmap of fields included. */ + assertEqualInt(i2(q + 5) >> 8, 3); /* system & version made by */ + assertEqualInt(i2(q + 7), 0); /* internal file attributes */ + assertEqualInt(i4(q + 9) >> 16 & 01777, file_perm); /* external file attributes */ + q = q + 4 + i2(q + 2); + + assert(q == extra_start + i2(local_header + 28)); + q = extra_start + i2(local_header + 28); + + /* Verify data of file entry, using our own zip reader to test. */ + assertEqualIntA(zip_archive, ARCHIVE_OK, archive_read_next_header(zip_archive, &ae)); + assertEqualString("file", archive_entry_pathname(ae)); + assertEqualIntA(zip_archive, sizeof(filedata), archive_read_data(zip_archive, filedata, sizeof(filedata))); + assertEqualMem(filedata, file_data1, sizeof(file_data1)); + assertEqualMem(filedata + sizeof(file_data1), file_data2, + sizeof(file_data2)); + + /* Skip data of file entry in q */ + while (q < buffend - 3) { + if (memcmp(q, "PK\007\010", 4) == 0) { + break; + } + q++; + } + + /* Verify data descriptor of file entry, except compressed size (offset 8). */ + assertEqualMem(q, "PK\007\010", 4); /* Signature */ + assertEqualInt(i4(q + 4), crc); /* CRC-32 */ + assertEqualInt(i4(q + 12), sizeof(file_data1) + sizeof(file_data2)); /* Uncompressed size */ + q = q + 16; + + /* Verify folder entry in central directory. */ + assertEqualMem(p, "PK\001\002", 4); /* Signature */ + assertEqualInt(i2(p + 4), 3 * 256 + 20); /* Version made by */ + assertEqualInt(i2(p + 6), 20); /* Version needed to extract */ + assertEqualInt(i2(p + 8), 0); /* Flags */ + assertEqualInt(i2(p + 10), 0); /* Compression method */ + assertEqualInt(i2(p + 12), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(p + 14), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + crc = 0; + assertEqualInt(i4(p + 16), crc); /* CRC-32 */ + assertEqualInt(i4(p + 20), 0); /* Compressed size */ + assertEqualInt(i4(p + 24), 0); /* Uncompressed size */ + assertEqualInt(i2(p + 28), strlen(folder_name)); /* Pathname length */ + assertEqualInt(i2(p + 30), 24); /* Extra field length */ + assertEqualInt(i2(p + 32), 0); /* File comment length */ + assertEqualInt(i2(p + 34), 0); /* Disk number start */ + assertEqualInt(i2(p + 36), 0); /* Internal file attrs */ + assertEqualInt(i4(p + 38) >> 16 & 01777, folder_perm); /* External file attrs */ + assertEqualInt(i4(p + 42), q - buff); /* Offset of local header */ + assertEqualMem(p + 46, folder_name, strlen(folder_name)); /* Pathname */ + p = p + 46 + strlen(folder_name); + + assertEqualInt(i2(p), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(p + 2), 11); /* 'ux' size */ + assertEqualInt(p[4], 1); /* 'ux' version */ + assertEqualInt(p[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(p + 6), folder_uid); /* 'ux' UID */ + assertEqualInt(p[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(p + 11), folder_gid); /* 'ux' GID */ + p = p + 4 + i2(p + 2); + + assertEqualInt(i2(p), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(p + 2), 5); /* 'UT' size */ + assertEqualInt(p[4], 1); /* 'UT' flags */ + assertEqualInt(i4(p + 5), now); /* 'UT' mtime */ + p = p + 4 + i2(p + 2); + + /* Verify local header of folder entry. */ + local_header = q; + assertEqualMem(q, "PK\003\004", 4); /* Signature */ + assertEqualInt(i2(q + 4), 20); /* Version needed to extract */ + assertEqualInt(i2(q + 6), 0); /* Flags */ + assertEqualInt(i2(q + 8), 0); /* Compression method */ + assertEqualInt(i2(q + 10), (tm->tm_hour * 2048) + (tm->tm_min * 32) + (tm->tm_sec / 2)); /* File time */ + assertEqualInt(i2(q + 12), ((tm->tm_year - 80) * 512) + ((tm->tm_mon + 1) * 32) + tm->tm_mday); /* File date */ + assertEqualInt(i4(q + 14), 0); /* CRC-32 */ + assertEqualInt(i4(q + 18), 0); /* Compressed size */ + assertEqualInt(i4(q + 22), 0); /* Uncompressed size */ + assertEqualInt(i2(q + 26), strlen(folder_name)); /* Pathname length */ + assertEqualInt(i2(q + 28), 41); /* Extra field length */ + assertEqualMem(q + 30, folder_name, strlen(folder_name)); /* Pathname */ + extra_start = q = q + 30 + strlen(folder_name); + + assertEqualInt(i2(q), 0x7875); /* 'ux' extension header */ + assertEqualInt(i2(q + 2), 11); /* 'ux' size */ + assertEqualInt(q[4], 1); /* 'ux' version */ + assertEqualInt(q[5], 4); /* 'ux' uid size */ + assertEqualInt(i4(q + 6), folder_uid); /* 'ux' UID */ + assertEqualInt(q[10], 4); /* 'ux' gid size */ + assertEqualInt(i4(q + 11), folder_gid); /* 'ux' GID */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x5455); /* 'UT' extension header */ + assertEqualInt(i2(q + 2), 9); /* 'UT' size */ + assertEqualInt(q[4], 5); /* 'UT' flags */ + assertEqualInt(i4(q + 5), now); /* 'UT' mtime */ + assertEqualInt(i4(q + 9), now + 5); /* 'UT' atime */ + q = q + 4 + i2(q + 2); + + assertEqualInt(i2(q), 0x6c78); /* 'xl' experimental extension header */ + assertEqualInt(i2(q + 2), 9); /* size */ + assertEqualInt(q[4], 7); /* bitmap of fields */ + assertEqualInt(i2(q + 5) >> 8, 3); /* system & version made by */ + assertEqualInt(i2(q + 7), 0); /* internal file attributes */ + assertEqualInt(i4(q + 9) >> 16 & 01777, folder_perm); /* external file attributes */ + q = q + 4 + i2(q + 2); + + assert(q == extra_start + i2(local_header + 28)); + q = extra_start + i2(local_header + 28); + + /* There should not be any data in the folder entry, + * so the first central directory entry should be next: */ + assertEqualMem(q, "PK\001\002", 4); /* Signature */ + + /* Close archive, in case. */ + archive_read_free(zip_archive); +} + +DEFINE_TEST(test_write_format_zip_compression_zstd) +{ + /* Buffer data */ + struct archive *a; + char buff[100000]; + size_t used; + + /* Time data */ + now = time(NULL); + + /* Create new ZIP archive in memory without padding. */ + /* Use the setter function to use ZSTD compression. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_zip_set_compression_zstd(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:experimental")); + assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_per_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_in_last_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + + verify_write_zstd(a); + + /* Close the archive . */ + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + dumpfile("constructed.zip", buff, used); + + verify_zstd_contents(buff, used); + + /* Create new ZIP archive in memory without padding. */ + /* Use compression-level=1 to check that compression levels are + * somewhat supported as well as threads=2 to check the multi-threaded + * encoder, if available. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:compression=zstd")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:compression-level=1")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:threads=2")); + assertEqualIntA(a, ARCHIVE_OK, + archive_write_set_options(a, "zip:experimental")); + assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_per_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_bytes_in_last_block(a, 1)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_open_memory(a, buff, sizeof(buff), &used)); + + verify_write_zstd(a); + + /* Close the archive. */ + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a)); + dumpfile("constructed.zip", buff, used); + + verify_zstd_contents(buff, used); +} +#endif