From: Mostyn Bramley-Moore Date: Tue, 22 Oct 2024 09:01:55 +0000 (+0200) Subject: 7zip writer: initial support for zstandard compression (#2137) X-Git-Tag: v3.8.0~132 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ae23713ceb5bb1c3c162c605a16aece8a8b1dcbc;p=thirdparty%2Flibarchive.git 7zip writer: initial support for zstandard compression (#2137) This is intended to be compatible with: * https://github.com/mcmilk/7-Zip-zstd * https://github.com/tehmul/p7zip-zstd --- diff --git a/CMakeLists.txt b/CMakeLists.txt index a7b74dcb7..8f38dd064 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -682,6 +682,7 @@ IF(ZSTD_FOUND) SET(CMAKE_REQUIRED_INCLUDES ${ZSTD_INCLUDE_DIR}) CHECK_FUNCTION_EXISTS(ZSTD_decompressStream HAVE_LIBZSTD) CHECK_FUNCTION_EXISTS(ZSTD_compressStream HAVE_ZSTD_compressStream) + CHECK_FUNCTION_EXISTS(ZSTD_minCLevel HAVE_ZSTD_minCLevel) # # TODO: test for static library. # diff --git a/README.md b/README.md index 28a491b00..4b05b0d14 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ The library can create archives in any of the following formats: * GNU and BSD 'ar' archives * 'mtree' format * ISO9660 format - * 7-Zip archives + * 7-Zip archives (including archives that use zstandard compression) * XAR archives When creating archives, the result can be filtered with any of the following: diff --git a/build/cmake/config.h.in b/build/cmake/config.h.in index d6e548792..c3c227e6d 100644 --- a/build/cmake/config.h.in +++ b/build/cmake/config.h.in @@ -777,6 +777,9 @@ typedef uint64_t uintmax_t; /* Define to 1 if you have the ZSTD_compressStream function. */ #cmakedefine HAVE_ZSTD_compressStream 1 +/* Define to 1 if you have the ZSTD_minCLevel function. */ +#cmakedefine HAVE_ZSTD_minCLevel 1 + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_LIMITS_H 1 diff --git a/configure.ac b/configure.ac index e794758b1..0fdfaf06a 100644 --- a/configure.ac +++ b/configure.ac @@ -483,6 +483,8 @@ if test "x$with_zstd" != "xno"; then AC_CHECK_LIB(zstd,ZSTD_decompressStream) AC_CHECK_LIB(zstd,ZSTD_compressStream, AC_DEFINE([HAVE_ZSTD_compressStream], [1], [Define to 1 if you have the `zstd' library (-lzstd) with compression support.])) + AC_CHECK_LIB(zstd,ZSTD_minCLevel, + AC_DEFINE([HAVE_ZSTD_minCLevel], [1], [Define to 1 if you have a `zstd' library version with ZSTD_minCLevel().])) fi AC_ARG_WITH([lzma], diff --git a/libarchive/archive_write_set_format_7zip.c b/libarchive/archive_write_set_format_7zip.c index b870338fc..e9b4fa95d 100644 --- a/libarchive/archive_write_set_format_7zip.c +++ b/libarchive/archive_write_set_format_7zip.c @@ -28,7 +28,9 @@ #ifdef HAVE_ERRNO_H #include #endif +#ifdef HAVE_STDLIB_H #include +#endif #ifdef HAVE_BZLIB_H #include #endif @@ -38,6 +40,9 @@ #ifdef HAVE_ZLIB_H #include #endif +#ifdef HAVE_ZSTD_H +#include +#endif #include "archive.h" #ifndef HAVE_ZLIB_H @@ -63,6 +68,8 @@ #define _7Z_BZIP2 0x040202 #define _7Z_PPMD 0x030401 +#define _7Z_ZSTD 0x4F71101 /* Copied from https://github.com/mcmilk/7-Zip-zstd.git */ + /* * 7-Zip header property IDs. */ @@ -110,6 +117,9 @@ // the attr field along with the unix permissions. #define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 +// Many systems define min or MIN, but not all. +#define sevenzipmin(a,b) ((a) < (b) ? (a) : (b)) + enum la_zaction { ARCHIVE_Z_FINISH, ARCHIVE_Z_RUN @@ -209,7 +219,9 @@ struct _7zip { #define ENCODED_CRC32 2 unsigned opt_compression; + int opt_compression_level; + int opt_zstd_compression_level; // This requires a different default value. struct la_zstream stream; struct coder coder; @@ -291,6 +303,13 @@ static int compression_code_ppmd(struct archive *, static int compression_end_ppmd(struct archive *, struct la_zstream *); static int _7z_compression_init_encoder(struct archive_write *, unsigned, int); +static int compression_init_encoder_zstd(struct archive *, + struct la_zstream *, int); +#if defined(HAVE_ZSTD_H) +static int compression_code_zstd(struct archive *, + struct la_zstream *, enum la_zaction); +static int compression_end_zstd(struct archive *, struct la_zstream *); +#endif static int compression_code(struct archive *, struct la_zstream *, enum la_zaction); static int compression_end(struct archive *, @@ -338,8 +357,17 @@ archive_write_set_format_7zip(struct archive *_a) #else zip->opt_compression = _7Z_COPY; #endif + zip->opt_compression_level = 6; +#ifdef ZSTD_CLEVEL_DEFAULT + // Zstandard compression needs a different default + // value than other encoders. + zip->opt_zstd_compression_level = ZSTD_CLEVEL_DEFAULT; +#else + zip->opt_zstd_compression_level = 3; +#endif + a->format_data = zip; a->format_name = "7zip"; @@ -397,6 +425,13 @@ _7z_options(struct archive_write *a, const char *key, const char *value) zip->opt_compression = _7Z_LZMA2; #else name = "lzma2"; +#endif + else if (strcmp(value, "zstd") == 0 || + strcmp(value, "ZSTD") == 0) +#if HAVE_ZSTD_H + zip->opt_compression = _7Z_ZSTD; +#else + name = "zstd"; #endif else if (strcmp(value, "ppmd") == 0 || strcmp(value, "PPMD") == 0 || @@ -420,16 +455,44 @@ _7z_options(struct archive_write *a, const char *key, const char *value) return (ARCHIVE_OK); } if (strcmp(key, "compression-level") == 0) { - if (value == NULL || - !(value[0] >= '0' && value[0] <= '9') || - value[1] != '\0') { - archive_set_error(&(a->archive), - ARCHIVE_ERRNO_MISC, - "Illegal value `%s'", - value); + if (value == NULL || *value == '\0') { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "Invalid compression-level option value `%s'", value); return (ARCHIVE_FAILED); } - zip->opt_compression_level = value[0] - '0'; + + char *end = NULL; + long lvl = strtol(value, &end, 10); + if (end == NULL || *end != '\0') { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "parsing compression-level option value failed `%s'", value); + return (ARCHIVE_FAILED); + } + +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream && HAVE_ZSTD_minCLevel + int min_level = sevenzipmin(0, ZSTD_minCLevel()); +#else + const int min_level = 0; +#endif + +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream + int max_level = ZSTD_maxCLevel(); +#else + const int max_level = 9; +#endif + + if (lvl < min_level || lvl > max_level) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "compression-level option value `%ld' out of range", lvl); + return (ARCHIVE_FAILED); + } + + // Note: we don't know here if this value is for zstd (negative to ~22), + // or zlib-style 0-9. If zstd is enabled but not in use, we will need to + // validate opt_compression_level before use. + zip->opt_compression_level = (int)lvl; + + zip->opt_zstd_compression_level = (int)lvl; return (ARCHIVE_OK); } @@ -495,8 +558,19 @@ _7z_write_header(struct archive_write *a, struct archive_entry *entry) * Init compression. */ if ((zip->total_number_entry - zip->total_number_empty_entry) == 1) { - r = _7z_compression_init_encoder(a, zip->opt_compression, - zip->opt_compression_level); + + int level = zip->opt_compression_level; +#if HAVE_ZSTD_H + if (zip->opt_compression == _7Z_ZSTD) { + level = zip->opt_zstd_compression_level; + } else if (level < 0 || level > 9) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "compression-level option value `%d' out of range 0-9", level); + return (ARCHIVE_FATAL); + } +#endif + + r = _7z_compression_init_encoder(a, zip->opt_compression, level); if (r < 0) { file_free(file); return (ARCHIVE_FATAL); @@ -785,8 +859,12 @@ _7z_close(struct archive_write *a) #else header_compression = _7Z_COPY; #endif - r = _7z_compression_init_encoder(a, header_compression, - zip->opt_compression_level); + + int level = zip->opt_compression_level; + if (level < 0) level = 0; + else if (level > 9) level = 9; + + r = _7z_compression_init_encoder(a, header_compression, level); if (r < 0) return (r); zip->crc32flg = PRECODE_CRC32; @@ -844,7 +922,7 @@ _7z_close(struct archive_write *a) header_offset = header_size = 0; header_crc32 = 0; } - + length = zip->temp_offset; /* @@ -1504,7 +1582,7 @@ file_cmp_node(const struct archive_rb_node *n1, return (memcmp(f1->utf16name, f2->utf16name, f1->name_len)); return (f1->name_len > f2->name_len)?1:-1; } - + static int file_cmp_key(const struct archive_rb_node *n, const void *key) { @@ -1646,7 +1724,8 @@ file_init_register_empty(struct _7zip *zip) } #if !defined(HAVE_ZLIB_H) || !defined(HAVE_BZLIB_H) ||\ - !defined(BZ_CONFIG_ERROR) || !defined(HAVE_LZMA_H) + !defined(BZ_CONFIG_ERROR) || !defined(HAVE_LZMA_H) ||\ + !(HAVE_ZSTD_H && HAVE_ZSTD_compressStream) static int compression_unsupported_encoder(struct archive *a, struct la_zstream *lastrm, const char *name) @@ -2279,6 +2358,117 @@ compression_end_ppmd(struct archive *a, struct la_zstream *lastrm) return (ARCHIVE_OK); } +#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream +static int +compression_init_encoder_zstd(struct archive *a, struct la_zstream *lastrm, int level) +{ + if (lastrm->valid) + compression_end(a, lastrm); + + ZSTD_CStream *strm = ZSTD_createCStream(); + if (strm == NULL) { + archive_set_error(a, ENOMEM, + "Can't allocate memory for zstd stream"); + return (ARCHIVE_FATAL); + } + + if (ZSTD_isError(ZSTD_initCStream(strm, level))) { + ZSTD_freeCStream(strm); + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing zstd compressor object"); + return (ARCHIVE_FATAL); + } + + // TODO: enable multiple threads? + // ZSTD_CCtx_setParameter(strm, ZSTD_c_nbWorkers, NUM_THREADS_GOES_HERE); + + // p7zip-zstd fails to unpack archives that don't have prop_size 5. + // 7-Zip-zstd fails to unpack archives that don't have prop_size 3 or 5. + // So let's use 5... + lastrm->prop_size = 5; + lastrm->props = calloc(5, 1); + if (lastrm->props == NULL) { + ZSTD_freeCStream(strm); + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "Internal error initializing zstd compressor properties"); + return (ARCHIVE_FATAL); + } + + // Refer to the DProps struct in 7-Zip-zstd's ZstdDecoder.h: + // https://github.com/mcmilk/7-Zip-zstd/blob/79b2c78e9e7735ddf90147129b75cf2797ff6522/CPP/7zip/Compress/ZstdDecoder.h#L34S + lastrm->props[0] = ZSTD_VERSION_MAJOR; + lastrm->props[1] = ZSTD_VERSION_MINOR; + lastrm->props[2] = level; + // lastrm->props[3] and lastrm->props[4] are reserved. Leave them as 0. + + lastrm->real_stream = strm; + lastrm->valid = 1; + lastrm->code = compression_code_zstd; + lastrm->end = compression_end_zstd; + + return (ARCHIVE_OK); +} + +static int +compression_code_zstd(struct archive *a, + struct la_zstream *lastrm, enum la_zaction action) +{ + ZSTD_CStream *strm = (ZSTD_CStream *)lastrm->real_stream; + + ZSTD_outBuffer out = { .dst = lastrm->next_out, .size = lastrm->avail_out, .pos = 0 }; + ZSTD_inBuffer in = { .src = lastrm->next_in, .size = lastrm->avail_in, .pos = 0 }; + + size_t zret; + + ZSTD_EndDirective mode = (action == ARCHIVE_Z_RUN) ? ZSTD_e_continue : ZSTD_e_end; + + zret = ZSTD_compressStream2(strm, &out, &in, mode); + if (ZSTD_isError(zret)) { + archive_set_error(a, ARCHIVE_ERRNO_MISC, + "zstd compression failed, ZSTD_compressStream2 returned: %s", + ZSTD_getErrorName(zret)); + return (ARCHIVE_FATAL); + } + + lastrm->next_in += in.pos; + lastrm->avail_in -= in.pos; + lastrm->total_in += in.pos; + + lastrm->next_out += out.pos; + lastrm->avail_out -= out.pos; + lastrm->total_out += out.pos; + + if (action == ARCHIVE_Z_FINISH && zret == 0) + return (ARCHIVE_EOF); // All done. + + return (ARCHIVE_OK); // More work to do. +} + +static int +compression_end_zstd(struct archive *a, struct la_zstream *lastrm) +{ + ZSTD_CStream *strm; + + (void)a; /* UNUSED */ + strm = (ZSTD_CStream *)lastrm->real_stream; + ZSTD_freeCStream(strm); + lastrm->valid = 0; + lastrm->real_stream = NULL; + return (ARCHIVE_OK); +} + +#else + +static int +compression_init_encoder_zstd(struct archive *a, struct la_zstream *lastrm, int level) +{ + (void) level; /* UNUSED */ + if (lastrm->valid) + compression_end(a, lastrm); + return (compression_unsupported_encoder(a, lastrm, "zstd")); +} +#endif + /* * Universal compressor initializer. */ @@ -2316,6 +2506,11 @@ _7z_compression_init_encoder(struct archive_write *a, unsigned compression, &(a->archive), &(zip->stream), PPMD7_DEFAULT_ORDER, PPMD7_DEFAULT_MEM_SIZE); break; + case _7Z_ZSTD: + r = compression_init_encoder_zstd( + &(a->archive), &(zip->stream), + compression_level); + break; case _7Z_COPY: default: r = compression_init_encoder_copy( diff --git a/libarchive/archive_write_set_options.3 b/libarchive/archive_write_set_options.3 index 2e784d872..6f02cff1f 100644 --- a/libarchive/archive_write_set_options.3 +++ b/libarchive/archive_write_set_options.3 @@ -274,9 +274,10 @@ The value is one of .Dq deflate , .Dq bzip2 , .Dq lzma1 , -.Dq lzma2 +.Dq lzma2 , +.Dq ppmd , or -.Dq ppmd +.Dq zstd to indicate how the following entries should be compressed. The values .Dq store @@ -289,7 +290,9 @@ and other special entries. The value is interpreted as a decimal integer specifying the compression level. Values between 0 and 9 are supported, with the exception of bzip2 -which only supports values between 1 and 9. +which only supports values between 1 and 9, and zstd which may +support negative values depending on the library version and +commonly used values 1 through 22. The interpretation of the compression level depends on the chosen compression method. .El diff --git a/libarchive/test/test_write_format_7zip.c b/libarchive/test/test_write_format_7zip.c index d91e88d8f..4b69cff40 100644 --- a/libarchive/test/test_write_format_7zip.c +++ b/libarchive/test/test_write_format_7zip.c @@ -567,3 +567,9 @@ DEFINE_TEST(test_write_format_7zip_basic_ppmd) /* Test that making a 7-Zip archive file with PPMd compression. */ test_basic("ppmd"); } + +DEFINE_TEST(test_write_format_7zip_basic_zstd) +{ + /* Test that making a 7-Zip archive file with zstandard compression. */ + test_basic("zstd"); +} diff --git a/libarchive/test/test_write_format_7zip_large.c b/libarchive/test/test_write_format_7zip_large.c index ac2fa08b4..307b1f13c 100644 --- a/libarchive/test/test_write_format_7zip_large.c +++ b/libarchive/test/test_write_format_7zip_large.c @@ -169,3 +169,9 @@ DEFINE_TEST(test_write_format_7zip_large_ppmd) /* Test that making a 7-Zip archive file with PPMd compression. */ test_large("ppmd"); } + +DEFINE_TEST(test_write_format_7zip_large_zstd) +{ + /* Test that making a 7-Zip archive file with zstd compression. */ + test_large("zstd"); +}