]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
7zip writer: initial support for zstandard compression (#2137)
authorMostyn Bramley-Moore <mostyn@antipode.se>
Tue, 22 Oct 2024 09:01:55 +0000 (11:01 +0200)
committerGitHub <noreply@github.com>
Tue, 22 Oct 2024 09:01:55 +0000 (11:01 +0200)
This is intended to be compatible with:
* https://github.com/mcmilk/7-Zip-zstd
* https://github.com/tehmul/p7zip-zstd

CMakeLists.txt
README.md
build/cmake/config.h.in
configure.ac
libarchive/archive_write_set_format_7zip.c
libarchive/archive_write_set_options.3
libarchive/test/test_write_format_7zip.c
libarchive/test/test_write_format_7zip_large.c

index a7b74dcb71bd0b8512da6fc6bed2b78d3edaca30..8f38dd06449c2b6d0ab0e7d0bb2a0c9f52456682 100644 (file)
@@ -682,6 +682,7 @@ IF(ZSTD_FOUND)
   SET(CMAKE_REQUIRED_INCLUDES ${ZSTD_INCLUDE_DIR})
   CHECK_FUNCTION_EXISTS(ZSTD_decompressStream HAVE_LIBZSTD)
   CHECK_FUNCTION_EXISTS(ZSTD_compressStream HAVE_ZSTD_compressStream)
+  CHECK_FUNCTION_EXISTS(ZSTD_minCLevel HAVE_ZSTD_minCLevel)
   #
   # TODO: test for static library.
   #
index 28a491b0010eb39165c0cd68b7de201fcdbc9724..4b05b0d144343f5862ee1d3cc818e52385f1e5e2 100644 (file)
--- a/README.md
+++ b/README.md
@@ -124,7 +124,7 @@ The library can create archives in any of the following formats:
   * GNU and BSD 'ar' archives
   * 'mtree' format
   * ISO9660 format
-  * 7-Zip archives
+  * 7-Zip archives (including archives that use zstandard compression)
   * XAR archives
 
 When creating archives, the result can be filtered with any of the following:
index d6e54879219257871f55157733153ed8d8aba227..c3c227e6d5f1de108ed89e00a61af74d1d6a65ed 100644 (file)
@@ -777,6 +777,9 @@ typedef uint64_t uintmax_t;
 /* Define to 1 if you have the ZSTD_compressStream function. */
 #cmakedefine HAVE_ZSTD_compressStream 1
 
+/* Define to 1 if you have the ZSTD_minCLevel function. */
+#cmakedefine HAVE_ZSTD_minCLevel 1
+
 /* Define to 1 if you have the <limits.h> header file. */
 #cmakedefine HAVE_LIMITS_H 1
 
index e794758b12c330d70ec1f76073088ca8ed79b6a8..0fdfaf06a508979f35ff914f6e7b45ad154bf1d3 100644 (file)
@@ -483,6 +483,8 @@ if test "x$with_zstd" != "xno"; then
   AC_CHECK_LIB(zstd,ZSTD_decompressStream)
   AC_CHECK_LIB(zstd,ZSTD_compressStream,
     AC_DEFINE([HAVE_ZSTD_compressStream], [1], [Define to 1 if you have the `zstd' library (-lzstd) with compression support.]))
+  AC_CHECK_LIB(zstd,ZSTD_minCLevel,
+    AC_DEFINE([HAVE_ZSTD_minCLevel], [1], [Define to 1 if you have a `zstd' library version with ZSTD_minCLevel().]))
 fi
 
 AC_ARG_WITH([lzma],
index b870338fc0256cd131afbb824cf4d545b5e1d000..e9b4fa95dee945fe02ab36542cfca03575fe8238 100644 (file)
@@ -28,7 +28,9 @@
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
+#ifdef HAVE_STDLIB_H
 #include <stdlib.h>
+#endif
 #ifdef HAVE_BZLIB_H
 #include <bzlib.h>
 #endif
@@ -38,6 +40,9 @@
 #ifdef HAVE_ZLIB_H
 #include <zlib.h>
 #endif
+#ifdef HAVE_ZSTD_H
+#include <zstd.h>
+#endif
 
 #include "archive.h"
 #ifndef HAVE_ZLIB_H
@@ -63,6 +68,8 @@
 #define _7Z_BZIP2      0x040202
 #define _7Z_PPMD       0x030401
 
+#define _7Z_ZSTD       0x4F71101 /* Copied from https://github.com/mcmilk/7-Zip-zstd.git */
+
 /*
  * 7-Zip header property IDs.
  */
 // the attr field along with the unix permissions.
 #define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000
 
+// Many systems define min or MIN, but not all.
+#define sevenzipmin(a,b) ((a) < (b) ? (a) : (b))
+
 enum la_zaction {
        ARCHIVE_Z_FINISH,
        ARCHIVE_Z_RUN
@@ -209,7 +219,9 @@ struct _7zip {
 #define        ENCODED_CRC32   2
 
        unsigned                 opt_compression;
+
        int                      opt_compression_level;
+       int                      opt_zstd_compression_level; // This requires a different default value.
 
        struct la_zstream        stream;
        struct coder             coder;
@@ -291,6 +303,13 @@ static int compression_code_ppmd(struct archive *,
 static int     compression_end_ppmd(struct archive *, struct la_zstream *);
 static int     _7z_compression_init_encoder(struct archive_write *, unsigned,
                    int);
+static int     compression_init_encoder_zstd(struct archive *,
+                   struct la_zstream *, int);
+#if defined(HAVE_ZSTD_H)
+static int     compression_code_zstd(struct archive *,
+                   struct la_zstream *, enum la_zaction);
+static int     compression_end_zstd(struct archive *, struct la_zstream *);
+#endif
 static int     compression_code(struct archive *,
                    struct la_zstream *, enum la_zaction);
 static int     compression_end(struct archive *,
@@ -338,8 +357,17 @@ archive_write_set_format_7zip(struct archive *_a)
 #else
        zip->opt_compression = _7Z_COPY;
 #endif
+
        zip->opt_compression_level = 6;
 
+#ifdef ZSTD_CLEVEL_DEFAULT
+       // Zstandard compression needs a different default
+       // value than other encoders.
+       zip->opt_zstd_compression_level = ZSTD_CLEVEL_DEFAULT;
+#else
+       zip->opt_zstd_compression_level = 3;
+#endif
+
        a->format_data = zip;
 
        a->format_name = "7zip";
@@ -397,6 +425,13 @@ _7z_options(struct archive_write *a, const char *key, const char *value)
                        zip->opt_compression = _7Z_LZMA2;
 #else
                        name = "lzma2";
+#endif
+               else if (strcmp(value, "zstd") == 0 ||
+                   strcmp(value, "ZSTD") == 0)
+#if HAVE_ZSTD_H
+                       zip->opt_compression = _7Z_ZSTD;
+#else
+                       name = "zstd";
 #endif
                else if (strcmp(value, "ppmd") == 0 ||
                    strcmp(value, "PPMD") == 0 ||
@@ -420,16 +455,44 @@ _7z_options(struct archive_write *a, const char *key, const char *value)
                return (ARCHIVE_OK);
        }
        if (strcmp(key, "compression-level") == 0) {
-               if (value == NULL ||
-                   !(value[0] >= '0' && value[0] <= '9') ||
-                   value[1] != '\0') {
-                       archive_set_error(&(a->archive),
-                           ARCHIVE_ERRNO_MISC,
-                           "Illegal value `%s'",
-                           value);
+               if (value == NULL || *value == '\0') {
+                       archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC,
+                               "Invalid compression-level option value `%s'", value);
                        return (ARCHIVE_FAILED);
                }
-               zip->opt_compression_level = value[0] - '0';
+
+               char *end = NULL;
+               long lvl = strtol(value, &end, 10);
+               if (end == NULL || *end != '\0') {
+                       archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC,
+                               "parsing compression-level option value failed `%s'", value);
+                       return (ARCHIVE_FAILED);
+               }
+
+#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream && HAVE_ZSTD_minCLevel
+               int min_level = sevenzipmin(0, ZSTD_minCLevel());
+#else
+               const int min_level = 0;
+#endif
+
+#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
+               int max_level = ZSTD_maxCLevel();
+#else
+               const int max_level = 9;
+#endif
+
+               if (lvl < min_level || lvl > max_level) {
+                       archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC,
+                               "compression-level option value `%ld' out of range", lvl);
+                       return (ARCHIVE_FAILED);
+               }
+
+               // Note: we don't know here if this value is for zstd (negative to ~22),
+               // or zlib-style 0-9. If zstd is enabled but not in use, we will need to
+               // validate opt_compression_level before use.
+               zip->opt_compression_level = (int)lvl;
+
+               zip->opt_zstd_compression_level = (int)lvl;
                return (ARCHIVE_OK);
        }
 
@@ -495,8 +558,19 @@ _7z_write_header(struct archive_write *a, struct archive_entry *entry)
         * Init compression.
         */
        if ((zip->total_number_entry - zip->total_number_empty_entry) == 1) {
-               r = _7z_compression_init_encoder(a, zip->opt_compression,
-                       zip->opt_compression_level);
+
+               int level = zip->opt_compression_level;
+#if HAVE_ZSTD_H
+               if (zip->opt_compression == _7Z_ZSTD) {
+                       level = zip->opt_zstd_compression_level;
+               } else if (level < 0 || level > 9) {
+                       archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC,
+                               "compression-level option value `%d' out of range 0-9", level);
+                       return (ARCHIVE_FATAL);
+               }
+#endif
+
+               r = _7z_compression_init_encoder(a, zip->opt_compression, level);
                if (r < 0) {
                        file_free(file);
                        return (ARCHIVE_FATAL);
@@ -785,8 +859,12 @@ _7z_close(struct archive_write *a)
 #else
                header_compression = _7Z_COPY;
 #endif
-               r = _7z_compression_init_encoder(a, header_compression,
-                                                zip->opt_compression_level);
+
+               int level = zip->opt_compression_level;
+               if (level < 0) level = 0;
+               else if (level > 9) level = 9;
+
+               r = _7z_compression_init_encoder(a, header_compression, level);
                if (r < 0)
                        return (r);
                zip->crc32flg = PRECODE_CRC32;
@@ -844,7 +922,7 @@ _7z_close(struct archive_write *a)
                header_offset = header_size = 0;
                header_crc32 = 0;
        }
-       
+
        length = zip->temp_offset;
 
        /*
@@ -1504,7 +1582,7 @@ file_cmp_node(const struct archive_rb_node *n1,
                return (memcmp(f1->utf16name, f2->utf16name, f1->name_len));
        return (f1->name_len > f2->name_len)?1:-1;
 }
-        
+
 static int
 file_cmp_key(const struct archive_rb_node *n, const void *key)
 {
@@ -1646,7 +1724,8 @@ file_init_register_empty(struct _7zip *zip)
 }
 
 #if !defined(HAVE_ZLIB_H) || !defined(HAVE_BZLIB_H) ||\
-        !defined(BZ_CONFIG_ERROR) || !defined(HAVE_LZMA_H)
+        !defined(BZ_CONFIG_ERROR) || !defined(HAVE_LZMA_H) ||\
+        !(HAVE_ZSTD_H && HAVE_ZSTD_compressStream)
 static int
 compression_unsupported_encoder(struct archive *a,
     struct la_zstream *lastrm, const char *name)
@@ -2279,6 +2358,117 @@ compression_end_ppmd(struct archive *a, struct la_zstream *lastrm)
        return (ARCHIVE_OK);
 }
 
+#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
+static int
+compression_init_encoder_zstd(struct archive *a, struct la_zstream *lastrm, int level)
+{
+       if (lastrm->valid)
+               compression_end(a, lastrm);
+
+       ZSTD_CStream *strm = ZSTD_createCStream();
+       if (strm == NULL) {
+               archive_set_error(a, ENOMEM,
+                       "Can't allocate memory for zstd stream");
+               return (ARCHIVE_FATAL);
+       }
+
+       if (ZSTD_isError(ZSTD_initCStream(strm, level))) {
+               ZSTD_freeCStream(strm);
+               archive_set_error(a, ARCHIVE_ERRNO_MISC,
+                       "Internal error initializing zstd compressor object");
+               return (ARCHIVE_FATAL);
+       }
+
+       // TODO: enable multiple threads?
+       // ZSTD_CCtx_setParameter(strm, ZSTD_c_nbWorkers, NUM_THREADS_GOES_HERE);
+
+       // p7zip-zstd fails to unpack archives that don't have prop_size 5.
+       // 7-Zip-zstd fails to unpack archives that don't have prop_size 3 or 5.
+       // So let's use 5...
+       lastrm->prop_size = 5;
+       lastrm->props = calloc(5, 1);
+       if (lastrm->props == NULL) {
+               ZSTD_freeCStream(strm);
+               archive_set_error(a, ARCHIVE_ERRNO_MISC,
+                       "Internal error initializing zstd compressor properties");
+               return (ARCHIVE_FATAL);
+       }
+
+       // Refer to the DProps struct in 7-Zip-zstd's ZstdDecoder.h:
+       // https://github.com/mcmilk/7-Zip-zstd/blob/79b2c78e9e7735ddf90147129b75cf2797ff6522/CPP/7zip/Compress/ZstdDecoder.h#L34S
+       lastrm->props[0] = ZSTD_VERSION_MAJOR;
+       lastrm->props[1] = ZSTD_VERSION_MINOR;
+       lastrm->props[2] = level;
+       // lastrm->props[3] and lastrm->props[4] are reserved. Leave them as 0.
+
+       lastrm->real_stream = strm;
+       lastrm->valid = 1;
+       lastrm->code = compression_code_zstd;
+       lastrm->end = compression_end_zstd;
+
+       return (ARCHIVE_OK);
+}
+
+static int
+compression_code_zstd(struct archive *a,
+    struct la_zstream *lastrm, enum la_zaction action)
+{
+       ZSTD_CStream *strm = (ZSTD_CStream *)lastrm->real_stream;
+
+       ZSTD_outBuffer out = { .dst = lastrm->next_out, .size = lastrm->avail_out, .pos = 0 };
+       ZSTD_inBuffer  in  = { .src = lastrm->next_in,  .size = lastrm->avail_in,  .pos = 0 };
+
+       size_t zret;
+
+       ZSTD_EndDirective mode = (action == ARCHIVE_Z_RUN) ? ZSTD_e_continue : ZSTD_e_end;
+
+       zret = ZSTD_compressStream2(strm, &out, &in, mode);
+       if (ZSTD_isError(zret)) {
+               archive_set_error(a, ARCHIVE_ERRNO_MISC,
+                       "zstd compression failed, ZSTD_compressStream2 returned: %s",
+                       ZSTD_getErrorName(zret));
+               return (ARCHIVE_FATAL);
+       }
+
+       lastrm->next_in += in.pos;
+       lastrm->avail_in -= in.pos;
+       lastrm->total_in += in.pos;
+
+       lastrm->next_out += out.pos;
+       lastrm->avail_out -= out.pos;
+       lastrm->total_out += out.pos;
+
+       if (action == ARCHIVE_Z_FINISH && zret == 0)
+               return (ARCHIVE_EOF); // All done.
+
+       return (ARCHIVE_OK); // More work to do.
+}
+
+static int
+compression_end_zstd(struct archive *a, struct la_zstream *lastrm)
+{
+       ZSTD_CStream *strm;
+
+       (void)a; /* UNUSED */
+       strm = (ZSTD_CStream *)lastrm->real_stream;
+       ZSTD_freeCStream(strm);
+       lastrm->valid = 0;
+       lastrm->real_stream = NULL;
+       return (ARCHIVE_OK);
+}
+
+#else
+
+static int
+compression_init_encoder_zstd(struct archive *a, struct la_zstream *lastrm, int level)
+{
+       (void) level; /* UNUSED */
+       if (lastrm->valid)
+               compression_end(a, lastrm);
+       return (compression_unsupported_encoder(a, lastrm, "zstd"));
+}
+#endif
+
 /*
  * Universal compressor initializer.
  */
@@ -2316,6 +2506,11 @@ _7z_compression_init_encoder(struct archive_write *a, unsigned compression,
                    &(a->archive), &(zip->stream),
                    PPMD7_DEFAULT_ORDER, PPMD7_DEFAULT_MEM_SIZE);
                break;
+       case _7Z_ZSTD:
+               r = compression_init_encoder_zstd(
+                   &(a->archive), &(zip->stream),
+                   compression_level);
+               break;
        case _7Z_COPY:
        default:
                r = compression_init_encoder_copy(
index 2e784d872cb312d50665e7a4217cc1bc2973e3c3..6f02cff1fbf0a246357badb1a63ffee20d0e2fc9 100644 (file)
@@ -274,9 +274,10 @@ The value is one of
 .Dq deflate ,
 .Dq bzip2 ,
 .Dq lzma1 ,
-.Dq lzma2
+.Dq lzma2 ,
+.Dq ppmd ,
 or
-.Dq ppmd
+.Dq zstd
 to indicate how the following entries should be compressed.
 The values
 .Dq store
@@ -289,7 +290,9 @@ and other special entries.
 The value is interpreted as a decimal integer specifying the
 compression level.
 Values between 0 and 9 are supported, with the exception of bzip2
-which only supports values between 1 and 9.
+which only supports values between 1 and 9, and zstd which may
+support negative values depending on the library version and
+commonly used values 1 through 22.
 The interpretation of the compression level depends on the chosen
 compression method.
 .El
index d91e88d8f40fa229b4f1a75c597aa7b98c64d563..4b69cff40474360d742e702e6c2f53300463aa39 100644 (file)
@@ -567,3 +567,9 @@ DEFINE_TEST(test_write_format_7zip_basic_ppmd)
        /* Test that making a 7-Zip archive file with PPMd compression. */
        test_basic("ppmd");
 }
+
+DEFINE_TEST(test_write_format_7zip_basic_zstd)
+{
+       /* Test that making a 7-Zip archive file with zstandard compression. */
+       test_basic("zstd");
+}
index ac2fa08b4125af3ee5b91ee0984fc053f1b60bb7..307b1f13cdd0c94993b9ceba8240210e8b6fd345 100644 (file)
@@ -169,3 +169,9 @@ DEFINE_TEST(test_write_format_7zip_large_ppmd)
        /* Test that making a 7-Zip archive file with PPMd compression. */
        test_large("ppmd");
 }
+
+DEFINE_TEST(test_write_format_7zip_large_zstd)
+{
+       /* Test that making a 7-Zip archive file with zstd compression. */
+       test_large("zstd");
+}