]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Support producing multi-fragment zstd archives.
authorDag-Erling Smørgrav <des@des.no>
Tue, 22 Nov 2022 02:52:43 +0000 (02:52 +0000)
committerMartin Matuška <martin@matuska.de>
Mon, 9 Jan 2023 14:23:53 +0000 (15:23 +0100)
When the `zstd:frame-per-file` option is specified, the zstd filter will start a new frame when flushed, i.e. for each file in the archive.

The `zstd:min-frame-size=N` option modifies the `zstd:frame-per-file` option in that it will not start a new frame unless the current one exceeds `N` bytes.

When the `zstd:max-frame-size=N` option is specified, the zstd filter will start a new frame any time the compressed size of the previous one exceeds `N` bytes.

These options decrease compression efficiency by a varying amount (depending on the exact composition of its contents) but render the tarball seekable, to a certain extent.

libarchive/archive_write_add_filter_zstd.c

index 1d194b1962ff504d2284ce08c722f3545954527c..37c5e741ebeee18545bd7f3a52ad8555158b7afe 100644 (file)
@@ -55,8 +55,19 @@ struct private_data {
        int              compression_level;
        int              threads;
 #if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR
+       enum {
+               running,
+               finishing,
+               resetting,
+       } state;
+       int              frame_per_file;
+       size_t           min_frame_size;
+       size_t           max_frame_size;
+       size_t           cur_frame;
+       size_t           cur_frame_in;
+       size_t           cur_frame_out;
+       size_t           total_in;
        ZSTD_CStream    *cstream;
-       int64_t          total_in;
        ZSTD_outBuffer   out;
 #else
        struct archive_write_program_data *pdata;
@@ -78,6 +89,7 @@ static int archive_compressor_zstd_options(struct archive_write_filter *,
 static int archive_compressor_zstd_open(struct archive_write_filter *);
 static int archive_compressor_zstd_write(struct archive_write_filter *,
                    const void *, size_t);
+static int archive_compressor_zstd_flush(struct archive_write_filter *);
 static int archive_compressor_zstd_close(struct archive_write_filter *);
 static int archive_compressor_zstd_free(struct archive_write_filter *);
 #if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR
@@ -106,6 +118,7 @@ archive_write_add_filter_zstd(struct archive *_a)
        f->data = data;
        f->open = &archive_compressor_zstd_open;
        f->options = &archive_compressor_zstd_options;
+       f->flush = &archive_compressor_zstd_flush;
        f->close = &archive_compressor_zstd_close;
        f->free = &archive_compressor_zstd_free;
        f->code = ARCHIVE_FILTER_ZSTD;
@@ -113,6 +126,11 @@ archive_write_add_filter_zstd(struct archive *_a)
        data->compression_level = CLEVEL_DEFAULT;
        data->threads = 0;
 #if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR
+       data->frame_per_file = 0;
+       data->min_frame_size = 0;
+       data->max_frame_size = SIZE_MAX;
+       data->cur_frame_in = 0;
+       data->cur_frame_out = 0;
        data->cstream = ZSTD_createCStream();
        if (data->cstream == NULL) {
                free(data);
@@ -154,6 +172,8 @@ static int string_to_number(const char *string, intmax_t *numberp)
 {
        char *end;
 
+       if (string == NULL || *string == '\0')
+               return (ARCHIVE_WARN);
        *numberp = strtoimax(string, &end, 10);
        if (end == string || *end != '\0' || errno == EOVERFLOW) {
                *numberp = 0;
@@ -206,6 +226,31 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key,
                }
                data->threads = threads;
                return (ARCHIVE_OK);
+#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR
+       } else if (strcmp(key, "frame-per-file") == 0) {
+               data->frame_per_file = 1;
+               return (ARCHIVE_OK);
+       } else if (strcmp(key, "min-frame-size") == 0) {
+               intmax_t min_frame_size;
+               if (string_to_number(value, &min_frame_size) != ARCHIVE_OK) {
+                       return (ARCHIVE_WARN);
+               }
+               if (min_frame_size < 0) {
+                       return (ARCHIVE_WARN);
+               }
+               data->min_frame_size = min_frame_size;
+               return (ARCHIVE_OK);
+       } else if (strcmp(key, "max-frame-size") == 0) {
+               intmax_t max_frame_size;
+               if (string_to_number(value, &max_frame_size) != ARCHIVE_OK) {
+                       return (ARCHIVE_WARN);
+               }
+               if (max_frame_size < 1024) {
+                       return (ARCHIVE_WARN);
+               }
+               data->max_frame_size = max_frame_size;
+               return (ARCHIVE_OK);
+#endif
        }
 
        /* Note: The "warn" return is just to inform the options
@@ -267,15 +312,22 @@ archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
     size_t length)
 {
        struct private_data *data = (struct private_data *)f->data;
-       int ret;
 
-       /* Update statistics */
-       data->total_in += length;
+       return (drive_compressor(f, data, 0, buff, length));
+}
 
-       if ((ret = drive_compressor(f, data, 0, buff, length)) != ARCHIVE_OK)
-               return (ret);
+/*
+ * Flush the compressed stream.
+ */
+static int
+archive_compressor_zstd_flush(struct archive_write_filter *f)
+{
+       struct private_data *data = (struct private_data *)f->data;
 
-       return (ARCHIVE_OK);
+       if (data->frame_per_file && data->state == running &&
+           data->cur_frame_out > data->min_frame_size)
+               data->state = finishing;
+       return (drive_compressor(f, data, 1, NULL, 0));
 }
 
 /*
@@ -286,56 +338,72 @@ archive_compressor_zstd_close(struct archive_write_filter *f)
 {
        struct private_data *data = (struct private_data *)f->data;
 
-       /* Finish zstd frame */
-       return drive_compressor(f, data, 1, NULL, 0);
+       if (data->state == running)
+               data->state = finishing;
+       return (drive_compressor(f, data, 1, NULL, 0));
 }
 
 /*
  * Utility function to push input data through compressor,
  * writing full output blocks as necessary.
- *
- * Note that this handles both the regular write case (finishing ==
- * false) and the end-of-archive case (finishing == true).
  */
 static int
 drive_compressor(struct archive_write_filter *f,
-    struct private_data *data, int finishing, const void *src, size_t length)
+    struct private_data *data, int flush, const void *src, size_t length)
 {
        ZSTD_inBuffer in = { .src = src, .size = length, .pos = 0 };
-       size_t zstdret;
+       size_t ipos, opos, zstdret = 0;
        int ret;
 
        for (;;) {
-               if (data->out.pos == data->out.size) {
-                       ret = __archive_write_filter(f->next_filter,
-                           data->out.dst, data->out.pos);
-                       if (ret != ARCHIVE_OK)
-                               return (ARCHIVE_FATAL);
-                       data->out.pos = 0;
+               ipos = in.pos;
+               opos = data->out.pos;
+               switch (data->state) {
+               case running:
+                       if (in.pos == in.size)
+                               return (ARCHIVE_OK);
+                       zstdret = ZSTD_compressStream(data->cstream,
+                           &data->out, &in);
+                       if (ZSTD_isError(zstdret))
+                               goto zstd_fatal;
+                       break;
+               case finishing:
+                       zstdret = ZSTD_endStream(data->cstream, &data->out);
+                       if (ZSTD_isError(zstdret))
+                               goto zstd_fatal;
+                       if (zstdret == 0)
+                               data->state = resetting;
+                       break;
+               case resetting:
+                       ZSTD_CCtx_reset(data->cstream, ZSTD_reset_session_only);
+                       data->cur_frame++;
+                       data->cur_frame_in = 0;
+                       data->cur_frame_out = 0;
+                       data->state = running;
+                       break;
                }
-
-               /* If there's nothing to do, we're done. */
-               if (!finishing && in.pos == in.size)
-                       return (ARCHIVE_OK);
-
-               zstdret = !finishing ?
-                   ZSTD_compressStream(data->cstream, &data->out, &in) :
-                   ZSTD_endStream(data->cstream, &data->out);
-
-               if (ZSTD_isError(zstdret)) {
-                       archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
-                           "Zstd compression failed: %s",
-                           ZSTD_getErrorName(zstdret));
-                       return (ARCHIVE_FATAL);
+               data->total_in += in.pos - ipos;
+               data->cur_frame_in += in.pos - ipos;
+               data->cur_frame_out += data->out.pos - opos;
+               if (data->state == running &&
+                   data->cur_frame_in >= data->max_frame_size) {
+                       data->state = finishing;
                }
-
-               /* If we're finishing, 0 means nothing left to flush */
-               if (finishing && zstdret == 0) {
+               if (data->out.pos == data->out.size ||
+                   (flush && data->out.pos > 0)) {
                        ret = __archive_write_filter(f->next_filter,
                            data->out.dst, data->out.pos);
-                       return (ret);
+                       if (ret != ARCHIVE_OK)
+                               goto fatal;
+                       data->out.pos = 0;
                }
        }
+zstd_fatal:
+       archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
+           "Zstd compression failed: %s",
+           ZSTD_getErrorName(zstdret));
+fatal:
+       return (ARCHIVE_FATAL);
 }
 
 #else /* HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR */
@@ -380,6 +448,13 @@ archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
        return __archive_write_program_write(f, data->pdata, buff, length);
 }
 
+static int
+archive_compressor_zstd_flush(struct archive_write_filter *f)
+{
+
+       return (ARCHIVE_OK);
+}
+
 static int
 archive_compressor_zstd_close(struct archive_write_filter *f)
 {