]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Improved control over frame size in zstd filter. (#2081)
authorDag-Erling Smørgrav <des@des.no>
Sat, 23 Mar 2024 00:35:29 +0000 (01:35 +0100)
committerGitHub <noreply@github.com>
Sat, 23 Mar 2024 00:35:29 +0000 (17:35 -0700)
Instead of just `min-frame-size` and `max-frame-size`, we now have four
separate options:

* `min-frame-in` delays the creation of a new frame on flush until the
uncompressed size of the current frame passes a certain threshold.

* `min-frame-out` delays the creation of a new frame on flush until the
compressed size of the current frame passes a certain threshold.

* `max-frame-in` forces the creation of a new frame as soon as possible
after the uncompressed size of the current frame reaches a certain
limit.

* `max-frame-out` forces the creation of a new frame as soon as possible
after the compressed size of the current frame reaches a certain limit.

We now also support `k`, `kB`, `M`, `MB`, `G` and `GB` suffixes for all
four options.

The old options are retained as aliases for the corresponding new
options.

libarchive/archive_write_add_filter_zstd.c
libarchive/test/test_write_filter_zstd.c
tar/bsdtar.1

index 9a7b7f22cd5d34e0b9eb0a34fcb3167356d03b7f..9a74085b5b7525e5de60db54cbe18642ba474bf1 100644 (file)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2017 Sean Purcell
+ * Copyright (c) 2023-2024 Klara, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,8 +60,10 @@ struct private_data {
                resetting,
        } state;
        int              frame_per_file;
-       size_t           min_frame_size;
-       size_t           max_frame_size;
+       size_t           min_frame_in;
+       size_t           max_frame_in;
+       size_t           min_frame_out;
+       size_t           max_frame_out;
        size_t           cur_frame;
        size_t           cur_frame_in;
        size_t           cur_frame_out;
@@ -129,8 +132,10 @@ archive_write_add_filter_zstd(struct archive *_a)
        data->long_distance = 0;
 #if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR
        data->frame_per_file = 0;
-       data->min_frame_size = 0;
-       data->max_frame_size = SIZE_MAX;
+       data->min_frame_in = 0;
+       data->max_frame_in = SIZE_MAX;
+       data->min_frame_out = 0;
+       data->max_frame_out = SIZE_MAX;
        data->cur_frame_in = 0;
        data->cur_frame_out = 0;
        data->cstream = ZSTD_createCStream();
@@ -170,7 +175,8 @@ archive_compressor_zstd_free(struct archive_write_filter *f)
        return (ARCHIVE_OK);
 }
 
-static int string_to_number(const char *string, intmax_t *numberp)
+static int
+string_to_number(const char *string, intmax_t *numberp)
 {
        char *end;
 
@@ -184,6 +190,41 @@ static int string_to_number(const char *string, intmax_t *numberp)
        return (ARCHIVE_OK);
 }
 
+static int
+string_to_size(const char *string, size_t *numberp)
+{
+       uintmax_t number;
+       char *end;
+       unsigned int shift = 0;
+
+       if (string == NULL || *string == '\0' || *string == '-')
+               return (ARCHIVE_WARN);
+       number = strtoumax(string, &end, 10);
+       if (end > string) {
+               if (*end == 'K' || *end == 'k') {
+                       shift = 10;
+                       end++;
+               } else if (*end == 'M' || *end == 'm') {
+                       shift = 20;
+                       end++;
+               } else if (*end == 'G' || *end == 'g') {
+                       shift = 30;
+                       end++;
+               }
+               if (*end == 'B' || *end == 'b') {
+                       end++;
+               }
+       }
+       if (end == string || *end != '\0' || errno == EOVERFLOW) {
+               return (ARCHIVE_WARN);
+       }
+       if (number > (uintmax_t)SIZE_MAX >> shift) {
+               return (ARCHIVE_WARN);
+       }
+       *numberp = (size_t)(number << shift);
+       return (ARCHIVE_OK);
+}
+
 /*
  * Set write options.
  */
@@ -232,25 +273,29 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key,
        } else if (strcmp(key, "frame-per-file") == 0) {
                data->frame_per_file = 1;
                return (ARCHIVE_OK);
-       } else if (strcmp(key, "min-frame-size") == 0) {
-               intmax_t min_frame_size;
-               if (string_to_number(value, &min_frame_size) != ARCHIVE_OK) {
+       } else if (strcmp(key, "min-frame-in") == 0) {
+               if (string_to_size(value, &data->min_frame_in) != ARCHIVE_OK) {
                        return (ARCHIVE_WARN);
                }
-               if (min_frame_size < 0) {
+               return (ARCHIVE_OK);
+       } else if (strcmp(key, "min-frame-out") == 0 ||
+           strcmp(key, "min-frame-size") == 0) {
+               if (string_to_size(value, &data->min_frame_out) != ARCHIVE_OK) {
                        return (ARCHIVE_WARN);
                }
-               data->min_frame_size = min_frame_size;
                return (ARCHIVE_OK);
-       } else if (strcmp(key, "max-frame-size") == 0) {
-               intmax_t max_frame_size;
-               if (string_to_number(value, &max_frame_size) != ARCHIVE_OK) {
+       } else if (strcmp(key, "max-frame-in") == 0 ||
+           strcmp(key, "max-frame-size") == 0) {
+               if (string_to_size(value, &data->max_frame_in) != ARCHIVE_OK ||
+                   data->max_frame_in < 1024) {
                        return (ARCHIVE_WARN);
                }
-               if (max_frame_size < 1024) {
+               return (ARCHIVE_OK);
+       } else if (strcmp(key, "max-frame-out") == 0) {
+               if (string_to_size(value, &data->max_frame_out) != ARCHIVE_OK ||
+                   data->max_frame_out < 1024) {
                        return (ARCHIVE_WARN);
                }
-               data->max_frame_size = max_frame_size;
                return (ARCHIVE_OK);
 #endif
        }
@@ -353,9 +398,12 @@ archive_compressor_zstd_flush(struct archive_write_filter *f)
 {
        struct private_data *data = (struct private_data *)f->data;
 
-       if (data->frame_per_file && data->state == running &&
-           data->cur_frame_out > data->min_frame_size)
-               data->state = finishing;
+       if (data->frame_per_file && data->state == running) {
+               if (data->cur_frame_in > data->min_frame_in &&
+                   data->cur_frame_out > data->min_frame_out) {
+                       data->state = finishing;
+               }
+       }
        return (drive_compressor(f, data, 1, NULL, 0));
 }
 
@@ -414,9 +462,11 @@ drive_compressor(struct archive_write_filter *f,
                data->total_in += in.pos - ipos;
                data->cur_frame_in += in.pos - ipos;
                data->cur_frame_out += data->out.pos - opos;
-               if (data->state == running &&
-                   data->cur_frame_in >= data->max_frame_size) {
-                       data->state = finishing;
+               if (data->state == running) {
+                       if (data->cur_frame_in >= data->max_frame_in ||
+                           data->cur_frame_out >= data->max_frame_out) {
+                               data->state = finishing;
+                       }
                }
                if (data->out.pos == data->out.size ||
                    (flush && data->out.pos > 0)) {
index 04c0baa8954080fb612ee0f1bca59fc23d9d0c91..c68074e87b2002b2932765637b02582970e9f73c 100644 (file)
@@ -136,28 +136,98 @@ DEFINE_TEST(test_write_filter_zstd)
        /* frame-per-file: boolean */
        assertEqualIntA(a, ARCHIVE_OK,
            archive_write_set_filter_option(a, NULL, "frame-per-file", ""));
-       /* min-frame-size: >= 0 */
+       /* min-frame-in: >= 0 */
        assertEqualIntA(a, ARCHIVE_FAILED,
-           archive_write_set_filter_option(a, NULL, "min-frame-size", ""));
+           archive_write_set_filter_option(a, NULL, "min-frame-out", ""));
        assertEqualIntA(a, ARCHIVE_FAILED,
-           archive_write_set_filter_option(a, NULL, "min-frame-size", "-1"));
+           archive_write_set_filter_option(a, NULL, "min-frame-out", "-1"));
        assertEqualIntA(a, ARCHIVE_OK,
-           archive_write_set_filter_option(a, NULL, "min-frame-size", "0"));
+           archive_write_set_filter_option(a, NULL, "min-frame-out", "0"));
        assertEqualIntA(a, ARCHIVE_OK,
-           archive_write_set_filter_option(a, NULL, "min-frame-size", "1048576"));
-       /* max-frame-size: >= 1024 */
+           archive_write_set_filter_option(a, NULL, "min-frame-out", "1048576"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-out", "1k"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-out", "1kB"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-out", "1M"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-out", "1MB"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-out", "1G"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-out", "1GB"));
+       /* min-frame-out: >= 0 */
+       assertEqualIntA(a, ARCHIVE_FAILED,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", ""));
+       assertEqualIntA(a, ARCHIVE_FAILED,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", "-1"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", "0"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", "1048576"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", "1k"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", "1kB"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", "1M"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", "1MB"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", "1G"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "min-frame-in", "1GB"));
+       /* max-frame-in: >= 1024 */
+       assertEqualIntA(a, ARCHIVE_FAILED,
+           archive_write_set_filter_option(a, NULL, "max-frame-in", ""));
        assertEqualIntA(a, ARCHIVE_FAILED,
-           archive_write_set_filter_option(a, NULL, "max-frame-size", ""));
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "-1"));
        assertEqualIntA(a, ARCHIVE_FAILED,
-           archive_write_set_filter_option(a, NULL, "max-frame-size", "-1"));
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "0"));
        assertEqualIntA(a, ARCHIVE_FAILED,
-           archive_write_set_filter_option(a, NULL, "max-frame-size", "0"));
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "1023"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "1024"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "1048576"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "1k"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "1kB"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "1M"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "1MB"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "1G"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-in", "1GB"));
+       /* max-frame-out: >= 1024 */
+       assertEqualIntA(a, ARCHIVE_FAILED,
+           archive_write_set_filter_option(a, NULL, "max-frame-out", ""));
        assertEqualIntA(a, ARCHIVE_FAILED,
-           archive_write_set_filter_option(a, NULL, "max-frame-size", "1023"));
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "-1"));
+       assertEqualIntA(a, ARCHIVE_FAILED,
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "0"));
+       assertEqualIntA(a, ARCHIVE_FAILED,
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "1023"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "1024"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "1048576"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "1k"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "1kB"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "1M"));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "1MB"));
        assertEqualIntA(a, ARCHIVE_OK,
-           archive_write_set_filter_option(a, NULL, "max-frame-size", "1024"));
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "1G"));
        assertEqualIntA(a, ARCHIVE_OK,
-           archive_write_set_filter_option(a, NULL, "max-frame-size", "1048576"));
+           archive_write_set_filter_option(a, NULL, "max-frame-out", "1GB"));
 #endif
 #if ZSTD_VERSION_NUMBER >= MINVER_LONG
        if ((int)(sizeof(size_t) == 4))
index ae84e00fcd3cdfed5403624593ca5ec9d3b323fe..e570d2a48a017931e58c2b7d6de782177292e503 100644 (file)
@@ -23,7 +23,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd December 10, 2023
+.Dd March 1, 2024
 .Dt TAR 1
 .Os
 .Sh NAME
@@ -655,16 +655,56 @@ use as many threads as there are CPU cores on the system.
 .It Cm zstd:frame-per-file
 Start a new compression frame at the beginning of each file in the
 archive.
-.It Cm zstd:min-frame-size Ns = Ns Ar N
+.It Cm zstd:min-frame-in Ns = Ns Ar N
 In combination with
 .Cm zstd:frame-per-file ,
-do not start a new compression frame unless the current frame is at least
+do not start a new compression frame unless the uncompressed size of
+the current frame is at least
 .Ar N
 bytes.
-.It Cm zstd:max-frame-size Ns = Ns Ar N
-Start a new compression frame as soon as the current frame exceeds
+The number may be followed by
+.Li k / Li kB ,
+.Li M / Li MB ,
+or
+.Li G / Li GB
+to indicate kilobytes, megabytes or gigabytes respectively.
+.It Cm zstd:min-frame-out Ns = Ns Ar N , Cm zstd:min-frame-size Ns = Ns Ar N
+In combination with
+.Cm zstd:frame-per-file ,
+do not start a new compression frame unless the compressed size of the
+current frame is at least
+.Ar N
+bytes.
+The number may be followed by
+.Li k / Li kB ,
+.Li M / Li MB ,
+or
+.Li G / Li GB
+to indicate kilobytes, megabytes or gigabytes respectively.
+.It Cm zstd:max-frame-in Ns = Ns Ar N , Cm zstd:max-frame-size Ns = Ns Ar N
+Start a new compression frame as soon as possible after the
+uncompressed size of the current frame exceeds
 .Ar N
 bytes.
+The number may be followed by
+.Li k / Li kB ,
+.Li M / Li MB ,
+or
+.Li G / Li GB
+to indicate kilobytes, megabytes or gigabytes respectively.
+Values less than 1,024 will be rejected.
+.It Cm zstd:max-frame-out Ns = Ns Ar N
+Start a new compression frame as soon as possible after the compressed
+size of the current frame exceeds
+.Ar N
+bytes.
+The number may be followed by
+.Li k / Li kB ,
+.Li M / Li MB ,
+or
+.Li G / Li GB
+to indicate kilobytes, megabytes or gigabytes respectively.
+Values less than 1,024 will be rejected.
 .It Cm lzop:compression-level
 A decimal integer from 1 to 9 specifying the lzop compression level.
 .It Cm xz:compression-level