]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
btrfs: zstd: enable negative compression levels mount option
authorDaniel Vacek <neelx@suse.com>
Thu, 30 Jan 2025 17:58:19 +0000 (18:58 +0100)
committerDavid Sterba <dsterba@suse.com>
Tue, 18 Mar 2025 19:35:41 +0000 (20:35 +0100)
Allow using the fast modes (negative compression levels) of zstd as a
mount option.

As per the results, the compression ratio is (expectedly) lower:

for level in {-15..-1} 1 2 3; \
do printf "level %3d\n" $level; \
  mount -o compress=zstd:$level /dev/sdb /mnt/test/; \
  grep sdb /proc/mounts; \
  cp -r /usr/bin       /mnt/test/; sync; compsize /mnt/test/bin; \
  cp -r /usr/share/doc /mnt/test/; sync; compsize /mnt/test/doc; \
  cp    enwik9         /mnt/test/; sync; compsize /mnt/test/enwik9; \
  cp    linux-6.13.tar /mnt/test/; sync; compsize /mnt/test/linux-6.13.tar; \
  rm -r /mnt/test/{bin,doc,enwik9,linux-6.13.tar}; \
  umount /mnt/test/; \
done |& tee results | \
awk '/^level/{print}/^TOTAL/{print$3"\t"$2"  |"}' | paste - - - - -

266M bin  | 45M doc  | 953M wiki | 1.4G source
=============================+===============+===============+===============+
level -15 180M 67%  | 30M 68%  | 694M 72%  | 598M 40%  |
level -14 180M 67%  | 30M 67%  | 683M 71%  | 581M 39%  |
level -13 177M 66%  | 29M 66%  | 671M 70%  | 566M 38%  |
level -12 174M 65%  | 29M 65%  | 658M 69%  | 548M 37%  |
level -11 174M 65%  | 28M 64%  | 645M 67%  | 530M 35%  |
level -10 171M 64%  | 28M 62%  | 631M 66%  | 512M 34%  |
level  -9 165M 62%  | 27M 61%  | 615M 64%  | 493M 33%  |
level  -8 161M 60%  | 27M 59%  | 598M 62%  | 475M 32%  |
level  -7 155M 58%  | 26M 58%  | 582M 61%  | 457M 30%  |
level  -6 151M 56%  | 25M 56%  | 565M 59%  | 437M 29%  |
level  -5 145M 54%  | 24M 55%  | 545M 57%  | 417M 28%  |
level  -4 139M 52%  | 23M 52%  | 520M 54%  | 391M 26%  |
level  -3 135M 50%  | 22M 50%  | 495M 51%  | 369M 24%  |
level  -2 127M 47%  | 22M 48%  | 470M 49%  | 349M 23%  |
level  -1 120M 45%  | 21M 47%  | 452M 47%  | 332M 22%  |
level   1 110M 41%  | 17M 39%  | 362M 38%  | 290M 19%  |
level   2 106M 40%  | 17M 38%  | 349M 36%  | 288M 19%  |
level   3 104M 39%  | 16M 37%  | 340M 35%  | 276M 18%  |

The samples represent some data sets that can be commonly found and show
approximate compressibility. The fast levels trade off speed for ratio
and are best suitable for highly compressible data.

As can be seen above, comparing the results to the current default zstd
level 3, the negative levels are roughly 2x worse at -15 and the
ratio increases almost linearly with each level.

Signed-off-by: Daniel Vacek <neelx@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/compression.c
fs/btrfs/compression.h
fs/btrfs/fs.h
fs/btrfs/inode.c
fs/btrfs/super.c
fs/btrfs/zlib.c
fs/btrfs/zstd.c

index 0c4d486c3048da65f7d39dc535332a8f70e68aef..1fe154e7cc0283bb6033922207f85df0b7910143 100644 (file)
@@ -740,7 +740,7 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
        &btrfs_zstd_compress,
 };
 
-static struct list_head *alloc_workspace(int type, unsigned int level)
+static struct list_head *alloc_workspace(int type, int level)
 {
        switch (type) {
        case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws();
@@ -818,7 +818,7 @@ static void btrfs_cleanup_workspace_manager(int type)
  * Preallocation makes a forward progress guarantees and we do not return
  * errors.
  */
-struct list_head *btrfs_get_workspace(int type, unsigned int level)
+struct list_head *btrfs_get_workspace(int type, int level)
 {
        struct workspace_manager *wsm;
        struct list_head *workspace;
@@ -968,14 +968,14 @@ static void put_workspace(int type, struct list_head *ws)
  * Adjust @level according to the limits of the compression algorithm or
  * fallback to default
  */
-static unsigned int btrfs_compress_set_level(int type, unsigned level)
+static int btrfs_compress_set_level(unsigned int type, int level)
 {
        const struct btrfs_compress_op *ops = btrfs_compress_op[type];
 
        if (level == 0)
                level = ops->default_level;
        else
-               level = min(level, ops->max_level);
+               level = min(max(level, ops->min_level), ops->max_level);
 
        return level;
 }
@@ -1023,12 +1023,10 @@ int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
  * @total_out is an in/out parameter, must be set to the input length and will
  * be also used to return the total number of compressed bytes
  */
-int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping,
+int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping,
                         u64 start, struct folio **folios, unsigned long *out_folios,
                         unsigned long *total_in, unsigned long *total_out)
 {
-       int type = btrfs_compress_type(type_level);
-       int level = btrfs_compress_level(type_level);
        const unsigned long orig_len = *total_out;
        struct list_head *workspace;
        int ret;
@@ -1590,18 +1588,19 @@ out:
 
 /*
  * Convert the compression suffix (eg. after "zlib" starting with ":") to
- * level, unrecognized string will set the default level
+ * level, unrecognized string will set the default level. Negative level
+ * numbers are allowed.
  */
-unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
+int btrfs_compress_str2level(unsigned int type, const char *str)
 {
-       unsigned int level = 0;
+       int level = 0;
        int ret;
 
        if (!type)
                return 0;
 
        if (str[0] == ':') {
-               ret = kstrtouint(str + 1, 10, &level);
+               ret = kstrtoint(str + 1, 10, &level);
                if (ret)
                        level = 0;
        }
index 954034086d0d44b33d61bfd32fc6ee849505adcc..933178f03d8f8bc01a080bc2580ada04939c6fbf 100644 (file)
@@ -72,16 +72,6 @@ struct compressed_bio {
        struct btrfs_bio bbio;
 };
 
-static inline unsigned int btrfs_compress_type(unsigned int type_level)
-{
-       return (type_level & 0xF);
-}
-
-static inline unsigned int btrfs_compress_level(unsigned int type_level)
-{
-       return ((type_level & 0xF0) >> 4);
-}
-
 /* @range_end must be exclusive. */
 static inline u32 btrfs_calc_input_length(u64 range_end, u64 cur)
 {
@@ -93,7 +83,7 @@ static inline u32 btrfs_calc_input_length(u64 range_end, u64 cur)
 int __init btrfs_init_compress(void);
 void __cold btrfs_exit_compress(void);
 
-int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping,
+int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping,
                          u64 start, struct folio **folios, unsigned long *out_folios,
                         unsigned long *total_in, unsigned long *total_out);
 int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
@@ -107,7 +97,7 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
                                   bool writeback);
 void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
 
-unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
+int btrfs_compress_str2level(unsigned int type, const char *str);
 
 struct folio *btrfs_alloc_compr_folio(void);
 void btrfs_free_compr_folio(struct folio *folio);
@@ -131,14 +121,15 @@ struct workspace_manager {
        wait_queue_head_t ws_wait;
 };
 
-struct list_head *btrfs_get_workspace(int type, unsigned int level);
+struct list_head *btrfs_get_workspace(int type, int level);
 void btrfs_put_workspace(int type, struct list_head *ws);
 
 struct btrfs_compress_op {
        struct workspace_manager *workspace_manager;
        /* Maximum level supported by the compression algorithm */
-       unsigned int max_level;
-       unsigned int default_level;
+       int min_level;
+       int max_level;
+       int default_level;
 };
 
 /* The heuristic workspaces are managed via the 0th workspace manager */
@@ -187,9 +178,9 @@ int zstd_decompress(struct list_head *ws, const u8 *data_in,
                size_t destlen);
 void zstd_init_workspace_manager(void);
 void zstd_cleanup_workspace_manager(void);
-struct list_head *zstd_alloc_workspace(unsigned int level);
+struct list_head *zstd_alloc_workspace(int level);
 void zstd_free_workspace(struct list_head *ws);
-struct list_head *zstd_get_workspace(unsigned int level);
+struct list_head *zstd_get_workspace(int level);
 void zstd_put_workspace(struct list_head *ws);
 
 #endif
index b572d6b9730b2217f4b637b3bde96bcab34b6824..1ddce774b2761b2fb51dbf2201697ac96ff0688a 100644 (file)
@@ -486,7 +486,7 @@ struct btrfs_fs_info {
        unsigned long long mount_opt;
 
        unsigned long compress_type:4;
-       unsigned int compress_level;
+       int compress_level;
        u32 commit_interval;
        /*
         * It is a suggestive number, the read side is safe even it gets a
index f51451402b96f335c1bf9089791e20d69a037a24..bdf9cb93be2def73dba6e527555e664c4aae0b66 100644 (file)
@@ -974,7 +974,7 @@ again:
                compress_type = inode->prop_compress;
 
        /* Compression level is applied here. */
-       ret = btrfs_compress_folios(compress_type | (fs_info->compress_level << 4),
+       ret = btrfs_compress_folios(compress_type, fs_info->compress_level,
                                    mapping, start, folios, &nr_folios, &total_in,
                                    &total_compressed);
        if (ret)
index dc4fee519ca6c1fc5f2c1b55a3a983c1f2740aef..fdec546a87f399a46cf5e5b9514a87f2cdfae7d3 100644 (file)
@@ -84,7 +84,7 @@ struct btrfs_fs_context {
        u32 thread_pool_size;
        unsigned long long mount_opt;
        unsigned long compress_type:4;
-       unsigned int compress_level;
+       int compress_level;
        refcount_t refs;
 };
 
index 96e3b1e09bf6f215bc4600aa2483176792f43bdb..545f413d81fc2b126e463bfda8079fdaa47d103e 100644 (file)
@@ -489,6 +489,7 @@ out:
 
 const struct btrfs_compress_op btrfs_zlib_compress = {
        .workspace_manager      = &wsm,
+       .min_level              = 1,
        .max_level              = 9,
        .default_level          = BTRFS_ZLIB_DEFAULT_LEVEL,
 };
index 5232b56d5892599c0f48f5712fd62363acefe0e6..a7bfbf8bea7d081035baa9a3123475d7a0e7fc41 100644 (file)
 #define ZSTD_BTRFS_MAX_WINDOWLOG 17
 #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
 #define ZSTD_BTRFS_DEFAULT_LEVEL 3
+#define ZSTD_BTRFS_MIN_LEVEL -15
 #define ZSTD_BTRFS_MAX_LEVEL 15
 /* 307s to avoid pathologically clashing with transaction commit */
 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
 
-static zstd_parameters zstd_get_btrfs_parameters(unsigned int level,
+static zstd_parameters zstd_get_btrfs_parameters(int level,
                                                 size_t src_len)
 {
        zstd_parameters params = zstd_get_params(level, src_len);
@@ -45,8 +46,8 @@ struct workspace {
        void *mem;
        size_t size;
        char *buf;
-       unsigned int level;
-       unsigned int req_level;
+       int level;
+       int req_level;
        unsigned long last_used; /* jiffies */
        struct list_head list;
        struct list_head lru_list;
@@ -93,8 +94,10 @@ static inline struct workspace *list_to_workspace(struct list_head *list)
        return container_of(list, struct workspace, list);
 }
 
-void zstd_free_workspace(struct list_head *ws);
-struct list_head *zstd_alloc_workspace(unsigned int level);
+static inline int clip_level(int level)
+{
+       return max(0, level - 1);
+}
 
 /*
  * Timer callback to free unused workspaces.
@@ -123,7 +126,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
        list_for_each_prev_safe(pos, next, &wsm.lru_list) {
                struct workspace *victim = container_of(pos, struct workspace,
                                                        lru_list);
-               unsigned int level;
+               int level;
 
                if (time_after(victim->last_used, reclaim_threshold))
                        break;
@@ -137,8 +140,8 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
                list_del(&victim->list);
                zstd_free_workspace(&victim->list);
 
-               if (list_empty(&wsm.idle_ws[level - 1]))
-                       clear_bit(level - 1, &wsm.active_map);
+               if (list_empty(&wsm.idle_ws[level]))
+                       clear_bit(level, &wsm.active_map);
 
        }
 
@@ -160,9 +163,11 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
 static void zstd_calc_ws_mem_sizes(void)
 {
        size_t max_size = 0;
-       unsigned int level;
+       int level;
 
-       for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
+       for (level = ZSTD_BTRFS_MIN_LEVEL; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
+               if (level == 0)
+                       continue;
                zstd_parameters params =
                        zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
                size_t level_size =
@@ -171,7 +176,8 @@ static void zstd_calc_ws_mem_sizes(void)
                              zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
 
                max_size = max_t(size_t, max_size, level_size);
-               zstd_ws_mem_sizes[level - 1] = max_size;
+               /* Use level 1 workspace size for all the fast mode negative levels. */
+               zstd_ws_mem_sizes[clip_level(level)] = max_size;
        }
 }
 
@@ -233,11 +239,11 @@ void zstd_cleanup_workspace_manager(void)
  * offer the opportunity to reclaim the workspace in favor of allocating an
  * appropriately sized one in the future.
  */
-static struct list_head *zstd_find_workspace(unsigned int level)
+static struct list_head *zstd_find_workspace(int level)
 {
        struct list_head *ws;
        struct workspace *workspace;
-       int i = level - 1;
+       int i = clip_level(level);
 
        spin_lock_bh(&wsm.lock);
        for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
@@ -247,7 +253,7 @@ static struct list_head *zstd_find_workspace(unsigned int level)
                        list_del_init(ws);
                        /* keep its place if it's a lower level using this */
                        workspace->req_level = level;
-                       if (level == workspace->level)
+                       if (clip_level(level) == workspace->level)
                                list_del(&workspace->lru_list);
                        if (list_empty(&wsm.idle_ws[i]))
                                clear_bit(i, &wsm.active_map);
@@ -270,7 +276,7 @@ static struct list_head *zstd_find_workspace(unsigned int level)
  * attempt to allocate a new workspace.  If we fail to allocate one due to
  * memory pressure, go to sleep waiting for the max level workspace to free up.
  */
-struct list_head *zstd_get_workspace(unsigned int level)
+struct list_head *zstd_get_workspace(int level)
 {
        struct list_head *ws;
        unsigned int nofs_flag;
@@ -319,7 +325,7 @@ void zstd_put_workspace(struct list_head *ws)
        spin_lock_bh(&wsm.lock);
 
        /* A node is only taken off the lru if we are the corresponding level */
-       if (workspace->req_level == workspace->level) {
+       if (clip_level(workspace->req_level) == workspace->level) {
                /* Hide a max level workspace from reclaim */
                if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
                        INIT_LIST_HEAD(&workspace->lru_list);
@@ -332,13 +338,13 @@ void zstd_put_workspace(struct list_head *ws)
                }
        }
 
-       set_bit(workspace->level - 1, &wsm.active_map);
-       list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
+       set_bit(workspace->level, &wsm.active_map);
+       list_add(&workspace->list, &wsm.idle_ws[workspace->level]);
        workspace->req_level = 0;
 
        spin_unlock_bh(&wsm.lock);
 
-       if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
+       if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL))
                cond_wake_up(&wsm.wait);
 }
 
@@ -351,7 +357,7 @@ void zstd_free_workspace(struct list_head *ws)
        kfree(workspace);
 }
 
-struct list_head *zstd_alloc_workspace(unsigned int level)
+struct list_head *zstd_alloc_workspace(int level)
 {
        struct workspace *workspace;
 
@@ -359,8 +365,9 @@ struct list_head *zstd_alloc_workspace(unsigned int level)
        if (!workspace)
                return ERR_PTR(-ENOMEM);
 
-       workspace->size = zstd_ws_mem_sizes[level - 1];
-       workspace->level = level;
+       /* Use level 1 workspace size for all the fast mode negative levels. */
+       workspace->size = zstd_ws_mem_sizes[clip_level(level)];
+       workspace->level = clip_level(level);
        workspace->req_level = level;
        workspace->last_used = jiffies;
        workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
@@ -717,6 +724,7 @@ finish:
 const struct btrfs_compress_op btrfs_zstd_compress = {
        /* ZSTD uses own workspace manager */
        .workspace_manager = NULL,
+       .min_level      = ZSTD_BTRFS_MIN_LEVEL,
        .max_level      = ZSTD_BTRFS_MAX_LEVEL,
        .default_level  = ZSTD_BTRFS_DEFAULT_LEVEL,
 };