]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
ext4: fix zombie groups in average fragment size lists
authorBaokun Li <libaokun1@huawei.com>
Mon, 14 Jul 2025 13:03:20 +0000 (21:03 +0800)
committerTheodore Ts'o <tytso@mit.edu>
Fri, 25 Jul 2025 13:14:17 +0000 (09:14 -0400)
Groups with no free blocks shouldn't be in any average fragment size list.
However, when all blocks in a group are allocated(i.e., bb_fragments or
bb_free is 0), we currently skip updating the average fragment size, which
means the group isn't removed from its previous s_mb_avg_fragment_size[old]
list.

This created "zombie" groups that were always skipped during traversal as
they couldn't satisfy any block allocation requests, negatively impacting
traversal efficiency.

Therefore, when a group becomes completely full, bb_avg_fragment_size_order
is now set to -1. If the old order was not -1, a removal operation is
performed; if the new order is not -1, an insertion is performed.

Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
CC: stable@vger.kernel.org
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://patch.msgid.link/20250714130327.1830534-11-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/mballoc.c

index 6d98f2a5afc4df7e9e044c6fcebd56a1ea652afa..72b20fc52bbfeaddfaf0d357c14cf944348be5cd 100644 (file)
@@ -841,30 +841,30 @@ static void
 mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       int new_order;
+       int new, old;
 
-       if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0)
+       if (!test_opt2(sb, MB_OPTIMIZE_SCAN))
                return;
 
-       new_order = mb_avg_fragment_size_order(sb,
-                                       grp->bb_free / grp->bb_fragments);
-       if (new_order == grp->bb_avg_fragment_size_order)
+       old = grp->bb_avg_fragment_size_order;
+       new = grp->bb_fragments == 0 ? -1 :
+             mb_avg_fragment_size_order(sb, grp->bb_free / grp->bb_fragments);
+       if (new == old)
                return;
 
-       if (grp->bb_avg_fragment_size_order != -1) {
-               write_lock(&sbi->s_mb_avg_fragment_size_locks[
-                                       grp->bb_avg_fragment_size_order]);
+       if (old >= 0) {
+               write_lock(&sbi->s_mb_avg_fragment_size_locks[old]);
                list_del(&grp->bb_avg_fragment_size_node);
-               write_unlock(&sbi->s_mb_avg_fragment_size_locks[
-                                       grp->bb_avg_fragment_size_order]);
-       }
-       grp->bb_avg_fragment_size_order = new_order;
-       write_lock(&sbi->s_mb_avg_fragment_size_locks[
-                                       grp->bb_avg_fragment_size_order]);
-       list_add_tail(&grp->bb_avg_fragment_size_node,
-               &sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
-       write_unlock(&sbi->s_mb_avg_fragment_size_locks[
-                                       grp->bb_avg_fragment_size_order]);
+               write_unlock(&sbi->s_mb_avg_fragment_size_locks[old]);
+       }
+
+       grp->bb_avg_fragment_size_order = new;
+       if (new >= 0) {
+               write_lock(&sbi->s_mb_avg_fragment_size_locks[new]);
+               list_add_tail(&grp->bb_avg_fragment_size_node,
+                               &sbi->s_mb_avg_fragment_size[new]);
+               write_unlock(&sbi->s_mb_avg_fragment_size_locks[new]);
+       }
 }
 
 /*