]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: btree_gc no longer uses main in-memory bucket array
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 26 Dec 2021 01:39:19 +0000 (20:39 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:23 +0000 (17:09 -0400)
This changes the btree_gc code to only use the second bucket array, the
one dedicated to GC. On completion, it compares what's in its in memory
bucket array to the allocation information in the btree and writes it
directly, instead of updating the main in-memory bucket array and
writing that.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_gc.c
fs/bcachefs/recovery.c

index 55af41a63ff7e5dcfa5e212c22daf63f3903e2f9..700d1e00aaf9264c0faccffa531186595a8c17b0 100644 (file)
@@ -39,15 +39,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
 #undef x
 };
 
-struct bkey_alloc_buf {
-       struct bkey_i   k;
-       struct bch_alloc_v3 v;
-
-#define x(_name,  _bits)               + _bits / 8
-       u8              _pad[0 + BCH_ALLOC_FIELDS_V2()];
-#undef  x
-} __attribute__((packed, aligned(8)));
-
 /* Persistent alloc info: */
 
 static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
@@ -254,24 +245,31 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
        return ret;
 }
 
-static void bch2_alloc_pack(struct bch_fs *c,
-                           struct bkey_alloc_buf *dst,
-                           const struct bkey_alloc_unpacked src)
+struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
+                                      const struct bkey_alloc_unpacked src)
 {
-       bch2_alloc_pack_v3(dst, src);
+       struct bkey_alloc_buf *dst;
+
+       dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
+       if (!IS_ERR(dst))
+               bch2_alloc_pack_v3(dst, src);
+
+       return dst;
 }
 
 int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
                     struct bkey_alloc_unpacked *u, unsigned trigger_flags)
 {
-       struct bkey_alloc_buf *a;
-
-       a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
-       if (IS_ERR(a))
-               return PTR_ERR(a);
+       struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
 
-       bch2_alloc_pack(trans->c, a, *u);
-       return bch2_trans_update(trans, iter, &a->k, trigger_flags|
+       /*
+        * Without BTREE_UPDATE_NO_KEY_CACHE_COHERENCY, we may end up updating
+        * the btree instead of the key cache - this can casue the allocator to
+        * self-deadlock, since updating the btree may require allocating new
+        * btree nodes:
+        */
+       return PTR_ERR_OR_ZERO(a) ?:
+               bch2_trans_update(trans, iter, &a->k, trigger_flags|
                                 BTREE_UPDATE_NO_KEY_CACHE_COHERENCY);
 }
 
@@ -342,7 +340,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
 #undef  x
 }
 
-int bch2_alloc_read(struct bch_fs *c)
+int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
 {
        struct btree_trans trans;
        struct btree_iter iter;
@@ -353,108 +351,43 @@ int bch2_alloc_read(struct bch_fs *c)
        int ret;
 
        bch2_trans_init(&trans, c, 0, 0);
-       down_read(&c->gc_lock);
 
        for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
-               if (!bkey_is_alloc(k.k))
-                       continue;
-
                ca = bch_dev_bkey_exists(c, k.k->p.inode);
-               g = bucket(ca, k.k->p.offset);
+               g = __bucket(ca, k.k->p.offset, gc);
                u = bch2_alloc_unpack(k);
 
-               *bucket_gen(ca, k.k->p.offset) = u.gen;
+               if (!gc)
+                       *bucket_gen(ca, k.k->p.offset) = u.gen;
+
                g->_mark.gen            = u.gen;
-               g->_mark.data_type      = u.data_type;
-               g->_mark.dirty_sectors  = u.dirty_sectors;
-               g->_mark.cached_sectors = u.cached_sectors;
-               g->_mark.stripe         = u.stripe != 0;
-               g->stripe               = u.stripe;
-               g->stripe_redundancy    = u.stripe_redundancy;
                g->io_time[READ]        = u.read_time;
                g->io_time[WRITE]       = u.write_time;
-               g->oldest_gen           = u.oldest_gen;
+               g->oldest_gen           = !gc ? u.oldest_gen : u.gen;
                g->gen_valid            = 1;
-       }
-       bch2_trans_iter_exit(&trans, &iter);
 
-       up_read(&c->gc_lock);
-       bch2_trans_exit(&trans);
+               if (!gc ||
+                   (metadata_only &&
+                    (u.data_type == BCH_DATA_user ||
+                     u.data_type == BCH_DATA_cached ||
+                     u.data_type == BCH_DATA_parity))) {
+                       g->_mark.data_type      = u.data_type;
+                       g->_mark.dirty_sectors  = u.dirty_sectors;
+                       g->_mark.cached_sectors = u.cached_sectors;
+                       g->_mark.stripe         = u.stripe != 0;
+                       g->stripe               = u.stripe;
+                       g->stripe_redundancy    = u.stripe_redundancy;
+               }
 
-       if (ret) {
-               bch_err(c, "error reading alloc info: %i", ret);
-               return ret;
        }
+       bch2_trans_iter_exit(&trans, &iter);
 
-       return 0;
-}
-
-static int bch2_alloc_write_key(struct btree_trans *trans,
-                               struct btree_iter *iter,
-                               unsigned flags)
-{
-       struct bch_fs *c = trans->c;
-       struct bkey_s_c k;
-       struct bkey_alloc_unpacked old_u, new_u;
-       int ret;
-retry:
-       bch2_trans_begin(trans);
-
-       ret = bch2_btree_key_cache_flush(trans,
-                       BTREE_ID_alloc, iter->pos);
-       if (ret)
-               goto err;
+       bch2_trans_exit(&trans);
 
-       k = bch2_btree_iter_peek_slot(iter);
-       ret = bkey_err(k);
        if (ret)
-               goto err;
-
-       old_u   = bch2_alloc_unpack(k);
-       new_u   = alloc_mem_to_key(c, iter);
-
-       if (!bkey_alloc_unpacked_cmp(old_u, new_u))
-               return 0;
-
-       ret   = bch2_alloc_write(trans, iter, &new_u,
-                                 BTREE_TRIGGER_NORUN) ?:
-               bch2_trans_commit(trans, NULL, NULL,
-                               BTREE_INSERT_NOFAIL|flags);
-err:
-       if (ret == -EINTR)
-               goto retry;
-       return ret;
-}
-
-int bch2_alloc_write_all(struct bch_fs *c, unsigned flags)
-{
-       struct btree_trans trans;
-       struct btree_iter iter;
-       struct bch_dev *ca;
-       unsigned i;
-       int ret = 0;
-
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-       bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
-                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-
-       for_each_member_device(ca, c, i) {
-               bch2_btree_iter_set_pos(&iter,
-                       POS(ca->dev_idx, ca->mi.first_bucket));
+               bch_err(c, "error reading alloc info: %i", ret);
 
-               while (iter.pos.offset < ca->mi.nbuckets) {
-                       ret = bch2_alloc_write_key(&trans, &iter, flags);
-                       if (ret) {
-                               percpu_ref_put(&ca->ref);
-                               goto err;
-                       }
-                       bch2_btree_iter_advance(&iter);
-               }
-       }
-err:
-       bch2_trans_iter_exit(&trans, &iter);
-       bch2_trans_exit(&trans);
        return ret;
 }
 
index 86b64177b3d0bc2a378e7961aff478363ce0c210..98c7866e20b57ded9f8d629d8427d5966f97bfb5 100644 (file)
@@ -38,40 +38,23 @@ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
        ;
 }
 
+struct bkey_alloc_buf {
+       struct bkey_i   k;
+       struct bch_alloc_v3 v;
+
+#define x(_name,  _bits)               + _bits / 8
+       u8              _pad[0 + BCH_ALLOC_FIELDS_V2()];
+#undef  x
+} __attribute__((packed, aligned(8)));
+
 struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
+struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *,
+                                      const struct bkey_alloc_unpacked);
 int bch2_alloc_write(struct btree_trans *, struct btree_iter *,
                     struct bkey_alloc_unpacked *, unsigned);
 
 int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
 
-static inline struct bkey_alloc_unpacked
-alloc_mem_to_key(struct bch_fs *c, struct btree_iter *iter)
-{
-       struct bch_dev *ca;
-       struct bucket *g;
-       struct bkey_alloc_unpacked ret;
-
-       percpu_down_read(&c->mark_lock);
-       ca      = bch_dev_bkey_exists(c, iter->pos.inode);
-       g       = bucket(ca, iter->pos.offset);
-       ret     = (struct bkey_alloc_unpacked) {
-               .dev            = iter->pos.inode,
-               .bucket         = iter->pos.offset,
-               .gen            = g->mark.gen,
-               .oldest_gen     = g->oldest_gen,
-               .data_type      = g->mark.data_type,
-               .dirty_sectors  = g->mark.dirty_sectors,
-               .cached_sectors = g->mark.cached_sectors,
-               .read_time      = g->io_time[READ],
-               .write_time     = g->io_time[WRITE],
-               .stripe         = g->stripe,
-               .stripe_redundancy = g->stripe_redundancy,
-       };
-       percpu_up_read(&c->mark_lock);
-
-       return ret;
-}
-
 #define ALLOC_SCAN_BATCH(ca)           max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
 
 const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
@@ -101,7 +84,7 @@ static inline bool bkey_is_alloc(const struct bkey *k)
                k->type == KEY_TYPE_alloc_v3;
 }
 
-int bch2_alloc_read(struct bch_fs *);
+int bch2_alloc_read(struct bch_fs *, bool, bool);
 
 static inline void bch2_wake_allocator(struct bch_dev *ca)
 {
@@ -139,7 +122,6 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_stop(struct bch_dev *);
 int bch2_dev_allocator_start(struct bch_dev *);
 
-int bch2_alloc_write_all(struct bch_fs *, unsigned);
 void bch2_fs_allocator_background_init(struct bch_fs *);
 
 #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
index 55db3c00f8dcb3fd61e23b1a005367a55796118c..91514365d72b9bf17eb9e63c4fe5f4294b7c3d6c 100644 (file)
@@ -536,7 +536,6 @@ enum {
        /* misc: */
        BCH_FS_NEED_ANOTHER_GC,
        BCH_FS_DELETED_NODES,
-       BCH_FS_NEED_ALLOC_WRITE,
        BCH_FS_REBUILD_REPLICAS,
        BCH_FS_HOLD_BTREE_WRITES,
 };
index e92769e010c15361da100cf0e6340388c87b4fc8..d4b2d2657340007343e381da446029c2b878c11e 100644 (file)
@@ -9,6 +9,7 @@
 #include "alloc_foreground.h"
 #include "bkey_methods.h"
 #include "bkey_buf.h"
+#include "btree_key_cache.h"
 #include "btree_locking.h"
 #include "btree_update_interior.h"
 #include "btree_io.h"
@@ -533,7 +534,6 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
        bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
                struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
                struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
-               struct bucket *g2 = PTR_BUCKET(ca, &p.ptr);
                enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
 
                if (fsck_err_on(!g->gen_valid, c,
@@ -544,9 +544,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                                p.ptr.gen,
                                (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
                        if (!p.ptr.cached) {
-                               g2->_mark.gen   = g->_mark.gen          = p.ptr.gen;
-                               g2->gen_valid   = g->gen_valid          = true;
-                               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
+                               g->_mark.gen            = p.ptr.gen;
+                               g->gen_valid            = true;
                        } else {
                                do_update = true;
                        }
@@ -560,13 +559,12 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                                p.ptr.gen, g->mark.gen,
                                (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
                        if (!p.ptr.cached) {
-                               g2->_mark.gen   = g->_mark.gen  = p.ptr.gen;
-                               g2->gen_valid   = g->gen_valid  = true;
-                               g2->_mark.data_type             = 0;
-                               g2->_mark.dirty_sectors         = 0;
-                               g2->_mark.cached_sectors        = 0;
+                               g->_mark.gen            = p.ptr.gen;
+                               g->gen_valid            = true;
+                               g->_mark.data_type      = 0;
+                               g->_mark.dirty_sectors  = 0;
+                               g->_mark.cached_sectors = 0;
                                set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
-                               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
                        } else {
                                do_update = true;
                        }
@@ -603,8 +601,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                                bch2_data_types[data_type],
                                (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
                        if (data_type == BCH_DATA_btree) {
-                               g2->_mark.data_type     = g->_mark.data_type    = data_type;
-                               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
+                               g->_mark.data_type      = data_type;
                                set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
                        } else {
                                do_update = true;
@@ -1169,13 +1166,14 @@ static int bch2_gc_done(struct bch_fs *c,
        unsigned i, dev;
        int ret = 0;
 
+       percpu_down_write(&c->mark_lock);
+
 #define copy_field(_f, _msg, ...)                                      \
        if (dst->_f != src->_f) {                                       \
                if (verify)                                             \
                        fsck_err(c, _msg ": got %llu, should be %llu"   \
                                , ##__VA_ARGS__, dst->_f, src->_f);     \
                dst->_f = src->_f;                                      \
-               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
        }
 #define copy_stripe_field(_f, _msg, ...)                               \
        if (dst->_f != src->_f) {                                       \
@@ -1185,18 +1183,6 @@ static int bch2_gc_done(struct bch_fs *c,
                                iter.pos, ##__VA_ARGS__,                \
                                dst->_f, src->_f);                      \
                dst->_f = src->_f;                                      \
-               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
-       }
-#define copy_bucket_field(_f)                                          \
-       if (dst->b[b]._f != src->b[b]._f) {                             \
-               if (verify)                                             \
-                       fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f  \
-                               ": got %u, should be %u", dev, b,       \
-                               dst->b[b].mark.gen,                     \
-                               bch2_data_types[dst->b[b].mark.data_type],\
-                               dst->b[b]._f, src->b[b]._f);            \
-               dst->b[b]._f = src->b[b]._f;                            \
-               set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
        }
 #define copy_dev_field(_f, _msg, ...)                                  \
        copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
@@ -1207,36 +1193,18 @@ static int bch2_gc_done(struct bch_fs *c,
                bch2_fs_usage_acc_to_base(c, i);
 
        for_each_member_device(ca, c, dev) {
-               struct bucket_array *dst = __bucket_array(ca, 0);
-               struct bucket_array *src = __bucket_array(ca, 1);
-               size_t b;
-
-               for (b = 0; b < src->nbuckets; b++) {
-                       copy_bucket_field(_mark.gen);
-                       copy_bucket_field(_mark.data_type);
-                       copy_bucket_field(_mark.stripe);
-                       copy_bucket_field(_mark.dirty_sectors);
-                       copy_bucket_field(_mark.cached_sectors);
-                       copy_bucket_field(stripe_redundancy);
-                       copy_bucket_field(stripe);
-
-                       dst->b[b].oldest_gen = src->b[b].oldest_gen;
-               }
-
-               {
-                       struct bch_dev_usage *dst = ca->usage_base;
-                       struct bch_dev_usage *src = (void *)
-                               bch2_acc_percpu_u64s((void *) ca->usage_gc,
-                                                    dev_usage_u64s());
-
-                       copy_dev_field(buckets_ec,              "buckets_ec");
-                       copy_dev_field(buckets_unavailable,     "buckets_unavailable");
-
-                       for (i = 0; i < BCH_DATA_NR; i++) {
-                               copy_dev_field(d[i].buckets,    "%s buckets", bch2_data_types[i]);
-                               copy_dev_field(d[i].sectors,    "%s sectors", bch2_data_types[i]);
-                               copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
-                       }
+               struct bch_dev_usage *dst = ca->usage_base;
+               struct bch_dev_usage *src = (void *)
+                       bch2_acc_percpu_u64s((void *) ca->usage_gc,
+                                            dev_usage_u64s());
+
+               copy_dev_field(buckets_ec,              "buckets_ec");
+               copy_dev_field(buckets_unavailable,     "buckets_unavailable");
+
+               for (i = 0; i < BCH_DATA_NR; i++) {
+                       copy_dev_field(d[i].buckets,    "%s buckets", bch2_data_types[i]);
+                       copy_dev_field(d[i].sectors,    "%s sectors", bch2_data_types[i]);
+                       copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
                }
        };
 
@@ -1278,7 +1246,6 @@ static int bch2_gc_done(struct bch_fs *c,
 
 #undef copy_fs_field
 #undef copy_dev_field
-#undef copy_bucket_field
 #undef copy_stripe_field
 #undef copy_field
 fsck_err:
@@ -1286,6 +1253,8 @@ fsck_err:
                percpu_ref_put(&ca->ref);
        if (ret)
                bch_err(c, "%s: ret %i", __func__, ret);
+
+       percpu_up_write(&c->mark_lock);
        return ret;
 }
 
@@ -1308,15 +1277,6 @@ static int bch2_gc_start(struct bch_fs *c,
                BUG_ON(ca->buckets[1]);
                BUG_ON(ca->usage_gc);
 
-               ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
-                               ca->mi.nbuckets * sizeof(struct bucket),
-                               GFP_KERNEL|__GFP_ZERO);
-               if (!ca->buckets[1]) {
-                       percpu_ref_put(&ca->ref);
-                       bch_err(c, "error allocating ca->buckets[gc]");
-                       return -ENOMEM;
-               }
-
                ca->usage_gc = alloc_percpu(struct bch_dev_usage);
                if (!ca->usage_gc) {
                        bch_err(c, "error allocating ca->usage_gc");
@@ -1325,33 +1285,151 @@ static int bch2_gc_start(struct bch_fs *c,
                }
        }
 
-       percpu_down_write(&c->mark_lock);
+       return 0;
+}
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+                               struct btree_iter *iter,
+                               bool initial, bool metadata_only)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
+       struct bucket *g;
+       struct bkey_s_c k;
+       struct bkey_alloc_unpacked old_u, new_u, gc_u;
+       struct bkey_alloc_buf *a;
+       int ret;
+
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
+
+       old_u = new_u = bch2_alloc_unpack(k);
+
+       percpu_down_read(&c->mark_lock);
+       g       = gc_bucket(ca, iter->pos.offset);
+       gc_u = (struct bkey_alloc_unpacked) {
+               .dev            = iter->pos.inode,
+               .bucket         = iter->pos.offset,
+               .gen            = g->mark.gen,
+               .oldest_gen     = g->oldest_gen,
+               .data_type      = g->mark.data_type,
+               .dirty_sectors  = g->mark.dirty_sectors,
+               .cached_sectors = g->mark.cached_sectors,
+               .read_time      = g->io_time[READ],
+               .write_time     = g->io_time[WRITE],
+               .stripe         = g->stripe,
+               .stripe_redundancy = g->stripe_redundancy,
+       };
+       percpu_up_read(&c->mark_lock);
+
+       if (metadata_only &&
+           gc_u.data_type != BCH_DATA_sb &&
+           gc_u.data_type != BCH_DATA_journal &&
+           gc_u.data_type != BCH_DATA_btree)
+               return 0;
+
+       if (!bkey_alloc_unpacked_cmp(old_u, gc_u) ||
+           gen_after(old_u.gen, gc_u.gen))
+               return 0;
+
+#define copy_bucket_field(_f)                                          \
+       if (fsck_err_on(new_u._f != gc_u._f, c,                         \
+                       "bucket %llu:%llu gen %u data type %s has wrong " #_f   \
+                       ": got %u, should be %u",                       \
+                       iter->pos.inode, iter->pos.offset,              \
+                       new_u.gen,                                      \
+                       bch2_data_types[new_u.data_type],               \
+                       new_u._f, gc_u._f))                             \
+               new_u._f = gc_u._f;                                     \
+
+       copy_bucket_field(gen);
+       copy_bucket_field(data_type);
+       copy_bucket_field(stripe);
+       copy_bucket_field(dirty_sectors);
+       copy_bucket_field(cached_sectors);
+       copy_bucket_field(stripe_redundancy);
+       copy_bucket_field(stripe);
+#undef copy_bucket_field
+
+       new_u.oldest_gen = gc_u.oldest_gen;
+
+       if (!bkey_alloc_unpacked_cmp(old_u, new_u))
+               return 0;
+
+       a = bch2_alloc_pack(trans, new_u);
+       if (IS_ERR(a))
+               return PTR_ERR(a);
+
+       ret = initial
+               ? bch2_journal_key_insert(c, BTREE_ID_alloc, 0, &a->k)
+               : bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
+fsck_err:
+       return ret;
+}
+
+static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only)
+{
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       struct bch_dev *ca;
+       unsigned i;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
 
        for_each_member_device(ca, c, i) {
-               struct bucket_array *dst = __bucket_array(ca, 1);
-               struct bucket_array *src = __bucket_array(ca, 0);
-               size_t b;
+               for_each_btree_key(&trans, iter, BTREE_ID_alloc,
+                                  POS(ca->dev_idx, ca->mi.first_bucket),
+                                  BTREE_ITER_SLOTS|
+                                  BTREE_ITER_PREFETCH, k, ret) {
+                       if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
+                               break;
 
-               dst->first_bucket       = src->first_bucket;
-               dst->nbuckets           = src->nbuckets;
+                       ret = __bch2_trans_do(&trans, NULL, NULL,
+                                             BTREE_INSERT_LAZY_RW,
+                                       bch2_alloc_write_key(&trans, &iter,
+                                                            initial, metadata_only));
+                       if (ret)
+                               break;
+               }
+               bch2_trans_iter_exit(&trans, &iter);
 
-               for (b = 0; b < src->nbuckets; b++) {
-                       struct bucket *d = &dst->b[b];
-                       struct bucket *s = &src->b[b];
+               if (ret) {
+                       bch_err(c, "error writing alloc info: %i", ret);
+                       percpu_ref_put(&ca->ref);
+                       break;
+               }
+       }
 
-                       d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
-                       d->gen_valid = s->gen_valid;
+       bch2_trans_exit(&trans);
+       return ret;
+}
 
-                       if (metadata_only &&
-                           (s->mark.data_type == BCH_DATA_user ||
-                            s->mark.data_type == BCH_DATA_cached))
-                               d->_mark = s->mark;
+static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_only)
+{
+       struct bch_dev *ca;
+       unsigned i;
+
+       for_each_member_device(ca, c, i) {
+               struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
+                               ca->mi.nbuckets * sizeof(struct bucket),
+                               GFP_KERNEL|__GFP_ZERO);
+               if (!buckets) {
+                       percpu_ref_put(&ca->ref);
+                       percpu_up_write(&c->mark_lock);
+                       bch_err(c, "error allocating ca->buckets[gc]");
+                       return -ENOMEM;
                }
-       };
 
-       percpu_up_write(&c->mark_lock);
+               buckets->first_bucket   = ca->mi.first_bucket;
+               buckets->nbuckets       = ca->mi.nbuckets;
+               rcu_assign_pointer(ca->buckets[1], buckets);
+       };
 
-       return 0;
+       return bch2_alloc_read(c, true, metadata_only);
 }
 
 static void bch2_gc_alloc_reset(struct bch_fs *c, bool initial, bool metadata_only)
@@ -1598,6 +1676,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
                           !bch2_btree_interior_updates_nr_pending(c));
 
        ret   = bch2_gc_start(c, metadata_only) ?:
+               bch2_gc_alloc_start(c, initial, metadata_only) ?:
                bch2_gc_reflink_start(c, initial, metadata_only);
        if (ret)
                goto out;
@@ -1665,16 +1744,15 @@ out:
        if (!ret) {
                bch2_journal_block(&c->journal);
 
-               percpu_down_write(&c->mark_lock);
-               ret   = bch2_gc_reflink_done(c, initial, metadata_only) ?:
-                       bch2_gc_stripes_done(c, initial, metadata_only) ?:
+               ret   = bch2_gc_stripes_done(c, initial, metadata_only) ?:
+                       bch2_gc_reflink_done(c, initial, metadata_only) ?:
+                       bch2_gc_alloc_done(c, initial, metadata_only) ?:
                        bch2_gc_done(c, initial, metadata_only);
 
                bch2_journal_unblock(&c->journal);
-       } else {
-               percpu_down_write(&c->mark_lock);
        }
 
+       percpu_down_write(&c->mark_lock);
        /* Indicates that gc is no longer in progress: */
        __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
 
index 383838d66edff59026ff29c97ba55914ced1b3af..feafb7296ddf753985bcf67b4c01602235f47f3b 100644 (file)
@@ -1113,7 +1113,11 @@ use_clean:
 
        bch_verbose(c, "starting alloc read");
        err = "error reading allocation information";
-       ret = bch2_alloc_read(c);
+
+       down_read(&c->gc_lock);
+       ret = bch2_alloc_read(c, false, false);
+       up_read(&c->gc_lock);
+
        if (ret)
                goto err;
        bch_verbose(c, "alloc read done");
@@ -1171,23 +1175,6 @@ use_clean:
        if (c->opts.verbose || !c->sb.clean)
                bch_info(c, "journal replay done");
 
-       if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
-           !c->opts.nochanges) {
-               /*
-                * note that even when filesystem was clean there might be work
-                * to do here, if we ran gc (because of fsck) which recalculated
-                * oldest_gen:
-                */
-               bch_verbose(c, "writing allocation info");
-               err = "error writing out alloc info";
-               ret = bch2_alloc_write_all(c, BTREE_INSERT_LAZY_RW);
-               if (ret) {
-                       bch_err(c, "error writing alloc info");
-                       goto err;
-               }
-               bch_verbose(c, "alloc write done");
-       }
-
        if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
                bch2_fs_lazy_rw(c);