]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: Erasure coding fixes
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 29 Nov 2021 21:38:27 +0000 (16:38 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:18 +0000 (17:09 -0400)
When we added the stripe and stripe_redundancy fields to alloc keys, we
neglected to add them to the functions that convert back and forth with
the in-memory types.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/btree_gc.c
fs/bcachefs/buckets.c
fs/bcachefs/ec.c

index 10514476cffe2d6de1390b50a30935aab3552c64..dc1e09b138b622c3d6c91dd4756ccc8abc48e69e 100644 (file)
@@ -336,6 +336,9 @@ static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k)
        g->_mark.data_type      = u.data_type;
        g->_mark.dirty_sectors  = u.dirty_sectors;
        g->_mark.cached_sectors = u.cached_sectors;
+       g->_mark.stripe         = u.stripe != 0;
+       g->stripe               = u.stripe;
+       g->stripe_redundancy    = u.stripe_redundancy;
        g->io_time[READ]        = u.read_time;
        g->io_time[WRITE]       = u.write_time;
        g->oldest_gen           = u.oldest_gen;
index 370573f8e05d7f94585210402129dcd8e5db6d94..b1efc1494dc4f84e45dd22dc4999280247ee0d6f 100644 (file)
@@ -65,6 +65,8 @@ alloc_mem_to_key(struct btree_iter *iter,
                .cached_sectors = m.cached_sectors,
                .read_time      = g->io_time[READ],
                .write_time     = g->io_time[WRITE],
+               .stripe         = g->stripe,
+               .stripe_redundancy = g->stripe_redundancy,
        };
 }
 
index b692451f91b58e8560569972b03c6f65895e7bdc..6cde4234f5e94f1c62df7c6ab7ac5d996c2ceffb 100644 (file)
@@ -1176,14 +1176,14 @@ static int bch2_gc_done(struct bch_fs *c,
                set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
        }
 #define copy_bucket_field(_f)                                          \
-       if (dst->b[b].mark._f != src->b[b].mark._f) {                   \
+       if (dst->b[b]._f != src->b[b]._f) {                             \
                if (verify)                                             \
                        fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f  \
                                ": got %u, should be %u", dev, b,       \
                                dst->b[b].mark.gen,                     \
                                bch2_data_types[dst->b[b].mark.data_type],\
-                               dst->b[b].mark._f, src->b[b].mark._f);  \
-               dst->b[b]._mark._f = src->b[b].mark._f;                 \
+                               dst->b[b]._f, src->b[b]._f);            \
+               dst->b[b]._f = src->b[b]._f;                            \
                set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);            \
        }
 #define copy_dev_field(_f, _msg, ...)                                  \
@@ -1229,11 +1229,13 @@ static int bch2_gc_done(struct bch_fs *c,
                size_t b;
 
                for (b = 0; b < src->nbuckets; b++) {
-                       copy_bucket_field(gen);
-                       copy_bucket_field(data_type);
+                       copy_bucket_field(_mark.gen);
+                       copy_bucket_field(_mark.data_type);
+                       copy_bucket_field(_mark.stripe);
+                       copy_bucket_field(_mark.dirty_sectors);
+                       copy_bucket_field(_mark.cached_sectors);
+                       copy_bucket_field(stripe_redundancy);
                        copy_bucket_field(stripe);
-                       copy_bucket_field(dirty_sectors);
-                       copy_bucket_field(cached_sectors);
 
                        dst->b[b].oldest_gen = src->b[b].oldest_gen;
                }
index c4d72a49995531f7e0dff06cb2e7cb2e94a48813..66f0729051733dfeb3564e43178cf725dddaf4b9 100644 (file)
@@ -810,6 +810,8 @@ static int mark_stripe_bucket(struct btree_trans *trans,
        const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
        unsigned nr_data = s->nr_blocks - s->nr_redundant;
        bool parity = ptr_idx >= nr_data;
+       enum bch_data_type data_type = parity ? BCH_DATA_parity : 0;
+       s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
        const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
        bool gc = flags & BTREE_TRIGGER_GC;
        struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
@@ -818,10 +820,13 @@ static int mark_stripe_bucket(struct btree_trans *trans,
        char buf[200];
        int ret = 0;
 
+       /* * XXX doesn't handle deletion */
+
        percpu_down_read(&c->mark_lock);
        g = PTR_BUCKET(ca, ptr, gc);
 
-       if (g->stripe && g->stripe != k.k->p.offset) {
+       if (g->mark.dirty_sectors ||
+           (g->stripe && g->stripe != k.k->p.offset)) {
                bch2_fs_inconsistent(c,
                              "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
                              ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen,
@@ -831,20 +836,22 @@ static int mark_stripe_bucket(struct btree_trans *trans,
        }
 
        old = bucket_cmpxchg(g, new, ({
-               ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type,
+               ret = check_bucket_ref(c, k, ptr, sectors, data_type,
+                                      new.gen, new.data_type,
                                       new.dirty_sectors, new.cached_sectors);
                if (ret)
                        goto err;
 
-               if (parity) {
-                       new.data_type           = BCH_DATA_parity;
-                       new.dirty_sectors       = le16_to_cpu(s->sectors);
-               }
+               new.dirty_sectors += sectors;
+               if (data_type)
+                       new.data_type           = data_type;
 
                if (journal_seq) {
                        new.journal_seq_valid   = 1;
                        new.journal_seq         = journal_seq;
                }
+
+               new.stripe = true;
        }));
 
        g->stripe               = k.k->p.offset;
@@ -1124,6 +1131,11 @@ static int bch2_mark_stripe(struct btree_trans *trans,
        }
 
        if (gc) {
+               /*
+                * This will be wrong when we bring back runtime gc: we should
+                * be unmarking the old key and then marking the new key
+                */
+
                /*
                 * gc recalculates this field from stripe ptr
                 * references:
@@ -1656,50 +1668,75 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
        return 0;
 }
 
-static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans,
-                                           struct bkey_s_c_stripe s,
-                                           unsigned idx, bool deleting)
+static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
+                                        struct bkey_s_c_stripe s,
+                                        unsigned idx, bool deleting)
 {
        struct bch_fs *c = trans->c;
        const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
        struct bkey_alloc_buf *a;
        struct btree_iter iter;
        struct bkey_alloc_unpacked u;
-       bool parity = idx >= s.v->nr_blocks - s.v->nr_redundant;
+       enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
+               ? BCH_DATA_parity : 0;
+       s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
        int ret = 0;
 
+       if (deleting)
+               sectors = -sectors;
+
        a = bch2_trans_start_alloc_update(trans, &iter, ptr, &u);
        if (IS_ERR(a))
                return PTR_ERR(a);
 
-       if (parity) {
-               s64 sectors = le16_to_cpu(s.v->sectors);
-
-               if (deleting)
-                       sectors = -sectors;
-
-               u.dirty_sectors += sectors;
-               u.data_type = u.dirty_sectors
-                       ? BCH_DATA_parity
-                       : 0;
-       }
+       ret = check_bucket_ref(c, s.s_c, ptr, sectors, data_type,
+                              u.gen, u.data_type,
+                              u.dirty_sectors, u.cached_sectors);
+       if (ret)
+               goto err;
 
        if (!deleting) {
-               if (bch2_fs_inconsistent_on(u.stripe && u.stripe != s.k->p.offset, c,
-                               "bucket %llu:%llu gen %u: multiple stripes using same bucket (%u, %llu)",
+               if (bch2_fs_inconsistent_on(u.stripe ||
+                                           u.stripe_redundancy, c,
+                               "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
                                iter.pos.inode, iter.pos.offset, u.gen,
+                               bch2_data_types[u.data_type],
+                               u.dirty_sectors,
                                u.stripe, s.k->p.offset)) {
                        ret = -EIO;
                        goto err;
                }
 
+               if (bch2_fs_inconsistent_on(data_type && u.dirty_sectors, c,
+                               "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
+                               iter.pos.inode, iter.pos.offset, u.gen,
+                               bch2_data_types[u.data_type],
+                               u.dirty_sectors,
+                               s.k->p.offset)) {
+                       ret = -EIO;
+                       goto err;
+               }
+
                u.stripe                = s.k->p.offset;
                u.stripe_redundancy     = s.v->nr_redundant;
        } else {
+               if (bch2_fs_inconsistent_on(u.stripe != s.k->p.offset ||
+                                           u.stripe_redundancy != s.v->nr_redundant, c,
+                               "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
+                               iter.pos.inode, iter.pos.offset, u.gen,
+                               s.k->p.offset, u.stripe)) {
+                       ret = -EIO;
+                       goto err;
+               }
+
                u.stripe                = 0;
                u.stripe_redundancy     = 0;
        }
 
+       u.dirty_sectors += sectors;
+       if (data_type)
+               u.data_type = !deleting ? data_type : 0;
+
        bch2_alloc_pack(c, a, u);
        bch2_trans_update(trans, &iter, &a->k, 0);
 err:
@@ -1714,7 +1751,7 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
        struct bkey_s_c_stripe old_s = { .k = NULL };
        struct bkey_s_c_stripe new_s = { .k = NULL };
        struct bch_replicas_padded r;
-       unsigned i;
+       unsigned i, nr_blocks;
        int ret = 0;
 
        if (old.k->type == KEY_TYPE_stripe)
@@ -1732,18 +1769,17 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
                    new_s.v->nr_blocks * sizeof(struct bch_extent_ptr)))
                return 0;
 
+       BUG_ON(new_s.k && old_s.k &&
+              (new_s.v->nr_blocks      != old_s.v->nr_blocks ||
+               new_s.v->nr_redundant   != old_s.v->nr_redundant));
+
+       nr_blocks = new_s.k ? new_s.v->nr_blocks : old_s.v->nr_blocks;
+
        if (new_s.k) {
                s64 sectors = le16_to_cpu(new_s.v->sectors);
 
                bch2_bkey_to_replicas(&r.e, new);
                update_replicas_list(trans, &r.e, sectors * new_s.v->nr_redundant);
-
-               for (i = 0; i < new_s.v->nr_blocks; i++) {
-                       ret = bch2_trans_mark_stripe_alloc_ref(trans, new_s,
-                                                              i, false);
-                       if (ret)
-                               return ret;
-               }
        }
 
        if (old_s.k) {
@@ -1751,12 +1787,25 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
 
                bch2_bkey_to_replicas(&r.e, old);
                update_replicas_list(trans, &r.e, sectors * old_s.v->nr_redundant);
+       }
+
+       for (i = 0; i < nr_blocks; i++) {
+               if (new_s.k && old_s.k &&
+                   !memcmp(&new_s.v->ptrs[i],
+                           &old_s.v->ptrs[i],
+                           sizeof(new_s.v->ptrs[i])))
+                       continue;
 
-               for (i = 0; i < old_s.v->nr_blocks; i++) {
-                       ret = bch2_trans_mark_stripe_alloc_ref(trans, old_s,
-                                                              i, true);
+               if (new_s.k) {
+                       ret = bch2_trans_mark_stripe_bucket(trans, new_s, i, false);
                        if (ret)
-                               return ret;
+                               break;
+               }
+
+               if (old_s.k) {
+                       ret = bch2_trans_mark_stripe_bucket(trans, old_s, i, true);
+                       if (ret)
+                               break;
                }
        }
 
index b8e9bc91bf0b4ddcdfae3e9b92a9ab3343d7314f..689602d1858961a286de229244a9257b1f0072ce 100644 (file)
@@ -15,6 +15,7 @@
 #include "io.h"
 #include "keylist.h"
 #include "recovery.h"
+#include "replicas.h"
 #include "super-io.h"
 #include "util.h"
 
@@ -1635,17 +1636,41 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags)
 
 static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k)
 {
-       struct bkey deleted = KEY(0, 0, 0);
-       struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
+       const struct bch_stripe *s;
        struct bch_fs *c = trans->c;
+       struct stripe *m;
+       unsigned i;
        int ret = 0;
 
-       deleted.p = k.k->p;
+       if (k.k->type != KEY_TYPE_stripe)
+               return 0;
+
+       ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
+       if (ret)
+               return ret;
+
+       s = bkey_s_c_to_stripe(k).v;
+
+       m = genradix_ptr(&c->stripes[0], k.k->p.offset);
+       m->alive        = true;
+       m->sectors      = le16_to_cpu(s->sectors);
+       m->algorithm    = s->algorithm;
+       m->nr_blocks    = s->nr_blocks;
+       m->nr_redundant = s->nr_redundant;
+       m->blocks_nonempty = 0;
+
+       for (i = 0; i < s->nr_blocks; i++) {
+               m->block_sectors[i] =
+                       stripe_blockcount_get(s, i);
+               m->blocks_nonempty += !!m->block_sectors[i];
+               m->ptrs[i] = s->ptrs[i];
+       }
+
+       bch2_bkey_to_replicas(&m->r.e, k);
 
-       if (k.k->type == KEY_TYPE_stripe)
-               ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
-                       bch2_mark_key(trans, old, k,
-                                     BTREE_TRIGGER_NOATOMIC);
+       spin_lock(&c->ec_stripes_heap_lock);
+       bch2_stripes_heap_update(c, m, k.k->p.offset);
+       spin_unlock(&c->ec_stripes_heap_lock);
 
        return ret;
 }