]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: Kill struct bucket_mark
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 14 Feb 2022 05:07:38 +0000 (00:07 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:29 +0000 (17:09 -0400)
This switches struct bucket to using a lock, instead of cmpxchg. And now
that the protected members no longer need to fit into a u64, we can
expand the sector counts to 32 bits.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/btree_gc.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/buckets_types.h

index ee683d08e8aef39b777de527fbd125204a213f3f..5faa42baeeba135474514682a9cc001d4b86a42a 100644 (file)
@@ -903,8 +903,8 @@ struct bch_alloc_v2 {
 #define BCH_ALLOC_FIELDS_V2()                  \
        x(read_time,            64)             \
        x(write_time,           64)             \
-       x(dirty_sectors,        16)             \
-       x(cached_sectors,       16)             \
+       x(dirty_sectors,        32)             \
+       x(cached_sectors,       32)             \
        x(stripe,               32)             \
        x(stripe_redundancy,    8)
 
index f66b2ef03c3ab5b673c2905da61978dbabb7062e..747667ce131dcb5a348e8e4e8f8dbf9f9fe18a1e 100644 (file)
@@ -571,37 +571,37 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                                (printbuf_reset(&buf),
                                 bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
                        if (!p.ptr.cached) {
-                               g->_mark.gen            = p.ptr.gen;
                                g->gen_valid            = true;
+                               g->gen                  = p.ptr.gen;
                        } else {
                                do_update = true;
                        }
                }
 
-               if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c,
+               if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, c,
                                "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
                                "while marking %s",
                                p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
                                bch2_data_types[ptr_data_type(k->k, &p.ptr)],
-                               p.ptr.gen, g->mark.gen,
+                               p.ptr.gen, g->gen,
                                (printbuf_reset(&buf),
                                 bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
                        if (!p.ptr.cached) {
-                               g->_mark.gen            = p.ptr.gen;
                                g->gen_valid            = true;
-                               g->_mark.data_type      = 0;
-                               g->_mark.dirty_sectors  = 0;
-                               g->_mark.cached_sectors = 0;
+                               g->gen                  = p.ptr.gen;
+                               g->data_type            = 0;
+                               g->dirty_sectors        = 0;
+                               g->cached_sectors       = 0;
                                set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
                        } else {
                                do_update = true;
                        }
                }
 
-               if (fsck_err_on(gen_cmp(g->mark.gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c,
+               if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c,
                                "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
                                "while marking %s",
-                               p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->mark.gen,
+                               p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
                                bch2_data_types[ptr_data_type(k->k, &p.ptr)],
                                p.ptr.gen,
                                (printbuf_reset(&buf),
@@ -609,30 +609,30 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                        do_update = true;
 
                if (fsck_err_on(!p.ptr.cached &&
-                               gen_cmp(p.ptr.gen, g->mark.gen) < 0, c,
+                               gen_cmp(p.ptr.gen, g->gen) < 0, c,
                                "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
                                "while marking %s",
                                p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
                                bch2_data_types[ptr_data_type(k->k, &p.ptr)],
-                               p.ptr.gen, g->mark.gen,
+                               p.ptr.gen, g->gen,
                                (printbuf_reset(&buf),
                                 bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
                        do_update = true;
 
-               if (data_type != BCH_DATA_btree && p.ptr.gen != g->mark.gen)
+               if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
                        continue;
 
-               if (fsck_err_on(g->mark.data_type &&
-                               g->mark.data_type != data_type, c,
+               if (fsck_err_on(g->data_type &&
+                               g->data_type != data_type, c,
                                "bucket %u:%zu different types of data in same bucket: %s, %s\n"
                                "while marking %s",
                                p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-                               bch2_data_types[g->mark.data_type],
+                               bch2_data_types[g->data_type],
                                bch2_data_types[data_type],
                                (printbuf_reset(&buf),
                                 bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
                        if (data_type == BCH_DATA_btree) {
-                               g->_mark.data_type      = data_type;
+                               g->data_type    = data_type;
                                set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
                        } else {
                                do_update = true;
@@ -692,7 +692,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                                struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
                                struct bucket *g = PTR_GC_BUCKET(ca, ptr);
 
-                               ptr->gen = g->mark.gen;
+                               ptr->gen = g->gen;
                        }
                } else {
                        bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
@@ -701,12 +701,12 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
                                enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
 
                                (ptr->cached &&
-                                (!g->gen_valid || gen_cmp(ptr->gen, g->mark.gen) > 0)) ||
+                                (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
                                (!ptr->cached &&
-                                gen_cmp(ptr->gen, g->mark.gen) < 0) ||
-                               gen_cmp(g->mark.gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
-                               (g->mark.data_type &&
-                                g->mark.data_type != data_type);
+                                gen_cmp(ptr->gen, g->gen) < 0) ||
+                               gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
+                               (g->data_type &&
+                                g->data_type != data_type);
                        }));
 again:
                        ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
@@ -1325,10 +1325,10 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
-       struct bucket *g;
+       struct bucket gc;
        struct bkey_s_c k;
        struct bkey_i_alloc_v4 *a;
-       struct bch_alloc_v4 old, new, gc;
+       struct bch_alloc_v4 old, new;
        int ret;
 
        k = bch2_btree_iter_peek_slot(iter);
@@ -1340,15 +1340,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
        new = old;
 
        percpu_down_read(&c->mark_lock);
-       g       = gc_bucket(ca, iter->pos.offset);
-       gc = (struct bch_alloc_v4) {
-               .gen            = g->mark.gen,
-               .data_type      = g->mark.data_type,
-               .dirty_sectors  = g->mark.dirty_sectors,
-               .cached_sectors = g->mark.cached_sectors,
-               .stripe         = g->stripe,
-               .stripe_redundancy = g->stripe_redundancy,
-       };
+       gc = *gc_bucket(ca, iter->pos.offset);
        percpu_up_read(&c->mark_lock);
 
        if (metadata_only &&
@@ -1365,8 +1357,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
                        "bucket %llu:%llu gen %u data type %s has wrong " #_f   \
                        ": got %u, should be %u",                       \
                        iter->pos.inode, iter->pos.offset,              \
-                       new.gen,                                        \
-                       bch2_data_types[new.data_type],                 \
+                       gc.gen,                                         \
+                       bch2_data_types[gc.data_type],                  \
                        new._f, gc._f))                                 \
                new._f = gc._f;                                         \
 
@@ -1467,17 +1459,16 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
 
                bch2_alloc_to_v4(k, &a);
 
-               g->_mark.gen            = a.gen;
-               g->gen_valid            = 1;
+               g->gen_valid    = 1;
+               g->gen          = a.gen;
 
                if (metadata_only &&
                    (a.data_type == BCH_DATA_user ||
                     a.data_type == BCH_DATA_cached ||
                     a.data_type == BCH_DATA_parity)) {
-                       g->_mark.data_type      = a.data_type;
-                       g->_mark.dirty_sectors  = a.dirty_sectors;
-                       g->_mark.cached_sectors = a.cached_sectors;
-                       g->_mark.stripe         = a.stripe != 0;
+                       g->data_type            = a.data_type;
+                       g->dirty_sectors        = a.dirty_sectors;
+                       g->cached_sectors       = a.cached_sectors;
                        g->stripe               = a.stripe;
                        g->stripe_redundancy    = a.stripe_redundancy;
                }
@@ -1503,12 +1494,12 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
 
                for_each_bucket(g, buckets) {
                        if (metadata_only &&
-                           (g->mark.data_type == BCH_DATA_user ||
-                            g->mark.data_type == BCH_DATA_cached ||
-                            g->mark.data_type == BCH_DATA_parity))
+                           (g->data_type == BCH_DATA_user ||
+                            g->data_type == BCH_DATA_cached ||
+                            g->data_type == BCH_DATA_parity))
                                continue;
-                       g->_mark.dirty_sectors = 0;
-                       g->_mark.cached_sectors = 0;
+                       g->dirty_sectors = 0;
+                       g->cached_sectors = 0;
                }
        };
 }
index 572d56676c697c9ccd70aec9ba38e958eccb4876..31de8035e86d20cd440cb8dd9846c9d6db1387ee 100644 (file)
@@ -349,7 +349,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
 }
 
 static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
-                                   struct bucket_mark old, struct bucket_mark new,
+                                   struct bucket old, struct bucket new,
                                    u64 journal_seq, bool gc)
 {
        struct bch_alloc_v4 old_a = {
@@ -586,20 +586,19 @@ int bch2_mark_alloc(struct btree_trans *trans,
        bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
 
        if (gc) {
-               struct bucket_mark old_m, m;
                struct bucket *g = gc_bucket(ca, new.k->p.offset);
 
-               old_m = bucket_cmpxchg(g, m, ({
-                       m.gen                   = new_a.gen;
-                       m.data_type             = new_a.data_type;
-                       m.dirty_sectors         = new_a.dirty_sectors;
-                       m.cached_sectors        = new_a.cached_sectors;
-                       m.stripe                = new_a.stripe != 0;
-               }));
+               bucket_lock(g);
 
                g->gen_valid            = 1;
+               g->gen                  = new_a.gen;
+               g->data_type            = new_a.data_type;
                g->stripe               = new_a.stripe;
                g->stripe_redundancy    = new_a.stripe_redundancy;
+               g->dirty_sectors        = new_a.dirty_sectors;
+               g->cached_sectors       = new_a.cached_sectors;
+
+               bucket_unlock(g);
        }
        percpu_up_read(&c->mark_lock);
 
@@ -625,23 +624,12 @@ int bch2_mark_alloc(struct btree_trans *trans,
        return 0;
 }
 
-#define checked_add(a, b)                                      \
-({                                                             \
-       unsigned _res = (unsigned) (a) + (b);                   \
-       bool overflow = _res > U16_MAX;                         \
-       if (overflow)                                           \
-               _res = U16_MAX;                                 \
-       (a) = _res;                                             \
-       overflow;                                               \
-})
-
 void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
                               size_t b, enum bch_data_type data_type,
                               unsigned sectors, struct gc_pos pos,
                               unsigned flags)
 {
-       struct bucket *g;
-       struct bucket_mark old, new;
+       struct bucket old, new, *g;
        bool overflow;
 
        BUG_ON(!(flags & BTREE_TRIGGER_GC));
@@ -656,10 +644,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
 
        percpu_down_read(&c->mark_lock);
        g = gc_bucket(ca, b);
-       old = bucket_cmpxchg(g, new, ({
-               new.data_type   = data_type;
-               overflow = checked_add(new.dirty_sectors, sectors);
-       }));
+
+       bucket_lock(g);
+       old = *g;
+
+       g->data_type = data_type;
+       g->dirty_sectors += sectors;
+       overflow = g->dirty_sectors < sectors;
+
+       new = *g;
+       bucket_unlock(g);
 
        bch2_fs_inconsistent_on(old.data_type &&
                                old.data_type != data_type, c,
@@ -693,7 +687,7 @@ static int check_bucket_ref(struct bch_fs *c,
                            const struct bch_extent_ptr *ptr,
                            s64 sectors, enum bch_data_type ptr_data_type,
                            u8 b_gen, u8 bucket_data_type,
-                           u16 dirty_sectors, u16 cached_sectors)
+                           u32 dirty_sectors, u32 cached_sectors)
 {
        struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
        size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
@@ -761,7 +755,7 @@ static int check_bucket_ref(struct bch_fs *c,
                goto err;
        }
 
-       if ((unsigned) (bucket_sectors + sectors) > U16_MAX) {
+       if ((unsigned) (bucket_sectors + sectors) > U32_MAX) {
                bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
                        "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
                        "while marking %s",
@@ -792,8 +786,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
        s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
        const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
        struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-       struct bucket *g;
-       struct bucket_mark new, old;
+       struct bucket old, new, *g;
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
@@ -805,33 +798,37 @@ static int mark_stripe_bucket(struct btree_trans *trans,
        buf.atomic++;
        g = PTR_GC_BUCKET(ca, ptr);
 
-       if (g->mark.dirty_sectors ||
+       if (g->dirty_sectors ||
            (g->stripe && g->stripe != k.k->p.offset)) {
                bch2_fs_inconsistent(c,
                              "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
-                             ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen,
+                             ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
                              (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
                ret = -EINVAL;
                goto err;
        }
 
-       old = bucket_cmpxchg(g, new, ({
-               ret = check_bucket_ref(c, k, ptr, sectors, data_type,
-                                      new.gen, new.data_type,
-                                      new.dirty_sectors, new.cached_sectors);
-               if (ret)
-                       goto err;
+       bucket_lock(g);
+       old = *g;
 
-               new.dirty_sectors += sectors;
-               if (data_type)
-                       new.data_type           = data_type;
+       ret = check_bucket_ref(c, k, ptr, sectors, data_type,
+                              new.gen, new.data_type,
+                              new.dirty_sectors, new.cached_sectors);
+       if (ret) {
+               bucket_unlock(g);
+               goto err;
+       }
 
-               new.stripe = true;
-       }));
+       new.dirty_sectors += sectors;
+       if (data_type)
+               new.data_type = data_type;
 
        g->stripe               = k.k->p.offset;
        g->stripe_redundancy    = s->nr_redundant;
 
+       new = *g;
+       bucket_unlock(g);
+
        bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
 err:
        percpu_up_read(&c->mark_lock);
@@ -844,9 +841,9 @@ static int __mark_pointer(struct btree_trans *trans,
                          const struct bch_extent_ptr *ptr,
                          s64 sectors, enum bch_data_type ptr_data_type,
                          u8 bucket_gen, u8 *bucket_data_type,
-                         u16 *dirty_sectors, u16 *cached_sectors)
+                         u32 *dirty_sectors, u32 *cached_sectors)
 {
-       u16 *dst_sectors = !ptr->cached
+       u32 *dst_sectors = !ptr->cached
                ? dirty_sectors
                : cached_sectors;
        int ret = check_bucket_ref(trans->c, k, ptr, sectors, ptr_data_type,
@@ -870,11 +867,9 @@ static int bch2_mark_pointer(struct btree_trans *trans,
 {
        u64 journal_seq = trans->journal_res.seq;
        struct bch_fs *c = trans->c;
-       struct bucket_mark old, new;
        struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-       struct bucket *g;
+       struct bucket old, new, *g;
        u8 bucket_data_type;
-       u64 v;
        int ret = 0;
 
        BUG_ON(!(flags & BTREE_TRIGGER_GC));
@@ -882,28 +877,25 @@ static int bch2_mark_pointer(struct btree_trans *trans,
        percpu_down_read(&c->mark_lock);
        g = PTR_GC_BUCKET(ca, &p.ptr);
 
-       v = atomic64_read(&g->_mark.v);
-       do {
-               new.v.counter = old.v.counter = v;
-               bucket_data_type = new.data_type;
-
-               ret = __mark_pointer(trans, k, &p.ptr, sectors,
-                                    data_type, new.gen,
-                                    &bucket_data_type,
-                                    &new.dirty_sectors,
-                                    &new.cached_sectors);
-               if (ret)
-                       goto err;
+       bucket_lock(g);
+       old = *g;
 
-               new.data_type = bucket_data_type;
+       bucket_data_type = g->data_type;
 
-               if (flags & BTREE_TRIGGER_NOATOMIC) {
-                       g->_mark = new;
-                       break;
-               }
-       } while ((v = atomic64_cmpxchg(&g->_mark.v,
-                             old.v.counter,
-                             new.v.counter)) != old.v.counter);
+       ret = __mark_pointer(trans, k, &p.ptr, sectors,
+                            data_type, g->gen,
+                            &bucket_data_type,
+                            &g->dirty_sectors,
+                            &g->cached_sectors);
+       if (ret) {
+               bucket_unlock(g);
+               goto err;
+       }
+
+       g->data_type = bucket_data_type;
+
+       new = *g;
+       bucket_unlock(g);
 
        bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
 err:
@@ -1404,25 +1396,18 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
 {
        struct btree_iter iter;
        struct bkey_i_alloc_v4 *a;
-       u16 dirty_sectors, cached_sectors;
        int ret;
 
        a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
        if (IS_ERR(a))
                return PTR_ERR(a);
 
-       dirty_sectors   = a->v.dirty_sectors;
-       cached_sectors  = a->v.cached_sectors;
-
        ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
                             a->v.gen, &a->v.data_type,
-                            &dirty_sectors, &cached_sectors);
+                            &a->v.dirty_sectors, &a->v.cached_sectors);
        if (ret)
                goto out;
 
-       a->v.dirty_sectors      = dirty_sectors;
-       a->v.cached_sectors     = cached_sectors;
-
        ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
        if (ret)
                goto out;
index 7ae1feadf4c0685d5bce99a8d02294e2c466200e..31a56f1f4fcaa3fddc96f474170cc571ab3a5dd8 100644 (file)
        for (_b = (_buckets)->b + (_buckets)->first_bucket;     \
             _b < (_buckets)->b + (_buckets)->nbuckets; _b++)
 
-#define bucket_cmpxchg(g, new, expr)                           \
-({                                                             \
-       struct bucket *_g = g;                                  \
-       u64 _v = atomic64_read(&(g)->_mark.v);                  \
-       struct bucket_mark _old;                                \
-                                                               \
-       do {                                                    \
-               (new).v.counter = _old.v.counter = _v;          \
-               expr;                                           \
-       } while ((_v = atomic64_cmpxchg(&(_g)->_mark.v,         \
-                              _old.v.counter,                  \
-                              (new).v.counter)) != _old.v.counter);\
-       _old;                                                   \
-})
+static inline void bucket_unlock(struct bucket *b)
+{
+       smp_store_release(&b->lock, 0);
+}
+
+static inline void bucket_lock(struct bucket *b)
+{
+       while (xchg(&b->lock, 1))
+               cpu_relax();
+}
 
 static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
 {
index f7bf5c1d732f2da6029e45bf7065530b448de6aa..e79a33795bf936144675bda12f2659f5b49c90aa 100644 (file)
@@ -7,29 +7,15 @@
 
 #define BUCKET_JOURNAL_SEQ_BITS                16
 
-struct bucket_mark {
-       union {
-       atomic64_t      v;
-
-       struct {
-       u8              gen;
-       u8              data_type:3,
-                       stripe:1;
-       u16             dirty_sectors;
-       u16             cached_sectors;
-       };
-       };
-};
-
 struct bucket {
-       union {
-               struct bucket_mark      _mark;
-               const struct bucket_mark mark;
-       };
-
-       unsigned                        gen_valid:1;
-       u8                              stripe_redundancy;
-       u32                             stripe;
+       u8                      lock;
+       u8                      gen_valid:1;
+       u8                      data_type:7;
+       u8                      gen;
+       u8                      stripe_redundancy;
+       u32                     stripe;
+       u32                     dirty_sectors;
+       u32                     cached_sectors;
 };
 
 struct bucket_array {