]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bcachefs: Update data move path for snapshots
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 5 Aug 2021 04:41:41 +0000 (00:41 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:12 +0000 (17:09 -0400)
The data move path operates on existing extents, and not within a
subvolume as the regular IO paths do. It needs to change because it may
cause existing extents to be split, and when splitting an existing
extent in an ancestor snapshot we need to make sure the new split has
the same visibility in child snapshots as the existing extent.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/fsck.c
fs/bcachefs/io.c
fs/bcachefs/migrate.c
fs/bcachefs/move.c
fs/bcachefs/subvolume.h

index 2fc134e3457251395ea8fb3f076358d0ff728e08..b4a2f2e322489584df60ce34205494c87b762a3b 100644 (file)
@@ -941,6 +941,43 @@ err:
        goto retry;
 }
 
+static int check_pos_snapshot_overwritten(struct btree_trans *trans,
+                                         enum btree_id id,
+                                         struct bpos pos)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret;
+
+       if (!snapshot_t(c, pos.snapshot)->children[0])
+               return 0;
+
+       bch2_trans_iter_init(trans, &iter, id, pos,
+                            BTREE_ITER_NOT_EXTENTS|
+                            BTREE_ITER_ALL_SNAPSHOTS);
+       while (1) {
+               k = bch2_btree_iter_prev(&iter);
+               ret = bkey_err(k);
+               if (ret)
+                       break;
+
+               if (!k.k)
+                       break;
+
+               if (bkey_cmp(pos, k.k->p))
+                       break;
+
+               if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
+                       ret = 1;
+                       break;
+               }
+       }
+       bch2_trans_iter_exit(trans, &iter);
+
+       return ret;
+}
+
 static noinline int extent_front_merge(struct btree_trans *trans,
                                       struct btree_iter *iter,
                                       struct bkey_s_c k,
@@ -958,14 +995,40 @@ static noinline int extent_front_merge(struct btree_trans *trans,
 
        bkey_reassemble(update, k);
 
-       if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) {
-               ret = bch2_btree_delete_at(trans, iter, flags);
-               if (ret)
-                       return ret;
+       if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert)))
+               return 0;
 
-               *insert = update;
-       }
+       ret =   check_pos_snapshot_overwritten(trans, iter->btree_id, k.k->p) ?:
+               check_pos_snapshot_overwritten(trans, iter->btree_id, (*insert)->k.p);
+       if (ret < 0)
+               return ret;
+       if (ret)
+               return 0;
+
+       ret = bch2_btree_delete_at(trans, iter, flags);
+       if (ret)
+               return ret;
+
+       *insert = update;
+       return 0;
+}
+
+static noinline int extent_back_merge(struct btree_trans *trans,
+                                     struct btree_iter *iter,
+                                     struct bkey_i *insert,
+                                     struct bkey_s_c k)
+{
+       struct bch_fs *c = trans->c;
+       int ret;
+
+       ret =   check_pos_snapshot_overwritten(trans, iter->btree_id, insert->k.p) ?:
+               check_pos_snapshot_overwritten(trans, iter->btree_id, k.k->p);
+       if (ret < 0)
+               return ret;
+       if (ret)
+               return 0;
 
+       bch2_bkey_merge(c, bkey_i_to_s(insert), k);
        return 0;
 }
 
@@ -974,7 +1037,6 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
                                    struct bkey_i *insert,
                                    enum btree_update_flags flags)
 {
-       struct bch_fs *c = trans->c;
        struct btree_iter iter, update_iter;
        struct bpos start = bkey_start_pos(&insert->k);
        struct bkey_i *update;
@@ -1002,9 +1064,6 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
                goto next;
        }
 
-       if (!bkey_cmp(k.k->p, start))
-               goto next;
-
        while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) {
                bool front_split = bkey_cmp(bkey_start_pos(k.k), start) < 0;
                bool back_split  = bkey_cmp(k.k->p, insert->k.p) > 0;
@@ -1120,7 +1179,7 @@ next:
        }
 
        if (bch2_bkey_maybe_mergable(&insert->k, k.k))
-               bch2_bkey_merge(c, bkey_i_to_s(insert), k);
+               extent_back_merge(trans, &iter, insert, k);
 out:
        if (!bkey_deleted(&insert->k)) {
                /*
index b4a6b3d2ed077425e5230561d2763c62279a0fd1..f9a6a0b3ce7a57f0518debf3c67aacc84fcfdbe3 100644 (file)
@@ -473,24 +473,6 @@ out:
        return ret;
 }
 
-struct snapshots_seen {
-       struct bpos                     pos;
-       size_t                          nr;
-       size_t                          size;
-       u32                             *d;
-};
-
-static void snapshots_seen_exit(struct snapshots_seen *s)
-{
-       kfree(s->d);
-       s->d = NULL;
-}
-
-static void snapshots_seen_init(struct snapshots_seen *s)
-{
-       memset(s, 0, sizeof(*s));
-}
-
 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, struct bpos pos)
 {
        pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
@@ -499,26 +481,11 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, str
                s->nr = 0;
        s->pos = pos;
 
-       if (s->nr == s->size) {
-               size_t new_size = max(s->size, 128UL) * 2;
-               u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL);
-
-               if (!d) {
-                       bch_err(c, "error reallocating snapshots_seen table (new size %zu)",
-                               new_size);
-                       return -ENOMEM;
-               }
-
-               s->size = new_size;
-               s->d    = d;
-       }
-
        /* Might get called multiple times due to lock restarts */
        if (s->nr && s->d[s->nr - 1] == pos.snapshot)
                return 0;
 
-       s->d[s->nr++] = pos.snapshot;
-       return 0;
+       return snapshots_seen_add(c, s, pos.snapshot);
 }
 
 /**
index bd96c6bebe18476f750ed6bca39de9d5096e548d..002fd35e6bfea65bcf86046e77e6cd79a6db83cd 100644 (file)
@@ -1828,7 +1828,8 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
        if (!bch2_bkey_narrow_crcs(new, new_crc))
                goto out;
 
-       ret = bch2_trans_update(trans, &iter, new, 0);
+       ret = bch2_trans_update(trans, &iter, new,
+                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
 out:
        bch2_trans_iter_exit(trans, &iter);
        return ret;
index 1899326d9754eeebdf9c850ace262b2eb25fe16a..7c764ee4ea09c2182865081f3e560256725e6aa3 100644 (file)
@@ -48,7 +48,8 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN,
-                            BTREE_ITER_PREFETCH);
+                            BTREE_ITER_PREFETCH|
+                            BTREE_ITER_ALL_SNAPSHOTS);
 
        while ((k = bch2_btree_iter_peek(&iter)).k &&
               !(ret = bkey_err(k))) {
@@ -74,7 +75,8 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
                bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k));
 
                ret   = bch2_btree_iter_traverse(&iter) ?:
-                       bch2_trans_update(&trans, &iter, sk.k, 0) ?:
+                       bch2_trans_update(&trans, &iter, sk.k,
+                                         BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
                        bch2_trans_commit(&trans, NULL, NULL,
                                        BTREE_INSERT_NOFAIL);
 
index 9dc6684139de584c734563de2493fbcbc324298b..2e7d8e2fe331d364f4d078d6611ca596fe14b04a 100644 (file)
@@ -14,6 +14,7 @@
 #include "keylist.h"
 #include "move.h"
 #include "replicas.h"
+#include "subvolume.h"
 #include "super-io.h"
 #include "trace.h"
 
@@ -52,6 +53,81 @@ struct moving_context {
        wait_queue_head_t       wait;
 };
 
+static int insert_snapshot_whiteouts(struct btree_trans *trans,
+                                    enum btree_id id,
+                                    struct bpos old_pos,
+                                    struct bpos new_pos)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter, update_iter;
+       struct bkey_s_c k;
+       struct snapshots_seen s;
+       int ret;
+
+       if (!btree_type_has_snapshots(id))
+               return 0;
+
+       snapshots_seen_init(&s);
+
+       if (!bkey_cmp(old_pos, new_pos))
+               return 0;
+
+       if (!snapshot_t(c, old_pos.snapshot)->children[0])
+               return 0;
+
+       bch2_trans_iter_init(trans, &iter, id, old_pos,
+                            BTREE_ITER_NOT_EXTENTS|
+                            BTREE_ITER_ALL_SNAPSHOTS);
+       while (1) {
+next:
+               k = bch2_btree_iter_prev(&iter);
+               ret = bkey_err(k);
+               if (ret)
+                       break;
+
+               if (bkey_cmp(old_pos, k.k->p))
+                       break;
+
+               if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) {
+                       struct bkey_i *update;
+                       size_t i;
+
+                       for (i = 0; i < s.nr; i++)
+                               if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, s.d[i]))
+                                       goto next;
+
+                       update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
+
+                       ret = PTR_ERR_OR_ZERO(update);
+                       if (ret)
+                               break;
+
+                       bkey_init(&update->k);
+                       update->k.p = new_pos;
+                       update->k.p.snapshot = k.k->p.snapshot;
+
+                       bch2_trans_iter_init(trans, &update_iter, id, update->k.p,
+                                            BTREE_ITER_NOT_EXTENTS|
+                                            BTREE_ITER_ALL_SNAPSHOTS|
+                                            BTREE_ITER_INTENT);
+                       ret   = bch2_btree_iter_traverse(&update_iter) ?:
+                               bch2_trans_update(trans, &update_iter, update,
+                                         BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+                       bch2_trans_iter_exit(trans, &update_iter);
+                       if (ret)
+                               break;
+
+                       ret = snapshots_seen_add(c, &s, k.k->p.snapshot);
+                       if (ret)
+                               break;
+               }
+       }
+       bch2_trans_iter_exit(trans, &iter);
+       kfree(s.d);
+
+       return ret;
+}
+
 int bch2_migrate_index_update(struct bch_write_op *op)
 {
        struct bch_fs *c = op->c;
@@ -165,7 +241,10 @@ int bch2_migrate_index_update(struct bch_write_op *op)
 
                next_pos = insert->k.p;
 
-               ret   = bch2_trans_update(&trans, &iter, insert, 0) ?:
+               ret   = insert_snapshot_whiteouts(&trans, m->btree_id,
+                                                 k.k->p, insert->k.p) ?:
+                       bch2_trans_update(&trans, &iter, insert,
+                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
                        bch2_trans_commit(&trans, &op->res,
                                op_journal_seq(op),
                                BTREE_INSERT_NOFAIL|
index cea4c665af327dcd5b0922b7a81b6dd966757ba4..0740c7b7f77288fe267a3880f3602c9475159f42 100644 (file)
@@ -54,6 +54,44 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances
        return id == ancestor;
 }
 
+struct snapshots_seen {
+       struct bpos                     pos;
+       size_t                          nr;
+       size_t                          size;
+       u32                             *d;
+};
+
+static inline void snapshots_seen_exit(struct snapshots_seen *s)
+{
+       kfree(s->d);
+       s->d = NULL;
+}
+
+static inline void snapshots_seen_init(struct snapshots_seen *s)
+{
+       memset(s, 0, sizeof(*s));
+}
+
+static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id)
+{
+       if (s->nr == s->size) {
+               size_t new_size = max(s->size, 128UL) * 2;
+               u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL);
+
+               if (!d) {
+                       bch_err(c, "error reallocating snapshots_seen table (new size %zu)",
+                               new_size);
+                       return -ENOMEM;
+               }
+
+               s->size = new_size;
+               s->d    = d;
+       }
+
+       s->d[s->nr++] = id;
+       return 0;
+}
+
 int bch2_fs_snapshots_check(struct bch_fs *);
 void bch2_fs_snapshots_exit(struct bch_fs *);
 int bch2_fs_snapshots_start(struct bch_fs *);