]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: Whiteouts for snapshots
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 2 Feb 2021 22:09:10 +0000 (17:09 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:12 +0000 (17:09 -0400)
This patch adds KEY_TYPE_whiteout, a new type of whiteout for snapshots,
when we're deleting and the key being deleted is in an ancestor
snapshot - and updates the transaction update/commit path to use it.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bkey.h
fs/bcachefs/bkey_methods.c
fs/bcachefs/btree_update_leaf.c

index ae8f3a5bc787f941f142b3c65212056cc11c2bb9..f922302332ee137c9c206f8ad68a6a941b6fb188 100644 (file)
@@ -327,7 +327,7 @@ static inline void bkey_init(struct bkey *k)
 */
 #define BCH_BKEY_TYPES()                               \
        x(deleted,              0)                      \
-       x(discard,              1)                      \
+       x(whiteout,             1)                      \
        x(error,                2)                      \
        x(cookie,               3)                      \
        x(hash_whiteout,        4)                      \
@@ -361,7 +361,7 @@ struct bch_deleted {
        struct bch_val          v;
 };
 
-struct bch_discard {
+struct bch_whiteout {
        struct bch_val          v;
 };
 
index 904ceb67a0291d4f011ebc40384290364ab55eed..6a637a408a9f1a2c08985ba074e4e2d8ad2541e7 100644 (file)
@@ -63,7 +63,7 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
 #define bkey_deleted(_k)       ((_k)->type == KEY_TYPE_deleted)
 
 #define bkey_whiteout(_k)                              \
-       ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
+       ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout)
 
 enum bkey_lr_packed {
        BKEY_PACKED_BOTH,
index 42fdcc4487de6f1d6d50d9d8611fbaf4e1a503b1..3133db236b7b7a72129e7d0a476a00a8376d9da6 100644 (file)
@@ -31,7 +31,7 @@ static const char *deleted_key_invalid(const struct bch_fs *c,
        .key_invalid = deleted_key_invalid,             \
 }
 
-#define bch2_bkey_ops_discard (struct bkey_ops) {      \
+#define bch2_bkey_ops_whiteout (struct bkey_ops) {     \
        .key_invalid = deleted_key_invalid,             \
 }
 
@@ -101,6 +101,8 @@ const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
 
 static unsigned bch2_key_types_allowed[] = {
        [BKEY_TYPE_extents] =
+               (1U << KEY_TYPE_deleted)|
+               (1U << KEY_TYPE_whiteout)|
                (1U << KEY_TYPE_error)|
                (1U << KEY_TYPE_cookie)|
                (1U << KEY_TYPE_extent)|
@@ -108,30 +110,43 @@ static unsigned bch2_key_types_allowed[] = {
                (1U << KEY_TYPE_reflink_p)|
                (1U << KEY_TYPE_inline_data),
        [BKEY_TYPE_inodes] =
+               (1U << KEY_TYPE_deleted)|
+               (1U << KEY_TYPE_whiteout)|
                (1U << KEY_TYPE_inode)|
                (1U << KEY_TYPE_inode_generation),
        [BKEY_TYPE_dirents] =
+               (1U << KEY_TYPE_deleted)|
+               (1U << KEY_TYPE_whiteout)|
                (1U << KEY_TYPE_hash_whiteout)|
                (1U << KEY_TYPE_dirent),
        [BKEY_TYPE_xattrs] =
+               (1U << KEY_TYPE_deleted)|
+               (1U << KEY_TYPE_whiteout)|
                (1U << KEY_TYPE_cookie)|
                (1U << KEY_TYPE_hash_whiteout)|
                (1U << KEY_TYPE_xattr),
        [BKEY_TYPE_alloc] =
+               (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_alloc)|
                (1U << KEY_TYPE_alloc_v2),
        [BKEY_TYPE_quotas] =
+               (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_quota),
        [BKEY_TYPE_stripes] =
+               (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_stripe),
        [BKEY_TYPE_reflink] =
+               (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_reflink_v)|
                (1U << KEY_TYPE_indirect_inline_data),
        [BKEY_TYPE_subvolumes] =
+               (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_subvolume),
        [BKEY_TYPE_snapshots] =
+               (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_snapshot),
        [BKEY_TYPE_btree] =
+               (1U << KEY_TYPE_deleted)|
                (1U << KEY_TYPE_btree_ptr)|
                (1U << KEY_TYPE_btree_ptr_v2),
 };
@@ -139,21 +154,18 @@ static unsigned bch2_key_types_allowed[] = {
 const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
                                enum btree_node_type type)
 {
-       unsigned key_types_allowed = (1U << KEY_TYPE_deleted)|
-               bch2_key_types_allowed[type] ;
-
        if (k.k->u64s < BKEY_U64s)
                return "u64s too small";
 
-       if (!(key_types_allowed & (1U << k.k->type)))
+       if (!(bch2_key_types_allowed[type] & (1U << k.k->type)))
                return "invalid key type for this btree";
 
        if (type == BKEY_TYPE_btree &&
            bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
                return "value too big";
 
-       if (btree_node_type_is_extents(type)) {
-               if ((k.k->size == 0) != bkey_deleted(k.k))
+       if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
+               if (k.k->size == 0)
                        return "bad size field";
 
                if (k.k->size > k.k->p.offset)
index 1922bf8236f7c212b8127528203407e97d179f6a..2fc134e3457251395ea8fb3f076358d0ff728e08 100644 (file)
@@ -1002,21 +1002,24 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
                goto next;
        }
 
-       if (!bkey_cmp(k.k->p, bkey_start_pos(&insert->k)))
+       if (!bkey_cmp(k.k->p, start))
                goto next;
 
        while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) {
+               bool front_split = bkey_cmp(bkey_start_pos(k.k), start) < 0;
+               bool back_split  = bkey_cmp(k.k->p, insert->k.p) > 0;
+
                /*
                 * If we're going to be splitting a compressed extent, note it
                 * so that __bch2_trans_commit() can increase our disk
                 * reservation:
                 */
-               if (bkey_cmp(bkey_start_pos(k.k), start) < 0 &&
-                   bkey_cmp(k.k->p, insert->k.p) > 0 &&
+               if (((front_split && back_split) ||
+                    ((front_split || back_split) && k.k->p.snapshot != insert->k.p.snapshot)) &&
                    (compressed_sectors = bch2_bkey_sectors_compressed(k)))
                        trans->extra_journal_res += compressed_sectors;
 
-               if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
+               if (front_split) {
                        update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
                        if ((ret = PTR_ERR_OR_ZERO(update)))
                                goto err;
@@ -1027,6 +1030,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
 
                        bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
                                             BTREE_ITER_NOT_EXTENTS|
+                                            BTREE_ITER_ALL_SNAPSHOTS|
+                                            BTREE_ITER_INTENT);
+                       ret   = bch2_btree_iter_traverse(&update_iter) ?:
+                               bch2_trans_update(trans, &update_iter, update,
+                                                 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+                                                 flags);
+                       bch2_trans_iter_exit(trans, &update_iter);
+
+                       if (ret)
+                               goto err;
+               }
+
+               if (k.k->p.snapshot != insert->k.p.snapshot &&
+                   (front_split || back_split)) {
+                       update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+                       if ((ret = PTR_ERR_OR_ZERO(update)))
+                               goto err;
+
+                       bkey_reassemble(update, k);
+
+                       bch2_cut_front(start, update);
+                       bch2_cut_back(insert->k.p, update);
+
+                       bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
+                                            BTREE_ITER_NOT_EXTENTS|
+                                            BTREE_ITER_ALL_SNAPSHOTS|
                                             BTREE_ITER_INTENT);
                        ret   = bch2_btree_iter_traverse(&update_iter) ?:
                                bch2_trans_update(trans, &update_iter, update,
@@ -1038,12 +1067,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
                }
 
                if (bkey_cmp(k.k->p, insert->k.p) <= 0) {
-                       ret = bch2_btree_delete_at(trans, &iter, flags);
+                       update = bch2_trans_kmalloc(trans, sizeof(*update));
+                       if ((ret = PTR_ERR_OR_ZERO(update)))
+                               goto err;
+
+                       bkey_init(&update->k);
+                       update->k.p = k.k->p;
+
+                       if (insert->k.p.snapshot != k.k->p.snapshot) {
+                               update->k.p.snapshot = insert->k.p.snapshot;
+                               update->k.type = KEY_TYPE_whiteout;
+                       }
+
+                       bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
+                                            BTREE_ITER_NOT_EXTENTS|
+                                            BTREE_ITER_INTENT);
+                       ret   = bch2_btree_iter_traverse(&update_iter) ?:
+                               bch2_trans_update(trans, &update_iter, update,
+                                                 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+                                                 flags);
+                       bch2_trans_iter_exit(trans, &update_iter);
+
                        if (ret)
                                goto err;
                }
 
-               if (bkey_cmp(k.k->p, insert->k.p) > 0) {
+               if (back_split) {
                        update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
                        if ((ret = PTR_ERR_OR_ZERO(update)))
                                goto err;
@@ -1051,10 +1100,15 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
                        bkey_reassemble(update, k);
                        bch2_cut_front(insert->k.p, update);
 
-                       ret = bch2_trans_update(trans, &iter, update, flags);
+                       bch2_trans_copy_iter(&update_iter, &iter);
+                       update_iter.pos = update->k.p;
+                       ret   = bch2_trans_update(trans, &update_iter, update,
+                                                 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+                                                 flags);
+                       bch2_trans_iter_exit(trans, &update_iter);
+
                        if (ret)
                                goto err;
-
                        goto out;
                }
 next:
@@ -1086,6 +1140,39 @@ err:
        return ret;
 }
 
+/*
+ * When deleting, check if we need to emit a whiteout (because we're overwriting
+ * something in an ancestor snapshot)
+ */
+static int need_whiteout_for_snapshot(struct btree_trans *trans,
+                                     enum btree_id btree_id, struct bpos pos)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       u32 snapshot = pos.snapshot;
+       int ret;
+
+       if (!bch2_snapshot_parent(trans->c, pos.snapshot))
+               return 0;
+
+       pos.snapshot++;
+
+       for_each_btree_key(trans, iter, btree_id, pos,
+                          BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+               if (bkey_cmp(k.k->p, pos))
+                       break;
+
+               if (bch2_snapshot_is_ancestor(trans->c, snapshot,
+                                             k.k->p.snapshot)) {
+                       ret = !bkey_whiteout(k.k);
+                       break;
+               }
+       }
+       bch2_trans_iter_exit(trans, &iter);
+
+       return ret;
+}
+
 int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
                      struct bkey_i *k, enum btree_update_flags flags)
 {
@@ -1118,6 +1205,16 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
                       btree_insert_entry_cmp(i - 1, i) >= 0);
 #endif
 
+       if (bkey_deleted(&n.k->k) &&
+           (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
+               int ret = need_whiteout_for_snapshot(trans, n.btree_id, n.k->k.p);
+               if (unlikely(ret < 0))
+                       return ret;
+
+               if (ret)
+                       n.k->k.type = KEY_TYPE_whiteout;
+       }
+
        /*
         * Pending updates are kept sorted: first, find position of new update,
         * then delete/trim any updates the new update overwrites: