]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: bch2_inode_or_descendents_is_open()
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 3 Oct 2024 01:23:41 +0000 (21:23 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Wed, 9 Oct 2024 20:42:53 +0000 (16:42 -0400)
fsck can now correctly check if inodes in interior snapshot nodes are
open/in use.

- Tweak the vfs inode rhashtable so that the subvolume ID isn't hashed,
  meaning inums in different subvolumes will hash to the same slot. Note
  that this is a hack, and will cause problems if anyone ever has the
  same file in many different snapshots open all at the same time.

- Then check if any of those subvolumes is a descendent of the snapshot
  ID being checked

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/fs.c
fs/bcachefs/fs.h
fs/bcachefs/fsck.c
fs/bcachefs/inode.c

index 23cae92d313d592ab72e0143ff661bde516a940e..e9e32d21f82d39e0379886d27de599ffc4b38a33 100644 (file)
@@ -157,6 +157,20 @@ static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
        return a.subvol == b.subvol && a.inum == b.inum;
 }
 
+static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed)
+{
+       const subvol_inum *inum = data;
+
+       return jhash(&inum->inum, sizeof(inum->inum), seed);
+}
+
+static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed)
+{
+       const struct bch_inode_info *inode = data;
+
+       return bch2_vfs_inode_hash_fn(&inode->ei_inum, sizeof(inode->ei_inum), seed);
+}
+
 static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,
                                 const void *obj)
 {
@@ -170,32 +184,93 @@ static const struct rhashtable_params bch2_vfs_inodes_params = {
        .head_offset            = offsetof(struct bch_inode_info, hash),
        .key_offset             = offsetof(struct bch_inode_info, ei_inum),
        .key_len                = sizeof(subvol_inum),
+       .hashfn                 = bch2_vfs_inode_hash_fn,
+       .obj_hashfn             = bch2_vfs_inode_obj_hash_fn,
        .obj_cmpfn              = bch2_vfs_inode_cmp_fn,
        .automatic_shrinking    = true,
 };
 
-static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
 {
-       return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
-}
+       struct bch_fs *c = trans->c;
+       struct rhashtable *ht = &c->vfs_inodes_table;
+       subvol_inum inum = (subvol_inum) { .inum = p.offset };
+       DARRAY(u32) subvols;
+       int ret = 0;
 
-bool bch2_inode_is_open(struct bch_fs *c, struct bpos p)
-{
        if (!test_bit(BCH_FS_started, &c->flags))
                return false;
 
-       subvol_inum inum = {
-               .subvol = snapshot_t(c, p.snapshot)->subvol,
-               .inum   = p.offset,
-       };
+       darray_init(&subvols);
+restart_from_top:
+
+       /*
+        * Tweaked version of __rhashtable_lookup(); we need to get a list of
+        * subvolumes in which the given inode number is open.
+        *
+        * For this to work, we don't include the subvolume ID in the key that
+        * we hash - all inodes with the same inode number regardless of
+        * subvolume will hash to the same slot.
+        *
+        * This will be less than ideal if the same file is ever open
+        * simultaneously in many different snapshots:
+        */
+       rcu_read_lock();
+       struct rhash_lock_head __rcu *const *bkt;
+       struct rhash_head *he;
+       unsigned int hash;
+       struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+restart:
+       hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params);
+       bkt = rht_bucket(tbl, hash);
+       do {
+               struct bch_inode_info *inode;
+
+               rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) {
+                       if (inode->ei_inum.inum == inum.inum) {
+                               ret = darray_push_gfp(&subvols, inode->ei_inum.subvol,
+                                                     GFP_NOWAIT|__GFP_NOWARN);
+                               if (ret) {
+                                       rcu_read_unlock();
+                                       ret = darray_make_room(&subvols, 1);
+                                       if (ret)
+                                               goto err;
+                                       subvols.nr = 0;
+                                       goto restart_from_top;
+                               }
+                       }
+               }
+               /* An object might have been moved to a different hash chain,
+                * while we walk along it - better check and retry.
+                */
+       } while (he != RHT_NULLS_MARKER(bkt));
+
+       /* Ensure we see any new tables. */
+       smp_rmb();
+
+       tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+       if (unlikely(tbl))
+               goto restart;
+       rcu_read_unlock();
+
+       darray_for_each(subvols, i) {
+               u32 snap;
+               ret = bch2_subvolume_get_snapshot(trans, *i, &snap);
+               if (ret)
+                       goto err;
 
-       /* snapshot tree interior node, can't safely delete while online (yet) */
-       if (!inum.subvol) {
-               bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot);
-               return true;
+               ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot);
+               if (ret)
+                       break;
        }
+err:
+       darray_exit(&subvols);
+       return ret;
+}
 
-       return __bch2_inode_hash_find(c, inum) != NULL;
+static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+{
+       return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
 }
 
 static void __wait_on_freeing_inode(struct bch_fs *c,
@@ -271,7 +346,8 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c,
 
        set_bit(EI_INODE_HASHED, &inode->ei_flags);
 retry:
-       if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table,
+       if (unlikely(rhashtable_lookup_insert_key(&c->vfs_inodes_table,
+                                       &inode->ei_inum,
                                        &inode->hash,
                                        bch2_vfs_inodes_params))) {
                old = bch2_inode_hash_find(c, trans, inode->ei_inum);
index 40dbd5774d0b6c8dd6d80e8fdac5c6c4ec9b9ce1..59f9f7ae728d2a1f6cbf7516cd93c999b8005ad3 100644 (file)
@@ -146,6 +146,8 @@ struct bch_inode_info *
 __bch2_create(struct mnt_idmap *, struct bch_inode_info *,
              struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
 
+int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p);
+
 int bch2_fs_quota_transfer(struct bch_fs *,
                           struct bch_inode_info *,
                           struct bch_qid,
@@ -179,8 +181,6 @@ void bch2_inode_update_after_write(struct btree_trans *,
 int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
                                  inode_set_fn, void *, unsigned);
 
-bool bch2_inode_is_open(struct bch_fs *c, struct bpos p);
-
 int bch2_setattr_nonsize(struct mnt_idmap *,
                         struct bch_inode_info *,
                         struct iattr *);
@@ -198,7 +198,7 @@ int bch2_vfs_init(void);
 
 #define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields)       ({ do {} while (0); })
 
-static inline bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) { return false; }
+static inline int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) { return 0; }
 
 static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
                                               snapshot_id_list *s) {}
index f00a36f62323998d3690b95f4afb377173416978..a1087fd292e475d9fa5bfbe929d028a17081e82f 100644 (file)
@@ -1213,7 +1213,11 @@ static int check_inode(struct btree_trans *trans,
                        if (ret)
                                goto err;
                } else {
-                       if (fsck_err_on(!bch2_inode_is_open(c, k.k->p),
+                       ret = bch2_inode_or_descendents_is_open(trans, k.k->p);
+                       if (ret < 0)
+                               goto err;
+
+                       if (fsck_err_on(!ret,
                                        trans, inode_unlinked_and_not_open,
                                      "inode %llu%u unlinked and not open",
                                      u.bi_inum, u.bi_snapshot)) {
@@ -1221,6 +1225,7 @@ static int check_inode(struct btree_trans *trans,
                                bch_err_msg(c, ret, "in fsck deleting inode");
                                goto err_noprint;
                        }
+                       ret = 0;
                }
        }
 
index 9d6040d4ba3992f1701af36aeb47adf46f926355..2c037e84fbaed43502caf88be221800a9788ebdd 100644 (file)
@@ -1244,8 +1244,9 @@ next_parent:
        if (!unlinked)
                return 0;
 
-       if (bch2_inode_is_open(trans->c, pos))
-               return 0;
+       ret = lockrestart_do(trans, bch2_inode_or_descendents_is_open(trans, pos));
+       if (ret)
+               return ret < 0 ? ret : 0;
 
        ret = __bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot);
        if (ret)