]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
Merge tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 20 Mar 2024 00:27:25 +0000 (17:27 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 20 Mar 2024 00:27:25 +0000 (17:27 -0700)
Pull bcachefs fixes from Kent Overstreet:
 "Assorted bugfixes.

  Most are fixes for simple assertion pops; the most significant fix is
  for a deadlock in recovery when we have to rewrite large numbers of
  btree nodes to fix errors. This was incorrectly running out of the
  same workqueue as the core interior btree update path - we now give it
  its own single threaded workqueue.

  This was visible to users as "bch2_btree_update_start(): error:
  BCH_ERR_journal_reclaim_would_deadlock" - and then recovery hanging"

* tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Fix lost wakeup on journal shutdown
  bcachefs; Fix deadlock in bch2_btree_update_start()
  bcachefs: ratelimit errors from async_btree_node_rewrite
  bcachefs: Run check_topology() first
  bcachefs: Improve bch2_fatal_error()
  bcachefs: Fix lost transaction restart error
  bcachefs: Don't corrupt journal keys gap buffer when dropping alloc info
  bcachefs: fix for building in userspace
  bcachefs: bch2_snapshot_is_ancestor() now safe to call in early recovery
  bcachefs: Fix nested transaction restart handling in bch2_bucket_gens_init()
  bcachefs: Improve sysfs internal/btree_updates
  bcachefs: Split out btree_node_rewrite_worker
  bcachefs: Fix locking in bch2_alloc_write_key()
  bcachefs: Avoid extent entry type assertions in .invalid()
  bcachefs: Fix spurious -BCH_ERR_transaction_restart_nested
  bcachefs: Fix check_key_has_snapshot() call
  bcachefs: Change "accounting overran journal reservation" to a warning

26 files changed:
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_foreground.c
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_io.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/btree_write_buffer.c
fs/bcachefs/buckets.c
fs/bcachefs/debug.c
fs/bcachefs/ec.c
fs/bcachefs/error.h
fs/bcachefs/extents.h
fs/bcachefs/fs.c
fs/bcachefs/fsck.c
fs/bcachefs/journal.c
fs/bcachefs/journal_io.c
fs/bcachefs/logged_ops.c
fs/bcachefs/movinggc.c
fs/bcachefs/recovery.c
fs/bcachefs/recovery_types.h
fs/bcachefs/snapshot.c
fs/bcachefs/super-io.c
fs/bcachefs/super.c
fs/bcachefs/util.h

index c47f72f2bd586f6c15bc42e019cc97edc0f6030a..893e38f9db807f4c6d819a470339b6af97f85804 100644 (file)
@@ -532,13 +532,13 @@ int bch2_bucket_gens_init(struct bch_fs *c)
                u8 gen = bch2_alloc_to_v4(k, &a)->gen;
                unsigned offset;
                struct bpos pos = alloc_gens_pos(iter.pos, &offset);
+               int ret2 = 0;
 
                if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
-                       ret = commit_do(trans, NULL, NULL,
-                                       BCH_TRANS_COMMIT_no_enospc,
-                               bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
-                       if (ret)
-                               break;
+                       ret2 =  bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
+                               bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+                       if (ret2)
+                               goto iter_err;
                        have_bucket_gens_key = false;
                }
 
@@ -549,7 +549,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
                }
 
                g.v.gens[offset] = gen;
-               0;
+iter_err:
+               ret2;
        }));
 
        if (have_bucket_gens_key && !ret)
@@ -852,7 +853,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
                                        bucket_journal_seq);
                        if (ret) {
                                bch2_fs_fatal_error(c,
-                                       "error setting bucket_needs_journal_commit: %i", ret);
+                                       "setting bucket_needs_journal_commit: %s", bch2_err_str(ret));
                                return ret;
                        }
                }
index ca58193dd90279b6d6081f06954690f214ba3a42..214b15c84d1f3258d2ba23effacc5ad4e8852783 100644 (file)
@@ -1356,15 +1356,17 @@ retry:
 
                /* Don't retry from all devices if we're out of open buckets: */
                if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
-                       int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
+                       int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
                                              target, erasure_code,
                                              nr_replicas, &nr_effective,
                                              &have_cache, watermark,
                                              flags, cl);
-                       if (!ret ||
-                           bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
-                           bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
+                       if (!ret2 ||
+                           bch2_err_matches(ret2, BCH_ERR_transaction_restart) ||
+                           bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) {
+                               ret = ret2;
                                goto alloc_done;
+                       }
                }
 
                /*
index 339dc3e1dcd39939b5f021db2665190ea07ceee9..799aa32b6b4d990f913b0d5dfb98b6a47af1f0b2 100644 (file)
@@ -849,6 +849,8 @@ struct bch_fs {
        struct workqueue_struct *btree_interior_update_worker;
        struct work_struct      btree_interior_update_work;
 
+       struct workqueue_struct *btree_node_rewrite_worker;
+
        struct list_head        pending_node_rewrites;
        struct mutex            pending_node_rewrites_lock;
 
index 584aee7010deaa79df36edd60e5d7e59bcb7a7e3..bdaed29f084a4d558a160817e6fa6447c8547b0d 100644 (file)
@@ -1392,11 +1392,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
                                         *old,
                                         b->data_type);
        gc = *b;
-       percpu_up_read(&c->mark_lock);
 
        if (gc.data_type != old_gc.data_type ||
            gc.dirty_sectors != old_gc.dirty_sectors)
                bch2_dev_usage_update_m(c, ca, &old_gc, &gc);
+       percpu_up_read(&c->mark_lock);
 
        if (metadata_only &&
            gc.data_type != BCH_DATA_sb &&
index 624c8287deb43191d39b130c842875f7aa1a9ff1..34df8ccc5fecc2bfbad874e77f53fa1f5f068251 100644 (file)
@@ -1066,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
                        ret = bset_encrypt(c, i, b->written << 9);
                        if (bch2_fs_fatal_err_on(ret, c,
-                                       "error decrypting btree node: %i", ret))
+                                       "decrypting btree node: %s", bch2_err_str(ret)))
                                goto fsck_err;
 
                        btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
@@ -1107,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
                        ret = bset_encrypt(c, i, b->written << 9);
                        if (bch2_fs_fatal_err_on(ret, c,
-                                       "error decrypting btree node: %i\n", ret))
+                                       "decrypting btree node: %s", bch2_err_str(ret)))
                                goto fsck_err;
 
                        sectors = vstruct_sectors(bne, c->block_bits);
@@ -1338,7 +1338,7 @@ start:
        if (saw_error && !btree_node_read_error(b)) {
                printbuf_reset(&buf);
                bch2_bpos_to_text(&buf, b->key.k.p);
-               bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
+               bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
                         __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
 
                bch2_btree_node_rewrite_async(c, b);
@@ -1874,8 +1874,8 @@ out:
        return;
 err:
        set_btree_node_noevict(b);
-       if (!bch2_err_matches(ret, EROFS))
-               bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret));
+       bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
+                            "writing btree node: %s", bch2_err_str(ret));
        goto out;
 }
 
@@ -2131,7 +2131,7 @@ do_write:
 
        ret = bset_encrypt(c, i, b->written << 9);
        if (bch2_fs_fatal_err_on(ret, c,
-                       "error encrypting btree node: %i\n", ret))
+                       "encrypting btree node: %s", bch2_err_str(ret)))
                goto err;
 
        nonce = btree_nonce(i, b->written << 9);
index 8a71d43444b9425808ff1db86ec0fd296c9900ff..581edcb0911bfa39e9ec6242686bd213c47f352c 100644 (file)
@@ -676,7 +676,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                             !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
                             !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
                             !bch2_journal_error(j), c,
-                            "error flushing key cache: %s", bch2_err_str(ret));
+                            "flushing key cache: %s", bch2_err_str(ret));
        if (ret)
                goto out;
 
index 642213ef9f798e477bc902e4977be9cb813aab56..b2f5f2e50f7e19ccd59502a1471ee9bb6d14a988 100644 (file)
@@ -646,7 +646,7 @@ static void btree_update_nodes_written(struct btree_update *as)
        bch2_trans_unlock(trans);
 
        bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
-                            "%s(): error %s", __func__, bch2_err_str(ret));
+                            "%s", bch2_err_str(ret));
 err:
        if (as->b) {
 
@@ -1067,13 +1067,18 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        flags &= ~BCH_WATERMARK_MASK;
        flags |= watermark;
 
-       if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-           watermark < c->journal.watermark) {
+       if (watermark < c->journal.watermark) {
                struct journal_res res = { 0 };
+               unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
+
+               if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
+                   watermark != BCH_WATERMARK_reclaim)
+                       journal_flags |= JOURNAL_RES_GET_NONBLOCK;
 
                ret = drop_locks_do(trans,
-                       bch2_journal_res_get(&c->journal, &res, 1,
-                                            watermark|JOURNAL_RES_GET_CHECK));
+                       bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
+               if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+                       ret = -BCH_ERR_journal_reclaim_would_deadlock;
                if (ret)
                        return ERR_PTR(ret);
        }
@@ -1117,6 +1122,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        closure_init(&as->cl, NULL);
        as->c           = c;
        as->start_time  = start_time;
+       as->ip_started  = _RET_IP_;
        as->mode        = BTREE_INTERIOR_NO_UPDATE;
        as->took_gc_lock = true;
        as->btree_id    = path->btree_id;
@@ -1192,7 +1198,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
 err:
        bch2_btree_update_free(as, trans);
        if (!bch2_err_matches(ret, ENOSPC) &&
-           !bch2_err_matches(ret, EROFS))
+           !bch2_err_matches(ret, EROFS) &&
+           ret != -BCH_ERR_journal_reclaim_would_deadlock)
                bch_err_fn_ratelimited(c, ret);
        return ERR_PTR(ret);
 }
@@ -2114,7 +2121,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
 
        ret = bch2_trans_do(c, NULL, NULL, 0,
                      async_btree_node_rewrite_trans(trans, a));
-       bch_err_fn(c, ret);
+       bch_err_fn_ratelimited(c, ret);
        bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
        kfree(a);
 }
@@ -2161,7 +2168,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
                bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
        }
 
-       queue_work(c->btree_interior_update_worker, &a->work);
+       queue_work(c->btree_node_rewrite_worker, &a->work);
 }
 
 void bch2_do_pending_node_rewrites(struct bch_fs *c)
@@ -2173,7 +2180,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
                list_del(&a->list);
 
                bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
-               queue_work(c->btree_interior_update_worker, &a->work);
+               queue_work(c->btree_node_rewrite_worker, &a->work);
        }
        mutex_unlock(&c->pending_node_rewrites_lock);
 }
@@ -2441,12 +2448,12 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
 
        mutex_lock(&c->btree_interior_update_lock);
        list_for_each_entry(as, &c->btree_interior_update_list, list)
-               prt_printf(out, "%p m %u w %u r %u j %llu\n",
-                      as,
-                      as->mode,
-                      as->nodes_written,
-                      closure_nr_remaining(&as->cl),
-                      as->journal.seq);
+               prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+                          (void *) as->ip_started,
+                          as->mode,
+                          as->nodes_written,
+                          closure_nr_remaining(&as->cl),
+                          as->journal.seq);
        mutex_unlock(&c->btree_interior_update_lock);
 }
 
@@ -2510,6 +2517,8 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
 
 void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
 {
+       if (c->btree_node_rewrite_worker)
+               destroy_workqueue(c->btree_node_rewrite_worker);
        if (c->btree_interior_update_worker)
                destroy_workqueue(c->btree_interior_update_worker);
        mempool_exit(&c->btree_interior_update_pool);
@@ -2534,6 +2543,11 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c)
        if (!c->btree_interior_update_worker)
                return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
 
+       c->btree_node_rewrite_worker =
+               alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND);
+       if (!c->btree_node_rewrite_worker)
+               return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
+
        if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
                                      sizeof(struct btree_update)))
                return -BCH_ERR_ENOMEM_btree_interior_update_pool_init;
index 3439b03719c7b505bf43e5f76bf703f6778c9899..f651dd48aaa0496b7cf7c80eb183896b1d418044 100644 (file)
@@ -32,6 +32,7 @@ struct btree_update {
        struct closure                  cl;
        struct bch_fs                   *c;
        u64                             start_time;
+       unsigned long                   ip_started;
 
        struct list_head                list;
        struct list_head                unwritten_list;
index b77e7b382b66660f8b02925c13657118dc28fed1..5cbad8445782c4006074365c13551b1c38b57849 100644 (file)
@@ -378,7 +378,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
                }
        }
 err:
-       bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
+       bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret));
        trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
        bch2_journal_pin_drop(j, &wb->flushing.pin);
        wb->flushing.keys.nr = 0;
index c2f46b267b3ad50c796690320f0a700411940931..96edf2c34d433d1c1ad41ec8da0c77b8d40afe9f 100644 (file)
@@ -990,8 +990,8 @@ static int __trigger_extent(struct btree_trans *trans,
                                ret = !gc
                                        ? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors)
                                        : update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true);
-                               bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors",
-                                                    __func__);
+                               bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors",
+                                                    bch2_err_str(ret));
                                if (ret)
                                        return ret;
                        }
@@ -1020,7 +1020,7 @@ static int __trigger_extent(struct btree_trans *trans,
                        struct printbuf buf = PRINTBUF;
 
                        bch2_bkey_val_to_text(&buf, c, k);
-                       bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
+                       bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
                        printbuf_exit(&buf);
                }
                if (ret)
index b1f147e6be4d5cdd0ab491932db9c625b763e29e..208ce6f0fc4317d561582bae51785da2c016a1cd 100644 (file)
@@ -170,7 +170,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
                struct printbuf buf = PRINTBUF;
 
                bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
-               bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf);
+               bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf);
                printbuf_exit(&buf);
        }
 out:
index b98e2c2b8bf06f59fa70cfe23873e51529a917b8..082075244e16aedc824249b239ecec6efb1a07fa 100644 (file)
@@ -448,7 +448,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
                        struct printbuf buf = PRINTBUF;
 
                        bch2_bkey_val_to_text(&buf, c, new);
-                       bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
+                       bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
                        printbuf_exit(&buf);
                        return ret;
                }
@@ -1868,10 +1868,10 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
                return -BCH_ERR_stripe_alloc_blocked;
 
        ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
+       bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
+                            "reading stripe key: %s", bch2_err_str(ret));
        if (ret) {
                bch2_stripe_close(c, h->s);
-               if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
-                       bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret));
                return ret;
        }
 
index 94491190e09e9d5085ca1ef87c5764c4192d6a24..ae1d6674c512d44521379f21d5872b2b79993f57 100644 (file)
@@ -191,9 +191,9 @@ do {                                                                        \
 
 void bch2_fatal_error(struct bch_fs *);
 
-#define bch2_fs_fatal_error(c, ...)                                    \
+#define bch2_fs_fatal_error(c, _msg, ...)                              \
 do {                                                                   \
-       bch_err(c, __VA_ARGS__);                                        \
+       bch_err(c, "%s(): fatal error " _msg, __func__, ##__VA_ARGS__); \
        bch2_fatal_error(c);                                            \
 } while (0)
 
index 6219f2c08e4c737abd477588419c0f0dbeecbc38..fd2669cdd76f3b23861a9c0835253d3812a6de10 100644 (file)
@@ -108,17 +108,17 @@ static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *en
 
 static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
 {
-       return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
+       return __extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
 }
 
 static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e)
 {
-       return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
+       return __extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
 }
 
 static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
 {
-       switch (extent_entry_type(e)) {
+       switch (__extent_entry_type(e)) {
        case BCH_EXTENT_ENTRY_crc32:
        case BCH_EXTENT_ENTRY_crc64:
        case BCH_EXTENT_ENTRY_crc128:
index 3f073845bbd77391306a55c6ac7a87771f7e5890..0ccee05f6887b3b0aedf1e7c11f82665c7d38ba5 100644 (file)
@@ -108,7 +108,8 @@ retry:
                goto retry;
 
        bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
-                            "inode %u:%llu not found when updating",
+                            "%s: inode %u:%llu not found when updating",
+                            bch2_err_str(ret),
                             inode_inum(inode).subvol,
                             inode_inum(inode).inum);
 
index f48033be3f6b1fda3da6105b982403b607158c32..47d4eefaba7ba05dc1a610ddc35a27bb10891b5d 100644 (file)
@@ -1114,10 +1114,9 @@ int bch2_check_inodes(struct bch_fs *c)
        return ret;
 }
 
-static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
+static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w)
 {
        struct bch_fs *c = trans->c;
-       u32 restart_count = trans->restart_count;
        int ret = 0;
        s64 count2;
 
@@ -1149,7 +1148,14 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
        }
 fsck_err:
        bch_err_fn(c, ret);
-       return ret ?: trans_was_restarted(trans, restart_count);
+       return ret;
+}
+
+static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
+{
+       u32 restart_count = trans->restart_count;
+       return check_i_sectors_notnested(trans, w) ?:
+               trans_was_restarted(trans, restart_count);
 }
 
 struct extent_end {
@@ -1533,7 +1539,7 @@ int bch2_check_extents(struct bch_fs *c)
                        check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
                        check_extent_overbig(trans, &iter, k);
                })) ?:
-               check_i_sectors(trans, &w));
+               check_i_sectors_notnested(trans, &w));
 
        bch2_disk_reservation_put(c, &res);
        extent_ends_exit(&extent_ends);
@@ -1563,10 +1569,9 @@ int bch2_check_indirect_extents(struct bch_fs *c)
        return ret;
 }
 
-static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
+static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w)
 {
        struct bch_fs *c = trans->c;
-       u32 restart_count = trans->restart_count;
        int ret = 0;
        s64 count2;
 
@@ -1598,7 +1603,14 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
        }
 fsck_err:
        bch_err_fn(c, ret);
-       return ret ?: trans_was_restarted(trans, restart_count);
+       return ret;
+}
+
+static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
+{
+       u32 restart_count = trans->restart_count;
+       return check_subdir_count_notnested(trans, w) ?:
+               trans_was_restarted(trans, restart_count);
 }
 
 static int check_dirent_inode_dirent(struct btree_trans *trans,
@@ -2003,7 +2015,8 @@ int bch2_check_dirents(struct bch_fs *c)
                                k,
                                NULL, NULL,
                                BCH_TRANS_COMMIT_no_enospc,
-                       check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)));
+                       check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?:
+               check_subdir_count_notnested(trans, &dir));
 
        snapshots_seen_exit(&s);
        inode_walker_exit(&dir);
@@ -2022,8 +2035,10 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
        int ret;
 
        ret = check_key_has_snapshot(trans, iter, k);
-       if (ret)
+       if (ret < 0)
                return ret;
+       if (ret)
+               return 0;
 
        i = walk_inode(trans, inode, k);
        ret = PTR_ERR_OR_ZERO(i);
index f314b2e78ec368718e671651a99752de374a838f..9c9a25dbd6137a6d51205c80cbf8d931fda918ef 100644 (file)
@@ -511,18 +511,18 @@ retry:
        if (journal_res_get_fast(j, res, flags))
                return 0;
 
+       if (bch2_journal_error(j))
+               return -BCH_ERR_erofs_journal_err;
+
+       if (j->blocked)
+               return -BCH_ERR_journal_res_get_blocked;
+
        if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
                ret = JOURNAL_ERR_journal_full;
                can_discard = j->can_discard;
                goto out;
        }
 
-       if (j->blocked)
-               return -BCH_ERR_journal_res_get_blocked;
-
-       if (bch2_journal_error(j))
-               return -BCH_ERR_erofs_journal_err;
-
        if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
                ret = JOURNAL_ERR_max_in_flight;
                goto out;
index d76c3c0c203f9eb8e39d391b28bf09430f919f6b..725fcf46f6312c267c2a7c05f1eaa6aed5fb83e7 100644 (file)
@@ -1082,9 +1082,7 @@ reread:
                ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
                             j->encrypted_start,
                             vstruct_end(j) - (void *) j->encrypted_start);
-               bch2_fs_fatal_err_on(ret, c,
-                               "error decrypting journal entry: %s",
-                               bch2_err_str(ret));
+               bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret));
 
                mutex_lock(&jlist->lock);
                ret = journal_entry_add(c, ca, (struct journal_ptr) {
@@ -1820,7 +1818,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
                        jset_entry_for_each_key(i, k) {
                                ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k);
                                if (ret) {
-                                       bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer");
+                                       bch2_fs_fatal_error(c, "flushing journal keys to btree write buffer: %s",
+                                                           bch2_err_str(ret));
                                        bch2_journal_keys_to_write_buffer_end(c, &wb);
                                        return ret;
                                }
@@ -1848,7 +1847,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
 
        bch2_journal_super_entries_add_common(c, &end, seq);
        u64s    = (u64 *) end - (u64 *) start;
-       BUG_ON(u64s > j->entry_u64s_reserved);
+
+       WARN_ON(u64s > j->entry_u64s_reserved);
 
        le32_add_cpu(&jset->u64s, u64s);
 
@@ -1856,7 +1856,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
        bytes   = vstruct_bytes(jset);
 
        if (sectors > w->sectors) {
-               bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
+               bch2_fs_fatal_error(c, ": journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
                                    vstruct_bytes(jset), w->sectors << 9,
                                    u64s, w->u64s_reserved, j->entry_u64s_reserved);
                return -EINVAL;
@@ -1884,8 +1884,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
        ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
                    jset->encrypted_start,
                    vstruct_end(jset) - (void *) jset->encrypted_start);
-       if (bch2_fs_fatal_err_on(ret, c,
-                       "error decrypting journal entry: %i", ret))
+       if (bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret)))
                return ret;
 
        jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
index ad598105c587cc0354773b85461293099bbbe36d..9fac838d123e8e40fb836d895afccde634f2d54c 100644 (file)
@@ -101,8 +101,8 @@ void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
                struct printbuf buf = PRINTBUF;
 
                bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
-               bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s",
-                                    __func__, buf.buf, bch2_err_str(ret));
+               bch2_fs_fatal_error(c, "deleting logged operation %s: %s",
+                                   buf.buf, bch2_err_str(ret));
                printbuf_exit(&buf);
        }
 }
index 69e06a84dad4094847e8c737860d5acc37b25d79..0d2b82d8d11f39efe6a65bee8678e8f2496bdbcf 100644 (file)
@@ -155,8 +155,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
        if (bch2_err_matches(ret, EROFS))
                return ret;
 
-       if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_tryflush()",
-                                __func__, bch2_err_str(ret)))
+       if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret)))
                return ret;
 
        ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
index 2af219aedfdbefb36d37fe19423d8e26cabf0cdd..03f9d6afe467889b02a483561277b0d539a836f5 100644 (file)
@@ -90,10 +90,12 @@ static void do_reconstruct_alloc(struct bch_fs *c)
        struct journal_keys *keys = &c->journal_keys;
        size_t src, dst;
 
+       move_gap(keys, keys->nr);
+
        for (src = 0, dst = 0; src < keys->nr; src++)
                if (!btree_id_is_alloc(keys->data[src].btree_id))
                        keys->data[dst++] = keys->data[src];
-       keys->nr = dst;
+       keys->nr = keys->gap = dst;
 }
 
 /*
@@ -203,6 +205,8 @@ static int bch2_journal_replay(struct bch_fs *c)
 
        BUG_ON(!atomic_read(&keys->ref));
 
+       move_gap(keys, keys->nr);
+
        /*
         * First, attempt to replay keys in sorted order. This is more
         * efficient - better locality of btree access -  but some might fail if
index 1361e34d4e64c2939fc0b7af8c9df9e5d9dfc7cf..4959e95e7c74654e8b3a6e78a0ea7778713bd8ba 100644 (file)
  * must never change:
  */
 #define BCH_RECOVERY_PASSES()                                                  \
+       x(check_topology,                        4, 0)                          \
        x(alloc_read,                            0, PASS_ALWAYS)                \
        x(stripes_read,                          1, PASS_ALWAYS)                \
        x(initialize_subvolumes,                 2, 0)                          \
        x(snapshots_read,                        3, PASS_ALWAYS)                \
-       x(check_topology,                        4, 0)                          \
        x(check_allocations,                     5, PASS_FSCK)                  \
        x(trans_mark_dev_sbs,                    6, PASS_ALWAYS|PASS_SILENT)    \
        x(fs_journal_alloc,                      7, PASS_ALWAYS|PASS_SILENT)    \
index ac6ba04d5521714ece2e2cb00400fff60ec05eb6..39debe814bf392acb76c7cebe6752736d6c57cff 100644 (file)
@@ -91,18 +91,20 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans,
 
 /* Snapshot nodes: */
 
-static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
+static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
 {
-       struct snapshot_table *t;
-
-       rcu_read_lock();
-       t = rcu_dereference(c->snapshots);
-
        while (id && id < ancestor)
                id = __snapshot_t(t, id)->parent;
+       return id == ancestor;
+}
+
+static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
+{
+       rcu_read_lock();
+       bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
        rcu_read_unlock();
 
-       return id == ancestor;
+       return ret;
 }
 
 static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
@@ -120,13 +122,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
 
 bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
 {
-       struct snapshot_table *t;
        bool ret;
 
-       EBUG_ON(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots);
-
        rcu_read_lock();
-       t = rcu_dereference(c->snapshots);
+       struct snapshot_table *t = rcu_dereference(c->snapshots);
+
+       if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
+               ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
+               goto out;
+       }
 
        while (id && id < ancestor - IS_ANCESTOR_BITMAP)
                id = get_ancestor_below(t, id, ancestor);
@@ -134,11 +138,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
        if (id && id < ancestor) {
                ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
 
-               EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor));
+               EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
        } else {
                ret = id == ancestor;
        }
-
+out:
        rcu_read_unlock();
 
        return ret;
@@ -547,7 +551,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
                        "snapshot tree points to missing subvolume:\n  %s",
                        (printbuf_reset(&buf),
                         bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
-           fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
+           fsck_err_on(!bch2_snapshot_is_ancestor(c,
                                                le32_to_cpu(subvol.snapshot),
                                                root_id),
                        c, snapshot_tree_to_wrong_subvol,
index bceac29f3d86272d884c8fa59ad8e7f7c8163318..ad28e370b6404c915ee8bf8743ed535366fc6a55 100644 (file)
@@ -985,7 +985,7 @@ int bch2_write_super(struct bch_fs *c)
                prt_str(&buf, " > ");
                bch2_version_to_text(&buf, bcachefs_metadata_version_current);
                prt_str(&buf, ")");
-               bch2_fs_fatal_error(c, "%s", buf.buf);
+               bch2_fs_fatal_error(c, "%s", buf.buf);
                printbuf_exit(&buf);
                return -BCH_ERR_sb_not_downgraded;
        }
@@ -1005,7 +1005,7 @@ int bch2_write_super(struct bch_fs *c)
 
                if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
                        bch2_fs_fatal_error(c,
-                               "Superblock write was silently dropped! (seq %llu expected %llu)",
+                               "Superblock write was silently dropped! (seq %llu expected %llu)",
                                le64_to_cpu(ca->sb_read_scratch->seq),
                                ca->disk_sb.seq);
                        percpu_ref_put(&ca->io_ref);
@@ -1015,7 +1015,7 @@ int bch2_write_super(struct bch_fs *c)
 
                if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
                        bch2_fs_fatal_error(c,
-                               "Superblock modified by another process (seq %llu expected %llu)",
+                               "Superblock modified by another process (seq %llu expected %llu)",
                                le64_to_cpu(ca->sb_read_scratch->seq),
                                ca->disk_sb.seq);
                        percpu_ref_put(&ca->io_ref);
@@ -1066,7 +1066,7 @@ int bch2_write_super(struct bch_fs *c)
                                 !can_mount_with_written ||
                                 (can_mount_without_written &&
                                  !can_mount_with_written), c,
-               "Unable to write superblock to sufficient devices (from %ps)",
+               "Unable to write superblock to sufficient devices (from %ps)",
                (void *) _RET_IP_))
                ret = -1;
 out:
index 233f864ed8b07ff321b64993a821d08137b70ecd..1ad6e5cd9476c86f4b905feff6f727b4cdd94a4e 100644 (file)
@@ -87,20 +87,28 @@ const char * const bch2_fs_flag_strs[] = {
        NULL
 };
 
-void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...)
+__printf(2, 0)
+static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args)
 {
-       struct stdio_redirect *stdio = (void *)(unsigned long)opts->stdio;
-
-       va_list args;
-       va_start(args, fmt);
-       if (likely(!stdio)) {
-               vprintk(fmt, args);
-       } else {
+#ifdef __KERNEL__
+       if (unlikely(stdio)) {
                if (fmt[0] == KERN_SOH[0])
                        fmt += 2;
 
                bch2_stdio_redirect_vprintf(stdio, true, fmt, args);
+               return;
        }
+#endif
+       vprintk(fmt, args);
+}
+
+void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...)
+{
+       struct stdio_redirect *stdio = (void *)(unsigned long)opts->stdio;
+
+       va_list args;
+       va_start(args, fmt);
+       bch2_print_maybe_redirect(stdio, fmt, args);
        va_end(args);
 }
 
@@ -110,14 +118,7 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...)
 
        va_list args;
        va_start(args, fmt);
-       if (likely(!stdio)) {
-               vprintk(fmt, args);
-       } else {
-               if (fmt[0] == KERN_SOH[0])
-                       fmt += 2;
-
-               bch2_stdio_redirect_vprintf(stdio, true, fmt, args);
-       }
+       bch2_print_maybe_redirect(stdio, fmt, args);
        va_end(args);
 }
 
index 7ffbddb80400d7aed4bc1479c79cae32427e2595..175aee3074c7d539d40e7ec3ffc072a0e3d2d388 100644 (file)
@@ -683,6 +683,9 @@ static inline void __move_gap(void *array, size_t element_size,
 /* Move the gap in a gap buffer: */
 #define move_gap(_d, _new_gap)                                         \
 do {                                                                   \
+       BUG_ON(_new_gap > (_d)->nr);                                    \
+       BUG_ON((_d)->gap > (_d)->nr);                                   \
+                                                                       \
        __move_gap((_d)->data, sizeof((_d)->data[0]),                   \
                   (_d)->nr, (_d)->size, (_d)->gap, _new_gap);          \
        (_d)->gap = _new_gap;                                           \