From: Kent Overstreet Date: Wed, 27 Oct 2021 16:51:12 +0000 (-0400) Subject: bcachefs: Run insert triggers before overwrite triggers X-Git-Tag: v6.7-rc1~201^2~1286 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f0c3f88b35e1fac6e3b7cec5635e43d4e595cf7a;p=thirdparty%2Fkernel%2Flinux.git bcachefs: Run insert triggers before overwrite triggers Currently, btree triggers are run in natural key order, which presents a problem for fallocate in INSERT_RANGE mode: since we're moving existing extents to higher offsets, the trigger for deleting the old extent runs before the trigger that adds the new extent, potentially leading to indirect extents being deleted that shouldn't be when the delete causes the refcount to hit 0. This changes the order we run triggers so that for a givin btree, we run all insert triggers before overwrite triggers, nicely sidestepping this issue. Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 5331626e62a51..25b0df22366bf 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -338,7 +338,8 @@ struct btree_insert_entry { enum btree_id btree_id:8; u8 level; bool cached:1; - bool trans_triggers_run:1; + bool insert_trigger_run:1; + bool overwrite_trigger_run:1; struct bkey_i *k; struct btree_path *path; unsigned long ip_allocated; diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 4e9f7e3b5a61a..61c87525e48db 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -816,10 +816,112 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) return 0; } +static int bch2_trans_commit_run_triggers(struct btree_trans *trans) +{ + struct bkey _deleted = KEY(0, 0, 0); + struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; + struct bkey_s_c old; + struct bkey unpacked; + struct btree_insert_entry *i = NULL, *btree_id_start = trans->updates; + bool trans_trigger_run; + unsigned btree_id = 0; + int ret = 0; + + /* + * + * For a given btree, this algorithm runs insert triggers before + * overwrite triggers: this is so that when extents are being moved + * (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before + * they are re-added. + */ + for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { + while (btree_id_start < trans->updates + trans->nr_updates && + btree_id_start->btree_id < btree_id) + btree_id_start++; + + /* + * Running triggers will append more updates to the list of updates as + * we're walking it: + */ + do { + trans_trigger_run = false; + + for (i = btree_id_start; + i < trans->updates + trans->nr_updates && i->btree_id <= btree_id; + i++) { + if (i->insert_trigger_run || + (i->flags & BTREE_TRIGGER_NORUN) || + !(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type))) + continue; + + BUG_ON(i->overwrite_trigger_run); + + i->insert_trigger_run = true; + trans_trigger_run = true; + + old = bch2_btree_path_peek_slot(i->path, &unpacked); + _deleted.p = i->path->pos; + + if (old.k->type == i->k->k.type && + ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { + i->overwrite_trigger_run = true; + ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(i->k), + BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|i->flags); + } else { + ret = bch2_trans_mark_key(trans, deleted, bkey_i_to_s_c(i->k), + BTREE_TRIGGER_INSERT|i->flags); + } + + if (ret == -EINTR) + trace_trans_restart_mark(trans->ip, _RET_IP_, + i->btree_id, &i->path->pos); + if (ret) + return ret; + } + } while (trans_trigger_run); + + do { + trans_trigger_run = false; + + for (i = btree_id_start; + i < trans->updates + trans->nr_updates && i->btree_id <= btree_id; + i++) { + if (i->overwrite_trigger_run || + (i->flags & BTREE_TRIGGER_NORUN) || + !(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type))) + continue; + + BUG_ON(!i->insert_trigger_run); + + i->overwrite_trigger_run = true; + trans_trigger_run = true; + + old = bch2_btree_path_peek_slot(i->path, &unpacked); + _deleted.p = i->path->pos; + + ret = bch2_trans_mark_key(trans, old, deleted, + BTREE_TRIGGER_OVERWRITE|i->flags); + + if (ret == -EINTR) + trace_trans_restart_mark(trans->ip, _RET_IP_, + i->btree_id, &i->path->pos); + if (ret) + return ret; + } + } while (trans_trigger_run); + } + + trans_for_each_update(trans, i) + BUG_ON(!(i->flags & BTREE_TRIGGER_NORUN) && + (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) && + (!i->insert_trigger_run || !i->overwrite_trigger_run)); + + return 0; +} + int __bch2_trans_commit(struct btree_trans *trans) { struct btree_insert_entry *i = NULL; - bool trans_trigger_run; unsigned u64s; int ret = 0; @@ -854,30 +956,9 @@ int __bch2_trans_commit(struct btree_trans *trans) i->btree_id, i->k->k.p); #endif - /* - * Running triggers will append more updates to the list of updates as - * we're walking it: - */ - do { - trans_trigger_run = false; - - trans_for_each_update(trans, i) { - if ((BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) && - !i->trans_triggers_run) { - i->trans_triggers_run = true; - trans_trigger_run = true; - - ret = bch2_trans_mark_update(trans, i->path, - i->k, i->flags); - if (unlikely(ret)) { - if (ret == -EINTR) - trace_trans_restart_mark(trans->ip, _RET_IP_, - i->btree_id, &i->path->pos); - goto out; - } - } - } - } while (trans_trigger_run); + ret = bch2_trans_commit_run_triggers(trans); + if (ret) + goto out; trans_for_each_update(trans, i) { BUG_ON(!i->path->should_be_locked); @@ -1297,7 +1378,7 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, if (i < trans->updates + trans->nr_updates && !btree_insert_entry_cmp(&n, i)) { - BUG_ON(i->trans_triggers_run); + BUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); /* * This is a hack to ensure that inode creates update the btree, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index c3387689fbb6c..2c0a385ace505 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1882,41 +1882,6 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old, } } -int bch2_trans_mark_update(struct btree_trans *trans, - struct btree_path *path, - struct bkey_i *new, - unsigned flags) -{ - struct bkey _deleted = KEY(0, 0, 0); - struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; - struct bkey_s_c old; - struct bkey unpacked; - int ret; - - _deleted.p = path->pos; - - if (unlikely(flags & BTREE_TRIGGER_NORUN)) - return 0; - - if (!btree_node_type_needs_gc(path->btree_id)) - return 0; - - old = bch2_btree_path_peek_slot(path, &unpacked); - - if (old.k->type == new->k.type && - ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { - ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), - BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); - } else { - ret = bch2_trans_mark_key(trans, deleted, bkey_i_to_s_c(new), - BTREE_TRIGGER_INSERT|flags) ?: - bch2_trans_mark_key(trans, old, deleted, - BTREE_TRIGGER_OVERWRITE|flags); - } - - return ret; -} - static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, size_t b, enum bch_data_type type, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 41374463710c8..54a29bf69d675 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -233,8 +233,6 @@ int bch2_mark_update(struct btree_trans *, struct btree_path *, int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); -int bch2_trans_mark_update(struct btree_trans *, struct btree_path *, - struct bkey_i *, unsigned); void bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,