]> git.ipfire.org Git - thirdparty/linux.git/blobdiff - fs/btrfs/delayed-ref.c
btrfs: stop reserving excessive space for block group item updates
[thirdparty/linux.git] / fs / btrfs / delayed-ref.c
index 9fe4ccca50a0604ad00cfa9ebb63f3f3db58f833..de843479591349153344b4a7026a46126a202702 100644 (file)
@@ -57,16 +57,20 @@ bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
  * Release a ref head's reservation.
  *
  * @fs_info:  the filesystem
- * @nr:       number of items to drop
+ * @nr_refs:  number of delayed refs to drop
+ * @nr_csums: number of csum items to drop
  *
  * Drops the delayed ref head's count from the delayed refs rsv and free any
  * excess reservation we had.
  */
-void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
+void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums)
 {
        struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
-       const u64 num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr);
-       u64 released = 0;
+       u64 num_bytes;
+       u64 released;
+
+       num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr_refs);
+       num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
 
        released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
        if (released)
@@ -77,26 +81,83 @@ void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
 /*
  * Adjust the size of the delayed refs rsv.
  *
- * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
- * it'll calculate the additional size and add it to the delayed_refs_rsv.
+ * This is to be called anytime we may have adjusted trans->delayed_ref_updates
+ * or trans->delayed_ref_csum_deletions, it'll calculate the additional size and
+ * add it to the delayed_refs_rsv.
  */
 void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+       struct btrfs_block_rsv *local_rsv = &trans->delayed_rsv;
        u64 num_bytes;
+       u64 reserved_bytes;
+
+       num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates);
+       num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info,
+                                                      trans->delayed_ref_csum_deletions);
 
-       if (!trans->delayed_ref_updates)
+       if (num_bytes == 0)
                return;
 
-       num_bytes = btrfs_calc_delayed_ref_bytes(fs_info,
-                                                trans->delayed_ref_updates);
+       /*
+        * Try to take num_bytes from the transaction's local delayed reserve.
+        * If not possible, try to take as much as it's available. If the local
+        * reserve doesn't have enough reserved space, the delayed refs reserve
+        * will be refilled next time btrfs_delayed_refs_rsv_refill() is called
+        * by someone or if a transaction commit is triggered before that, the
+        * global block reserve will be used. We want to minimize using the
+        * global block reserve for cases we can account for in advance, to
+        * avoid exhausting it and reach -ENOSPC during a transaction commit.
+        */
+       spin_lock(&local_rsv->lock);
+       reserved_bytes = min(num_bytes, local_rsv->reserved);
+       local_rsv->reserved -= reserved_bytes;
+       local_rsv->full = (local_rsv->reserved >= local_rsv->size);
+       spin_unlock(&local_rsv->lock);
 
        spin_lock(&delayed_rsv->lock);
        delayed_rsv->size += num_bytes;
-       delayed_rsv->full = false;
+       delayed_rsv->reserved += reserved_bytes;
+       delayed_rsv->full = (delayed_rsv->reserved >= delayed_rsv->size);
        spin_unlock(&delayed_rsv->lock);
        trans->delayed_ref_updates = 0;
+       trans->delayed_ref_csum_deletions = 0;
+}
+
+/*
+ * Adjust the size of the delayed refs block reserve for 1 block group item
+ * update.
+ */
+void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+
+       spin_lock(&delayed_rsv->lock);
+       /*
+        * Updating a block group item does not result in new nodes/leaves and
+        * does not require changing the free space tree, only the extent tree
+        * or the block group tree, so this is all we need.
+        */
+       delayed_rsv->size += btrfs_calc_metadata_size(fs_info, 1);
+       delayed_rsv->full = false;
+       spin_unlock(&delayed_rsv->lock);
+}
+
+/*
+ * Adjust the size of the delayed refs block reserve to release space for 1
+ * block group item update.
+ */
+void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+       const u64 num_bytes = btrfs_calc_metadata_size(fs_info, 1);
+       u64 released;
+
+       released = btrfs_block_rsv_release(fs_info, delayed_rsv, num_bytes, NULL);
+       if (released > 0)
+               trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+                                             0, released, 0);
 }
 
 /*
@@ -154,6 +215,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
                                  enum btrfs_reserve_flush_enum flush)
 {
        struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
+       struct btrfs_space_info *space_info = block_rsv->space_info;
        u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1);
        u64 num_bytes = 0;
        u64 refilled_bytes;
@@ -170,7 +232,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
        if (!num_bytes)
                return 0;
 
-       ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush);
+       ret = btrfs_reserve_metadata_bytes(fs_info, space_info, num_bytes, flush);
        if (ret)
                return ret;
 
@@ -199,8 +261,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
        spin_unlock(&block_rsv->lock);
 
        if (to_free > 0)
-               btrfs_space_info_free_bytes_may_use(fs_info, block_rsv->space_info,
-                                                   to_free);
+               btrfs_space_info_free_bytes_may_use(fs_info, space_info, to_free);
 
        if (refilled_bytes > 0)
                trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0,
@@ -422,7 +483,8 @@ int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
        return 0;
 }
 
-static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs,
+static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info,
+                                   struct btrfs_delayed_ref_root *delayed_refs,
                                    struct btrfs_delayed_ref_head *head,
                                    struct btrfs_delayed_ref_node *ref)
 {
@@ -433,9 +495,11 @@ static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs,
                list_del(&ref->add_list);
        btrfs_put_delayed_ref(ref);
        atomic_dec(&delayed_refs->num_entries);
+       btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
 }
 
-static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs,
+static bool merge_ref(struct btrfs_fs_info *fs_info,
+                     struct btrfs_delayed_ref_root *delayed_refs,
                      struct btrfs_delayed_ref_head *head,
                      struct btrfs_delayed_ref_node *ref,
                      u64 seq)
@@ -464,10 +528,10 @@ static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs,
                        mod = -next->ref_mod;
                }
 
-               drop_delayed_ref(delayed_refs, head, next);
+               drop_delayed_ref(fs_info, delayed_refs, head, next);
                ref->ref_mod += mod;
                if (ref->ref_mod == 0) {
-                       drop_delayed_ref(delayed_refs, head, ref);
+                       drop_delayed_ref(fs_info, delayed_refs, head, ref);
                        done = true;
                } else {
                        /*
@@ -505,7 +569,7 @@ again:
                ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
                if (seq && ref->seq >= seq)
                        continue;
-               if (merge_ref(delayed_refs, head, ref, seq))
+               if (merge_ref(fs_info, delayed_refs, head, ref, seq))
                        goto again;
        }
 }
@@ -584,10 +648,11 @@ void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
  * Return true if the ref was merged into an existing one (and therefore can be
  * freed by the caller).
  */
-static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
+static bool insert_delayed_ref(struct btrfs_trans_handle *trans,
                               struct btrfs_delayed_ref_head *href,
                               struct btrfs_delayed_ref_node *ref)
 {
+       struct btrfs_delayed_ref_root *root = &trans->transaction->delayed_refs;
        struct btrfs_delayed_ref_node *exist;
        int mod;
 
@@ -598,6 +663,7 @@ static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
                        list_add_tail(&ref->add_list, &href->ref_add_list);
                atomic_inc(&root->num_entries);
                spin_unlock(&href->lock);
+               trans->delayed_ref_updates++;
                return false;
        }
 
@@ -626,7 +692,7 @@ static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
 
        /* remove existing tail if its ref_mod is zero */
        if (exist->ref_mod == 0)
-               drop_delayed_ref(root, href, exist);
+               drop_delayed_ref(trans->fs_info, root, href, exist);
        spin_unlock(&href->lock);
        return true;
 }
@@ -647,6 +713,15 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
        BUG_ON(existing->is_data != update->is_data);
 
        spin_lock(&existing->lock);
+
+       /*
+        * When freeing an extent, we may not know the owning root when we
+        * first create the head_ref. However, some deref before the last deref
+        * will know it, so we just need to update the head_ref accordingly.
+        */
+       if (!existing->owning_root)
+               existing->owning_root = update->owning_root;
+
        if (update->must_insert_reserved) {
                /* if the extent was freed and then
                 * reallocated before the delayed ref
@@ -656,6 +731,7 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
                 * Set it again here
                 */
                existing->must_insert_reserved = update->must_insert_reserved;
+               existing->owning_root = update->owning_root;
 
                /*
                 * update the num_bytes so we make sure the accounting
@@ -695,6 +771,8 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
        /*
         * If we are going to from a positive ref mod to a negative or vice
         * versa we need to make sure to adjust pending_csums accordingly.
+        * We reserve bytes for csum deletion when adding or updating a ref head
+        * see add_delayed_ref_head() for more details.
         */
        if (existing->is_data) {
                u64 csum_leaves =
@@ -703,11 +781,11 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
 
                if (existing->total_ref_mod >= 0 && old_ref_mod < 0) {
                        delayed_refs->pending_csums -= existing->num_bytes;
-                       btrfs_delayed_refs_rsv_release(fs_info, csum_leaves);
+                       btrfs_delayed_refs_rsv_release(fs_info, 0, csum_leaves);
                }
                if (existing->total_ref_mod < 0 && old_ref_mod >= 0) {
                        delayed_refs->pending_csums += existing->num_bytes;
-                       trans->delayed_ref_updates += csum_leaves;
+                       trans->delayed_ref_csum_deletions += csum_leaves;
                }
        }
 
@@ -718,7 +796,7 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
                                  struct btrfs_qgroup_extent_record *qrecord,
                                  u64 bytenr, u64 num_bytes, u64 ref_root,
                                  u64 reserved, int action, bool is_data,
-                                 bool is_system)
+                                 bool is_system, u64 owning_root)
 {
        int count_mod = 1;
        bool must_insert_reserved = false;
@@ -758,7 +836,9 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
        head_ref->bytenr = bytenr;
        head_ref->num_bytes = num_bytes;
        head_ref->ref_mod = count_mod;
+       head_ref->reserved_bytes = reserved;
        head_ref->must_insert_reserved = must_insert_reserved;
+       head_ref->owning_root = owning_root;
        head_ref->is_data = is_data;
        head_ref->is_system = is_system;
        head_ref->ref_tree = RB_ROOT_CACHED;
@@ -819,16 +899,21 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
                head_ref = existing;
        } else {
+               /*
+                * We reserve the amount of bytes needed to delete csums when
+                * adding the ref head and not when adding individual drop refs
+                * since the csum items are deleted only after running the last
+                * delayed drop ref (the data extent's ref count drops to 0).
+                */
                if (head_ref->is_data && head_ref->ref_mod < 0) {
                        delayed_refs->pending_csums += head_ref->num_bytes;
-                       trans->delayed_ref_updates +=
+                       trans->delayed_ref_csum_deletions +=
                                btrfs_csum_bytes_to_leaves(trans->fs_info,
                                                           head_ref->num_bytes);
                }
                delayed_refs->num_heads++;
                delayed_refs->num_heads_ready++;
                atomic_inc(&delayed_refs->num_entries);
-               trans->delayed_ref_updates++;
        }
        if (qrecord_inserted_ret)
                *qrecord_inserted_ret = qrecord_inserted;
@@ -837,8 +922,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
 }
 
 /*
- * init_delayed_ref_common - Initialize the structure which represents a
- *                          modification to a an extent.
+ * Initialize the structure which represents a modification to a an extent.
  *
  * @fs_info:    Internal to the mounted filesystem mount structure.
  *
@@ -909,7 +993,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
        u64 parent = generic_ref->parent;
        u8 ref_type;
 
-       is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID);
+       is_system = (generic_ref->tree_ref.ref_root == BTRFS_CHUNK_TREE_OBJECTID);
 
        ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
        ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@@ -922,8 +1006,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        }
 
-       if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
-           !generic_ref->skip_qgroup) {
+       if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) {
                record = kzalloc(sizeof(*record), GFP_NOFS);
                if (!record) {
                        kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
@@ -938,15 +1021,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
                ref_type = BTRFS_TREE_BLOCK_REF_KEY;
 
        init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
-                               generic_ref->tree_ref.owning_root, action,
+                               generic_ref->tree_ref.ref_root, action,
                                ref_type);
-       ref->root = generic_ref->tree_ref.owning_root;
+       ref->root = generic_ref->tree_ref.ref_root;
        ref->parent = parent;
        ref->level = level;
 
        init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
-                             generic_ref->tree_ref.owning_root, 0, action,
-                             false, is_system);
+                             generic_ref->tree_ref.ref_root, 0, action,
+                             false, is_system, generic_ref->owning_root);
        head_ref->extent_op = extent_op;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -959,7 +1042,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
        head_ref = add_delayed_ref_head(trans, head_ref, record,
                                        action, &qrecord_inserted);
 
-       merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
+       merged = insert_delayed_ref(trans, head_ref, &ref->node);
        spin_unlock(&delayed_refs->lock);
 
        /*
@@ -998,7 +1081,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
        u64 bytenr = generic_ref->bytenr;
        u64 num_bytes = generic_ref->len;
        u64 parent = generic_ref->parent;
-       u64 ref_root = generic_ref->data_ref.owning_root;
+       u64 ref_root = generic_ref->data_ref.ref_root;
        u64 owner = generic_ref->data_ref.ino;
        u64 offset = generic_ref->data_ref.offset;
        u8 ref_type;
@@ -1026,8 +1109,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        }
 
-       if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
-           !generic_ref->skip_qgroup) {
+       if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) {
                record = kzalloc(sizeof(*record), GFP_NOFS);
                if (!record) {
                        kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
@@ -1038,7 +1120,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
        }
 
        init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
-                             reserved, action, true, false);
+                             reserved, action, true, false, generic_ref->owning_root);
        head_ref->extent_op = NULL;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -1051,7 +1133,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
        head_ref = add_delayed_ref_head(trans, head_ref, record,
                                        action, &qrecord_inserted);
 
-       merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
+       merged = insert_delayed_ref(trans, head_ref, &ref->node);
        spin_unlock(&delayed_refs->lock);
 
        /*
@@ -1084,7 +1166,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
-                             BTRFS_UPDATE_DELAYED_HEAD, false, false);
+                             BTRFS_UPDATE_DELAYED_HEAD, false, false, 0);
        head_ref->extent_op = extent_op;
 
        delayed_refs = &trans->transaction->delayed_refs;