]> git.ipfire.org Git - thirdparty/linux.git/blobdiff - fs/btrfs/transaction.c
btrfs: add and use helpers for reading and writing fs_info->generation
[thirdparty/linux.git] / fs / btrfs / transaction.c
index c780d37294636852144c8082899e1da4c1189b78..f5db3a483f40544500a0f2a9d54f4e6096cefff6 100644 (file)
@@ -386,7 +386,7 @@ loop:
                        IO_TREE_TRANS_DIRTY_PAGES);
        extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
                        IO_TREE_FS_PINNED_EXTENTS);
-       fs_info->generation++;
+       btrfs_set_fs_generation(fs_info, fs_info->generation + 1);
        cur_trans->transid = fs_info->generation;
        fs_info->running_transaction = cur_trans;
        cur_trans->aborted = 0;
@@ -561,6 +561,69 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
        return true;
 }
 
+static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info,
+                                       enum btrfs_reserve_flush_enum flush,
+                                       u64 num_bytes,
+                                       u64 *delayed_refs_bytes)
+{
+       struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+       struct btrfs_space_info *si = fs_info->trans_block_rsv.space_info;
+       u64 extra_delayed_refs_bytes = 0;
+       u64 bytes;
+       int ret;
+
+       /*
+        * If there's a gap between the size of the delayed refs reserve and
+        * its reserved space, than some tasks have added delayed refs or bumped
+        * its size otherwise (due to block group creation or removal, or block
+        * group item update). Also try to allocate that gap in order to prevent
+        * using (and possibly abusing) the global reserve when committing the
+        * transaction.
+        */
+       if (flush == BTRFS_RESERVE_FLUSH_ALL &&
+           !btrfs_block_rsv_full(delayed_refs_rsv)) {
+               spin_lock(&delayed_refs_rsv->lock);
+               if (delayed_refs_rsv->size > delayed_refs_rsv->reserved)
+                       extra_delayed_refs_bytes = delayed_refs_rsv->size -
+                               delayed_refs_rsv->reserved;
+               spin_unlock(&delayed_refs_rsv->lock);
+       }
+
+       bytes = num_bytes + *delayed_refs_bytes + extra_delayed_refs_bytes;
+
+       /*
+        * We want to reserve all the bytes we may need all at once, so we only
+        * do 1 enospc flushing cycle per transaction start.
+        */
+       ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
+       if (ret == 0) {
+               if (extra_delayed_refs_bytes > 0)
+                       btrfs_migrate_to_delayed_refs_rsv(fs_info,
+                                                         extra_delayed_refs_bytes);
+               return 0;
+       }
+
+       if (extra_delayed_refs_bytes > 0) {
+               bytes -= extra_delayed_refs_bytes;
+               ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
+               if (ret == 0)
+                       return 0;
+       }
+
+       /*
+        * If we are an emergency flush, which can steal from the global block
+        * reserve, then attempt to not reserve space for the delayed refs, as
+        * we will consume space for them from the global block reserve.
+        */
+       if (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
+               bytes -= *delayed_refs_bytes;
+               *delayed_refs_bytes = 0;
+               ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
+       }
+
+       return ret;
+}
+
 static struct btrfs_trans_handle *
 start_transaction(struct btrfs_root *root, unsigned int num_items,
                  unsigned int type, enum btrfs_reserve_flush_enum flush,
@@ -568,10 +631,12 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+       struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
        struct btrfs_trans_handle *h;
        struct btrfs_transaction *cur_trans;
        u64 num_bytes = 0;
        u64 qgroup_reserved = 0;
+       u64 delayed_refs_bytes = 0;
        bool reloc_reserved = false;
        bool do_chunk_alloc = false;
        int ret;
@@ -594,9 +659,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
         * the appropriate flushing if need be.
         */
        if (num_items && root != fs_info->chunk_root) {
-               struct btrfs_block_rsv *rsv = &fs_info->trans_block_rsv;
-               u64 delayed_refs_bytes = 0;
-
                qgroup_reserved = num_items * fs_info->nodesize;
                /*
                 * Use prealloc for now, as there might be a currently running
@@ -608,20 +670,16 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
                if (ret)
                        return ERR_PTR(ret);
 
+               num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
                /*
-                * We want to reserve all the bytes we may need all at once, so
-                * we only do 1 enospc flushing cycle per transaction start.  We
-                * accomplish this by simply assuming we'll do num_items worth
-                * of delayed refs updates in this trans handle, and refill that
-                * amount for whatever is missing in the reserve.
+                * If we plan to insert/update/delete "num_items" from a btree,
+                * we will also generate delayed refs for extent buffers in the
+                * respective btree paths, so reserve space for the delayed refs
+                * that will be generated by the caller as it modifies btrees.
+                * Try to reserve them to avoid excessive use of the global
+                * block reserve.
                 */
-               num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
-               if (flush == BTRFS_RESERVE_FLUSH_ALL &&
-                   !btrfs_block_rsv_full(delayed_refs_rsv)) {
-                       delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info,
-                                                                         num_items);
-                       num_bytes += delayed_refs_bytes;
-               }
+               delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info, num_items);
 
                /*
                 * Do the reservation for the relocation root creation
@@ -631,16 +689,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
                        reloc_reserved = true;
                }
 
-               ret = btrfs_reserve_metadata_bytes(fs_info, rsv, num_bytes, flush);
+               ret = btrfs_reserve_trans_metadata(fs_info, flush, num_bytes,
+                                                  &delayed_refs_bytes);
                if (ret)
                        goto reserve_fail;
-               if (delayed_refs_bytes) {
-                       btrfs_migrate_to_delayed_refs_rsv(fs_info, delayed_refs_bytes);
-                       num_bytes -= delayed_refs_bytes;
-               }
-               btrfs_block_rsv_add_bytes(rsv, num_bytes, true);
 
-               if (rsv->space_info->force_alloc)
+               btrfs_block_rsv_add_bytes(trans_rsv, num_bytes, true);
+
+               if (trans_rsv->space_info->force_alloc)
                        do_chunk_alloc = true;
        } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
                   !btrfs_block_rsv_full(delayed_refs_rsv)) {
@@ -700,6 +756,7 @@ again:
 
        h->type = type;
        INIT_LIST_HEAD(&h->new_bgs);
+       btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELOPS);
 
        smp_mb();
        if (cur_trans->state >= TRANS_STATE_COMMIT_START &&
@@ -712,8 +769,17 @@ again:
        if (num_bytes) {
                trace_btrfs_space_reservation(fs_info, "transaction",
                                              h->transid, num_bytes, 1);
-               h->block_rsv = &fs_info->trans_block_rsv;
+               h->block_rsv = trans_rsv;
                h->bytes_reserved = num_bytes;
+               if (delayed_refs_bytes > 0) {
+                       trace_btrfs_space_reservation(fs_info,
+                                                     "local_delayed_refs_rsv",
+                                                     h->transid,
+                                                     delayed_refs_bytes, 1);
+                       h->delayed_refs_bytes_reserved = delayed_refs_bytes;
+                       btrfs_block_rsv_add_bytes(&h->delayed_rsv, delayed_refs_bytes, true);
+                       delayed_refs_bytes = 0;
+               }
                h->reloc_reserved = reloc_reserved;
        }
 
@@ -769,8 +835,10 @@ join_fail:
        kmem_cache_free(btrfs_trans_handle_cachep, h);
 alloc_fail:
        if (num_bytes)
-               btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
-                                       num_bytes, NULL);
+               btrfs_block_rsv_release(fs_info, trans_rsv, num_bytes, NULL);
+       if (delayed_refs_bytes)
+               btrfs_space_info_free_bytes_may_use(fs_info, trans_rsv->space_info,
+                                                   delayed_refs_bytes);
 reserve_fail:
        btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
        return ERR_PTR(ret);
@@ -817,7 +885,7 @@ struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *roo
 }
 
 /*
- * btrfs_attach_transaction() - catch the running transaction
+ * Catch the running transaction.
  *
  * It is used when we want to commit the current the transaction, but
  * don't want to start a new one.
@@ -836,7 +904,7 @@ struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
 }
 
 /*
- * btrfs_attach_transaction_barrier() - catch the running transaction
+ * Catch the running transaction.
  *
  * It is similar to the above function, the difference is this one
  * will wait for all the inactive transactions until they fully
@@ -991,11 +1059,14 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
 
        if (!trans->block_rsv) {
                ASSERT(!trans->bytes_reserved);
+               ASSERT(!trans->delayed_refs_bytes_reserved);
                return;
        }
 
-       if (!trans->bytes_reserved)
+       if (!trans->bytes_reserved) {
+               ASSERT(!trans->delayed_refs_bytes_reserved);
                return;
+       }
 
        ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
        trace_btrfs_space_reservation(fs_info, "transaction",
@@ -1003,6 +1074,16 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
        btrfs_block_rsv_release(fs_info, trans->block_rsv,
                                trans->bytes_reserved, NULL);
        trans->bytes_reserved = 0;
+
+       if (!trans->delayed_refs_bytes_reserved)
+               return;
+
+       trace_btrfs_space_reservation(fs_info, "local_delayed_refs_rsv",
+                                     trans->transid,
+                                     trans->delayed_refs_bytes_reserved, 0);
+       btrfs_block_rsv_release(fs_info, &trans->delayed_rsv,
+                               trans->delayed_refs_bytes_reserved, NULL);
+       trans->delayed_refs_bytes_reserved = 0;
 }
 
 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
@@ -1334,7 +1415,7 @@ again:
        }
 
        /* Now flush any delayed refs generated by updating all of the roots */
-       ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
+       ret = btrfs_run_delayed_refs(trans, U64_MAX);
        if (ret)
                return ret;
 
@@ -1349,7 +1430,7 @@ again:
                 * so we want to keep this flushing in this loop to make sure
                 * everything gets run.
                 */
-               ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
+               ret = btrfs_run_delayed_refs(trans, U64_MAX);
                if (ret)
                        return ret;
        }
@@ -1483,45 +1564,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
        return 0;
 }
 
-/*
- * defrag a given btree.
- * Every leaf in the btree is read and defragged.
- */
-int btrfs_defrag_root(struct btrfs_root *root)
-{
-       struct btrfs_fs_info *info = root->fs_info;
-       struct btrfs_trans_handle *trans;
-       int ret;
-
-       if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state))
-               return 0;
-
-       while (1) {
-               trans = btrfs_start_transaction(root, 0);
-               if (IS_ERR(trans)) {
-                       ret = PTR_ERR(trans);
-                       break;
-               }
-
-               ret = btrfs_defrag_leaves(trans, root);
-
-               btrfs_end_transaction(trans);
-               btrfs_btree_balance_dirty(info);
-               cond_resched();
-
-               if (btrfs_fs_closing(info) || ret != -EAGAIN)
-                       break;
-
-               if (btrfs_defrag_cancelled(info)) {
-                       btrfs_debug(info, "defrag_root cancelled");
-                       ret = -EAGAIN;
-                       break;
-               }
-       }
-       clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state);
-       return ret;
-}
-
 /*
  * Do all special snapshot related qgroup dirty hack.
  *
@@ -1539,11 +1581,10 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
        int ret;
 
        /*
-        * Save some performance in the case that qgroups are not
-        * enabled. If this check races with the ioctl, rescan will
-        * kick in anyway.
+        * Save some performance in the case that qgroups are not enabled. If
+        * this check races with the ioctl, rescan will kick in anyway.
         */
-       if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
+       if (!btrfs_qgroup_full_accounting(fs_info))
                return 0;
 
        /*
@@ -1567,7 +1608,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
         * for now flush the delayed refs to narrow the race window where the
         * qgroup counters could end up wrong.
         */
-       ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
+       ret = btrfs_run_delayed_refs(trans, U64_MAX);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                return ret;
@@ -1582,7 +1623,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
 
        /* Now qgroup are all updated, we can inherit it to new qgroups */
        ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid,
-                                  inherit);
+                                  parent->root_key.objectid, inherit);
        if (ret < 0)
                goto out;
 
@@ -1732,6 +1773,12 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        }
        btrfs_release_path(path);
 
+       ret = btrfs_create_qgroup(trans, objectid);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto fail;
+       }
+
        /*
         * pull in the delayed directory update
         * and the delayed inode item
@@ -1843,8 +1890,12 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         * To co-operate with that hack, we do hack again.
         * Or snapshot will be greatly slowed down by a subtree qgroup rescan
         */
-       ret = qgroup_account_snapshot(trans, root, parent_root,
-                                     pending->inherit, objectid);
+       if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL)
+               ret = qgroup_account_snapshot(trans, root, parent_root,
+                                             pending->inherit, objectid);
+       else if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE)
+               ret = btrfs_qgroup_inherit(trans, root->root_key.objectid, objectid,
+                                          parent_root->root_key.objectid, pending->inherit);
        if (ret < 0)
                goto fail;
 
@@ -1861,7 +1912,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
                                                  fname.disk_name.len * 2);
        parent_inode->i_mtime = inode_set_ctime_current(parent_inode);
-       ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
+       ret = btrfs_update_inode_fallback(trans, BTRFS_I(parent_inode));
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                goto fail;
@@ -2084,7 +2135,7 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
        struct btrfs_block_group *block_group, *tmp;
 
        list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
-               btrfs_delayed_refs_rsv_release(fs_info, 1);
+               btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info);
                list_del_init(&block_group->bg_list);
        }
 }
@@ -2403,7 +2454,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
        if (ret)
                goto unlock_reloc;
 
-       ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
+       ret = btrfs_run_delayed_refs(trans, U64_MAX);
        if (ret)
                goto unlock_reloc;
 
@@ -2654,18 +2705,18 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
  */
 void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                                      const char *function,
-                                     unsigned int line, int errno, bool first_hit)
+                                     unsigned int line, int error, bool first_hit)
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
 
-       WRITE_ONCE(trans->aborted, errno);
-       WRITE_ONCE(trans->transaction->aborted, errno);
-       if (first_hit && errno == -ENOSPC)
+       WRITE_ONCE(trans->aborted, error);
+       WRITE_ONCE(trans->transaction->aborted, error);
+       if (first_hit && error == -ENOSPC)
                btrfs_dump_space_info_for_trans_abort(fs_info);
        /* Wake up anybody who may be waiting on this transaction */
        wake_up(&fs_info->transaction_wait);
        wake_up(&fs_info->transaction_blocked_wait);
-       __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
+       __btrfs_handle_fs_error(fs_info, function, line, error, NULL);
 }
 
 int __init btrfs_transaction_init(void)