From: Greg Kroah-Hartman Date: Fri, 13 Aug 2021 10:18:12 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v4.4.281~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2aa70ccfdfec9ea9726ce4a9813dff4466359613;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: btrfs-fix-lockdep-splat-when-enabling-and-disabling-qgroups.patch btrfs-make-btrfs_qgroup_reserve_data-take-btrfs_inode.patch btrfs-make-qgroup_free_reserved_data-take-btrfs_inode.patch btrfs-qgroup-allow-to-unreserve-range-without-releasing-other-ranges.patch btrfs-qgroup-remove-async_commit-mechanism-in-favor-of-reserve-retry-after-edquot.patch btrfs-qgroup-try-to-flush-qgroup-space-when-we-get-edquot.patch btrfs-transaction-cleanup-unused-trans_state_blocked.patch --- diff --git a/queue-5.4/btrfs-fix-lockdep-splat-when-enabling-and-disabling-qgroups.patch b/queue-5.4/btrfs-fix-lockdep-splat-when-enabling-and-disabling-qgroups.patch new file mode 100644 index 00000000000..7f91ac508d0 --- /dev/null +++ b/queue-5.4/btrfs-fix-lockdep-splat-when-enabling-and-disabling-qgroups.patch @@ -0,0 +1,277 @@ +From foo@baz Fri Aug 13 12:17:11 PM CEST 2021 +From: Anand Jain +Date: Fri, 13 Aug 2021 17:55:30 +0800 +Subject: btrfs: fix lockdep splat when enabling and disabling qgroups +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: linux-btrfs@vger.kernel.org, Filipe Manana , David Sterba , Anand Jain +Message-ID: <60ff32a043f5315ee559d4d9e1222e0f40a93917.1628845854.git.anand.jain@oracle.com> + +From: Filipe Manana + +commit a855fbe69229078cd8aecd8974fb996a5ca651e6 upstream + +When running test case btrfs/017 from fstests, lockdep reported the +following splat: + + [ 1297.067385] ====================================================== + [ 1297.067708] WARNING: possible circular locking dependency detected + [ 1297.068022] 5.10.0-rc4-btrfs-next-73 #1 Not tainted + [ 1297.068322] ------------------------------------------------------ + [ 1297.068629] btrfs/189080 is trying to acquire lock: + [ 1297.068929] ffff9f2725731690 (sb_internal#2){.+.+}-{0:0}, at: btrfs_quota_enable+0xaf/0xa70 [btrfs] + [ 1297.069274] + but task is already holding lock: + [ 1297.069868] ffff9f2702b61a08 (&fs_info->qgroup_ioctl_lock){+.+.}-{3:3}, at: btrfs_quota_enable+0x3b/0xa70 [btrfs] + [ 1297.070219] + which lock already depends on the new lock. + + [ 1297.071131] + the existing dependency chain (in reverse order) is: + [ 1297.071721] + -> #1 (&fs_info->qgroup_ioctl_lock){+.+.}-{3:3}: + [ 1297.072375] lock_acquire+0xd8/0x490 + [ 1297.072710] __mutex_lock+0xa3/0xb30 + [ 1297.073061] btrfs_qgroup_inherit+0x59/0x6a0 [btrfs] + [ 1297.073421] create_subvol+0x194/0x990 [btrfs] + [ 1297.073780] btrfs_mksubvol+0x3fb/0x4a0 [btrfs] + [ 1297.074133] __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs] + [ 1297.074498] btrfs_ioctl_snap_create+0x58/0x80 [btrfs] + [ 1297.074872] btrfs_ioctl+0x1a90/0x36f0 [btrfs] + [ 1297.075245] __x64_sys_ioctl+0x83/0xb0 + [ 1297.075617] do_syscall_64+0x33/0x80 + [ 1297.075993] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [ 1297.076380] + -> #0 (sb_internal#2){.+.+}-{0:0}: + [ 1297.077166] check_prev_add+0x91/0xc60 + [ 1297.077572] __lock_acquire+0x1740/0x3110 + [ 1297.077984] lock_acquire+0xd8/0x490 + [ 1297.078411] start_transaction+0x3c5/0x760 [btrfs] + [ 1297.078853] btrfs_quota_enable+0xaf/0xa70 [btrfs] + [ 1297.079323] btrfs_ioctl+0x2c60/0x36f0 [btrfs] + [ 1297.079789] __x64_sys_ioctl+0x83/0xb0 + [ 1297.080232] do_syscall_64+0x33/0x80 + [ 1297.080680] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [ 1297.081139] + other info that might help us debug this: + + [ 1297.082536] Possible unsafe locking scenario: + + [ 1297.083510] CPU0 CPU1 + [ 1297.084005] ---- ---- + [ 1297.084500] lock(&fs_info->qgroup_ioctl_lock); + [ 1297.084994] lock(sb_internal#2); + [ 1297.085485] lock(&fs_info->qgroup_ioctl_lock); + [ 1297.085974] lock(sb_internal#2); + [ 1297.086454] + *** DEADLOCK *** + [ 1297.087880] 3 locks held by btrfs/189080: + [ 1297.088324] #0: ffff9f2725731470 (sb_writers#14){.+.+}-{0:0}, at: btrfs_ioctl+0xa73/0x36f0 [btrfs] + [ 1297.088799] #1: ffff9f2702b60cc0 (&fs_info->subvol_sem){++++}-{3:3}, at: btrfs_ioctl+0x1f4d/0x36f0 [btrfs] + [ 1297.089284] #2: ffff9f2702b61a08 (&fs_info->qgroup_ioctl_lock){+.+.}-{3:3}, at: btrfs_quota_enable+0x3b/0xa70 [btrfs] + [ 1297.089771] + stack backtrace: + [ 1297.090662] CPU: 5 PID: 189080 Comm: btrfs Not tainted 5.10.0-rc4-btrfs-next-73 #1 + [ 1297.091132] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 + [ 1297.092123] Call Trace: + [ 1297.092629] dump_stack+0x8d/0xb5 + [ 1297.093115] check_noncircular+0xff/0x110 + [ 1297.093596] check_prev_add+0x91/0xc60 + [ 1297.094076] ? kvm_clock_read+0x14/0x30 + [ 1297.094553] ? kvm_sched_clock_read+0x5/0x10 + [ 1297.095029] __lock_acquire+0x1740/0x3110 + [ 1297.095510] lock_acquire+0xd8/0x490 + [ 1297.095993] ? btrfs_quota_enable+0xaf/0xa70 [btrfs] + [ 1297.096476] start_transaction+0x3c5/0x760 [btrfs] + [ 1297.096962] ? btrfs_quota_enable+0xaf/0xa70 [btrfs] + [ 1297.097451] btrfs_quota_enable+0xaf/0xa70 [btrfs] + [ 1297.097941] ? btrfs_ioctl+0x1f4d/0x36f0 [btrfs] + [ 1297.098429] btrfs_ioctl+0x2c60/0x36f0 [btrfs] + [ 1297.098904] ? do_user_addr_fault+0x20c/0x430 + [ 1297.099382] ? kvm_clock_read+0x14/0x30 + [ 1297.099854] ? kvm_sched_clock_read+0x5/0x10 + [ 1297.100328] ? sched_clock+0x5/0x10 + [ 1297.100801] ? sched_clock_cpu+0x12/0x180 + [ 1297.101272] ? __x64_sys_ioctl+0x83/0xb0 + [ 1297.101739] __x64_sys_ioctl+0x83/0xb0 + [ 1297.102207] do_syscall_64+0x33/0x80 + [ 1297.102673] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [ 1297.103148] RIP: 0033:0x7f773ff65d87 + +This is because during the quota enable ioctl we lock first the mutex +qgroup_ioctl_lock and then start a transaction, and starting a transaction +acquires a fs freeze semaphore (at the VFS level). However, every other +code path, except for the quota disable ioctl path, we do the opposite: +we start a transaction and then lock the mutex. + +So fix this by making the quota enable and disable paths to start the +transaction without having the mutex locked, and then, after starting the +transaction, lock the mutex and check if some other task already enabled +or disabled the quotas, bailing with success if that was the case. + +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Anand Jain + + Conflicts: + fs/btrfs/qgroup.c +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 5 +++- + fs/btrfs/qgroup.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++-------- + 2 files changed, 52 insertions(+), 9 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -827,7 +827,10 @@ struct btrfs_fs_info { + */ + struct ulist *qgroup_ulist; + +- /* protect user change for quota operations */ ++ /* ++ * Protect user change for quota operations. If a transaction is needed, ++ * it must be started before locking this lock. ++ */ + struct mutex qgroup_ioctl_lock; + + /* list of dirty qgroups to be written at next commit */ +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -886,6 +886,7 @@ int btrfs_quota_enable(struct btrfs_fs_i + struct btrfs_key found_key; + struct btrfs_qgroup *qgroup = NULL; + struct btrfs_trans_handle *trans = NULL; ++ struct ulist *ulist = NULL; + int ret = 0; + int slot; + +@@ -893,13 +894,28 @@ int btrfs_quota_enable(struct btrfs_fs_i + if (fs_info->quota_root) + goto out; + +- fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); +- if (!fs_info->qgroup_ulist) { ++ ulist = ulist_alloc(GFP_KERNEL); ++ if (!ulist) { + ret = -ENOMEM; + goto out; + } + + /* ++ * Unlock qgroup_ioctl_lock before starting the transaction. This is to ++ * avoid lock acquisition inversion problems (reported by lockdep) between ++ * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we ++ * start a transaction. ++ * After we started the transaction lock qgroup_ioctl_lock again and ++ * check if someone else created the quota root in the meanwhile. If so, ++ * just return success and release the transaction handle. ++ * ++ * Also we don't need to worry about someone else calling ++ * btrfs_sysfs_add_qgroups() after we unlock and getting an error because ++ * that function returns 0 (success) when the sysfs entries already exist. ++ */ ++ mutex_unlock(&fs_info->qgroup_ioctl_lock); ++ ++ /* + * 1 for quota root item + * 1 for BTRFS_QGROUP_STATUS item + * +@@ -908,12 +924,20 @@ int btrfs_quota_enable(struct btrfs_fs_i + * would be a lot of overkill. + */ + trans = btrfs_start_transaction(tree_root, 2); ++ ++ mutex_lock(&fs_info->qgroup_ioctl_lock); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } + ++ if (fs_info->quota_root) ++ goto out; ++ ++ fs_info->qgroup_ulist = ulist; ++ ulist = NULL; ++ + /* + * initially create the quota tree + */ +@@ -1046,10 +1070,13 @@ out: + if (ret) { + ulist_free(fs_info->qgroup_ulist); + fs_info->qgroup_ulist = NULL; +- if (trans) +- btrfs_end_transaction(trans); + } + mutex_unlock(&fs_info->qgroup_ioctl_lock); ++ if (ret && trans) ++ btrfs_end_transaction(trans); ++ else if (trans) ++ ret = btrfs_end_transaction(trans); ++ ulist_free(ulist); + return ret; + } + +@@ -1062,19 +1089,29 @@ int btrfs_quota_disable(struct btrfs_fs_ + mutex_lock(&fs_info->qgroup_ioctl_lock); + if (!fs_info->quota_root) + goto out; ++ mutex_unlock(&fs_info->qgroup_ioctl_lock); + + /* + * 1 For the root item + * + * We should also reserve enough items for the quota tree deletion in + * btrfs_clean_quota_tree but this is not done. ++ * ++ * Also, we must always start a transaction without holding the mutex ++ * qgroup_ioctl_lock, see btrfs_quota_enable(). + */ + trans = btrfs_start_transaction(fs_info->tree_root, 1); ++ ++ mutex_lock(&fs_info->qgroup_ioctl_lock); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); ++ trans = NULL; + goto out; + } + ++ if (!fs_info->quota_root) ++ goto out; ++ + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + btrfs_qgroup_wait_for_completion(fs_info, false); + spin_lock(&fs_info->qgroup_lock); +@@ -1088,13 +1125,13 @@ int btrfs_quota_disable(struct btrfs_fs_ + ret = btrfs_clean_quota_tree(trans, quota_root); + if (ret) { + btrfs_abort_transaction(trans, ret); +- goto end_trans; ++ goto out; + } + + ret = btrfs_del_root(trans, "a_root->root_key); + if (ret) { + btrfs_abort_transaction(trans, ret); +- goto end_trans; ++ goto out; + } + + list_del("a_root->dirty_list); +@@ -1108,10 +1145,13 @@ int btrfs_quota_disable(struct btrfs_fs_ + free_extent_buffer(quota_root->commit_root); + kfree(quota_root); + +-end_trans: +- ret = btrfs_end_transaction(trans); + out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); ++ if (ret && trans) ++ btrfs_end_transaction(trans); ++ else if (trans) ++ ret = btrfs_end_transaction(trans); ++ + return ret; + } + diff --git a/queue-5.4/btrfs-make-btrfs_qgroup_reserve_data-take-btrfs_inode.patch b/queue-5.4/btrfs-make-btrfs_qgroup_reserve_data-take-btrfs_inode.patch new file mode 100644 index 00000000000..d7a6bab4797 --- /dev/null +++ b/queue-5.4/btrfs-make-btrfs_qgroup_reserve_data-take-btrfs_inode.patch @@ -0,0 +1,112 @@ +From foo@baz Fri Aug 13 12:16:18 PM CEST 2021 +From: Anand Jain +Date: Fri, 13 Aug 2021 17:55:25 +0800 +Subject: btrfs: make btrfs_qgroup_reserve_data take btrfs_inode +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: linux-btrfs@vger.kernel.org, Nikolay Borisov , David Sterba , Anand Jain +Message-ID: <7df1756cf877dc6c51330a5793008453a1ee730a.1628845854.git.anand.jain@oracle.com> + +From: Nikolay Borisov + +commit 7661a3e033ab782366e0e1f4b6aad0df3555fcbd upstream + +There's only a single use of vfs_inode in a tracepoint so let's take +btrfs_inode directly. + +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Anand Jain +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/delalloc-space.c | 2 +- + fs/btrfs/file.c | 7 ++++--- + fs/btrfs/qgroup.c | 10 +++++----- + fs/btrfs/qgroup.h | 2 +- + 4 files changed, 11 insertions(+), 10 deletions(-) + +--- a/fs/btrfs/delalloc-space.c ++++ b/fs/btrfs/delalloc-space.c +@@ -151,7 +151,7 @@ int btrfs_check_data_free_space(struct i + return ret; + + /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ +- ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); ++ ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), reserved, start, len); + if (ret < 0) + btrfs_free_reserved_data_space_noquota(inode, start, len); + else +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -3149,7 +3149,7 @@ reserve_space: + &cached_state); + if (ret) + goto out; +- ret = btrfs_qgroup_reserve_data(inode, &data_reserved, ++ ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, + alloc_start, bytes_to_reserve); + if (ret) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, +@@ -3322,8 +3322,9 @@ static long btrfs_fallocate(struct file + free_extent_map(em); + break; + } +- ret = btrfs_qgroup_reserve_data(inode, &data_reserved, +- cur_offset, last_byte - cur_offset); ++ ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), ++ &data_reserved, cur_offset, ++ last_byte - cur_offset); + if (ret < 0) { + cur_offset = last_byte; + free_extent_map(em); +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -3425,11 +3425,11 @@ btrfs_qgroup_rescan_resume(struct btrfs_ + * same @reserved, caller must ensure when error happens it's OK + * to free *ALL* reserved space. + */ +-int btrfs_qgroup_reserve_data(struct inode *inode, ++int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, + struct extent_changeset **reserved_ret, u64 start, + u64 len) + { +- struct btrfs_root *root = BTRFS_I(inode)->root; ++ struct btrfs_root *root = inode->root; + struct ulist_node *unode; + struct ulist_iterator uiter; + struct extent_changeset *reserved; +@@ -3452,12 +3452,12 @@ int btrfs_qgroup_reserve_data(struct ino + reserved = *reserved_ret; + /* Record already reserved space */ + orig_reserved = reserved->bytes_changed; +- ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, ++ ret = set_record_extent_bits(&inode->io_tree, start, + start + len -1, EXTENT_QGROUP_RESERVED, reserved); + + /* Newly reserved space */ + to_reserve = reserved->bytes_changed - orig_reserved; +- trace_btrfs_qgroup_reserve_data(inode, start, len, ++ trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len, + to_reserve, QGROUP_RESERVE); + if (ret < 0) + goto cleanup; +@@ -3471,7 +3471,7 @@ cleanup: + /* cleanup *ALL* already reserved ranges */ + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(&reserved->range_changed, &uiter))) +- clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, ++ clear_extent_bit(&inode->io_tree, unode->val, + unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL); + /* Also free data bytes of already reserved one */ + btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, +--- a/fs/btrfs/qgroup.h ++++ b/fs/btrfs/qgroup.h +@@ -344,7 +344,7 @@ int btrfs_verify_qgroup_counts(struct bt + #endif + + /* New io_tree based accurate qgroup reserve API */ +-int btrfs_qgroup_reserve_data(struct inode *inode, ++int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, + struct extent_changeset **reserved, u64 start, u64 len); + int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); + int btrfs_qgroup_free_data(struct inode *inode, diff --git a/queue-5.4/btrfs-make-qgroup_free_reserved_data-take-btrfs_inode.patch b/queue-5.4/btrfs-make-qgroup_free_reserved_data-take-btrfs_inode.patch new file mode 100644 index 00000000000..e3bdbd7060a --- /dev/null +++ b/queue-5.4/btrfs-make-qgroup_free_reserved_data-take-btrfs_inode.patch @@ -0,0 +1,59 @@ +From foo@baz Fri Aug 13 12:16:18 PM CEST 2021 +From: Anand Jain +Date: Fri, 13 Aug 2021 17:55:24 +0800 +Subject: btrfs: make qgroup_free_reserved_data take btrfs_inode +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: linux-btrfs@vger.kernel.org, Nikolay Borisov , David Sterba , Anand Jain +Message-ID: <8a1f31bf0f35535bb38d906432d78a7de7fdff2c.1628845854.git.anand.jain@oracle.com> + +From: Nikolay Borisov + +commit df2cfd131fd33dbef1ce33be8b332b1f3d645f35 upstream + +It only uses btrfs_inode so can just as easily take it as an argument. + +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Anand Jain +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/qgroup.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -3481,10 +3481,10 @@ cleanup: + } + + /* Free ranges specified by @reserved, normally in error path */ +-static int qgroup_free_reserved_data(struct inode *inode, ++static int qgroup_free_reserved_data(struct btrfs_inode *inode, + struct extent_changeset *reserved, u64 start, u64 len) + { +- struct btrfs_root *root = BTRFS_I(inode)->root; ++ struct btrfs_root *root = inode->root; + struct ulist_node *unode; + struct ulist_iterator uiter; + struct extent_changeset changeset; +@@ -3520,8 +3520,8 @@ static int qgroup_free_reserved_data(str + * EXTENT_QGROUP_RESERVED, we won't double free. + * So not need to rush. + */ +- ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, +- free_start, free_start + free_len - 1, ++ ret = clear_record_extent_bits(&inode->io_tree, free_start, ++ free_start + free_len - 1, + EXTENT_QGROUP_RESERVED, &changeset); + if (ret < 0) + goto out; +@@ -3550,7 +3550,8 @@ static int __btrfs_qgroup_release_data(s + /* In release case, we shouldn't have @reserved */ + WARN_ON(!free && reserved); + if (free && reserved) +- return qgroup_free_reserved_data(inode, reserved, start, len); ++ return qgroup_free_reserved_data(BTRFS_I(inode), reserved, ++ start, len); + extent_changeset_init(&changeset); + ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, + start + len -1, EXTENT_QGROUP_RESERVED, &changeset); diff --git a/queue-5.4/btrfs-qgroup-allow-to-unreserve-range-without-releasing-other-ranges.patch b/queue-5.4/btrfs-qgroup-allow-to-unreserve-range-without-releasing-other-ranges.patch new file mode 100644 index 00000000000..d9f7fdc1a67 --- /dev/null +++ b/queue-5.4/btrfs-qgroup-allow-to-unreserve-range-without-releasing-other-ranges.patch @@ -0,0 +1,199 @@ +From foo@baz Fri Aug 13 12:16:18 PM CEST 2021 +From: Anand Jain +Date: Fri, 13 Aug 2021 17:55:26 +0800 +Subject: btrfs: qgroup: allow to unreserve range without releasing other ranges +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: linux-btrfs@vger.kernel.org, Qu Wenruo , Josef Bacik , David Sterba , Anand Jain +Message-ID: + +From: Qu Wenruo + +commit 263da812e87bac4098a4778efaa32c54275641db upstream + +[PROBLEM] +Before this patch, when btrfs_qgroup_reserve_data() fails, we free all +reserved space of the changeset. + +For example: + ret = btrfs_qgroup_reserve_data(inode, changeset, 0, SZ_1M); + ret = btrfs_qgroup_reserve_data(inode, changeset, SZ_1M, SZ_1M); + ret = btrfs_qgroup_reserve_data(inode, changeset, SZ_2M, SZ_1M); + +If the last btrfs_qgroup_reserve_data() failed, it will release the +entire [0, 3M) range. + +This behavior is kind of OK for now, as when we hit -EDQUOT, we normally +go error handling and need to release all reserved ranges anyway. + +But this also means the following call is not possible: + + ret = btrfs_qgroup_reserve_data(); + if (ret == -EDQUOT) { + /* Do something to free some qgroup space */ + ret = btrfs_qgroup_reserve_data(); + } + +As if the first btrfs_qgroup_reserve_data() fails, it will free all +reserved qgroup space. + +[CAUSE] +This is because we release all reserved ranges when +btrfs_qgroup_reserve_data() fails. + +[FIX] +This patch will implement a new function, qgroup_unreserve_range(), to +iterate through the ulist nodes, to find any nodes in the failure range, +and remove the EXTENT_QGROUP_RESERVED bits from the io_tree, and +decrease the extent_changeset::bytes_changed, so that we can revert to +previous state. + +This allows later patches to retry btrfs_qgroup_reserve_data() if EDQUOT +happens. + +Suggested-by: Josef Bacik +Reviewed-by: Josef Bacik +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Anand Jain +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/qgroup.c | 92 +++++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 77 insertions(+), 15 deletions(-) + +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -3411,6 +3411,73 @@ btrfs_qgroup_rescan_resume(struct btrfs_ + } + } + ++#define rbtree_iterate_from_safe(node, next, start) \ ++ for (node = start; node && ({ next = rb_next(node); 1;}); node = next) ++ ++static int qgroup_unreserve_range(struct btrfs_inode *inode, ++ struct extent_changeset *reserved, u64 start, ++ u64 len) ++{ ++ struct rb_node *node; ++ struct rb_node *next; ++ struct ulist_node *entry = NULL; ++ int ret = 0; ++ ++ node = reserved->range_changed.root.rb_node; ++ while (node) { ++ entry = rb_entry(node, struct ulist_node, rb_node); ++ if (entry->val < start) ++ node = node->rb_right; ++ else if (entry) ++ node = node->rb_left; ++ else ++ break; ++ } ++ ++ /* Empty changeset */ ++ if (!entry) ++ return 0; ++ ++ if (entry->val > start && rb_prev(&entry->rb_node)) ++ entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node, ++ rb_node); ++ ++ rbtree_iterate_from_safe(node, next, &entry->rb_node) { ++ u64 entry_start; ++ u64 entry_end; ++ u64 entry_len; ++ int clear_ret; ++ ++ entry = rb_entry(node, struct ulist_node, rb_node); ++ entry_start = entry->val; ++ entry_end = entry->aux; ++ entry_len = entry_end - entry_start + 1; ++ ++ if (entry_start >= start + len) ++ break; ++ if (entry_start + entry_len <= start) ++ continue; ++ /* ++ * Now the entry is in [start, start + len), revert the ++ * EXTENT_QGROUP_RESERVED bit. ++ */ ++ clear_ret = clear_extent_bits(&inode->io_tree, entry_start, ++ entry_end, EXTENT_QGROUP_RESERVED); ++ if (!ret && clear_ret < 0) ++ ret = clear_ret; ++ ++ ulist_del(&reserved->range_changed, entry->val, entry->aux); ++ if (likely(reserved->bytes_changed >= entry_len)) { ++ reserved->bytes_changed -= entry_len; ++ } else { ++ WARN_ON(1); ++ reserved->bytes_changed = 0; ++ } ++ } ++ ++ return ret; ++} ++ + /* + * Reserve qgroup space for range [start, start + len). + * +@@ -3421,18 +3488,14 @@ btrfs_qgroup_rescan_resume(struct btrfs_ + * Return <0 for error (including -EQUOT) + * + * NOTE: this function may sleep for memory allocation. +- * if btrfs_qgroup_reserve_data() is called multiple times with +- * same @reserved, caller must ensure when error happens it's OK +- * to free *ALL* reserved space. + */ + int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, + struct extent_changeset **reserved_ret, u64 start, + u64 len) + { + struct btrfs_root *root = inode->root; +- struct ulist_node *unode; +- struct ulist_iterator uiter; + struct extent_changeset *reserved; ++ bool new_reserved = false; + u64 orig_reserved; + u64 to_reserve; + int ret; +@@ -3445,6 +3508,7 @@ int btrfs_qgroup_reserve_data(struct btr + if (WARN_ON(!reserved_ret)) + return -EINVAL; + if (!*reserved_ret) { ++ new_reserved = true; + *reserved_ret = extent_changeset_alloc(); + if (!*reserved_ret) + return -ENOMEM; +@@ -3460,7 +3524,7 @@ int btrfs_qgroup_reserve_data(struct btr + trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len, + to_reserve, QGROUP_RESERVE); + if (ret < 0) +- goto cleanup; ++ goto out; + ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA); + if (ret < 0) + goto cleanup; +@@ -3468,15 +3532,13 @@ int btrfs_qgroup_reserve_data(struct btr + return ret; + + cleanup: +- /* cleanup *ALL* already reserved ranges */ +- ULIST_ITER_INIT(&uiter); +- while ((unode = ulist_next(&reserved->range_changed, &uiter))) +- clear_extent_bit(&inode->io_tree, unode->val, +- unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL); +- /* Also free data bytes of already reserved one */ +- btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, +- orig_reserved, BTRFS_QGROUP_RSV_DATA); +- extent_changeset_release(reserved); ++ qgroup_unreserve_range(inode, reserved, start, len); ++out: ++ if (new_reserved) { ++ extent_changeset_release(reserved); ++ kfree(reserved); ++ *reserved_ret = NULL; ++ } + return ret; + } + diff --git a/queue-5.4/btrfs-qgroup-remove-async_commit-mechanism-in-favor-of-reserve-retry-after-edquot.patch b/queue-5.4/btrfs-qgroup-remove-async_commit-mechanism-in-favor-of-reserve-retry-after-edquot.patch new file mode 100644 index 00000000000..07b6a74dbf8 --- /dev/null +++ b/queue-5.4/btrfs-qgroup-remove-async_commit-mechanism-in-favor-of-reserve-retry-after-edquot.patch @@ -0,0 +1,171 @@ +From foo@baz Fri Aug 13 12:17:11 PM CEST 2021 +From: Anand Jain +Date: Fri, 13 Aug 2021 17:55:29 +0800 +Subject: btrfs: qgroup: remove ASYNC_COMMIT mechanism in favor of reserve retry-after-EDQUOT +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: linux-btrfs@vger.kernel.org, Qu Wenruo , Josef Bacik , David Sterba , Anand Jain +Message-ID: + +From: Qu Wenruo + +commit adca4d945c8dca28a85df45c5b117e6dac2e77f1 upstream + +commit a514d63882c3 ("btrfs: qgroup: Commit transaction in advance to +reduce early EDQUOT") tries to reduce the early EDQUOT problems by +checking the qgroup free against threshold and tries to wake up commit +kthread to free some space. + +The problem of that mechanism is, it can only free qgroup per-trans +metadata space, can't do anything to data, nor prealloc qgroup space. + +Now since we have the ability to flush qgroup space, and implemented +retry-after-EDQUOT behavior, such mechanism can be completely replaced. + +So this patch will cleanup such mechanism in favor of +retry-after-EDQUOT. + +Reviewed-by: Josef Bacik +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Anand Jain +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 5 ----- + fs/btrfs/disk-io.c | 1 - + fs/btrfs/qgroup.c | 43 ++----------------------------------------- + fs/btrfs/transaction.c | 1 - + fs/btrfs/transaction.h | 14 -------------- + 5 files changed, 2 insertions(+), 62 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -505,11 +505,6 @@ enum { + */ + BTRFS_FS_EXCL_OP, + /* +- * To info transaction_kthread we need an immediate commit so it +- * doesn't need to wait for commit_interval +- */ +- BTRFS_FS_NEED_ASYNC_COMMIT, +- /* + * Indicate that balance has been set up from the ioctl and is in the + * main phase. The fs_info::balance_ctl is initialized. + * Set and cleared while holding fs_info::balance_mutex. +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -1749,7 +1749,6 @@ static int transaction_kthread(void *arg + + now = ktime_get_seconds(); + if (cur->state < TRANS_STATE_COMMIT_START && +- !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && + (now < cur->start_time || + now - cur->start_time < fs_info->commit_interval)) { + spin_unlock(&fs_info->trans_lock); +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -11,7 +11,6 @@ + #include + #include + #include +-#include + + #include "ctree.h" + #include "transaction.h" +@@ -2840,20 +2839,8 @@ out: + return ret; + } + +-/* +- * Two limits to commit transaction in advance. +- * +- * For RATIO, it will be 1/RATIO of the remaining limit as threshold. +- * For SIZE, it will be in byte unit as threshold. +- */ +-#define QGROUP_FREE_RATIO 32 +-#define QGROUP_FREE_SIZE SZ_32M +-static bool qgroup_check_limits(struct btrfs_fs_info *fs_info, +- const struct btrfs_qgroup *qg, u64 num_bytes) ++static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) + { +- u64 free; +- u64 threshold; +- + if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && + qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) + return false; +@@ -2862,32 +2849,6 @@ static bool qgroup_check_limits(struct b + qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) + return false; + +- /* +- * Even if we passed the check, it's better to check if reservation +- * for meta_pertrans is pushing us near limit. +- * If there is too much pertrans reservation or it's near the limit, +- * let's try commit transaction to free some, using transaction_kthread +- */ +- if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER | +- BTRFS_QGROUP_LIMIT_MAX_EXCL))) { +- if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { +- free = qg->max_excl - qgroup_rsv_total(qg) - qg->excl; +- threshold = min_t(u64, qg->max_excl / QGROUP_FREE_RATIO, +- QGROUP_FREE_SIZE); +- } else { +- free = qg->max_rfer - qgroup_rsv_total(qg) - qg->rfer; +- threshold = min_t(u64, qg->max_rfer / QGROUP_FREE_RATIO, +- QGROUP_FREE_SIZE); +- } +- +- /* +- * Use transaction_kthread to commit transaction, so we no +- * longer need to bother nested transaction nor lock context. +- */ +- if (free < threshold) +- btrfs_commit_transaction_locksafe(fs_info); +- } +- + return true; + } + +@@ -2937,7 +2898,7 @@ static int qgroup_reserve(struct btrfs_r + + qg = unode_aux_to_qgroup(unode); + +- if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) { ++ if (enforce && !qgroup_check_limits(qg, num_bytes)) { + ret = -EDQUOT; + goto out; + } +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -2297,7 +2297,6 @@ int btrfs_commit_transaction(struct btrf + */ + cur_trans->state = TRANS_STATE_COMPLETED; + wake_up(&cur_trans->commit_wait); +- clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); + + spin_lock(&fs_info->trans_lock); + list_del_init(&cur_trans->list); +--- a/fs/btrfs/transaction.h ++++ b/fs/btrfs/transaction.h +@@ -207,20 +207,6 @@ int btrfs_clean_one_deleted_snapshot(str + int btrfs_commit_transaction(struct btrfs_trans_handle *trans); + int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, + int wait_for_unblock); +- +-/* +- * Try to commit transaction asynchronously, so this is safe to call +- * even holding a spinlock. +- * +- * It's done by informing transaction_kthread to commit transaction without +- * waiting for commit interval. +- */ +-static inline void btrfs_commit_transaction_locksafe( +- struct btrfs_fs_info *fs_info) +-{ +- set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); +- wake_up_process(fs_info->transaction_kthread); +-} + int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); + int btrfs_should_end_transaction(struct btrfs_trans_handle *trans); + void btrfs_throttle(struct btrfs_fs_info *fs_info); diff --git a/queue-5.4/btrfs-qgroup-try-to-flush-qgroup-space-when-we-get-edquot.patch b/queue-5.4/btrfs-qgroup-try-to-flush-qgroup-space-when-we-get-edquot.patch new file mode 100644 index 00000000000..14d0922c75b --- /dev/null +++ b/queue-5.4/btrfs-qgroup-try-to-flush-qgroup-space-when-we-get-edquot.patch @@ -0,0 +1,243 @@ +From foo@baz Fri Aug 13 12:17:11 PM CEST 2021 +From: Anand Jain +Date: Fri, 13 Aug 2021 17:55:27 +0800 +Subject: btrfs: qgroup: try to flush qgroup space when we get -EDQUOT +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: linux-btrfs@vger.kernel.org, Qu Wenruo , Josef Bacik , David Sterba , Anand Jain +Message-ID: <740e4978ebebfc08491db3f52264f7b5ba60ed96.1628845854.git.anand.jain@oracle.com> + +From: Qu Wenruo + +commit c53e9653605dbf708f5be02902de51831be4b009 upstream + +[PROBLEM] +There are known problem related to how btrfs handles qgroup reserved +space. One of the most obvious case is the the test case btrfs/153, +which do fallocate, then write into the preallocated range. + +# btrfs/153 1s ... - output mismatch (see xfstests-dev/results//btrfs/153.out.bad) +# --- tests/btrfs/153.out 2019-10-22 15:18:14.068965341 +0800 +# +++ xfstests-dev/results//btrfs/153.out.bad 2020-07-01 20:24:40.730000089 +0800 +# @@ -1,2 +1,5 @@ +# QA output created by 153 +# +pwrite: Disk quota exceeded +# +/mnt/scratch/testfile2: Disk quota exceeded +# +/mnt/scratch/testfile2: Disk quota exceeded +# Silence is golden +# ... +# (Run 'diff -u xfstests-dev/tests/btrfs/153.out xfstests-dev/results//btrfs/153.out.bad' to see the entire diff) + +[CAUSE] +Since commit c6887cd11149 ("Btrfs: don't do nocow check unless we have to"), +we always reserve space no matter if it's COW or not. + +Such behavior change is mostly for performance, and reverting it is not +a good idea anyway. + +For preallcoated extent, we reserve qgroup data space for it already, +and since we also reserve data space for qgroup at buffered write time, +it needs twice the space for us to write into preallocated space. + +This leads to the -EDQUOT in buffered write routine. + +And we can't follow the same solution, unlike data/meta space check, +qgroup reserved space is shared between data/metadata. +The EDQUOT can happen at the metadata reservation, so doing NODATACOW +check after qgroup reservation failure is not a solution. + +[FIX] +To solve the problem, we don't return -EDQUOT directly, but every time +we got a -EDQUOT, we try to flush qgroup space: + +- Flush all inodes of the root + NODATACOW writes will free the qgroup reserved at run_dealloc_range(). + However we don't have the infrastructure to only flush NODATACOW + inodes, here we flush all inodes anyway. + +- Wait for ordered extents + This would convert the preallocated metadata space into per-trans + metadata, which can be freed in later transaction commit. + +- Commit transaction + This will free all per-trans metadata space. + +Also we don't want to trigger flush multiple times, so here we introduce +a per-root wait list and a new root status, to ensure only one thread +starts the flushing. + +Fixes: c6887cd11149 ("Btrfs: don't do nocow check unless we have to") +Reviewed-by: Josef Bacik +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Anand Jain +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 3 + + fs/btrfs/disk-io.c | 1 + fs/btrfs/qgroup.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++----- + 3 files changed, 96 insertions(+), 8 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -945,6 +945,8 @@ enum { + BTRFS_ROOT_DEAD_TREE, + /* The root has a log tree. Used only for subvolume roots. */ + BTRFS_ROOT_HAS_LOG_TREE, ++ /* Qgroup flushing is in progress */ ++ BTRFS_ROOT_QGROUP_FLUSHING, + }; + + /* +@@ -1097,6 +1099,7 @@ struct btrfs_root { + spinlock_t qgroup_meta_rsv_lock; + u64 qgroup_meta_rsv_pertrans; + u64 qgroup_meta_rsv_prealloc; ++ wait_queue_head_t qgroup_flush_wait; + + /* Number of active swapfiles */ + atomic_t nr_swapfiles; +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -1154,6 +1154,7 @@ static void __setup_root(struct btrfs_ro + mutex_init(&root->log_mutex); + mutex_init(&root->ordered_extent_mutex); + mutex_init(&root->delalloc_mutex); ++ init_waitqueue_head(&root->qgroup_flush_wait); + init_waitqueue_head(&root->log_writer_wait); + init_waitqueue_head(&root->log_commit_wait[0]); + init_waitqueue_head(&root->log_commit_wait[1]); +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -3479,17 +3479,58 @@ static int qgroup_unreserve_range(struct + } + + /* +- * Reserve qgroup space for range [start, start + len). ++ * Try to free some space for qgroup. + * +- * This function will either reserve space from related qgroups or doing +- * nothing if the range is already reserved. ++ * For qgroup, there are only 3 ways to free qgroup space: ++ * - Flush nodatacow write ++ * Any nodatacow write will free its reserved data space at run_delalloc_range(). ++ * In theory, we should only flush nodatacow inodes, but it's not yet ++ * possible, so we need to flush the whole root. + * +- * Return 0 for successful reserve +- * Return <0 for error (including -EQUOT) ++ * - Wait for ordered extents ++ * When ordered extents are finished, their reserved metadata is finally ++ * converted to per_trans status, which can be freed by later commit ++ * transaction. + * +- * NOTE: this function may sleep for memory allocation. ++ * - Commit transaction ++ * This would free the meta_per_trans space. ++ * In theory this shouldn't provide much space, but any more qgroup space ++ * is needed. + */ +-int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, ++static int try_flush_qgroup(struct btrfs_root *root) ++{ ++ struct btrfs_trans_handle *trans; ++ int ret; ++ ++ /* ++ * We don't want to run flush again and again, so if there is a running ++ * one, we won't try to start a new flush, but exit directly. ++ */ ++ if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { ++ wait_event(root->qgroup_flush_wait, ++ !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); ++ return 0; ++ } ++ ++ ret = btrfs_start_delalloc_snapshot(root); ++ if (ret < 0) ++ goto out; ++ btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); ++ ++ trans = btrfs_join_transaction(root); ++ if (IS_ERR(trans)) { ++ ret = PTR_ERR(trans); ++ goto out; ++ } ++ ++ ret = btrfs_commit_transaction(trans); ++out: ++ clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); ++ wake_up(&root->qgroup_flush_wait); ++ return ret; ++} ++ ++static int qgroup_reserve_data(struct btrfs_inode *inode, + struct extent_changeset **reserved_ret, u64 start, + u64 len) + { +@@ -3542,6 +3583,34 @@ out: + return ret; + } + ++/* ++ * Reserve qgroup space for range [start, start + len). ++ * ++ * This function will either reserve space from related qgroups or do nothing ++ * if the range is already reserved. ++ * ++ * Return 0 for successful reservation ++ * Return <0 for error (including -EQUOT) ++ * ++ * NOTE: This function may sleep for memory allocation, dirty page flushing and ++ * commit transaction. So caller should not hold any dirty page locked. ++ */ ++int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, ++ struct extent_changeset **reserved_ret, u64 start, ++ u64 len) ++{ ++ int ret; ++ ++ ret = qgroup_reserve_data(inode, reserved_ret, start, len); ++ if (ret <= 0 && ret != -EDQUOT) ++ return ret; ++ ++ ret = try_flush_qgroup(inode->root); ++ if (ret < 0) ++ return ret; ++ return qgroup_reserve_data(inode, reserved_ret, start, len); ++} ++ + /* Free ranges specified by @reserved, normally in error path */ + static int qgroup_free_reserved_data(struct btrfs_inode *inode, + struct extent_changeset *reserved, u64 start, u64 len) +@@ -3712,7 +3781,7 @@ static int sub_root_meta_rsv(struct btrf + return num_bytes; + } + +-int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, ++static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, + enum btrfs_qgroup_rsv_type type, bool enforce) + { + struct btrfs_fs_info *fs_info = root->fs_info; +@@ -3739,6 +3808,21 @@ int __btrfs_qgroup_reserve_meta(struct b + return ret; + } + ++int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, ++ enum btrfs_qgroup_rsv_type type, bool enforce) ++{ ++ int ret; ++ ++ ret = qgroup_reserve_meta(root, num_bytes, type, enforce); ++ if (ret <= 0 && ret != -EDQUOT) ++ return ret; ++ ++ ret = try_flush_qgroup(root); ++ if (ret < 0) ++ return ret; ++ return qgroup_reserve_meta(root, num_bytes, type, enforce); ++} ++ + void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) + { + struct btrfs_fs_info *fs_info = root->fs_info; diff --git a/queue-5.4/btrfs-transaction-cleanup-unused-trans_state_blocked.patch b/queue-5.4/btrfs-transaction-cleanup-unused-trans_state_blocked.patch new file mode 100644 index 00000000000..fd44a178bab --- /dev/null +++ b/queue-5.4/btrfs-transaction-cleanup-unused-trans_state_blocked.patch @@ -0,0 +1,111 @@ +From foo@baz Fri Aug 13 12:17:11 PM CEST 2021 +From: Anand Jain +Date: Fri, 13 Aug 2021 17:55:28 +0800 +Subject: btrfs: transaction: Cleanup unused TRANS_STATE_BLOCKED +To: linux-kernel@vger.kernel.org, stable@vger.kernel.org +Cc: linux-btrfs@vger.kernel.org, Qu Wenruo , David Sterba , Anand Jain +Message-ID: <7c24989ec90962679cdcf9f6f2ba6fd39fc569ee.1628845854.git.anand.jain@oracle.com> + +From: Qu Wenruo + +commit 3296bf562443a8ca35aaad959a76a49e9b412760 upstream + +The state was introduced in commit 4a9d8bdee368 ("Btrfs: make the state +of the transaction more readable"), then in commit 302167c50b32 +("btrfs: don't end the transaction for delayed refs in throttle") the +state is completely removed. + +So we can just clean up the state since it's only compared but never +set. + +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Anand Jain +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 2 +- + fs/btrfs/transaction.c | 15 +++------------ + fs/btrfs/transaction.h | 1 - + 3 files changed, 4 insertions(+), 14 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -1748,7 +1748,7 @@ static int transaction_kthread(void *arg + } + + now = ktime_get_seconds(); +- if (cur->state < TRANS_STATE_BLOCKED && ++ if (cur->state < TRANS_STATE_COMMIT_START && + !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && + (now < cur->start_time || + now - cur->start_time < fs_info->commit_interval)) { +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -27,7 +27,6 @@ + + static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { + [TRANS_STATE_RUNNING] = 0U, +- [TRANS_STATE_BLOCKED] = __TRANS_START, + [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), + [TRANS_STATE_COMMIT_DOING] = (__TRANS_START | + __TRANS_ATTACH | +@@ -388,7 +387,7 @@ int btrfs_record_root_in_trans(struct bt + + static inline int is_transaction_blocked(struct btrfs_transaction *trans) + { +- return (trans->state >= TRANS_STATE_BLOCKED && ++ return (trans->state >= TRANS_STATE_COMMIT_START && + trans->state < TRANS_STATE_UNBLOCKED && + !TRANS_ABORTED(trans)); + } +@@ -580,7 +579,7 @@ again: + INIT_LIST_HEAD(&h->new_bgs); + + smp_mb(); +- if (cur_trans->state >= TRANS_STATE_BLOCKED && ++ if (cur_trans->state >= TRANS_STATE_COMMIT_START && + may_wait_transaction(fs_info, type)) { + current->journal_info = h; + btrfs_commit_transaction(h); +@@ -797,7 +796,7 @@ int btrfs_should_end_transaction(struct + struct btrfs_transaction *cur_trans = trans->transaction; + + smp_mb(); +- if (cur_trans->state >= TRANS_STATE_BLOCKED || ++ if (cur_trans->state >= TRANS_STATE_COMMIT_START || + cur_trans->delayed_refs.flushing) + return 1; + +@@ -830,7 +829,6 @@ static int __btrfs_end_transaction(struc + { + struct btrfs_fs_info *info = trans->fs_info; + struct btrfs_transaction *cur_trans = trans->transaction; +- int lock = (trans->type != TRANS_JOIN_NOLOCK); + int err = 0; + + if (refcount_read(&trans->use_count) > 1) { +@@ -846,13 +844,6 @@ static int __btrfs_end_transaction(struc + + btrfs_trans_release_chunk_metadata(trans); + +- if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { +- if (throttle) +- return btrfs_commit_transaction(trans); +- else +- wake_up_process(info->transaction_kthread); +- } +- + if (trans->type & __TRANS_FREEZABLE) + sb_end_intwrite(info->sb); + +--- a/fs/btrfs/transaction.h ++++ b/fs/btrfs/transaction.h +@@ -13,7 +13,6 @@ + + enum btrfs_trans_state { + TRANS_STATE_RUNNING, +- TRANS_STATE_BLOCKED, + TRANS_STATE_COMMIT_START, + TRANS_STATE_COMMIT_DOING, + TRANS_STATE_UNBLOCKED, diff --git a/queue-5.4/series b/queue-5.4/series index 139939da977..d9862cbf17b 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -14,3 +14,10 @@ usb-ehci-fix-kunpeng920-ehci-hardware-problem.patch alsa-hda-add-quirk-for-asus-flow-x13.patch ppp-fix-generating-ppp-unit-id-when-ifname-is-not-specified.patch ovl-prevent-private-clone-if-bind-mount-is-not-allowed.patch +btrfs-make-qgroup_free_reserved_data-take-btrfs_inode.patch +btrfs-make-btrfs_qgroup_reserve_data-take-btrfs_inode.patch +btrfs-qgroup-allow-to-unreserve-range-without-releasing-other-ranges.patch +btrfs-qgroup-try-to-flush-qgroup-space-when-we-get-edquot.patch +btrfs-transaction-cleanup-unused-trans_state_blocked.patch +btrfs-qgroup-remove-async_commit-mechanism-in-favor-of-reserve-retry-after-edquot.patch +btrfs-fix-lockdep-splat-when-enabling-and-disabling-qgroups.patch