From: Sasha Levin <sashal@kernel.org>
Date: Mon, 2 Aug 2021 22:03:22 +0000 (-0400)
Subject: Fixes for 5.4
X-Git-Tag: v4.4.278~15
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=80a86cca18f0e3a14afa36dc94b862585a4b741c;p=thirdparty%2Fkernel%2Fstable-queue.git

Fixes for 5.4

Signed-off-by: Sasha Levin <sashal@kernel.org>
---

diff --git a/queue-5.4/btrfs-delete-duplicated-words-other-fixes-in-comment.patch b/queue-5.4/btrfs-delete-duplicated-words-other-fixes-in-comment.patch
new file mode 100644
index 00000000000..1a425635d1d
--- /dev/null
+++ b/queue-5.4/btrfs-delete-duplicated-words-other-fixes-in-comment.patch
@@ -0,0 +1,131 @@
+From 7b436b4e8de8b8f0b89199957d21ebd79364ef2d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Aug 2020 19:48:34 -0700
+Subject: btrfs: delete duplicated words + other fixes in comments
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+[ Upstream commit 260db43cd2f556677f6ae818ba09f997eed81004 ]
+
+Delete repeated words in fs/btrfs/.
+{to, the, a, and old}
+and change "into 2 part" to "into 2 parts".
+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/block-group.c      | 2 +-
+ fs/btrfs/ctree.c            | 2 +-
+ fs/btrfs/disk-io.c          | 2 +-
+ fs/btrfs/extent_io.c        | 2 +-
+ fs/btrfs/free-space-cache.c | 2 +-
+ fs/btrfs/qgroup.c           | 2 +-
+ fs/btrfs/tree-log.c         | 4 ++--
+ 7 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index a352c1704042..e98d6ea35ea8 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -2637,7 +2637,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
+ 			 * finished yet (no block group item in the extent tree
+ 			 * yet, etc). If this is the case, wait for all free
+ 			 * space endio workers to finish and retry. This is a
+-			 * a very rare case so no need for a more efficient and
++			 * very rare case so no need for a more efficient and
+ 			 * complex approach.
+ 			 */
+ 			if (ret == -ENOENT) {
+diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
+index ab69e3563b12..dac30b00d14b 100644
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -5232,7 +5232,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
+ 			slot--;
+ 		/*
+ 		 * check this node pointer against the min_trans parameters.
+-		 * If it is too old, old, skip to the next one.
++		 * If it is too old, skip to the next one.
+ 		 */
+ 		while (slot < nritems) {
+ 			u64 gen;
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index e6aa94a583e9..1d28333bb798 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2816,7 +2816,7 @@ int open_ctree(struct super_block *sb,
+ 	}
+ 
+ 	/*
+-	 * Verify the type first, if that or the the checksum value are
++	 * Verify the type first, if that or the checksum value are
+ 	 * corrupted, we'll find out
+ 	 */
+ 	csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data);
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index eca3abc1a7cd..9108a73423f7 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -3152,7 +3152,7 @@ static int __do_readpage(struct extent_io_tree *tree,
+ 
+ 		/*
+ 		 * If we have a file range that points to a compressed extent
+-		 * and it's followed by a consecutive file range that points to
++		 * and it's followed by a consecutive file range that points
+ 		 * to the same compressed extent (possibly with a different
+ 		 * offset and/or length, so it either points to the whole extent
+ 		 * or only part of it), we must make sure we do not submit a
+diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
+index 23f59d463e24..d2d32fed8f2e 100644
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -1339,7 +1339,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
+ 
+ 	/*
+ 	 * at this point the pages are under IO and we're happy,
+-	 * The caller is responsible for waiting on them and updating the
++	 * The caller is responsible for waiting on them and updating
+ 	 * the cache and the inode
+ 	 */
+ 	io_ctl->entries = entries;
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index cd8e81c02f63..837bd5e29c8a 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2262,7 +2262,7 @@ static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
+  * Update qgroup rfer/excl counters.
+  * Rfer update is easy, codes can explain themselves.
+  *
+- * Excl update is tricky, the update is split into 2 part.
++ * Excl update is tricky, the update is split into 2 parts.
+  * Part 1: Possible exclusive <-> sharing detect:
+  *	|	A	|	!A	|
+  *  -------------------------------------
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index afc6731bb692..dcbdd0ebea83 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4923,7 +4923,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
+ 		 * Check the inode's logged_trans only instead of
+ 		 * btrfs_inode_in_log(). This is because the last_log_commit of
+ 		 * the inode is not updated when we only log that it exists and
+-		 * and it has the full sync bit set (see btrfs_log_inode()).
++		 * it has the full sync bit set (see btrfs_log_inode()).
+ 		 */
+ 		if (BTRFS_I(inode)->logged_trans == trans->transid) {
+ 			spin_unlock(&BTRFS_I(inode)->lock);
+@@ -6426,7 +6426,7 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+  *            committed by the caller, and BTRFS_DONT_NEED_TRANS_COMMIT
+  *            otherwise.
+  * When false: returns BTRFS_DONT_NEED_LOG_SYNC if the caller does not need to
+- *             to sync the log, BTRFS_NEED_LOG_SYNC if it needs to sync the log,
++ *             sync the log, BTRFS_NEED_LOG_SYNC if it needs to sync the log,
+  *             or BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
+  *             committed (without attempting to sync the log).
+  */
+-- 
+2.30.2
+
diff --git a/queue-5.4/btrfs-do-not-commit-logs-and-transactions-during-lin.patch b/queue-5.4/btrfs-do-not-commit-logs-and-transactions-during-lin.patch
new file mode 100644
index 00000000000..49e17d820ca
--- /dev/null
+++ b/queue-5.4/btrfs-do-not-commit-logs-and-transactions-during-lin.patch
@@ -0,0 +1,492 @@
+From aa2ae7be22586b1cb4ef74bd325ec887ba5d1252 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Aug 2020 12:43:48 +0100
+Subject: btrfs: do not commit logs and transactions during link and rename
+ operations
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 75b463d2b47aef96fe1dc3e0237629963034764b ]
+
+Since commit d4682ba03ef618 ("Btrfs: sync log after logging new name") we
+started to commit logs, and fallback to transaction commits when we failed
+to log the new names or commit the logs, after link and rename operations
+when the target inodes (or their parents) were previously logged in the
+current transaction. This was to avoid losing directories despite an
+explicit fsync on them when they are ancestors of some inode that got a
+new named logged, due to a link or rename operation. However that adds the
+cost of starting IO and waiting for it to complete, which can cause higher
+latencies for applications.
+
+Instead of doing that, just make sure that when we log a new name for an
+inode we don't mark any of its ancestors as logged, so that if any one
+does an fsync against any of them, without doing any other change on them,
+the fsync commits the log. This way we only pay the cost of a log commit
+(or a transaction commit if something goes wrong or a new block group was
+created) if the application explicitly asks to fsync any of the parent
+directories.
+
+Using dbench, which mixes several filesystems operations including renames,
+revealed some significant latency gains. The following script that uses
+dbench was used to test this:
+
+  #!/bin/bash
+
+  DEV=/dev/nvme0n1
+  MNT=/mnt/btrfs
+  MOUNT_OPTIONS="-o ssd -o space_cache=v2"
+  MKFS_OPTIONS="-m single -d single"
+  THREADS=16
+
+  echo "performance" | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
+  mkfs.btrfs -f $MKFS_OPTIONS $DEV
+  mount $MOUNT_OPTIONS $DEV $MNT
+
+  dbench -t 300 -D $MNT $THREADS
+
+  umount $MNT
+
+The test was run on bare metal, no virtualization, on a box with 12 cores
+(Intel i7-8700), 64Gb of RAM and using a NVMe device, with a kernel
+configuration that is the default of typical distributions (debian in this
+case), without debug options enabled (kasan, kmemleak, slub debug, debug
+of page allocations, lock debugging, etc).
+
+Results before this patch:
+
+ Operation      Count    AvgLat    MaxLat
+ ----------------------------------------
+ NTCreateX    10750455     0.011   155.088
+ Close         7896674     0.001     0.243
+ Rename         455222     2.158  1101.947
+ Unlink        2171189     0.067   121.638
+ Deltree           256     2.425     7.816
+ Mkdir             128     0.002     0.003
+ Qpathinfo     9744323     0.006    21.370
+ Qfileinfo     1707092     0.001     0.146
+ Qfsinfo       1786756     0.001    11.228
+ Sfileinfo      875612     0.003    21.263
+ Find          3767281     0.025     9.617
+ WriteX        5356924     0.011   211.390
+ ReadX        16852694     0.003     9.442
+ LockX           35008     0.002     0.119
+ UnlockX         35008     0.001     0.138
+ Flush          753458     4.252  1102.249
+
+Throughput 1128.35 MB/sec  16 clients  16 procs  max_latency=1102.255 ms
+
+Results after this patch:
+
+16 clients, after
+
+ Operation      Count    AvgLat    MaxLat
+ ----------------------------------------
+ NTCreateX    11471098     0.012   448.281
+ Close         8426396     0.001     0.925
+ Rename         485746     0.123   267.183
+ Unlink        2316477     0.080    63.433
+ Deltree           288     2.830    11.144
+ Mkdir             144     0.003     0.010
+ Qpathinfo    10397420     0.006    10.288
+ Qfileinfo     1822039     0.001     0.169
+ Qfsinfo       1906497     0.002    14.039
+ Sfileinfo      934433     0.004     2.438
+ Find          4019879     0.026    10.200
+ WriteX        5718932     0.011   200.985
+ ReadX        17981671     0.003    10.036
+ LockX           37352     0.002     0.076
+ UnlockX         37352     0.001     0.109
+ Flush          804018     5.015   778.033
+
+Throughput 1201.98 MB/sec  16 clients  16 procs  max_latency=778.036 ms
+(+6.5% throughput, -29.4% max latency, -75.8% rename latency)
+
+Test case generic/498 from fstests tests the scenario that the previously
+mentioned commit fixed.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c    | 115 +++++---------------------------------------
+ fs/btrfs/tree-log.c | 100 +++++++++++++++++---------------------
+ fs/btrfs/tree-log.h |  14 ++----
+ 3 files changed, 60 insertions(+), 169 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 025b02e9799f..8959d011aafa 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -6992,7 +6992,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+ 		drop_inode = 1;
+ 	} else {
+ 		struct dentry *parent = dentry->d_parent;
+-		int ret;
+ 
+ 		err = btrfs_update_inode(trans, root, inode);
+ 		if (err)
+@@ -7007,12 +7006,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+ 				goto fail;
+ 		}
+ 		d_instantiate(dentry, inode);
+-		ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
+-					 true, NULL);
+-		if (ret == BTRFS_NEED_TRANS_COMMIT) {
+-			err = btrfs_commit_transaction(trans);
+-			trans = NULL;
+-		}
++		btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
+ 	}
+ 
+ fail:
+@@ -9699,27 +9693,19 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+ 	struct inode *new_inode = new_dentry->d_inode;
+ 	struct inode *old_inode = old_dentry->d_inode;
+ 	struct timespec64 ctime = current_time(old_inode);
+-	struct dentry *parent;
+ 	u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
+ 	u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
+ 	u64 old_idx = 0;
+ 	u64 new_idx = 0;
+ 	int ret;
++	int ret2;
+ 	bool root_log_pinned = false;
+ 	bool dest_log_pinned = false;
+-	struct btrfs_log_ctx ctx_root;
+-	struct btrfs_log_ctx ctx_dest;
+-	bool sync_log_root = false;
+-	bool sync_log_dest = false;
+-	bool commit_transaction = false;
+ 
+ 	/* we only allow rename subvolume link between subvolumes */
+ 	if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
+ 		return -EXDEV;
+ 
+-	btrfs_init_log_ctx(&ctx_root, old_inode);
+-	btrfs_init_log_ctx(&ctx_dest, new_inode);
+-
+ 	/* close the race window with snapshot create/destroy ioctl */
+ 	if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
+ 	    new_ino == BTRFS_FIRST_FREE_OBJECTID)
+@@ -9861,30 +9847,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+ 		BTRFS_I(new_inode)->dir_index = new_idx;
+ 
+ 	if (root_log_pinned) {
+-		parent = new_dentry->d_parent;
+-		ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
+-					 BTRFS_I(old_dir), parent,
+-					 false, &ctx_root);
+-		if (ret == BTRFS_NEED_LOG_SYNC)
+-			sync_log_root = true;
+-		else if (ret == BTRFS_NEED_TRANS_COMMIT)
+-			commit_transaction = true;
+-		ret = 0;
++		btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
++				   new_dentry->d_parent);
+ 		btrfs_end_log_trans(root);
+ 		root_log_pinned = false;
+ 	}
+ 	if (dest_log_pinned) {
+-		if (!commit_transaction) {
+-			parent = old_dentry->d_parent;
+-			ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
+-						 BTRFS_I(new_dir), parent,
+-						 false, &ctx_dest);
+-			if (ret == BTRFS_NEED_LOG_SYNC)
+-				sync_log_dest = true;
+-			else if (ret == BTRFS_NEED_TRANS_COMMIT)
+-				commit_transaction = true;
+-			ret = 0;
+-		}
++		btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
++				   old_dentry->d_parent);
+ 		btrfs_end_log_trans(dest);
+ 		dest_log_pinned = false;
+ 	}
+@@ -9917,46 +9887,13 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+ 			dest_log_pinned = false;
+ 		}
+ 	}
+-	if (!ret && sync_log_root && !commit_transaction) {
+-		ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
+-				     &ctx_root);
+-		if (ret)
+-			commit_transaction = true;
+-	}
+-	if (!ret && sync_log_dest && !commit_transaction) {
+-		ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
+-				     &ctx_dest);
+-		if (ret)
+-			commit_transaction = true;
+-	}
+-	if (commit_transaction) {
+-		/*
+-		 * We may have set commit_transaction when logging the new name
+-		 * in the destination root, in which case we left the source
+-		 * root context in the list of log contextes. So make sure we
+-		 * remove it to avoid invalid memory accesses, since the context
+-		 * was allocated in our stack frame.
+-		 */
+-		if (sync_log_root) {
+-			mutex_lock(&root->log_mutex);
+-			list_del_init(&ctx_root.list);
+-			mutex_unlock(&root->log_mutex);
+-		}
+-		ret = btrfs_commit_transaction(trans);
+-	} else {
+-		int ret2;
+-
+-		ret2 = btrfs_end_transaction(trans);
+-		ret = ret ? ret : ret2;
+-	}
++	ret2 = btrfs_end_transaction(trans);
++	ret = ret ? ret : ret2;
+ out_notrans:
+ 	if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
+ 	    old_ino == BTRFS_FIRST_FREE_OBJECTID)
+ 		up_read(&fs_info->subvol_sem);
+ 
+-	ASSERT(list_empty(&ctx_root.list));
+-	ASSERT(list_empty(&ctx_dest.list));
+-
+ 	return ret;
+ }
+ 
+@@ -10024,11 +9961,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ 	struct inode *old_inode = d_inode(old_dentry);
+ 	u64 index = 0;
+ 	int ret;
++	int ret2;
+ 	u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
+ 	bool log_pinned = false;
+-	struct btrfs_log_ctx ctx;
+-	bool sync_log = false;
+-	bool commit_transaction = false;
+ 
+ 	if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ 		return -EPERM;
+@@ -10178,17 +10113,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ 		BTRFS_I(old_inode)->dir_index = index;
+ 
+ 	if (log_pinned) {
+-		struct dentry *parent = new_dentry->d_parent;
+-
+-		btrfs_init_log_ctx(&ctx, old_inode);
+-		ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
+-					 BTRFS_I(old_dir), parent,
+-					 false, &ctx);
+-		if (ret == BTRFS_NEED_LOG_SYNC)
+-			sync_log = true;
+-		else if (ret == BTRFS_NEED_TRANS_COMMIT)
+-			commit_transaction = true;
+-		ret = 0;
++		btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
++				   new_dentry->d_parent);
+ 		btrfs_end_log_trans(root);
+ 		log_pinned = false;
+ 	}
+@@ -10225,23 +10151,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ 		btrfs_end_log_trans(root);
+ 		log_pinned = false;
+ 	}
+-	if (!ret && sync_log) {
+-		ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
+-		if (ret)
+-			commit_transaction = true;
+-	} else if (sync_log) {
+-		mutex_lock(&root->log_mutex);
+-		list_del(&ctx.list);
+-		mutex_unlock(&root->log_mutex);
+-	}
+-	if (commit_transaction) {
+-		ret = btrfs_commit_transaction(trans);
+-	} else {
+-		int ret2;
+-
+-		ret2 = btrfs_end_transaction(trans);
+-		ret = ret ? ret : ret2;
+-	}
++	ret2 = btrfs_end_transaction(trans);
++	ret = ret ? ret : ret2;
+ out_notrans:
+ 	if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
+ 		up_read(&fs_info->subvol_sem);
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index dcbdd0ebea83..53607156b008 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -174,7 +174,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
+ 
+ 	atomic_inc(&root->log_batch);
+ 	atomic_inc(&root->log_writers);
+-	if (ctx) {
++	if (ctx && !ctx->logging_new_name) {
+ 		int index = root->log_transid % 2;
+ 		list_add_tail(&ctx->list, &root->log_ctxs[index]);
+ 		ctx->log_transid = root->log_transid;
+@@ -5379,19 +5379,34 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
+ 	}
+ 
+ 	/*
+-	 * Don't update last_log_commit if we logged that an inode exists after
+-	 * it was loaded to memory (full_sync bit set).
+-	 * This is to prevent data loss when we do a write to the inode, then
+-	 * the inode gets evicted after all delalloc was flushed, then we log
+-	 * it exists (due to a rename for example) and then fsync it. This last
+-	 * fsync would do nothing (not logging the extents previously written).
++	 * If we are logging that an ancestor inode exists as part of logging a
++	 * new name from a link or rename operation, don't mark the inode as
++	 * logged - otherwise if an explicit fsync is made against an ancestor,
++	 * the fsync considers the inode in the log and doesn't sync the log,
++	 * resulting in the ancestor missing after a power failure unless the
++	 * log was synced as part of an fsync against any other unrelated inode.
++	 * So keep it simple for this case and just don't flag the ancestors as
++	 * logged.
+ 	 */
+-	spin_lock(&inode->lock);
+-	inode->logged_trans = trans->transid;
+-	if (inode_only != LOG_INODE_EXISTS ||
+-	    !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+-		inode->last_log_commit = inode->last_sub_trans;
+-	spin_unlock(&inode->lock);
++	if (!ctx ||
++	    !(S_ISDIR(inode->vfs_inode.i_mode) && ctx->logging_new_name &&
++	      &inode->vfs_inode != ctx->inode)) {
++		spin_lock(&inode->lock);
++		inode->logged_trans = trans->transid;
++		/*
++		 * Don't update last_log_commit if we logged that an inode exists
++		 * after it was loaded to memory (full_sync bit set).
++		 * This is to prevent data loss when we do a write to the inode,
++		 * then the inode gets evicted after all delalloc was flushed,
++		 * then we log it exists (due to a rename for example) and then
++		 * fsync it. This last fsync would do nothing (not logging the
++		 * extents previously written).
++		 */
++		if (inode_only != LOG_INODE_EXISTS ||
++		    !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
++			inode->last_log_commit = inode->last_sub_trans;
++		spin_unlock(&inode->lock);
++	}
+ out_unlock:
+ 	mutex_unlock(&inode->log_mutex);
+ 
+@@ -6417,26 +6432,13 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ /*
+  * Call this after adding a new name for a file and it will properly
+  * update the log to reflect the new name.
+- *
+- * @ctx can not be NULL when @sync_log is false, and should be NULL when it's
+- * true (because it's not used).
+- *
+- * Return value depends on whether @sync_log is true or false.
+- * When true: returns BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
+- *            committed by the caller, and BTRFS_DONT_NEED_TRANS_COMMIT
+- *            otherwise.
+- * When false: returns BTRFS_DONT_NEED_LOG_SYNC if the caller does not need to
+- *             sync the log, BTRFS_NEED_LOG_SYNC if it needs to sync the log,
+- *             or BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
+- *             committed (without attempting to sync the log).
+  */
+-int btrfs_log_new_name(struct btrfs_trans_handle *trans,
++void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+ 			struct btrfs_inode *inode, struct btrfs_inode *old_dir,
+-			struct dentry *parent,
+-			bool sync_log, struct btrfs_log_ctx *ctx)
++			struct dentry *parent)
+ {
+ 	struct btrfs_fs_info *fs_info = trans->fs_info;
+-	int ret;
++	struct btrfs_log_ctx ctx;
+ 
+ 	/*
+ 	 * this will force the logging code to walk the dentry chain
+@@ -6451,34 +6453,18 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
+ 	 */
+ 	if (inode->logged_trans <= fs_info->last_trans_committed &&
+ 	    (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
+-		return sync_log ? BTRFS_DONT_NEED_TRANS_COMMIT :
+-			BTRFS_DONT_NEED_LOG_SYNC;
+-
+-	if (sync_log) {
+-		struct btrfs_log_ctx ctx2;
+-
+-		btrfs_init_log_ctx(&ctx2, &inode->vfs_inode);
+-		ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
+-					     LOG_INODE_EXISTS, &ctx2);
+-		if (ret == BTRFS_NO_LOG_SYNC)
+-			return BTRFS_DONT_NEED_TRANS_COMMIT;
+-		else if (ret)
+-			return BTRFS_NEED_TRANS_COMMIT;
+-
+-		ret = btrfs_sync_log(trans, inode->root, &ctx2);
+-		if (ret)
+-			return BTRFS_NEED_TRANS_COMMIT;
+-		return BTRFS_DONT_NEED_TRANS_COMMIT;
+-	}
+-
+-	ASSERT(ctx);
+-	ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
+-				     LOG_INODE_EXISTS, ctx);
+-	if (ret == BTRFS_NO_LOG_SYNC)
+-		return BTRFS_DONT_NEED_LOG_SYNC;
+-	else if (ret)
+-		return BTRFS_NEED_TRANS_COMMIT;
++		return;
+ 
+-	return BTRFS_NEED_LOG_SYNC;
++	btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
++	ctx.logging_new_name = true;
++	/*
++	 * We don't care about the return value. If we fail to log the new name
++	 * then we know the next attempt to sync the log will fallback to a full
++	 * transaction commit (due to a call to btrfs_set_log_full_commit()), so
++	 * we don't need to worry about getting a log committed that has an
++	 * inconsistent state after a rename operation.
++	 */
++	btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
++			       LOG_INODE_EXISTS, &ctx);
+ }
+ 
+diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
+index 132e43d29034..ddfc6789d9bf 100644
+--- a/fs/btrfs/tree-log.h
++++ b/fs/btrfs/tree-log.h
+@@ -16,6 +16,7 @@ struct btrfs_log_ctx {
+ 	int log_ret;
+ 	int log_transid;
+ 	bool log_new_dentries;
++	bool logging_new_name;
+ 	struct inode *inode;
+ 	struct list_head list;
+ };
+@@ -26,6 +27,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
+ 	ctx->log_ret = 0;
+ 	ctx->log_transid = 0;
+ 	ctx->log_new_dentries = false;
++	ctx->logging_new_name = false;
+ 	ctx->inode = inode;
+ 	INIT_LIST_HEAD(&ctx->list);
+ }
+@@ -67,16 +69,8 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
+ 			     int for_rename);
+ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ 				   struct btrfs_inode *dir);
+-/* Return values for btrfs_log_new_name() */
+-enum {
+-	BTRFS_DONT_NEED_TRANS_COMMIT,
+-	BTRFS_NEED_TRANS_COMMIT,
+-	BTRFS_DONT_NEED_LOG_SYNC,
+-	BTRFS_NEED_LOG_SYNC,
+-};
+-int btrfs_log_new_name(struct btrfs_trans_handle *trans,
++void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+ 			struct btrfs_inode *inode, struct btrfs_inode *old_dir,
+-			struct dentry *parent,
+-			bool sync_log, struct btrfs_log_ctx *ctx);
++			struct dentry *parent);
+ 
+ #endif
+-- 
+2.30.2
+
diff --git a/queue-5.4/btrfs-fix-lost-inode-on-log-replay-after-mix-of-fsyn.patch b/queue-5.4/btrfs-fix-lost-inode-on-log-replay-after-mix-of-fsyn.patch
new file mode 100644
index 00000000000..5e2b26ab6a4
--- /dev/null
+++ b/queue-5.4/btrfs-fix-lost-inode-on-log-replay-after-mix-of-fsyn.patch
@@ -0,0 +1,106 @@
+From e85781ce25731c67341997be8e4a90bc3cfdc1cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Jul 2021 11:24:43 +0100
+Subject: btrfs: fix lost inode on log replay after mix of fsync, rename and
+ inode eviction
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit ecc64fab7d49c678e70bd4c35fe64d2ab3e3d212 ]
+
+When checking if we need to log the new name of a renamed inode, we are
+checking if the inode and its parent inode have been logged before, and if
+not we don't log the new name. The check however is buggy, as it directly
+compares the logged_trans field of the inodes versus the ID of the current
+transaction. The problem is that logged_trans is a transient field, only
+stored in memory and never persisted in the inode item, so if an inode
+was logged before, evicted and reloaded, its logged_trans field is set to
+a value of 0, meaning the check will return false and the new name of the
+renamed inode is not logged. If the old parent directory was previously
+fsynced and we deleted the logged directory entries corresponding to the
+old name, we end up with a log that when replayed will delete the renamed
+inode.
+
+The following example triggers the problem:
+
+  $ mkfs.btrfs -f /dev/sdc
+  $ mount /dev/sdc /mnt
+
+  $ mkdir /mnt/A
+  $ mkdir /mnt/B
+  $ echo -n "hello world" > /mnt/A/foo
+
+  $ sync
+
+  # Add some new file to A and fsync directory A.
+  $ touch /mnt/A/bar
+  $ xfs_io -c "fsync" /mnt/A
+
+  # Now trigger inode eviction. We are only interested in triggering
+  # eviction for the inode of directory A.
+  $ echo 2 > /proc/sys/vm/drop_caches
+
+  # Move foo from directory A to directory B.
+  # This deletes the directory entries for foo in A from the log, and
+  # does not add the new name for foo in directory B to the log, because
+  # logged_trans of A is 0, which is less than the current transaction ID.
+  $ mv /mnt/A/foo /mnt/B/foo
+
+  # Now make an fsync to anything except A, B or any file inside them,
+  # like for example create a file at the root directory and fsync this
+  # new file. This syncs the log that contains all the changes done by
+  # previous rename operation.
+  $ touch /mnt/baz
+  $ xfs_io -c "fsync" /mnt/baz
+
+  <power fail>
+
+  # Mount the filesystem and replay the log.
+  $ mount /dev/sdc /mnt
+
+  # Check the filesystem content.
+  $ ls -1R /mnt
+  /mnt/:
+  A
+  B
+  baz
+
+  /mnt/A:
+  bar
+
+  /mnt/B:
+  $
+
+  # File foo is gone, it's neither in A/ nor in B/.
+
+Fix this by using the inode_logged() helper at btrfs_log_new_name(), which
+safely checks if an inode was logged before in the current transaction.
+
+A test case for fstests will follow soon.
+
+CC: stable@vger.kernel.org # 4.14+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 9912b5231bcf..5412361d0c27 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -6450,8 +6450,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+ 	 * if this inode hasn't been logged and directory we're renaming it
+ 	 * from hasn't been logged, we don't need to log it
+ 	 */
+-	if (inode->logged_trans < trans->transid &&
+-	    (!old_dir || old_dir->logged_trans < trans->transid))
++	if (!inode_logged(trans, inode) &&
++	    (!old_dir || !inode_logged(trans, old_dir)))
+ 		return;
+ 
+ 	btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
+-- 
+2.30.2
+
diff --git a/queue-5.4/btrfs-fix-race-causing-unnecessary-inode-logging-dur.patch b/queue-5.4/btrfs-fix-race-causing-unnecessary-inode-logging-dur.patch
new file mode 100644
index 00000000000..b5aebbced28
--- /dev/null
+++ b/queue-5.4/btrfs-fix-race-causing-unnecessary-inode-logging-dur.patch
@@ -0,0 +1,93 @@
+From f795577dcfd1a34ef1486eb80cbf005137a68199 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Nov 2020 12:19:23 +0000
+Subject: btrfs: fix race causing unnecessary inode logging during link and
+ rename
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit de53d892e5c51dfa0a158e812575a75a6c991f39 ]
+
+When we are doing a rename or a link operation for an inode that was logged
+in the previous transaction and that transaction is still committing, we
+have a time window where we incorrectly consider that the inode was logged
+previously in the current transaction and therefore decide to log it to
+update it in the log. The following steps give an example on how this
+happens during a link operation:
+
+1) Inode X is logged in transaction 1000, so its logged_trans field is set
+   to 1000;
+
+2) Task A starts to commit transaction 1000;
+
+3) The state of transaction 1000 is changed to TRANS_STATE_UNBLOCKED;
+
+4) Task B starts a link operation for inode X, and as a consequence it
+   starts transaction 1001;
+
+5) Task A is still committing transaction 1000, therefore the value stored
+   at fs_info->last_trans_committed is still 999;
+
+6) Task B calls btrfs_log_new_name(), it reads a value of 999 from
+   fs_info->last_trans_committed and because the logged_trans field of
+   inode X has a value of 1000, the function does not return immediately,
+   instead it proceeds to logging the inode, which should not happen
+   because the inode was logged in the previous transaction (1000) and
+   not in the current one (1001).
+
+This is not a functional problem, just wasted time and space logging an
+inode that does not need to be logged, contributing to higher latency
+for link and rename operations.
+
+So fix this by comparing the inodes' logged_trans field with the
+generation of the current transaction instead of comparing with the value
+stored in fs_info->last_trans_committed.
+
+This case is often hit when running dbench for a long enough duration, as
+it does lots of rename operations.
+
+This patch belongs to a patch set that is comprised of the following
+patches:
+
+  btrfs: fix race causing unnecessary inode logging during link and rename
+  btrfs: fix race that results in logging old extents during a fast fsync
+  btrfs: fix race that causes unnecessary logging of ancestor inodes
+  btrfs: fix race that makes inode logging fallback to transaction commit
+  btrfs: fix race leading to unnecessary transaction commit when logging inode
+  btrfs: do not block inode logging for so long during transaction commit
+
+Performance results are mentioned in the change log of the last patch.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 53607156b008..9912b5231bcf 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -6437,7 +6437,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+ 			struct btrfs_inode *inode, struct btrfs_inode *old_dir,
+ 			struct dentry *parent)
+ {
+-	struct btrfs_fs_info *fs_info = trans->fs_info;
+ 	struct btrfs_log_ctx ctx;
+ 
+ 	/*
+@@ -6451,8 +6450,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+ 	 * if this inode hasn't been logged and directory we're renaming it
+ 	 * from hasn't been logged, we don't need to log it
+ 	 */
+-	if (inode->logged_trans <= fs_info->last_trans_committed &&
+-	    (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
++	if (inode->logged_trans < trans->transid &&
++	    (!old_dir || old_dir->logged_trans < trans->transid))
+ 		return;
+ 
+ 	btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
+-- 
+2.30.2
+
diff --git a/queue-5.4/series b/queue-5.4/series
index 2a78876aab3..922470bd83c 100644
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -38,3 +38,7 @@ powerpc-pseries-fix-regression-while-building-external-modules.patch
 revert-perf-map-fix-dso-nsinfo-refcounting.patch
 i40e-add-additional-info-to-phy-type-error.patch
 can-j1939-j1939_session_deactivate-clarify-lifetime-of-session-object.patch
+btrfs-delete-duplicated-words-other-fixes-in-comment.patch
+btrfs-do-not-commit-logs-and-transactions-during-lin.patch
+btrfs-fix-race-causing-unnecessary-inode-logging-dur.patch
+btrfs-fix-lost-inode-on-log-replay-after-mix-of-fsyn.patch