From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 5 Mar 2022 10:47:54 +0000 (+0100)
Subject: 5.16-stable patches
X-Git-Tag: v4.9.305~90
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=daffb0bc440acb25de8f311c8c51061b59b7dd7a;p=thirdparty%2Fkernel%2Fstable-queue.git

5.16-stable patches

added patches:
	btrfs-defrag-bring-back-the-old-file-extent-search-behavior.patch
	btrfs-defrag-don-t-use-merged-extent-map-for-their-generation-check.patch
---

diff --git a/queue-5.16/btrfs-defrag-bring-back-the-old-file-extent-search-behavior.patch b/queue-5.16/btrfs-defrag-bring-back-the-old-file-extent-search-behavior.patch
new file mode 100644
index 00000000000..44005fd1f8e
--- /dev/null
+++ b/queue-5.16/btrfs-defrag-bring-back-the-old-file-extent-search-behavior.patch
@@ -0,0 +1,251 @@
+From d5633b0dee02d7d25e93463a03709f11c71500e2 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 11 Feb 2022 14:46:12 +0800
+Subject: btrfs: defrag: bring back the old file extent search behavior
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit d5633b0dee02d7d25e93463a03709f11c71500e2 upstream.
+
+For defrag, we don't really want to use btrfs_get_extent() to iterate
+all extent maps of an inode.
+
+The reasons are:
+
+- btrfs_get_extent() can merge extent maps
+  And the result em has the higher generation of the two, causing defrag
+  to mark unnecessary part of such merged large extent map.
+
+  This in fact can result extra IO for autodefrag in v5.16+ kernels.
+
+  However this patch is not going to completely solve the problem, as
+  one can still using read() to trigger extent map reading, and got
+  them merged.
+
+  The completely solution for the extent map merging generation problem
+  will come as an standalone fix.
+
+- btrfs_get_extent() caches the extent map result
+  Normally it's fine, but for defrag the target range may not get
+  another read/write for a long long time.
+  Such cache would only increase the memory usage.
+
+- btrfs_get_extent() doesn't skip older extent map
+  Unlike the old find_new_extent() which uses btrfs_search_forward() to
+  skip the older subtree, thus it will pick up unnecessary extent maps.
+
+This patch will fix the regression by introducing defrag_get_extent() to
+replace the btrfs_get_extent() call.
+
+This helper will:
+
+- Not cache the file extent we found
+  It will search the file extent and manually convert it to em.
+
+- Use btrfs_search_forward() to skip entire ranges which is modified in
+  the past
+
+This should reduce the IO for autodefrag.
+
+Reported-by: Filipe Manana <fdmanana@suse.com>
+Fixes: 7b508037d4ca ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()")
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |  161 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 157 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -986,8 +986,155 @@ out:
+ 	return ret;
+ }
+ 
++/*
++ * Defrag specific helper to get an extent map.
++ *
++ * Differences between this and btrfs_get_extent() are:
++ *
++ * - No extent_map will be added to inode->extent_tree
++ *   To reduce memory usage in the long run.
++ *
++ * - Extra optimization to skip file extents older than @newer_than
++ *   By using btrfs_search_forward() we can skip entire file ranges that
++ *   have extents created in past transactions, because btrfs_search_forward()
++ *   will not visit leaves and nodes with a generation smaller than given
++ *   minimal generation threshold (@newer_than).
++ *
++ * Return valid em if we find a file extent matching the requirement.
++ * Return NULL if we can not find a file extent matching the requirement.
++ *
++ * Return ERR_PTR() for error.
++ */
++static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
++					    u64 start, u64 newer_than)
++{
++	struct btrfs_root *root = inode->root;
++	struct btrfs_file_extent_item *fi;
++	struct btrfs_path path = { 0 };
++	struct extent_map *em;
++	struct btrfs_key key;
++	u64 ino = btrfs_ino(inode);
++	int ret;
++
++	em = alloc_extent_map();
++	if (!em) {
++		ret = -ENOMEM;
++		goto err;
++	}
++
++	key.objectid = ino;
++	key.type = BTRFS_EXTENT_DATA_KEY;
++	key.offset = start;
++
++	if (newer_than) {
++		ret = btrfs_search_forward(root, &key, &path, newer_than);
++		if (ret < 0)
++			goto err;
++		/* Can't find anything newer */
++		if (ret > 0)
++			goto not_found;
++	} else {
++		ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
++		if (ret < 0)
++			goto err;
++	}
++	if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
++		/*
++		 * If btrfs_search_slot() makes path to point beyond nritems,
++		 * we should not have an empty leaf, as this inode must at
++		 * least have its INODE_ITEM.
++		 */
++		ASSERT(btrfs_header_nritems(path.nodes[0]));
++		path.slots[0] = btrfs_header_nritems(path.nodes[0]) - 1;
++	}
++	btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
++	/* Perfect match, no need to go one slot back */
++	if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY &&
++	    key.offset == start)
++		goto iterate;
++
++	/* We didn't find a perfect match, needs to go one slot back */
++	if (path.slots[0] > 0) {
++		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
++		if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY)
++			path.slots[0]--;
++	}
++
++iterate:
++	/* Iterate through the path to find a file extent covering @start */
++	while (true) {
++		u64 extent_end;
++
++		if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
++			goto next;
++
++		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
++
++		/*
++		 * We may go one slot back to INODE_REF/XATTR item, then
++		 * need to go forward until we reach an EXTENT_DATA.
++		 * But we should still has the correct ino as key.objectid.
++		 */
++		if (WARN_ON(key.objectid < ino) || key.type < BTRFS_EXTENT_DATA_KEY)
++			goto next;
++
++		/* It's beyond our target range, definitely not extent found */
++		if (key.objectid > ino || key.type > BTRFS_EXTENT_DATA_KEY)
++			goto not_found;
++
++		/*
++		 *	|	|<- File extent ->|
++		 *	\- start
++		 *
++		 * This means there is a hole between start and key.offset.
++		 */
++		if (key.offset > start) {
++			em->start = start;
++			em->orig_start = start;
++			em->block_start = EXTENT_MAP_HOLE;
++			em->len = key.offset - start;
++			break;
++		}
++
++		fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
++				    struct btrfs_file_extent_item);
++		extent_end = btrfs_file_extent_end(&path);
++
++		/*
++		 *	|<- file extent ->|	|
++		 *				\- start
++		 *
++		 * We haven't reached start, search next slot.
++		 */
++		if (extent_end <= start)
++			goto next;
++
++		/* Now this extent covers @start, convert it to em */
++		btrfs_extent_item_to_extent_map(inode, &path, fi, false, em);
++		break;
++next:
++		ret = btrfs_next_item(root, &path);
++		if (ret < 0)
++			goto err;
++		if (ret > 0)
++			goto not_found;
++	}
++	btrfs_release_path(&path);
++	return em;
++
++not_found:
++	btrfs_release_path(&path);
++	free_extent_map(em);
++	return NULL;
++
++err:
++	btrfs_release_path(&path);
++	free_extent_map(em);
++	return ERR_PTR(ret);
++}
++
+ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
+-					       bool locked)
++					       u64 newer_than, bool locked)
+ {
+ 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+@@ -1009,7 +1156,7 @@ static struct extent_map *defrag_lookup_
+ 		/* get the big lock and read metadata off disk */
+ 		if (!locked)
+ 			lock_extent_bits(io_tree, start, end, &cached);
+-		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, sectorsize);
++		em = defrag_get_extent(BTRFS_I(inode), start, newer_than);
+ 		if (!locked)
+ 			unlock_extent_cached(io_tree, start, end, &cached);
+ 
+@@ -1037,7 +1184,12 @@ static bool defrag_check_next_extent(str
+ 	if (em->start + em->len >= i_size_read(inode))
+ 		return false;
+ 
+-	next = defrag_lookup_extent(inode, em->start + em->len, locked);
++	/*
++	 * We want to check if the next extent can be merged with the current
++	 * one, which can be an extent created in a past generation, so we pass
++	 * a minimum generation of 0 to defrag_lookup_extent().
++	 */
++	next = defrag_lookup_extent(inode, em->start + em->len, 0, locked);
+ 	/* No more em or hole */
+ 	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
+ 		goto out;
+@@ -1188,7 +1340,8 @@ static int defrag_collect_targets(struct
+ 		u64 range_len;
+ 
+ 		last_is_target = false;
+-		em = defrag_lookup_extent(&inode->vfs_inode, cur, locked);
++		em = defrag_lookup_extent(&inode->vfs_inode, cur,
++					  newer_than, locked);
+ 		if (!em)
+ 			break;
+ 
diff --git a/queue-5.16/btrfs-defrag-don-t-use-merged-extent-map-for-their-generation-check.patch b/queue-5.16/btrfs-defrag-don-t-use-merged-extent-map-for-their-generation-check.patch
new file mode 100644
index 00000000000..3c7366bf624
--- /dev/null
+++ b/queue-5.16/btrfs-defrag-don-t-use-merged-extent-map-for-their-generation-check.patch
@@ -0,0 +1,110 @@
+From 199257a78bb01341c3ba6e85bdcf3a2e6e452c6d Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 11 Feb 2022 14:46:13 +0800
+Subject: btrfs: defrag: don't use merged extent map for their generation check
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 199257a78bb01341c3ba6e85bdcf3a2e6e452c6d upstream.
+
+For extent maps, if they are not compressed extents and are adjacent by
+logical addresses and file offsets, they can be merged into one larger
+extent map.
+
+Such merged extent map will have the higher generation of all the
+original ones.
+
+But this brings a problem for autodefrag, as it relies on accurate
+extent_map::generation to determine if one extent should be defragged.
+
+For merged extent maps, their higher generation can mark some older
+extents to be defragged while the original extent map doesn't meet the
+minimal generation threshold.
+
+Thus this will cause extra IO.
+
+So solve the problem, here we introduce a new flag, EXTENT_FLAG_MERGED,
+to indicate if the extent map is merged from one or more ems.
+
+And for autodefrag, if we find a merged extent map, and its generation
+meets the generation requirement, we just don't use this one, and go
+back to defrag_get_extent() to read extent maps from subvolume trees.
+
+This could cause more read IO, but should result less defrag data write,
+so in the long run it should be a win for autodefrag.
+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_map.c |    2 ++
+ fs/btrfs/extent_map.h |    8 ++++++++
+ fs/btrfs/ioctl.c      |   14 ++++++++++++++
+ 3 files changed, 24 insertions(+)
+
+--- a/fs/btrfs/extent_map.c
++++ b/fs/btrfs/extent_map.c
+@@ -261,6 +261,7 @@ static void try_merge_map(struct extent_
+ 			em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
+ 			em->mod_start = merge->mod_start;
+ 			em->generation = max(em->generation, merge->generation);
++			set_bit(EXTENT_FLAG_MERGED, &em->flags);
+ 
+ 			rb_erase_cached(&merge->rb_node, &tree->map);
+ 			RB_CLEAR_NODE(&merge->rb_node);
+@@ -278,6 +279,7 @@ static void try_merge_map(struct extent_
+ 		RB_CLEAR_NODE(&merge->rb_node);
+ 		em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
+ 		em->generation = max(em->generation, merge->generation);
++		set_bit(EXTENT_FLAG_MERGED, &em->flags);
+ 		free_extent_map(merge);
+ 	}
+ }
+--- a/fs/btrfs/extent_map.h
++++ b/fs/btrfs/extent_map.h
+@@ -25,6 +25,8 @@ enum {
+ 	EXTENT_FLAG_FILLING,
+ 	/* filesystem extent mapping type */
+ 	EXTENT_FLAG_FS_MAPPING,
++	/* This em is merged from two or more physically adjacent ems */
++	EXTENT_FLAG_MERGED,
+ };
+ 
+ struct extent_map {
+@@ -40,6 +42,12 @@ struct extent_map {
+ 	u64 ram_bytes;
+ 	u64 block_start;
+ 	u64 block_len;
++
++	/*
++	 * Generation of the extent map, for merged em it's the highest
++	 * generation of all merged ems.
++	 * For non-merged extents, it's from btrfs_file_extent_item::generation.
++	 */
+ 	u64 generation;
+ 	unsigned long flags;
+ 	/* Used for chunk mappings, flag EXTENT_FLAG_FS_MAPPING must be set */
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1149,6 +1149,20 @@ static struct extent_map *defrag_lookup_
+ 	em = lookup_extent_mapping(em_tree, start, sectorsize);
+ 	read_unlock(&em_tree->lock);
+ 
++	/*
++	 * We can get a merged extent, in that case, we need to re-search
++	 * tree to get the original em for defrag.
++	 *
++	 * If @newer_than is 0 or em::generation < newer_than, we can trust
++	 * this em, as either we don't care about the generation, or the
++	 * merged extent map will be rejected anyway.
++	 */
++	if (em && test_bit(EXTENT_FLAG_MERGED, &em->flags) &&
++	    newer_than && em->generation >= newer_than) {
++		free_extent_map(em);
++		em = NULL;
++	}
++
+ 	if (!em) {
+ 		struct extent_state *cached = NULL;
+ 		u64 end = start + sectorsize - 1;
diff --git a/queue-5.16/series b/queue-5.16/series
index c80383fe37a..811deb392ca 100644
--- a/queue-5.16/series
+++ b/queue-5.16/series
@@ -43,3 +43,5 @@ ata-pata_hpt37x-fix-pci-clock-detection.patch
 drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch
 tracing-add-ustring-operation-to-filtering-string-po.patch
 ipv6-fix-skb-drops-in-igmp6_event_query-and-igmp6_ev.patch
+btrfs-defrag-bring-back-the-old-file-extent-search-behavior.patch
+btrfs-defrag-don-t-use-merged-extent-map-for-their-generation-check.patch