--- /dev/null
+From ed58f2e66e849c34826083e5a6c1b506ee8a4d8e Mon Sep 17 00:00:00 2001
+From: ethanwu <ethanwu@synology.com>
+Date: Fri, 7 Feb 2020 17:38:16 +0800
+Subject: btrfs: backref, don't add refs from shared block when resolving normal backref
+
+From: ethanwu <ethanwu@synology.com>
+
+commit ed58f2e66e849c34826083e5a6c1b506ee8a4d8e upstream.
+
+All references from the block of SHARED_DATA_REF belong to that shared
+block backref.
+
+For example:
+
+ item 11 key (40831553536 EXTENT_ITEM 4194304) itemoff 15460 itemsize 95
+ extent refs 24 gen 7302 flags DATA
+ extent data backref root 257 objectid 260 offset 65536 count 5
+ extent data backref root 258 objectid 265 offset 0 count 9
+ shared data backref parent 394985472 count 10
+
+Block 394985472 might be leaf from root 257, and the item obejctid and
+(file_pos - file_extent_item::offset) in that leaf just happens to be
+260 and 65536 which is equal to the first extent data backref entry.
+
+Before this patch, when we resolve backref:
+
+ root 257 objectid 260 offset 65536
+
+we will add those refs in block 394985472 and wrongly treat those as the
+refs we want.
+
+Fix this by checking if the leaf we are processing is shared data
+backref, if so, just skip this leaf.
+
+Shared data refs added into preftrees.direct have all entry value = 0
+(root_id = 0, key = NULL, level = 0) except parent entry.
+
+Other refs from indirect tree will have key value and root id != 0, and
+these values won't be changed when their parent is resolved and added to
+preftrees.direct. Therefore, we could reuse the preftrees.direct and
+search ref with all values = 0 except parent is set to avoid getting
+those resolved refs block.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: ethanwu <ethanwu@synology.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/backref.c | 61 +++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 52 insertions(+), 9 deletions(-)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -386,8 +386,34 @@ static int add_indirect_ref(const struct
+ wanted_disk_byte, count, sc, gfp_mask);
+ }
+
++static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr)
++{
++ struct rb_node **p = &preftrees->direct.root.rb_root.rb_node;
++ struct rb_node *parent = NULL;
++ struct prelim_ref *ref = NULL;
++ struct prelim_ref target = {0};
++ int result;
++
++ target.parent = bytenr;
++
++ while (*p) {
++ parent = *p;
++ ref = rb_entry(parent, struct prelim_ref, rbnode);
++ result = prelim_ref_compare(ref, &target);
++
++ if (result < 0)
++ p = &(*p)->rb_left;
++ else if (result > 0)
++ p = &(*p)->rb_right;
++ else
++ return 1;
++ }
++ return 0;
++}
++
+ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
+- struct ulist *parents, struct prelim_ref *ref,
++ struct ulist *parents,
++ struct preftrees *preftrees, struct prelim_ref *ref,
+ int level, u64 time_seq, const u64 *extent_item_pos,
+ u64 total_refs, bool ignore_offset)
+ {
+@@ -412,11 +438,16 @@ static int add_all_parents(struct btrfs_
+ }
+
+ /*
+- * We normally enter this function with the path already pointing to
+- * the first item to check. But sometimes, we may enter it with
+- * slot==nritems. In that case, go to the next leaf before we continue.
++ * 1. We normally enter this function with the path already pointing to
++ * the first item to check. But sometimes, we may enter it with
++ * slot == nritems.
++ * 2. We are searching for normal backref but bytenr of this leaf
++ * matches shared data backref
++ * For these cases, go to the next leaf before we continue.
+ */
+- if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
++ eb = path->nodes[0];
++ if (path->slots[0] >= btrfs_header_nritems(eb) ||
++ is_shared_data_backref(preftrees, eb->start)) {
+ if (time_seq == SEQ_LAST)
+ ret = btrfs_next_leaf(root, path);
+ else
+@@ -433,6 +464,17 @@ static int add_all_parents(struct btrfs_
+ key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+
++ /*
++ * We are searching for normal backref but bytenr of this leaf
++ * matches shared data backref.
++ */
++ if (slot == 0 && is_shared_data_backref(preftrees, eb->start)) {
++ if (time_seq == SEQ_LAST)
++ ret = btrfs_next_leaf(root, path);
++ else
++ ret = btrfs_next_old_leaf(root, path, time_seq);
++ continue;
++ }
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+ data_offset = btrfs_file_extent_offset(eb, fi);
+@@ -484,6 +526,7 @@ next:
+ */
+ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 time_seq,
++ struct preftrees *preftrees,
+ struct prelim_ref *ref, struct ulist *parents,
+ const u64 *extent_item_pos, u64 total_refs,
+ bool ignore_offset)
+@@ -577,8 +620,8 @@ static int resolve_indirect_ref(struct b
+ eb = path->nodes[level];
+ }
+
+- ret = add_all_parents(root, path, parents, ref, level, time_seq,
+- extent_item_pos, total_refs, ignore_offset);
++ ret = add_all_parents(root, path, parents, preftrees, ref, level,
++ time_seq, extent_item_pos, total_refs, ignore_offset);
+ out:
+ path->lowest_level = 0;
+ btrfs_release_path(path);
+@@ -656,8 +699,8 @@ static int resolve_indirect_refs(struct
+ ret = BACKREF_FOUND_SHARED;
+ goto out;
+ }
+- err = resolve_indirect_ref(fs_info, path, time_seq, ref,
+- parents, extent_item_pos,
++ err = resolve_indirect_ref(fs_info, path, time_seq, preftrees,
++ ref, parents, extent_item_pos,
+ total_refs, ignore_offset);
+ /*
+ * we can only tolerate ENOENT,otherwise,we should catch error
--- /dev/null
+From 7ac8b88ee668a5b4743ebf3e9888fabac85c334a Mon Sep 17 00:00:00 2001
+From: ethanwu <ethanwu@synology.com>
+Date: Fri, 7 Feb 2020 17:38:15 +0800
+Subject: btrfs: backref, only collect file extent items matching backref offset
+
+From: ethanwu <ethanwu@synology.com>
+
+commit 7ac8b88ee668a5b4743ebf3e9888fabac85c334a upstream.
+
+When resolving one backref of type EXTENT_DATA_REF, we collect all
+references that simply reference the EXTENT_ITEM even though their
+(file_pos - file_extent_item::offset) are not the same as the
+btrfs_extent_data_ref::offset we are searching for.
+
+This patch adds additional check so that we only collect references whose
+(file_pos - file_extent_item::offset) == btrfs_extent_data_ref::offset.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: ethanwu <ethanwu@synology.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/backref.c | 63 +++++++++++++++++++++++++++--------------------------
+ 1 file changed, 33 insertions(+), 30 deletions(-)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -347,33 +347,10 @@ static int add_prelim_ref(const struct b
+ return -ENOMEM;
+
+ ref->root_id = root_id;
+- if (key) {
++ if (key)
+ ref->key_for_search = *key;
+- /*
+- * We can often find data backrefs with an offset that is too
+- * large (>= LLONG_MAX, maximum allowed file offset) due to
+- * underflows when subtracting a file's offset with the data
+- * offset of its corresponding extent data item. This can
+- * happen for example in the clone ioctl.
+- * So if we detect such case we set the search key's offset to
+- * zero to make sure we will find the matching file extent item
+- * at add_all_parents(), otherwise we will miss it because the
+- * offset taken form the backref is much larger then the offset
+- * of the file extent item. This can make us scan a very large
+- * number of file extent items, but at least it will not make
+- * us miss any.
+- * This is an ugly workaround for a behaviour that should have
+- * never existed, but it does and a fix for the clone ioctl
+- * would touch a lot of places, cause backwards incompatibility
+- * and would not fix the problem for extents cloned with older
+- * kernels.
+- */
+- if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY &&
+- ref->key_for_search.offset >= LLONG_MAX)
+- ref->key_for_search.offset = 0;
+- } else {
++ else
+ memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
+- }
+
+ ref->inode_list = NULL;
+ ref->level = level;
+@@ -424,6 +401,7 @@ static int add_all_parents(struct btrfs_
+ u64 disk_byte;
+ u64 wanted_disk_byte = ref->wanted_disk_byte;
+ u64 count = 0;
++ u64 data_offset;
+
+ if (level != 0) {
+ eb = path->nodes[level];
+@@ -457,11 +435,15 @@ static int add_all_parents(struct btrfs_
+
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
++ data_offset = btrfs_file_extent_offset(eb, fi);
+
+ if (disk_byte == wanted_disk_byte) {
+ eie = NULL;
+ old = NULL;
+- count++;
++ if (ref->key_for_search.offset == key.offset - data_offset)
++ count++;
++ else
++ goto next;
+ if (extent_item_pos) {
+ ret = check_extent_in_eb(&key, eb, fi,
+ *extent_item_pos,
+@@ -513,6 +495,7 @@ static int resolve_indirect_ref(struct b
+ int root_level;
+ int level = ref->level;
+ int index;
++ struct btrfs_key search_key = ref->key_for_search;
+
+ root_key.objectid = ref->root_id;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+@@ -545,13 +528,33 @@ static int resolve_indirect_ref(struct b
+ goto out;
+ }
+
++ /*
++ * We can often find data backrefs with an offset that is too large
++ * (>= LLONG_MAX, maximum allowed file offset) due to underflows when
++ * subtracting a file's offset with the data offset of its
++ * corresponding extent data item. This can happen for example in the
++ * clone ioctl.
++ *
++ * So if we detect such case we set the search key's offset to zero to
++ * make sure we will find the matching file extent item at
++ * add_all_parents(), otherwise we will miss it because the offset
++ * taken form the backref is much larger then the offset of the file
++ * extent item. This can make us scan a very large number of file
++ * extent items, but at least it will not make us miss any.
++ *
++ * This is an ugly workaround for a behaviour that should have never
++ * existed, but it does and a fix for the clone ioctl would touch a lot
++ * of places, cause backwards incompatibility and would not fix the
++ * problem for extents cloned with older kernels.
++ */
++ if (search_key.type == BTRFS_EXTENT_DATA_KEY &&
++ search_key.offset >= LLONG_MAX)
++ search_key.offset = 0;
+ path->lowest_level = level;
+ if (time_seq == SEQ_LAST)
+- ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
+- 0, 0);
++ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+ else
+- ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
+- time_seq);
++ ret = btrfs_search_old_slot(root, &search_key, path, time_seq);
+
+ /* root node has been locked, we can release @subvol_srcu safely here */
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
--- /dev/null
+From cfc0eed0ec89db7c4a8d461174cabfaa4a0912c7 Mon Sep 17 00:00:00 2001
+From: ethanwu <ethanwu@synology.com>
+Date: Fri, 7 Feb 2020 17:38:17 +0800
+Subject: btrfs: backref, only search backref entries from leaves of the same root
+
+From: ethanwu <ethanwu@synology.com>
+
+commit cfc0eed0ec89db7c4a8d461174cabfaa4a0912c7 upstream.
+
+We could have some nodes/leaves in subvolume whose owner are not the
+that subvolume. In this way, when we resolve normal backrefs of that
+subvolume, we should avoid collecting those references from these blocks.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: ethanwu <ethanwu@synology.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/backref.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -443,11 +443,14 @@ static int add_all_parents(struct btrfs_
+ * slot == nritems.
+ * 2. We are searching for normal backref but bytenr of this leaf
+ * matches shared data backref
++ * 3. The leaf owner is not equal to the root we are searching
++ *
+ * For these cases, go to the next leaf before we continue.
+ */
+ eb = path->nodes[0];
+ if (path->slots[0] >= btrfs_header_nritems(eb) ||
+- is_shared_data_backref(preftrees, eb->start)) {
++ is_shared_data_backref(preftrees, eb->start) ||
++ ref->root_id != btrfs_header_owner(eb)) {
+ if (time_seq == SEQ_LAST)
+ ret = btrfs_next_leaf(root, path);
+ else
+@@ -466,9 +469,12 @@ static int add_all_parents(struct btrfs_
+
+ /*
+ * We are searching for normal backref but bytenr of this leaf
+- * matches shared data backref.
++ * matches shared data backref, OR
++ * the leaf owner is not equal to the root we are searching for
+ */
+- if (slot == 0 && is_shared_data_backref(preftrees, eb->start)) {
++ if (slot == 0 &&
++ (is_shared_data_backref(preftrees, eb->start) ||
++ ref->root_id != btrfs_header_owner(eb))) {
+ if (time_seq == SEQ_LAST)
+ ret = btrfs_next_leaf(root, path);
+ else
--- /dev/null
+From b25b0b871f206936d5bca02b80d38c05623e27da Mon Sep 17 00:00:00 2001
+From: ethanwu <ethanwu@synology.com>
+Date: Fri, 7 Feb 2020 17:38:18 +0800
+Subject: btrfs: backref, use correct count to resolve normal data refs
+
+From: ethanwu <ethanwu@synology.com>
+
+commit b25b0b871f206936d5bca02b80d38c05623e27da upstream.
+
+With the following patches:
+
+- btrfs: backref, only collect file extent items matching backref offset
+- btrfs: backref, not adding refs from shared block when resolving normal backref
+- btrfs: backref, only search backref entries from leaves of the same root
+
+we only collect the normal data refs we want, so the imprecise upper
+bound total_refs of that EXTENT_ITEM could now be changed to the count
+of the normal backref entry we want to search.
+
+Background and how the patches fit together:
+
+Btrfs has two types of data backref.
+For BTRFS_EXTENT_DATA_REF_KEY type of backref, we don't have the
+exact block number. Therefore, we need to call resolve_indirect_refs.
+It uses btrfs_search_slot to locate the leaf block. Then
+we need to walk through the leaves to search for the EXTENT_DATA items
+that have disk bytenr matching the extent item (add_all_parents).
+
+When resolving indirect refs, we could take entries that don't
+belong to the backref entry we are searching for right now.
+For that reason when searching backref entry, we always use total
+refs of that EXTENT_ITEM rather than individual count.
+
+For example:
+item 11 key (40831553536 EXTENT_ITEM 4194304) itemoff 15460 itemsize
+ extent refs 24 gen 7302 flags DATA
+ shared data backref parent 394985472 count 10 #1
+ extent data backref root 257 objectid 260 offset 1048576 count 3 #2
+ extent data backref root 256 objectid 260 offset 65536 count 6 #3
+ extent data backref root 257 objectid 260 offset 65536 count 5 #4
+
+For example, when searching backref entry #4, we'll use total_refs
+24, a very loose loop ending condition, instead of total_refs = 5.
+
+But using total_refs = 24 is not accurate. Sometimes, we'll never find
+all the refs from specific root. As a result, the loop keeps on going
+until we reach the end of that inode.
+
+The first 3 patches, handle 3 different types refs we might encounter.
+These refs do not belong to the normal backref we are searching, and
+hence need to be skipped.
+
+This patch changes the total_refs to correct number so that we could
+end loop as soon as we find all the refs we want.
+
+btrfs send uses backref to find possible clone sources, the following
+is a simple test to compare the results with and without this patch:
+
+ $ btrfs subvolume create /sub1
+ $ for i in `seq 1 163840`; do
+ dd if=/dev/zero of=/sub1/file bs=64K count=1 seek=$((i-1)) conv=notrunc oflag=direct
+ done
+ $ btrfs subvolume snapshot /sub1 /sub2
+ $ for i in `seq 1 163840`; do
+ dd if=/dev/zero of=/sub1/file bs=4K count=1 seek=$(((i-1)*16+10)) conv=notrunc oflag=direct
+ done
+ $ btrfs subvolume snapshot -r /sub1 /snap1
+ $ time btrfs send /snap1 | btrfs receive /volume2
+
+Without this patch:
+
+real 69m48.124s
+user 0m50.199s
+sys 70m15.600s
+
+With this patch:
+
+real 1m59.683s
+user 0m35.421s
+sys 2m42.684s
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: ethanwu <ethanwu@synology.com>
+[ add patchset cover letter with background and numbers ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/backref.c | 29 +++++++++++------------------
+ 1 file changed, 11 insertions(+), 18 deletions(-)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -415,7 +415,7 @@ static int add_all_parents(struct btrfs_
+ struct ulist *parents,
+ struct preftrees *preftrees, struct prelim_ref *ref,
+ int level, u64 time_seq, const u64 *extent_item_pos,
+- u64 total_refs, bool ignore_offset)
++ bool ignore_offset)
+ {
+ int ret = 0;
+ int slot;
+@@ -457,7 +457,7 @@ static int add_all_parents(struct btrfs_
+ ret = btrfs_next_old_leaf(root, path, time_seq);
+ }
+
+- while (!ret && count < total_refs) {
++ while (!ret && count < ref->count) {
+ eb = path->nodes[0];
+ slot = path->slots[0];
+
+@@ -534,8 +534,7 @@ static int resolve_indirect_ref(struct b
+ struct btrfs_path *path, u64 time_seq,
+ struct preftrees *preftrees,
+ struct prelim_ref *ref, struct ulist *parents,
+- const u64 *extent_item_pos, u64 total_refs,
+- bool ignore_offset)
++ const u64 *extent_item_pos, bool ignore_offset)
+ {
+ struct btrfs_root *root;
+ struct btrfs_key root_key;
+@@ -627,7 +626,7 @@ static int resolve_indirect_ref(struct b
+ }
+
+ ret = add_all_parents(root, path, parents, preftrees, ref, level,
+- time_seq, extent_item_pos, total_refs, ignore_offset);
++ time_seq, extent_item_pos, ignore_offset);
+ out:
+ path->lowest_level = 0;
+ btrfs_release_path(path);
+@@ -661,7 +660,7 @@ unode_aux_to_inode_list(struct ulist_nod
+ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 time_seq,
+ struct preftrees *preftrees,
+- const u64 *extent_item_pos, u64 total_refs,
++ const u64 *extent_item_pos,
+ struct share_check *sc, bool ignore_offset)
+ {
+ int err;
+@@ -707,7 +706,7 @@ static int resolve_indirect_refs(struct
+ }
+ err = resolve_indirect_ref(fs_info, path, time_seq, preftrees,
+ ref, parents, extent_item_pos,
+- total_refs, ignore_offset);
++ ignore_offset);
+ /*
+ * we can only tolerate ENOENT,otherwise,we should catch error
+ * and return directly.
+@@ -810,8 +809,7 @@ static int add_missing_keys(struct btrfs
+ */
+ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_head *head, u64 seq,
+- struct preftrees *preftrees, u64 *total_refs,
+- struct share_check *sc)
++ struct preftrees *preftrees, struct share_check *sc)
+ {
+ struct btrfs_delayed_ref_node *node;
+ struct btrfs_delayed_extent_op *extent_op = head->extent_op;
+@@ -845,7 +843,6 @@ static int add_delayed_refs(const struct
+ default:
+ BUG();
+ }
+- *total_refs += count;
+ switch (node->type) {
+ case BTRFS_TREE_BLOCK_REF_KEY: {
+ /* NORMAL INDIRECT METADATA backref */
+@@ -928,7 +925,7 @@ out:
+ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 bytenr,
+ int *info_level, struct preftrees *preftrees,
+- u64 *total_refs, struct share_check *sc)
++ struct share_check *sc)
+ {
+ int ret = 0;
+ int slot;
+@@ -952,7 +949,6 @@ static int add_inline_refs(const struct
+
+ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(leaf, ei);
+- *total_refs += btrfs_extent_refs(leaf, ei);
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ ptr = (unsigned long)(ei + 1);
+@@ -1177,8 +1173,6 @@ static int find_parent_nodes(struct btrf
+ struct prelim_ref *ref;
+ struct rb_node *node;
+ struct extent_inode_elem *eie = NULL;
+- /* total of both direct AND indirect refs! */
+- u64 total_refs = 0;
+ struct preftrees preftrees = {
+ .direct = PREFTREE_INIT,
+ .indirect = PREFTREE_INIT,
+@@ -1247,7 +1241,7 @@ again:
+ }
+ spin_unlock(&delayed_refs->lock);
+ ret = add_delayed_refs(fs_info, head, time_seq,
+- &preftrees, &total_refs, sc);
++ &preftrees, sc);
+ mutex_unlock(&head->mutex);
+ if (ret)
+ goto out;
+@@ -1268,8 +1262,7 @@ again:
+ (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY)) {
+ ret = add_inline_refs(fs_info, path, bytenr,
+- &info_level, &preftrees,
+- &total_refs, sc);
++ &info_level, &preftrees, sc);
+ if (ret)
+ goto out;
+ ret = add_keyed_refs(fs_info, path, bytenr, info_level,
+@@ -1288,7 +1281,7 @@ again:
+ WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
+
+ ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
+- extent_item_pos, total_refs, sc, ignore_offset);
++ extent_item_pos, sc, ignore_offset);
+ if (ret)
+ goto out;
+
--- /dev/null
+From foo@baz Fri Feb 5 09:57:00 AM CET 2021
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 14 Jan 2021 10:19:29 -0800
+Subject: net_sched: gen_estimator: support large ewma log
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit dd5e073381f2ada3630f36be42833c6e9c78b75e upstream
+
+syzbot report reminded us that very big ewma_log were supported in the past,
+even if they made litle sense.
+
+tc qdisc replace dev xxx root est 1sec 131072sec ...
+
+While fixing the bug, also add boundary checks for ewma_log, in line
+with range supported by iproute2.
+
+UBSAN: shift-out-of-bounds in net/core/gen_estimator.c:83:38
+shift exponent -1 is negative
+CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:79 [inline]
+ dump_stack+0x107/0x163 lib/dump_stack.c:120
+ ubsan_epilogue+0xb/0x5a lib/ubsan.c:148
+ __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395
+ est_timer.cold+0xbb/0x12d net/core/gen_estimator.c:83
+ call_timer_fn+0x1a5/0x710 kernel/time/timer.c:1417
+ expire_timers kernel/time/timer.c:1462 [inline]
+ __run_timers.part.0+0x692/0xa80 kernel/time/timer.c:1731
+ __run_timers kernel/time/timer.c:1712 [inline]
+ run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1744
+ __do_softirq+0x2bc/0xa77 kernel/softirq.c:343
+ asm_call_irq_on_stack+0xf/0x20
+ </IRQ>
+ __run_on_irqstack arch/x86/include/asm/irq_stack.h:26 [inline]
+ run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:77 [inline]
+ do_softirq_own_stack+0xaa/0xd0 arch/x86/kernel/irq_64.c:77
+ invoke_softirq kernel/softirq.c:226 [inline]
+ __irq_exit_rcu+0x17f/0x200 kernel/softirq.c:420
+ irq_exit_rcu+0x5/0x20 kernel/softirq.c:432
+ sysvec_apic_timer_interrupt+0x4d/0x100 arch/x86/kernel/apic/apic.c:1096
+ asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:628
+RIP: 0010:native_save_fl arch/x86/include/asm/irqflags.h:29 [inline]
+RIP: 0010:arch_local_save_flags arch/x86/include/asm/irqflags.h:79 [inline]
+RIP: 0010:arch_irqs_disabled arch/x86/include/asm/irqflags.h:169 [inline]
+RIP: 0010:acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline]
+RIP: 0010:acpi_idle_do_entry+0x1c9/0x250 drivers/acpi/processor_idle.c:516
+
+Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20210114181929.1717985-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[sudip: adjust context]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/gen_estimator.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/core/gen_estimator.c
++++ b/net/core/gen_estimator.c
+@@ -80,11 +80,11 @@ static void est_timer(struct timer_list
+ u64 rate, brate;
+
+ est_fetch_counters(est, &b);
+- brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
+- brate -= (est->avbps >> est->ewma_log);
++ brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
++ brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
+
+- rate = (u64)(b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
+- rate -= (est->avpps >> est->ewma_log);
++ rate = (u64)(b.packets - est->last_packets) << (10 - est->intvl_log);
++ rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
+
+ write_seqcount_begin(&est->seq);
+ est->avbps += brate;
+@@ -143,6 +143,9 @@ int gen_new_estimator(struct gnet_stats_
+ if (parm->interval < -2 || parm->interval > 3)
+ return -EINVAL;
+
++ if (parm->ewma_log == 0 || parm->ewma_log >= 31)
++ return -EINVAL;
++
+ est = kzalloc(sizeof(*est), GFP_KERNEL);
+ if (!est)
+ return -ENOBUFS;
arm64-fix-kernel-address-detection-of-__is_lm_address.patch
arm64-do-not-pass-tagged-addresses-to-__is_lm_address.patch
tcp-make-tcp_user_timeout-accurate-for-zero-window-probes.patch
+btrfs-backref-only-collect-file-extent-items-matching-backref-offset.patch
+btrfs-backref-don-t-add-refs-from-shared-block-when-resolving-normal-backref.patch
+btrfs-backref-only-search-backref-entries-from-leaves-of-the-same-root.patch
+btrfs-backref-use-correct-count-to-resolve-normal-data-refs.patch
+net_sched-gen_estimator-support-large-ewma-log.patch