5.16-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 28 Feb 2022 07:05:44 +0000 (08:05 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 28 Feb 2022 07:05:44 +0000 (08:05 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Feb 2022 07:05:44 +0000 (08:05 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Feb 2022 07:05:44 +0000 (08:05 +0100)
diff --git a/queue-5.16/btrfs-autodefrag-only-scan-one-inode-once.patch b/queue-5.16/btrfs-autodefrag-only-scan-one-inode-once.patch

new file mode 100644 (file)

index 0000000..b5c099c
--- /dev/null
+++ b/queue-5.16/btrfs-autodefrag-only-scan-one-inode-once.patch
@@ -0,0 +1,171 @@
+From 26fbac2517fcad34fa3f950151fd4c0240fb2935 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 22 Feb 2022 18:20:59 +0100
+Subject: btrfs: autodefrag: only scan one inode once
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 26fbac2517fcad34fa3f950151fd4c0240fb2935 upstream.
+
+Although we have btrfs_requeue_inode_defrag(), for autodefrag we are
+still just exhausting all inode_defrag items in the tree.
+
+This means, it doesn't make much difference to requeue an inode_defrag,
+other than scan the inode from the beginning till its end.
+
+Change the behaviour to always scan from offset 0 of an inode, and till
+the end.
+
+By this we get the following benefit:
+
+- Straight-forward code
+
+- No more re-queue related check
+
+- Fewer members in inode_defrag
+
+We still keep the same btrfs_get_fs_root() and btrfs_iget() check for
+each loop, and added extra should_auto_defrag() check per-loop.
+
+Note: the patch needs to be backported and is intentionally written
+to minimize the diff size, code will be cleaned up later.
+
+CC: stable@vger.kernel.org # 5.16
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file.c |   84 ++++++++++++++------------------------------------------
+ 1 file changed, 22 insertions(+), 62 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -49,12 +49,6 @@ struct inode_defrag {
+ 
+       /* root objectid */
+       u64 root;
+-
+-      /* last offset we were able to defrag */
+-      u64 last_offset;
+-
+-      /* if we've wrapped around back to zero once already */
+-      int cycled;
+ };
+ 
+ static int __compare_inode_defrag(struct inode_defrag *defrag1,
+@@ -107,8 +101,6 @@ static int __btrfs_add_inode_defrag(stru
+                        */
+                       if (defrag->transid < entry->transid)
+                               entry->transid = defrag->transid;
+-                      if (defrag->last_offset > entry->last_offset)
+-                              entry->last_offset = defrag->last_offset;
+                       return -EEXIST;
+               }
+       }
+@@ -179,34 +171,6 @@ int btrfs_add_inode_defrag(struct btrfs_
+ }
+ 
+ /*
+- * Requeue the defrag object. If there is a defrag object that points to
+- * the same inode in the tree, we will merge them together (by
+- * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
+- */
+-static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
+-                                     struct inode_defrag *defrag)
+-{
+-      struct btrfs_fs_info *fs_info = inode->root->fs_info;
+-      int ret;
+-
+-      if (!__need_auto_defrag(fs_info))
+-              goto out;
+-
+-      /*
+-       * Here we don't check the IN_DEFRAG flag, because we need merge
+-       * them together.
+-       */
+-      spin_lock(&fs_info->defrag_inodes_lock);
+-      ret = __btrfs_add_inode_defrag(inode, defrag);
+-      spin_unlock(&fs_info->defrag_inodes_lock);
+-      if (ret)
+-              goto out;
+-      return;
+-out:
+-      kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+-}
+-
+-/*
+  * pick the defragable inode that we want, if it doesn't exist, we will get
+  * the next one.
+  */
+@@ -278,8 +242,14 @@ static int __btrfs_run_defrag_inode(stru
+       struct btrfs_root *inode_root;
+       struct inode *inode;
+       struct btrfs_ioctl_defrag_range_args range;
+-      int num_defrag;
+-      int ret;
++      int ret = 0;
++      u64 cur = 0;
++
++again:
++      if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
++              goto cleanup;
++      if (!__need_auto_defrag(fs_info))
++              goto cleanup;
+ 
+       /* get the inode */
+       inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
+@@ -295,39 +265,29 @@ static int __btrfs_run_defrag_inode(stru
+               goto cleanup;
+       }
+ 
++      if (cur >= i_size_read(inode)) {
++              iput(inode);
++              goto cleanup;
++      }
++
+       /* do a chunk of defrag */
+       clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
+       memset(&range, 0, sizeof(range));
+       range.len = (u64)-1;
+-      range.start = defrag->last_offset;
++      range.start = cur;
+ 
+       sb_start_write(fs_info->sb);
+-      num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
++      ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
+                                      BTRFS_DEFRAG_BATCH);
+       sb_end_write(fs_info->sb);
+-      /*
+-       * if we filled the whole defrag batch, there
+-       * must be more work to do.  Queue this defrag
+-       * again
+-       */
+-      if (num_defrag == BTRFS_DEFRAG_BATCH) {
+-              defrag->last_offset = range.start;
+-              btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
+-      } else if (defrag->last_offset && !defrag->cycled) {
+-              /*
+-               * we didn't fill our defrag batch, but
+-               * we didn't start at zero.  Make sure we loop
+-               * around to the start of the file.
+-               */
+-              defrag->last_offset = 0;
+-              defrag->cycled = 1;
+-              btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
+-      } else {
+-              kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+-      }
+-
+       iput(inode);
+-      return 0;
++
++      if (ret < 0)
++              goto cleanup;
++
++      cur = max(cur + fs_info->sectorsize, range.start);
++      goto again;
++
+ cleanup:
+       kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+       return ret;
diff --git a/queue-5.16/btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch b/queue-5.16/btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch

new file mode 100644 (file)

index 0000000..4e62e6d
--- /dev/null
+++ b/queue-5.16/btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch
@@ -0,0 +1,226 @@
+From 966d879bafaaf020c11a7cee9526f6dd823a4126 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 11 Feb 2022 14:41:39 +0800
+Subject: btrfs: defrag: allow defrag_one_cluster() to skip large extent which is not a target
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 966d879bafaaf020c11a7cee9526f6dd823a4126 upstream.
+
+In the rework of btrfs_defrag_file(), we always call
+defrag_one_cluster() and increase the offset by cluster size, which is
+only 256K.
+
+But there are cases where we have a large extent (e.g. 128M) which
+doesn't need to be defragged at all.
+
+Before the refactor, we can directly skip the range, but now we have to
+scan that extent map again and again until the cluster moves after the
+non-target extent.
+
+Fix the problem by allow defrag_one_cluster() to increase
+btrfs_defrag_ctrl::last_scanned to the end of an extent, if and only if
+the last extent of the cluster is not a target.
+
+The test script looks like this:
+
+       mkfs.btrfs -f $dev > /dev/null
+
+       mount $dev $mnt
+
+       # As btrfs ioctl uses 32M as extent_threshold
+       xfs_io -f -c "pwrite 0 64M" $mnt/file1
+       sync
+       # Some fragemented range to defrag
+       xfs_io -s -c "pwrite 65548k 4k" \
+                 -c "pwrite 65544k 4k" \
+                 -c "pwrite 65540k 4k" \
+                 -c "pwrite 65536k 4k" \
+                 $mnt/file1
+       sync
+
+       echo "=== before ==="
+       xfs_io -c "fiemap -v" $mnt/file1
+       echo "=== after ==="
+       btrfs fi defrag $mnt/file1
+       sync
+       xfs_io -c "fiemap -v" $mnt/file1
+       umount $mnt
+
+With extra ftrace put into defrag_one_cluster(), before the patch it
+would result tons of loops:
+
+(As defrag_one_cluster() is inlined, the function name is its caller)
+
+  btrfs-126062  [005] .....  4682.816026: btrfs_defrag_file: r/i=5/257 start=0 len=262144
+  btrfs-126062  [005] .....  4682.816027: btrfs_defrag_file: r/i=5/257 start=262144 len=262144
+  btrfs-126062  [005] .....  4682.816028: btrfs_defrag_file: r/i=5/257 start=524288 len=262144
+  btrfs-126062  [005] .....  4682.816028: btrfs_defrag_file: r/i=5/257 start=786432 len=262144
+  btrfs-126062  [005] .....  4682.816028: btrfs_defrag_file: r/i=5/257 start=1048576 len=262144
+  ...
+  btrfs-126062  [005] .....  4682.816043: btrfs_defrag_file: r/i=5/257 start=67108864 len=262144
+
+But with this patch there will be just one loop, then directly to the
+end of the extent:
+
+  btrfs-130471  [014] .....  5434.029558: defrag_one_cluster: r/i=5/257 start=0 len=262144
+  btrfs-130471  [014] .....  5434.029559: defrag_one_cluster: r/i=5/257 start=67108864 len=16384
+
+CC: stable@vger.kernel.org # 5.16
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |   48 +++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 39 insertions(+), 9 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1174,8 +1174,10 @@ struct defrag_target_range {
+ static int defrag_collect_targets(struct btrfs_inode *inode,
+                                 u64 start, u64 len, u32 extent_thresh,
+                                 u64 newer_than, bool do_compress,
+-                                bool locked, struct list_head *target_list)
++                                bool locked, struct list_head *target_list,
++                                u64 *last_scanned_ret)
+ {
++      bool last_is_target = false;
+       u64 cur = start;
+       int ret = 0;
+ 
+@@ -1185,6 +1187,7 @@ static int defrag_collect_targets(struct
+               bool next_mergeable = true;
+               u64 range_len;
+ 
++              last_is_target = false;
+               em = defrag_lookup_extent(&inode->vfs_inode, cur, locked);
+               if (!em)
+                       break;
+@@ -1267,6 +1270,7 @@ static int defrag_collect_targets(struct
+               }
+ 
+ add:
++              last_is_target = true;
+               range_len = min(extent_map_end(em), start + len) - cur;
+               /*
+                * This one is a good target, check if it can be merged into
+@@ -1310,6 +1314,17 @@ next:
+                       kfree(entry);
+               }
+       }
++      if (!ret && last_scanned_ret) {
++              /*
++               * If the last extent is not a target, the caller can skip to
++               * the end of that extent.
++               * Otherwise, we can only go the end of the specified range.
++               */
++              if (!last_is_target)
++                      *last_scanned_ret = max(cur, *last_scanned_ret);
++              else
++                      *last_scanned_ret = max(start + len, *last_scanned_ret);
++      }
+       return ret;
+ }
+ 
+@@ -1368,7 +1383,8 @@ static int defrag_one_locked_target(stru
+ }
+ 
+ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
+-                          u32 extent_thresh, u64 newer_than, bool do_compress)
++                          u32 extent_thresh, u64 newer_than, bool do_compress,
++                          u64 *last_scanned_ret)
+ {
+       struct extent_state *cached_state = NULL;
+       struct defrag_target_range *entry;
+@@ -1414,7 +1430,7 @@ static int defrag_one_range(struct btrfs
+        */
+       ret = defrag_collect_targets(inode, start, len, extent_thresh,
+                                    newer_than, do_compress, true,
+-                                   &target_list);
++                                   &target_list, last_scanned_ret);
+       if (ret < 0)
+               goto unlock_extent;
+ 
+@@ -1449,7 +1465,8 @@ static int defrag_one_cluster(struct btr
+                             u64 start, u32 len, u32 extent_thresh,
+                             u64 newer_than, bool do_compress,
+                             unsigned long *sectors_defragged,
+-                            unsigned long max_sectors)
++                            unsigned long max_sectors,
++                            u64 *last_scanned_ret)
+ {
+       const u32 sectorsize = inode->root->fs_info->sectorsize;
+       struct defrag_target_range *entry;
+@@ -1460,7 +1477,7 @@ static int defrag_one_cluster(struct btr
+       BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
+       ret = defrag_collect_targets(inode, start, len, extent_thresh,
+                                    newer_than, do_compress, false,
+-                                   &target_list);
++                                   &target_list, NULL);
+       if (ret < 0)
+               goto out;
+ 
+@@ -1477,6 +1494,15 @@ static int defrag_one_cluster(struct btr
+                       range_len = min_t(u32, range_len,
+                               (max_sectors - *sectors_defragged) * sectorsize);
+ 
++              /*
++               * If defrag_one_range() has updated last_scanned_ret,
++               * our range may already be invalid (e.g. hole punched).
++               * Skip if our range is before last_scanned_ret, as there is
++               * no need to defrag the range anymore.
++               */
++              if (entry->start + range_len <= *last_scanned_ret)
++                      continue;
++
+               if (ra)
+                       page_cache_sync_readahead(inode->vfs_inode.i_mapping,
+                               ra, NULL, entry->start >> PAGE_SHIFT,
+@@ -1489,7 +1515,8 @@ static int defrag_one_cluster(struct btr
+                * accounting.
+                */
+               ret = defrag_one_range(inode, entry->start, range_len,
+-                                     extent_thresh, newer_than, do_compress);
++                                     extent_thresh, newer_than, do_compress,
++                                     last_scanned_ret);
+               if (ret < 0)
+                       break;
+               *sectors_defragged += range_len >>
+@@ -1500,6 +1527,8 @@ out:
+               list_del_init(&entry->list);
+               kfree(entry);
+       }
++      if (ret >= 0)
++              *last_scanned_ret = max(*last_scanned_ret, start + len);
+       return ret;
+ }
+ 
+@@ -1585,6 +1614,7 @@ int btrfs_defrag_file(struct inode *inod
+ 
+       while (cur < last_byte) {
+               const unsigned long prev_sectors_defragged = sectors_defragged;
++              u64 last_scanned = cur;
+               u64 cluster_end;
+ 
+               /* The cluster size 256K should always be page aligned */
+@@ -1614,8 +1644,8 @@ int btrfs_defrag_file(struct inode *inod
+                       BTRFS_I(inode)->defrag_compress = compress_type;
+               ret = defrag_one_cluster(BTRFS_I(inode), ra, cur,
+                               cluster_end + 1 - cur, extent_thresh,
+-                              newer_than, do_compress,
+-                              &sectors_defragged, max_to_defrag);
++                              newer_than, do_compress, &sectors_defragged,
++                              max_to_defrag, &last_scanned);
+ 
+               if (sectors_defragged > prev_sectors_defragged)
+                       balance_dirty_pages_ratelimited(inode->i_mapping);
+@@ -1623,7 +1653,7 @@ int btrfs_defrag_file(struct inode *inod
+               btrfs_inode_unlock(inode, 0);
+               if (ret < 0)
+                       break;
+-              cur = cluster_end + 1;
++              cur = max(cluster_end + 1, last_scanned);
+               if (ret > 0) {
+                       ret = 0;
+                       break;
diff --git a/queue-5.16/btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch b/queue-5.16/btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch

new file mode 100644 (file)

index 0000000..2fc2aa5
--- /dev/null
+++ b/queue-5.16/btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch
@@ -0,0 +1,113 @@
+From 979b25c300dbcbcb750e88715018e04e854de6c6 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 28 Jan 2022 15:21:21 +0800
+Subject: btrfs: defrag: don't defrag extents which are already at max capacity
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 979b25c300dbcbcb750e88715018e04e854de6c6 upstream.
+
+[BUG]
+For compressed extents, defrag ioctl will always try to defrag any
+compressed extents, wasting not only IO but also CPU time to
+compress/decompress:
+
+   mkfs.btrfs -f $DEV
+   mount -o compress $DEV $MNT
+   xfs_io -f -c "pwrite -S 0xab 0 128K" $MNT/foobar
+   sync
+   xfs_io -f -c "pwrite -S 0xcd 128K 128K" $MNT/foobar
+   sync
+   echo "=== before ==="
+   xfs_io -c "fiemap -v" $MNT/foobar
+   btrfs filesystem defrag $MNT/foobar
+   sync
+   echo "=== after ==="
+   xfs_io -c "fiemap -v" $MNT/foobar
+
+Then it shows the 2 128K extents just get COW for no extra benefit, with
+extra IO/CPU spent:
+
+    === before ===
+    /mnt/btrfs/file1:
+     EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+       0: [0..255]:        26624..26879       256   0x8
+       1: [256..511]:      26632..26887       256   0x9
+    === after ===
+    /mnt/btrfs/file1:
+     EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+       0: [0..255]:        26640..26895       256   0x8
+       1: [256..511]:      26648..26903       256   0x9
+
+This affects not only v5.16 (after the defrag rework), but also v5.15
+(before the defrag rework).
+
+[CAUSE]
+From the very beginning, btrfs defrag never checks if one extent is
+already at its max capacity (128K for compressed extents, 128M
+otherwise).
+
+And the default extent size threshold is 256K, which is already beyond
+the compressed extent max size.
+
+This means, by default btrfs defrag ioctl will mark all compressed
+extent which is not adjacent to a hole/preallocated range for defrag.
+
+[FIX]
+Introduce a helper to grab the maximum extent size, and then in
+defrag_collect_targets() and defrag_check_next_extent(), reject extents
+which are already at their max capacity.
+
+Reported-by: Filipe Manana <fdmanana@suse.com>
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |   20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1020,6 +1020,13 @@ static struct extent_map *defrag_lookup_
+       return em;
+ }
+ 
++static u32 get_extent_max_capacity(const struct extent_map *em)
++{
++      if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
++              return BTRFS_MAX_COMPRESSED;
++      return BTRFS_MAX_EXTENT_SIZE;
++}
++
+ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
+                                    bool locked)
+ {
+@@ -1036,6 +1043,12 @@ static bool defrag_check_next_extent(str
+               goto out;
+       if (test_bit(EXTENT_FLAG_PREALLOC, &next->flags))
+               goto out;
++      /*
++       * If the next extent is at its max capacity, defragging current extent
++       * makes no sense, as the total number of extents won't change.
++       */
++      if (next->len >= get_extent_max_capacity(em))
++              goto out;
+       /* Physically adjacent and large enough */
+       if ((em->block_start + em->block_len == next->block_start) &&
+           (em->block_len > SZ_128K && next->block_len > SZ_128K))
+@@ -1233,6 +1246,13 @@ static int defrag_collect_targets(struct
+               if (range_len >= extent_thresh)
+                       goto next;
+ 
++              /*
++               * Skip extents already at its max capacity, this is mostly for
++               * compressed extents, which max cap is only 128K.
++               */
++              if (em->len >= get_extent_max_capacity(em))
++                      goto next;
++
+               next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
+                                                         locked);
+               if (!next_mergeable) {
diff --git a/queue-5.16/btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch b/queue-5.16/btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch

new file mode 100644 (file)

index 0000000..6d07baa
--- /dev/null
+++ b/queue-5.16/btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch
@@ -0,0 +1,118 @@
+From 7093f15291e95f16dfb5a93307eda3272bfe1108 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 28 Jan 2022 15:21:20 +0800
+Subject: btrfs: defrag: don't try to merge regular extents with preallocated extents
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 7093f15291e95f16dfb5a93307eda3272bfe1108 upstream.
+
+[BUG]
+With older kernels (before v5.16), btrfs will defrag preallocated extents.
+While with newer kernels (v5.16 and newer) btrfs will not defrag
+preallocated extents, but it will defrag the extent just before the
+preallocated extent, even it's just a single sector.
+
+This can be exposed by the following small script:
+
+       mkfs.btrfs -f $dev > /dev/null
+
+       mount $dev $mnt
+       xfs_io -f -c "pwrite 0 4k" -c sync -c "falloc 4k 16K" $mnt/file
+       xfs_io -c "fiemap -v" $mnt/file
+       btrfs fi defrag $mnt/file
+       sync
+       xfs_io -c "fiemap -v" $mnt/file
+
+The output looks like this on older kernels:
+
+/mnt/btrfs/file:
+ EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+   0: [0..7]:          26624..26631         8   0x0
+   1: [8..39]:         26632..26663        32 0x801
+/mnt/btrfs/file:
+ EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+   0: [0..39]:         26664..26703        40   0x1
+
+Which defrags the single sector along with the preallocated extent, and
+replace them with an regular extent into a new location (caused by data
+COW).
+This wastes most of the data IO just for the preallocated range.
+
+On the other hand, v5.16 is slightly better:
+
+/mnt/btrfs/file:
+ EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+   0: [0..7]:          26624..26631         8   0x0
+   1: [8..39]:         26632..26663        32 0x801
+/mnt/btrfs/file:
+ EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+   0: [0..7]:          26664..26671         8   0x0
+   1: [8..39]:         26632..26663        32 0x801
+
+The preallocated range is not defragged, but the sector before it still
+gets defragged, which has no need for it.
+
+[CAUSE]
+One of the function reused by the old and new behavior is
+defrag_check_next_extent(), it will determine if we should defrag
+current extent by checking the next one.
+
+It only checks if the next extent is a hole or inlined, but it doesn't
+check if it's preallocated.
+
+On the other hand, out of the function, both old and new kernel will
+reject preallocated extents.
+
+Such inconsistent behavior causes above behavior.
+
+[FIX]
+- Also check if next extent is preallocated
+  If so, don't defrag current extent.
+
+- Add comments for each branch why we reject the extent
+
+This will reduce the IO caused by defrag ioctl and autodefrag.
+
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |   17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1024,19 +1024,24 @@ static bool defrag_check_next_extent(str
+                                    bool locked)
+ {
+       struct extent_map *next;
+-      bool ret = true;
++      bool ret = false;
+ 
+       /* this is the last extent */
+       if (em->start + em->len >= i_size_read(inode))
+               return false;
+ 
+       next = defrag_lookup_extent(inode, em->start + em->len, locked);
++      /* No more em or hole */
+       if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
+-              ret = false;
+-      else if ((em->block_start + em->block_len == next->block_start) &&
+-               (em->block_len > SZ_128K && next->block_len > SZ_128K))
+-              ret = false;
+-
++              goto out;
++      if (test_bit(EXTENT_FLAG_PREALLOC, &next->flags))
++              goto out;
++      /* Physically adjacent and large enough */
++      if ((em->block_start + em->block_len == next->block_start) &&
++          (em->block_len > SZ_128K && next->block_len > SZ_128K))
++              goto out;
++      ret = true;
++out:
+       free_extent_map(next);
+       return ret;
+ }
diff --git a/queue-5.16/btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch b/queue-5.16/btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch

new file mode 100644 (file)

index 0000000..d1ad3aa
--- /dev/null
+++ b/queue-5.16/btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch
@@ -0,0 +1,55 @@
+From 550f133f6959db927127111b50e483da3a7ce662 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Fri, 28 Jan 2022 15:21:22 +0800
+Subject: btrfs: defrag: remove an ambiguous condition for rejection
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 550f133f6959db927127111b50e483da3a7ce662 upstream.
+
+From the very beginning of btrfs defrag, there is a check to reject
+extents which meet both conditions:
+
+- Physically adjacent
+
+  We may want to defrag physically adjacent extents to reduce the number
+  of extents or the size of subvolume tree.
+
+- Larger than 128K
+
+  This may be there for compressed extents, but unfortunately 128K is
+  exactly the max capacity for compressed extents.
+  And the check is > 128K, thus it never rejects compressed extents.
+
+  Furthermore, the compressed extent capacity bug is fixed by previous
+  patch, there is no reason for that check anymore.
+
+The original check has a very small ranges to reject (the target extent
+size is > 128K, and default extent threshold is 256K), and for
+compressed extent it doesn't work at all.
+
+So it's better just to remove the rejection, and allow us to defrag
+physically adjacent extents.
+
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1049,10 +1049,6 @@ static bool defrag_check_next_extent(str
+        */
+       if (next->len >= get_extent_max_capacity(em))
+               goto out;
+-      /* Physically adjacent and large enough */
+-      if ((em->block_start + em->block_len == next->block_start) &&
+-          (em->block_len > SZ_128K && next->block_len > SZ_128K))
+-              goto out;
+       ret = true;
+ out:
+       free_extent_map(next);
diff --git a/queue-5.16/btrfs-prevent-copying-too-big-compressed-lzo-segment.patch b/queue-5.16/btrfs-prevent-copying-too-big-compressed-lzo-segment.patch

new file mode 100644 (file)

index 0000000..95bc236
--- /dev/null
+++ b/queue-5.16/btrfs-prevent-copying-too-big-compressed-lzo-segment.patch
@@ -0,0 +1,83 @@
+From 741b23a970a79d5d3a1db2d64fa2c7b375a4febb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?D=C4=81vis=20Mos=C4=81ns?= <davispuh@gmail.com>
+Date: Wed, 2 Feb 2022 23:44:55 +0200
+Subject: btrfs: prevent copying too big compressed lzo segment
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Dāvis Mosāns <davispuh@gmail.com>
+
+commit 741b23a970a79d5d3a1db2d64fa2c7b375a4febb upstream.
+
+Compressed length can be corrupted to be a lot larger than memory
+we have allocated for buffer.
+This will cause memcpy in copy_compressed_segment to write outside
+of allocated memory.
+
+This mostly results in stuck read syscall but sometimes when using
+btrfs send can get #GP
+
+  kernel: general protection fault, probably for non-canonical address 0x841551d5c1000: 0000 [#1] PREEMPT SMP NOPTI
+  kernel: CPU: 17 PID: 264 Comm: kworker/u256:7 Tainted: P           OE     5.17.0-rc2-1 #12
+  kernel: Workqueue: btrfs-endio btrfs_work_helper [btrfs]
+  kernel: RIP: 0010:lzo_decompress_bio (./include/linux/fortify-string.h:225 fs/btrfs/lzo.c:322 fs/btrfs/lzo.c:394) btrfs
+  Code starting with the faulting instruction
+  ===========================================
+     0:*  48 8b 06                mov    (%rsi),%rax              <-- trapping instruction
+     3:   48 8d 79 08             lea    0x8(%rcx),%rdi
+     7:   48 83 e7 f8             and    $0xfffffffffffffff8,%rdi
+     b:   48 89 01                mov    %rax,(%rcx)
+     e:   44 89 f0                mov    %r14d,%eax
+    11:   48 8b 54 06 f8          mov    -0x8(%rsi,%rax,1),%rdx
+  kernel: RSP: 0018:ffffb110812efd50 EFLAGS: 00010212
+  kernel: RAX: 0000000000001000 RBX: 000000009ca264c8 RCX: ffff98996e6d8ff8
+  kernel: RDX: 0000000000000064 RSI: 000841551d5c1000 RDI: ffffffff9500435d
+  kernel: RBP: ffff989a3be856c0 R08: 0000000000000000 R09: 0000000000000000
+  kernel: R10: 0000000000000000 R11: 0000000000001000 R12: ffff98996e6d8000
+  kernel: R13: 0000000000000008 R14: 0000000000001000 R15: 000841551d5c1000
+  kernel: FS:  0000000000000000(0000) GS:ffff98a09d640000(0000) knlGS:0000000000000000
+  kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  kernel: CR2: 00001e9f984d9ea8 CR3: 000000014971a000 CR4: 00000000003506e0
+  kernel: Call Trace:
+  kernel:  <TASK>
+  kernel: end_compressed_bio_read (fs/btrfs/compression.c:104 fs/btrfs/compression.c:1363 fs/btrfs/compression.c:323) btrfs
+  kernel: end_workqueue_fn (fs/btrfs/disk-io.c:1923) btrfs
+  kernel: btrfs_work_helper (fs/btrfs/async-thread.c:326) btrfs
+  kernel: process_one_work (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:212 ./include/trace/events/workqueue.h:108 kernel/workqueue.c:2312)
+  kernel: worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2455)
+  kernel: ? process_one_work (kernel/workqueue.c:2397)
+  kernel: kthread (kernel/kthread.c:377)
+  kernel: ? kthread_complete_and_exit (kernel/kthread.c:332)
+  kernel: ret_from_fork (arch/x86/entry/entry_64.S:301)
+  kernel:  </TASK>
+
+CC: stable@vger.kernel.org # 4.9+
+Signed-off-by: Dāvis Mosāns <davispuh@gmail.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/lzo.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/fs/btrfs/lzo.c
++++ b/fs/btrfs/lzo.c
+@@ -380,6 +380,17 @@ int lzo_decompress_bio(struct list_head
+               kunmap(cur_page);
+               cur_in += LZO_LEN;
+ 
++              if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) {
++                      /*
++                       * seg_len shouldn't be larger than we have allocated
++                       * for workspace->cbuf
++                       */
++                      btrfs_err(fs_info, "unexpectedly large lzo segment len %u",
++                                      seg_len);
++                      ret = -EIO;
++                      goto out;
++              }
++
+               /* Copy the compressed segment payload into workspace */
+               copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in);
+ 
diff --git a/queue-5.16/btrfs-reduce-extent-threshold-for-autodefrag.patch b/queue-5.16/btrfs-reduce-extent-threshold-for-autodefrag.patch

new file mode 100644 (file)

index 0000000..2080f80
--- /dev/null
+++ b/queue-5.16/btrfs-reduce-extent-threshold-for-autodefrag.patch
@@ -0,0 +1,133 @@
+From 558732df2122092259ab4ef85594bee11dbb9104 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Sun, 13 Feb 2022 15:42:33 +0800
+Subject: btrfs: reduce extent threshold for autodefrag
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 558732df2122092259ab4ef85594bee11dbb9104 upstream.
+
+There is a big gap between inode_should_defrag() and autodefrag extent
+size threshold.  For inode_should_defrag() it has a flexible
+@small_write value. For compressed extent is 16K, and for non-compressed
+extent it's 64K.
+
+However for autodefrag extent size threshold, it's always fixed to the
+default value (256K).
+
+This means, the following write sequence will trigger autodefrag to
+defrag ranges which didn't trigger autodefrag:
+
+  pwrite 0 8k
+  sync
+  pwrite 8k 128K
+  sync
+
+The latter 128K write will also be considered as a defrag target (if
+other conditions are met). While only that 8K write is really
+triggering autodefrag.
+
+Such behavior can cause extra IO for autodefrag.
+
+Close the gap, by copying the @small_write value into inode_defrag, so
+that later autodefrag can use the same @small_write value which
+triggered autodefrag.
+
+With the existing transid value, this allows autodefrag really to scan
+the ranges which triggered autodefrag.
+
+Although this behavior change is mostly reducing the extent_thresh value
+for autodefrag, I believe in the future we should allow users to specify
+the autodefrag extent threshold through mount options, but that's an
+other problem to consider in the future.
+
+CC: stable@vger.kernel.org # 5.16+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h |    2 +-
+ fs/btrfs/file.c  |   15 ++++++++++++++-
+ fs/btrfs/inode.c |    4 ++--
+ 3 files changed, 17 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3315,7 +3315,7 @@ void btrfs_exclop_finish(struct btrfs_fs
+ int __init btrfs_auto_defrag_init(void);
+ void __cold btrfs_auto_defrag_exit(void);
+ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
+-                         struct btrfs_inode *inode);
++                         struct btrfs_inode *inode, u32 extent_thresh);
+ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
+ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
+ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -49,6 +49,15 @@ struct inode_defrag {
+ 
+       /* root objectid */
+       u64 root;
++
++      /*
++       * The extent size threshold for autodefrag.
++       *
++       * This value is different for compressed/non-compressed extents,
++       * thus needs to be passed from higher layer.
++       * (aka, inode_should_defrag())
++       */
++      u32 extent_thresh;
+ };
+ 
+ static int __compare_inode_defrag(struct inode_defrag *defrag1,
+@@ -101,6 +110,8 @@ static int __btrfs_add_inode_defrag(stru
+                        */
+                       if (defrag->transid < entry->transid)
+                               entry->transid = defrag->transid;
++                      entry->extent_thresh = min(defrag->extent_thresh,
++                                                 entry->extent_thresh);
+                       return -EEXIST;
+               }
+       }
+@@ -126,7 +137,7 @@ static inline int __need_auto_defrag(str
+  * enabled
+  */
+ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
+-                         struct btrfs_inode *inode)
++                         struct btrfs_inode *inode, u32 extent_thresh)
+ {
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+@@ -152,6 +163,7 @@ int btrfs_add_inode_defrag(struct btrfs_
+       defrag->ino = btrfs_ino(inode);
+       defrag->transid = transid;
+       defrag->root = root->root_key.objectid;
++      defrag->extent_thresh = extent_thresh;
+ 
+       spin_lock(&fs_info->defrag_inodes_lock);
+       if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
+@@ -275,6 +287,7 @@ again:
+       memset(&range, 0, sizeof(range));
+       range.len = (u64)-1;
+       range.start = cur;
++      range.extent_thresh = defrag->extent_thresh;
+ 
+       sb_start_write(fs_info->sb);
+       ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -561,12 +561,12 @@ static inline int inode_need_compress(st
+ }
+ 
+ static inline void inode_should_defrag(struct btrfs_inode *inode,
+-              u64 start, u64 end, u64 num_bytes, u64 small_write)
++              u64 start, u64 end, u64 num_bytes, u32 small_write)
+ {
+       /* If this is a small write inside eof, kick off a defrag */
+       if (num_bytes < small_write &&
+           (start > 0 || end + 1 < inode->disk_i_size))
+-              btrfs_add_inode_defrag(NULL, inode);
++              btrfs_add_inode_defrag(NULL, inode, small_write);
+ }
+ 
+ /*
diff --git a/queue-5.16/hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch b/queue-5.16/hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch

new file mode 100644 (file)

index 0000000..e51d648
--- /dev/null
+++ b/queue-5.16/hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch
@@ -0,0 +1,55 @@
+From e79ce9832316e09529b212a21278d68240ccbf1f Mon Sep 17 00:00:00 2001
+From: Liu Yuntao <liuyuntao10@huawei.com>
+Date: Fri, 25 Feb 2022 19:11:02 -0800
+Subject: hugetlbfs: fix a truncation issue in hugepages parameter
+
+From: Liu Yuntao <liuyuntao10@huawei.com>
+
+commit e79ce9832316e09529b212a21278d68240ccbf1f upstream.
+
+When we specify a large number for node in hugepages parameter, it may
+be parsed to another number due to truncation in this statement:
+
+       node = tmp;
+
+For example, add following parameter in command line:
+
+       hugepagesz=1G hugepages=4294967297:5
+
+and kernel will allocate 5 hugepages for node 1 instead of ignoring it.
+
+I move the validation check earlier to fix this issue, and slightly
+simplifies the condition here.
+
+Link: https://lkml.kernel.org/r/20220209134018.8242-1-liuyuntao10@huawei.com
+Fixes: b5389086ad7be0 ("hugetlbfs: extend the definition of hugepages parameter to support node allocation")
+Signed-off-by: Liu Yuntao <liuyuntao10@huawei.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index e57650a9404f..f294db835f4b 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -4159,10 +4159,10 @@ static int __init hugepages_setup(char *s)
+                               pr_warn("HugeTLB: architecture can't support node specific alloc, ignoring!\n");
+                               return 0;
+                       }
++                      if (tmp >= nr_online_nodes)
++                              goto invalid;
+                       node = tmp;
+                       p += count + 1;
+-                      if (node < 0 || node >= nr_online_nodes)
+-                              goto invalid;
+                       /* Parse hugepages */
+                       if (sscanf(p, "%lu%n", &tmp, &count) != 1)
+                               goto invalid;
+-- 
+2.35.1
+
diff --git a/queue-5.16/ib-qib-fix-duplicate-sysfs-directory-name.patch b/queue-5.16/ib-qib-fix-duplicate-sysfs-directory-name.patch

new file mode 100644 (file)

index 0000000..8f06519
--- /dev/null
+++ b/queue-5.16/ib-qib-fix-duplicate-sysfs-directory-name.patch
@@ -0,0 +1,39 @@
+From 32f57cb1b2c8d6f20aefec7052b1bfeb7e3b69d4 Mon Sep 17 00:00:00 2001
+From: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+Date: Thu, 17 Feb 2022 08:59:32 -0500
+Subject: IB/qib: Fix duplicate sysfs directory name
+
+From: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+
+commit 32f57cb1b2c8d6f20aefec7052b1bfeb7e3b69d4 upstream.
+
+The qib driver load has been failing with the following message:
+
+  sysfs: cannot create duplicate filename '/devices/pci0000:80/0000:80:02.0/0000:81:00.0/infiniband/qib0/ports/1/linkcontrol'
+
+The patch below has two "linkcontrol" names causing the duplication.
+
+Fix by using the correct "diag_counters" name on the second instance.
+
+Fixes: 4a7aaf88c89f ("RDMA/qib: Use attributes for the port sysfs")
+Link: https://lore.kernel.org/r/1645106372-23004-1-git-send-email-mike.marciniszyn@cornelisnetworks.com
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/qib/qib_sysfs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/qib/qib_sysfs.c
++++ b/drivers/infiniband/hw/qib/qib_sysfs.c
+@@ -541,7 +541,7 @@ static struct attribute *port_diagc_attr
+ };
+ 
+ static const struct attribute_group port_diagc_group = {
+-      .name = "linkcontrol",
++      .name = "diag_counters",
+       .attrs = port_diagc_attributes,
+ };
+ 
diff --git a/queue-5.16/mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch b/queue-5.16/mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch

new file mode 100644 (file)

index 0000000..6a2ae2e
--- /dev/null
+++ b/queue-5.16/mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch
@@ -0,0 +1,80 @@
+From db110a99d3367936058727ff4798e3a39c707969 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Date: Fri, 25 Feb 2022 19:10:56 -0800
+Subject: mm/hugetlb: fix kernel crash with hugetlb mremap
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+commit db110a99d3367936058727ff4798e3a39c707969 upstream.
+
+This fixes the below crash:
+
+  kernel BUG at include/linux/mm.h:2373!
+  cpu 0x5d: Vector: 700 (Program Check) at [c00000003c6e76e0]
+      pc: c000000000581a54: pmd_to_page+0x54/0x80
+      lr: c00000000058d184: move_hugetlb_page_tables+0x4e4/0x5b0
+      sp: c00000003c6e7980
+     msr: 9000000000029033
+    current = 0xc00000003bd8d980
+    paca    = 0xc000200fff610100   irqmask: 0x03   irq_happened: 0x01
+      pid   = 9349, comm = hugepage-mremap
+  kernel BUG at include/linux/mm.h:2373!
+    move_hugetlb_page_tables+0x4e4/0x5b0 (link register)
+    move_hugetlb_page_tables+0x22c/0x5b0 (unreliable)
+    move_page_tables+0xdbc/0x1010
+    move_vma+0x254/0x5f0
+    sys_mremap+0x7c0/0x900
+    system_call_exception+0x160/0x2c0
+
+the kernel can't use huge_pte_offset before it set the pte entry because
+a page table lookup check for huge PTE bit in the page table to
+differentiate between a huge pte entry and a pointer to pte page.  A
+huge_pte_alloc won't mark the page table entry huge and hence kernel
+should not use huge_pte_offset after a huge_pte_alloc.
+
+Link: https://lkml.kernel.org/r/20220211063221.99293-1-aneesh.kumar@linux.ibm.com
+Fixes: 550a7d60bd5e ("mm, hugepages: add mremap() support for hugepage backed vma")
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Mina Almasry <almasrymina@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 61895cc01d09..e57650a9404f 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -4851,14 +4851,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
+ }
+ 
+ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
+-                        unsigned long new_addr, pte_t *src_pte)
++                        unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte)
+ {
+       struct hstate *h = hstate_vma(vma);
+       struct mm_struct *mm = vma->vm_mm;
+-      pte_t *dst_pte, pte;
+       spinlock_t *src_ptl, *dst_ptl;
++      pte_t pte;
+ 
+-      dst_pte = huge_pte_offset(mm, new_addr, huge_page_size(h));
+       dst_ptl = huge_pte_lock(h, mm, dst_pte);
+       src_ptl = huge_pte_lockptr(h, mm, src_pte);
+ 
+@@ -4917,7 +4916,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
+               if (!dst_pte)
+                       break;
+ 
+-              move_huge_pte(vma, old_addr, new_addr, src_pte);
++              move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte);
+       }
+       flush_tlb_range(vma, old_end - len, old_end);
+       mmu_notifier_invalidate_range_end(&range);
+-- 
+2.35.1
+
diff --git a/queue-5.16/rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch b/queue-5.16/rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch

new file mode 100644 (file)

index 0000000..d23272f
--- /dev/null
+++ b/queue-5.16/rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch
@@ -0,0 +1,114 @@
+From 22e9f71072fa605cbf033158db58e0790101928d Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Wed, 23 Feb 2022 11:23:57 -0400
+Subject: RDMA/cma: Do not change route.addr.src_addr outside state checks
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 22e9f71072fa605cbf033158db58e0790101928d upstream.
+
+If the state is not idle then resolve_prepare_src() should immediately
+fail and no change to global state should happen. However, it
+unconditionally overwrites the src_addr trying to build a temporary any
+address.
+
+For instance if the state is already RDMA_CM_LISTEN then this will corrupt
+the src_addr and would cause the test in cma_cancel_operation():
+
+           if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
+
+Which would manifest as this trace from syzkaller:
+
+  BUG: KASAN: use-after-free in __list_add_valid+0x93/0xa0 lib/list_debug.c:26
+  Read of size 8 at addr ffff8881546491e0 by task syz-executor.1/32204
+
+  CPU: 1 PID: 32204 Comm: syz-executor.1 Not tainted 5.12.0-rc8-syzkaller #0
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+  Call Trace:
+   __dump_stack lib/dump_stack.c:79 [inline]
+   dump_stack+0x141/0x1d7 lib/dump_stack.c:120
+   print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:232
+   __kasan_report mm/kasan/report.c:399 [inline]
+   kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:416
+   __list_add_valid+0x93/0xa0 lib/list_debug.c:26
+   __list_add include/linux/list.h:67 [inline]
+   list_add_tail include/linux/list.h:100 [inline]
+   cma_listen_on_all drivers/infiniband/core/cma.c:2557 [inline]
+   rdma_listen+0x787/0xe00 drivers/infiniband/core/cma.c:3751
+   ucma_listen+0x16a/0x210 drivers/infiniband/core/ucma.c:1102
+   ucma_write+0x259/0x350 drivers/infiniband/core/ucma.c:1732
+   vfs_write+0x28e/0xa30 fs/read_write.c:603
+   ksys_write+0x1ee/0x250 fs/read_write.c:658
+   do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+This is indicating that an rdma_id_private was destroyed without doing
+cma_cancel_listens().
+
+Instead of trying to re-use the src_addr memory to indirectly create an
+any address derived from the dst build one explicitly on the stack and
+bind to that as any other normal flow would do. rdma_bind_addr() will copy
+it over the src_addr once it knows the state is valid.
+
+This is similar to commit bc0bdc5afaa7 ("RDMA/cma: Do not change
+route.addr.src_addr.ss_family")
+
+Link: https://lore.kernel.org/r/0-v2-e975c8fd9ef2+11e-syz_cma_srcaddr_jgg@nvidia.com
+Cc: stable@vger.kernel.org
+Fixes: 732d41c545bb ("RDMA/cma: Make the locking for automatic state transition more clear")
+Reported-by: syzbot+c94a3675a626f6333d74@syzkaller.appspotmail.com
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/core/cma.c |   38 +++++++++++++++++++++++---------------
+ 1 file changed, 23 insertions(+), 15 deletions(-)
+
+--- a/drivers/infiniband/core/cma.c
++++ b/drivers/infiniband/core/cma.c
+@@ -3370,22 +3370,30 @@ err:
+ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+                        const struct sockaddr *dst_addr)
+ {
+-      if (!src_addr || !src_addr->sa_family) {
+-              src_addr = (struct sockaddr *) &id->route.addr.src_addr;
+-              src_addr->sa_family = dst_addr->sa_family;
+-              if (IS_ENABLED(CONFIG_IPV6) &&
+-                  dst_addr->sa_family == AF_INET6) {
+-                      struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
+-                      struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
+-                      src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+-                      if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+-                              id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
+-              } else if (dst_addr->sa_family == AF_IB) {
+-                      ((struct sockaddr_ib *) src_addr)->sib_pkey =
+-                              ((struct sockaddr_ib *) dst_addr)->sib_pkey;
+-              }
++      struct sockaddr_storage zero_sock = {};
++
++      if (src_addr && src_addr->sa_family)
++              return rdma_bind_addr(id, src_addr);
++
++      /*
++       * When the src_addr is not specified, automatically supply an any addr
++       */
++      zero_sock.ss_family = dst_addr->sa_family;
++      if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
++              struct sockaddr_in6 *src_addr6 =
++                      (struct sockaddr_in6 *)&zero_sock;
++              struct sockaddr_in6 *dst_addr6 =
++                      (struct sockaddr_in6 *)dst_addr;
++
++              src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
++              if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
++                      id->route.addr.dev_addr.bound_dev_if =
++                              dst_addr6->sin6_scope_id;
++      } else if (dst_addr->sa_family == AF_IB) {
++              ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
++                      ((struct sockaddr_ib *)dst_addr)->sib_pkey;
+       }
+-      return rdma_bind_addr(id, src_addr);
++      return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
+ }
+ 
+ /*
diff --git a/queue-5.16/riscv-fix-nommu_k210_sdcard_defconfig.patch b/queue-5.16/riscv-fix-nommu_k210_sdcard_defconfig.patch

new file mode 100644 (file)

index 0000000..0d96c45
--- /dev/null
+++ b/queue-5.16/riscv-fix-nommu_k210_sdcard_defconfig.patch
@@ -0,0 +1,33 @@
+From 762e52f79c95ea20a7229674ffd13b94d7d8959c Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Date: Wed, 9 Feb 2022 12:56:23 +0900
+Subject: riscv: fix nommu_k210_sdcard_defconfig
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+commit 762e52f79c95ea20a7229674ffd13b94d7d8959c upstream.
+
+Instead of an arbitrary delay, use the "rootwait" kernel option to wait
+for the mmc root device to be ready.
+
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: Anup Patel <anup@brainfault.org>
+Fixes: 7e09fd3994c5 ("riscv: Add Canaan Kendryte K210 SD card defconfig")
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/configs/nommu_k210_sdcard_defconfig |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
++++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
+@@ -23,7 +23,7 @@ CONFIG_SLOB=y
+ CONFIG_SOC_CANAAN=y
+ CONFIG_SMP=y
+ CONFIG_NR_CPUS=2
+-CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
++CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro"
+ CONFIG_CMDLINE_FORCE=y
+ # CONFIG_SECCOMP is not set
+ # CONFIG_STACKPROTECTOR is not set
diff --git a/queue-5.16/riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch b/queue-5.16/riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch

new file mode 100644 (file)

index 0000000..b5722a1
--- /dev/null
+++ b/queue-5.16/riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch
@@ -0,0 +1,167 @@
+From 22e2100b1b07d6f5acc71cc1acb53f680c677d77 Mon Sep 17 00:00:00 2001
+From: Changbin Du <changbin.du@gmail.com>
+Date: Sun, 13 Feb 2022 16:18:45 +0800
+Subject: riscv: fix oops caused by irqsoff latency tracer
+
+From: Changbin Du <changbin.du@gmail.com>
+
+commit 22e2100b1b07d6f5acc71cc1acb53f680c677d77 upstream.
+
+The trace_hardirqs_{on,off}() require the caller to setup frame pointer
+properly. This because these two functions use macro 'CALLER_ADDR1' (aka.
+__builtin_return_address(1)) to acquire caller info. If the $fp is used
+for other purpose, the code generated this macro (as below) could trigger
+memory access fault.
+
+   0xffffffff8011510e <+80>:    ld      a1,-16(s0)
+   0xffffffff80115112 <+84>:    ld      s2,-8(a1)  # <-- paging fault here
+
+The oops message during booting if compiled with 'irqoff' tracer enabled:
+[    0.039615][    T0] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000f8
+[    0.041925][    T0] Oops [#1]
+[    0.042063][    T0] Modules linked in:
+[    0.042864][    T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-rc1-00233-g9a20c48d1ed2 #29
+[    0.043568][    T0] Hardware name: riscv-virtio,qemu (DT)
+[    0.044343][    T0] epc : trace_hardirqs_on+0x56/0xe2
+[    0.044601][    T0]  ra : restore_all+0x12/0x6e
+[    0.044721][    T0] epc : ffffffff80126a5c ra : ffffffff80003b94 sp : ffffffff81403db0
+[    0.044801][    T0]  gp : ffffffff8163acd8 tp : ffffffff81414880 t0 : 0000000000000020
+[    0.044882][    T0]  t1 : 0098968000000000 t2 : 0000000000000000 s0 : ffffffff81403de0
+[    0.044967][    T0]  s1 : 0000000000000000 a0 : 0000000000000001 a1 : 0000000000000100
+[    0.045046][    T0]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
+[    0.045124][    T0]  a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000054494d45
+[    0.045210][    T0]  s2 : ffffffff80003b94 s3 : ffffffff81a8f1b0 s4 : ffffffff80e27b50
+[    0.045289][    T0]  s5 : ffffffff81414880 s6 : ffffffff8160fa00 s7 : 00000000800120e8
+[    0.045389][    T0]  s8 : 0000000080013100 s9 : 000000000000007f s10: 0000000000000000
+[    0.045474][    T0]  s11: 0000000000000000 t3 : 7fffffffffffffff t4 : 0000000000000000
+[    0.045548][    T0]  t5 : 0000000000000000 t6 : ffffffff814aa368
+[    0.045620][    T0] status: 0000000200000100 badaddr: 00000000000000f8 cause: 000000000000000d
+[    0.046402][    T0] [<ffffffff80003b94>] restore_all+0x12/0x6e
+
+This because the $fp(aka. $s0) register is not used as frame pointer in the
+assembly entry code.
+
+       resume_kernel:
+               REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
+               bnez s0, restore_all
+               REG_L s0, TASK_TI_FLAGS(tp)
+                andi s0, s0, _TIF_NEED_RESCHED
+                beqz s0, restore_all
+                call preempt_schedule_irq
+                j restore_all
+
+To fix above issue, here we add one extra level wrapper for function
+trace_hardirqs_{on,off}() so they can be safely called by low level entry
+code.
+
+Signed-off-by: Changbin Du <changbin.du@gmail.com>
+Fixes: 3c4697982982 ("riscv: Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT")
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/kernel/Makefile    |    2 ++
+ arch/riscv/kernel/entry.S     |   10 +++++-----
+ arch/riscv/kernel/trace_irq.c |   27 +++++++++++++++++++++++++++
+ arch/riscv/kernel/trace_irq.h |   11 +++++++++++
+ 4 files changed, 45 insertions(+), 5 deletions(-)
+ create mode 100644 arch/riscv/kernel/trace_irq.c
+ create mode 100644 arch/riscv/kernel/trace_irq.h
+
+--- a/arch/riscv/kernel/Makefile
++++ b/arch/riscv/kernel/Makefile
+@@ -50,6 +50,8 @@ obj-$(CONFIG_MODULE_SECTIONS)        += module-
+ obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
+ obj-$(CONFIG_DYNAMIC_FTRACE)  += mcount-dyn.o
+ 
++obj-$(CONFIG_TRACE_IRQFLAGS)  += trace_irq.o
++
+ obj-$(CONFIG_RISCV_BASE_PMU)  += perf_event.o
+ obj-$(CONFIG_PERF_EVENTS)     += perf_callchain.o
+ obj-$(CONFIG_HAVE_PERF_REGS)  += perf_regs.o
+--- a/arch/riscv/kernel/entry.S
++++ b/arch/riscv/kernel/entry.S
+@@ -108,7 +108,7 @@ _save_context:
+ .option pop
+ 
+ #ifdef CONFIG_TRACE_IRQFLAGS
+-      call trace_hardirqs_off
++      call __trace_hardirqs_off
+ #endif
+ 
+ #ifdef CONFIG_CONTEXT_TRACKING
+@@ -143,7 +143,7 @@ skip_context_tracking:
+       li t0, EXC_BREAKPOINT
+       beq s4, t0, 1f
+ #ifdef CONFIG_TRACE_IRQFLAGS
+-      call trace_hardirqs_on
++      call __trace_hardirqs_on
+ #endif
+       csrs CSR_STATUS, SR_IE
+ 
+@@ -234,7 +234,7 @@ ret_from_exception:
+       REG_L s0, PT_STATUS(sp)
+       csrc CSR_STATUS, SR_IE
+ #ifdef CONFIG_TRACE_IRQFLAGS
+-      call trace_hardirqs_off
++      call __trace_hardirqs_off
+ #endif
+ #ifdef CONFIG_RISCV_M_MODE
+       /* the MPP value is too large to be used as an immediate arg for addi */
+@@ -270,10 +270,10 @@ restore_all:
+       REG_L s1, PT_STATUS(sp)
+       andi t0, s1, SR_PIE
+       beqz t0, 1f
+-      call trace_hardirqs_on
++      call __trace_hardirqs_on
+       j 2f
+ 1:
+-      call trace_hardirqs_off
++      call __trace_hardirqs_off
+ 2:
+ #endif
+       REG_L a0, PT_STATUS(sp)
+--- /dev/null
++++ b/arch/riscv/kernel/trace_irq.c
+@@ -0,0 +1,27 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
++ */
++
++#include <linux/irqflags.h>
++#include <linux/kprobes.h>
++#include "trace_irq.h"
++
++/*
++ * trace_hardirqs_on/off require the caller to setup frame pointer properly.
++ * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel.
++ * Here we add one extra level so they can be safely called by low
++ * level entry code which $fp is used for other purpose.
++ */
++
++void __trace_hardirqs_on(void)
++{
++      trace_hardirqs_on();
++}
++NOKPROBE_SYMBOL(__trace_hardirqs_on);
++
++void __trace_hardirqs_off(void)
++{
++      trace_hardirqs_off();
++}
++NOKPROBE_SYMBOL(__trace_hardirqs_off);
+--- /dev/null
++++ b/arch/riscv/kernel/trace_irq.h
+@@ -0,0 +1,11 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
++ */
++#ifndef __TRACE_IRQ_H
++#define __TRACE_IRQ_H
++
++void __trace_hardirqs_on(void);
++void __trace_hardirqs_off(void);
++
++#endif /* __TRACE_IRQ_H */
diff --git a/queue-5.16/series b/queue-5.16/series

index dff956a4a69863bfb2d57085529c57a491f2d849..43772b5b0c476dd32c6411eb6ca85133e0df9106 100644 (file)
--- a/queue-5.16/series
+++ b/queue-5.16/series
@@ -135,3 +135,20 @@ xhci-prevent-futile-urb-re-submissions-due-to-incorrect-return-value.patch
  nvmem-core-fix-a-conflict-between-mtd-and-nvmem-on-wp-gpios-property.patch
  mtd-core-fix-a-conflict-between-mtd-and-nvmem-on-wp-gpios-property.patch
  driver-core-free-dma-range-map-when-device-is-released.patch
+btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch
+btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch
+btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch
+btrfs-prevent-copying-too-big-compressed-lzo-segment.patch
+btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch
+btrfs-autodefrag-only-scan-one-inode-once.patch
+btrfs-reduce-extent-threshold-for-autodefrag.patch
+rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch
+thermal-int340x-fix-memory-leak-in-int3400_notify.patch
+staging-fbtft-fb_st7789v-reset-display-before-initialization.patch
+tps6598x-clear-int-mask-on-probe-failure.patch
+ib-qib-fix-duplicate-sysfs-directory-name.patch
+riscv-fix-nommu_k210_sdcard_defconfig.patch
+riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch
+mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch
+hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch
+tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch
diff --git a/queue-5.16/staging-fbtft-fb_st7789v-reset-display-before-initialization.patch b/queue-5.16/staging-fbtft-fb_st7789v-reset-display-before-initialization.patch

new file mode 100644 (file)

index 0000000..4bc0c7c
--- /dev/null
+++ b/queue-5.16/staging-fbtft-fb_st7789v-reset-display-before-initialization.patch
@@ -0,0 +1,33 @@
+From b6821b0d9b56386d2bf14806f90ec401468c799f Mon Sep 17 00:00:00 2001
+From: Oliver Graute <oliver.graute@kococonnector.com>
+Date: Thu, 10 Feb 2022 09:53:22 +0100
+Subject: staging: fbtft: fb_st7789v: reset display before initialization
+
+From: Oliver Graute <oliver.graute@kococonnector.com>
+
+commit b6821b0d9b56386d2bf14806f90ec401468c799f upstream.
+
+In rare cases the display is flipped or mirrored. This was observed more
+often in a low temperature environment. A clean reset on init_display()
+should help to get registers in a sane state.
+
+Fixes: ef8f317795da (staging: fbtft: use init function instead of init sequence)
+Cc: stable@vger.kernel.org
+Signed-off-by: Oliver Graute <oliver.graute@kococonnector.com>
+Link: https://lore.kernel.org/r/20220210085322.15676-1-oliver.graute@kococonnector.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/staging/fbtft/fb_st7789v.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/staging/fbtft/fb_st7789v.c
++++ b/drivers/staging/fbtft/fb_st7789v.c
+@@ -144,6 +144,8 @@ static int init_display(struct fbtft_par
+ {
+       int rc;
+ 
++      par->fbtftops.reset(par);
++
+       rc = init_tearing_effect_line(par);
+       if (rc)
+               return rc;
diff --git a/queue-5.16/thermal-int340x-fix-memory-leak-in-int3400_notify.patch b/queue-5.16/thermal-int340x-fix-memory-leak-in-int3400_notify.patch

new file mode 100644 (file)

index 0000000..26aa7a3
--- /dev/null
+++ b/queue-5.16/thermal-int340x-fix-memory-leak-in-int3400_notify.patch
@@ -0,0 +1,52 @@
+From 3abea10e6a8f0e7804ed4c124bea2d15aca977c8 Mon Sep 17 00:00:00 2001
+From: Chuansheng Liu <chuansheng.liu@intel.com>
+Date: Wed, 23 Feb 2022 08:20:24 +0800
+Subject: thermal: int340x: fix memory leak in int3400_notify()
+
+From: Chuansheng Liu <chuansheng.liu@intel.com>
+
+commit 3abea10e6a8f0e7804ed4c124bea2d15aca977c8 upstream.
+
+It is easy to hit the below memory leaks in my TigerLake platform:
+
+unreferenced object 0xffff927c8b91dbc0 (size 32):
+  comm "kworker/0:2", pid 112, jiffies 4294893323 (age 83.604s)
+  hex dump (first 32 bytes):
+    4e 41 4d 45 3d 49 4e 54 33 34 30 30 20 54 68 65  NAME=INT3400 The
+    72 6d 61 6c 00 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5  rmal.kkkkkkkkkk.
+  backtrace:
+    [<ffffffff9c502c3e>] __kmalloc_track_caller+0x2fe/0x4a0
+    [<ffffffff9c7b7c15>] kvasprintf+0x65/0xd0
+    [<ffffffff9c7b7d6e>] kasprintf+0x4e/0x70
+    [<ffffffffc04cb662>] int3400_notify+0x82/0x120 [int3400_thermal]
+    [<ffffffff9c8b7358>] acpi_ev_notify_dispatch+0x54/0x71
+    [<ffffffff9c88f1a7>] acpi_os_execute_deferred+0x17/0x30
+    [<ffffffff9c2c2c0a>] process_one_work+0x21a/0x3f0
+    [<ffffffff9c2c2e2a>] worker_thread+0x4a/0x3b0
+    [<ffffffff9c2cb4dd>] kthread+0xfd/0x130
+    [<ffffffff9c201c1f>] ret_from_fork+0x1f/0x30
+
+Fix it by calling kfree() accordingly.
+
+Fixes: 38e44da59130 ("thermal: int3400_thermal: process "thermal table changed" event")
+Signed-off-by: Chuansheng Liu <chuansheng.liu@intel.com>
+Cc: 4.14+ <stable@vger.kernel.org> # 4.14+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/thermal/intel/int340x_thermal/int3400_thermal.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
++++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+@@ -404,6 +404,10 @@ static void int3400_notify(acpi_handle h
+       thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event);
+       thermal_prop[4] = NULL;
+       kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop);
++      kfree(thermal_prop[0]);
++      kfree(thermal_prop[1]);
++      kfree(thermal_prop[2]);
++      kfree(thermal_prop[3]);
+ }
+ 
+ static int int3400_thermal_get_temp(struct thermal_zone_device *thermal,
diff --git a/queue-5.16/tps6598x-clear-int-mask-on-probe-failure.patch b/queue-5.16/tps6598x-clear-int-mask-on-probe-failure.patch

new file mode 100644 (file)

index 0000000..e63dd5c
--- /dev/null
+++ b/queue-5.16/tps6598x-clear-int-mask-on-probe-failure.patch
@@ -0,0 +1,59 @@
+From aba2081e0a9c977396124aa6df93b55ed5912b19 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 15 Feb 2022 11:22:04 -0700
+Subject: tps6598x: clear int mask on probe failure
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit aba2081e0a9c977396124aa6df93b55ed5912b19 upstream.
+
+The interrupt mask is enabled before any potential failure points in
+the driver, which can leave a failure path where we exit with
+interrupts enabled but the device not live. This causes an infinite
+stream of interrupts on an Apple M1 Pro laptop on USB-C.
+
+Add a failure label that's used post enabling interrupts, where we
+mask them again before returning an error.
+
+Suggested-by: Sven Peter <sven@svenpeter.dev>
+Cc: stable <stable@vger.kernel.org>
+Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Link: https://lore.kernel.org/r/e6b80669-20f3-06e7-9ed5-8951a9c6db6f@kernel.dk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/typec/tipd/core.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
+index 6d27a5b5e3ca..7ffcda94d323 100644
+--- a/drivers/usb/typec/tipd/core.c
++++ b/drivers/usb/typec/tipd/core.c
+@@ -761,12 +761,12 @@ static int tps6598x_probe(struct i2c_client *client)
+ 
+       ret = tps6598x_read32(tps, TPS_REG_STATUS, &status);
+       if (ret < 0)
+-              return ret;
++              goto err_clear_mask;
+       trace_tps6598x_status(status);
+ 
+       ret = tps6598x_read32(tps, TPS_REG_SYSTEM_CONF, &conf);
+       if (ret < 0)
+-              return ret;
++              goto err_clear_mask;
+ 
+       /*
+        * This fwnode has a "compatible" property, but is never populated as a
+@@ -855,7 +855,8 @@ static int tps6598x_probe(struct i2c_client *client)
+       usb_role_switch_put(tps->role_sw);
+ err_fwnode_put:
+       fwnode_handle_put(fwnode);
+-
++err_clear_mask:
++      tps6598x_write64(tps, TPS_REG_INT_MASK1, 0);
+       return ret;
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.16/tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch b/queue-5.16/tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch

new file mode 100644 (file)

index 0000000..6338667
--- /dev/null
+++ b/queue-5.16/tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch
@@ -0,0 +1,44 @@
+From 737b0ef3be6b319d6c1fd64193d1603311969326 Mon Sep 17 00:00:00 2001
+From: "daniel.starke@siemens.com" <daniel.starke@siemens.com>
+Date: Thu, 17 Feb 2022 23:31:17 -0800
+Subject: tty: n_gsm: fix encoding of control signal octet bit DV
+
+From: daniel.starke@siemens.com <daniel.starke@siemens.com>
+
+commit 737b0ef3be6b319d6c1fd64193d1603311969326 upstream.
+
+n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010.
+See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516
+The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to
+the newer 27.010 here. Chapter 5.4.6.3.7 describes the encoding of the
+control signal octet used by the MSC (modem status command). The same
+encoding is also used in convergence layer type 2 as described in chapter
+5.5.2. Table 7 and 24 both require the DV (data valid) bit to be set 1 for
+outgoing control signal octets sent by the DTE (data terminal equipment),
+i.e. for the initiator side.
+Currently, the DV bit is only set if CD (carrier detect) is on, regardless
+of the side.
+
+This patch fixes this behavior by setting the DV bit on the initiator side
+unconditionally.
+
+Fixes: e1eaea46bb40 ("tty: n_gsm line discipline")
+Cc: stable@vger.kernel.org
+Signed-off-by: Daniel Starke <daniel.starke@siemens.com>
+Link: https://lore.kernel.org/r/20220218073123.2121-1-daniel.starke@siemens.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/n_gsm.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/tty/n_gsm.c
++++ b/drivers/tty/n_gsm.c
+@@ -439,7 +439,7 @@ static u8 gsm_encode_modem(const struct
+               modembits |= MDM_RTR;
+       if (dlci->modem_tx & TIOCM_RI)
+               modembits |= MDM_IC;
+-      if (dlci->modem_tx & TIOCM_CD)
++      if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator)
+               modembits |= MDM_DV;
+       return modembits;
+ }
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 28 Feb 2022 07:05:44 +0000 (08:05 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 28 Feb 2022 07:05:44 +0000 (08:05 +0100)
queue-5.16/btrfs-autodefrag-only-scan-one-inode-once.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/btrfs-defrag-allow-defrag_one_cluster-to-skip-large-extent-which-is-not-a-target.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/btrfs-defrag-don-t-defrag-extents-which-are-already-at-max-capacity.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/btrfs-defrag-don-t-try-to-merge-regular-extents-with-preallocated-extents.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/btrfs-defrag-remove-an-ambiguous-condition-for-rejection.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/btrfs-prevent-copying-too-big-compressed-lzo-segment.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/btrfs-reduce-extent-threshold-for-autodefrag.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/hugetlbfs-fix-a-truncation-issue-in-hugepages-parameter.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/ib-qib-fix-duplicate-sysfs-directory-name.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/mm-hugetlb-fix-kernel-crash-with-hugetlb-mremap.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/rdma-cma-do-not-change-route.addr.src_addr-outside-state-checks.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/riscv-fix-nommu_k210_sdcard_defconfig.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/riscv-fix-oops-caused-by-irqsoff-latency-tracer.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/series		patch \| blob \| blame \| history
queue-5.16/staging-fbtft-fb_st7789v-reset-display-before-initialization.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/thermal-int340x-fix-memory-leak-in-int3400_notify.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/tps6598x-clear-int-mask-on-probe-failure.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/tty-n_gsm-fix-encoding-of-control-signal-octet-bit-dv.patch	[new file with mode: 0644]	patch \| blob