]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Aug 2022 07:48:29 +0000 (09:48 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Aug 2022 07:48:29 +0000 (09:48 +0200)
added patches:
acpi-processor-remove-freq-qos-request-for-all-cpus.patch
asm-generic-sections-refactor-memory_intersects.patch
btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
btrfs-fix-silent-failure-when-deleting-root-reference.patch
btrfs-replace-drop-assert-for-suspended-replace.patch
loop-check-for-overflow-while-configuring-loop.patch
perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch

13 files changed:
queue-5.10/acpi-processor-remove-freq-qos-request-for-all-cpus.patch [new file with mode: 0644]
queue-5.10/asm-generic-sections-refactor-memory_intersects.patch [new file with mode: 0644]
queue-5.10/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch [new file with mode: 0644]
queue-5.10/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch [new file with mode: 0644]
queue-5.10/btrfs-fix-silent-failure-when-deleting-root-reference.patch [new file with mode: 0644]
queue-5.10/btrfs-replace-drop-assert-for-suspended-replace.patch [new file with mode: 0644]
queue-5.10/loop-check-for-overflow-while-configuring-loop.patch [new file with mode: 0644]
queue-5.10/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch [new file with mode: 0644]
queue-5.10/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch [new file with mode: 0644]
queue-5.10/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch [new file with mode: 0644]
queue-5.10/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch [new file with mode: 0644]

diff --git a/queue-5.10/acpi-processor-remove-freq-qos-request-for-all-cpus.patch b/queue-5.10/acpi-processor-remove-freq-qos-request-for-all-cpus.patch
new file mode 100644 (file)
index 0000000..1467bea
--- /dev/null
@@ -0,0 +1,38 @@
+From 36527b9d882362567ceb4eea8666813280f30e6f Mon Sep 17 00:00:00 2001
+From: Riwen Lu <luriwen@kylinos.cn>
+Date: Tue, 23 Aug 2022 15:43:42 +0800
+Subject: ACPI: processor: Remove freq Qos request for all CPUs
+
+From: Riwen Lu <luriwen@kylinos.cn>
+
+commit 36527b9d882362567ceb4eea8666813280f30e6f upstream.
+
+The freq Qos request would be removed repeatedly if the cpufreq policy
+relates to more than one CPU. Then, it would cause the "called for unknown
+object" warning.
+
+Remove the freq Qos request for each CPU relates to the cpufreq policy,
+instead of removing repeatedly for the last CPU of it.
+
+Fixes: a1bb46c36ce3 ("ACPI: processor: Add QoS requests for all CPUs")
+Reported-by: Jeremy Linton <Jeremy.Linton@arm.com>
+Tested-by: Jeremy Linton <jeremy.linton@arm.com>
+Signed-off-by: Riwen Lu <luriwen@kylinos.cn>
+Cc: 5.4+ <stable@vger.kernel.org> # 5.4+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/processor_thermal.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/acpi/processor_thermal.c
++++ b/drivers/acpi/processor_thermal.c
+@@ -148,7 +148,7 @@ void acpi_thermal_cpufreq_exit(struct cp
+       unsigned int cpu;
+       for_each_cpu(cpu, policy->related_cpus) {
+-              struct acpi_processor *pr = per_cpu(processors, policy->cpu);
++              struct acpi_processor *pr = per_cpu(processors, cpu);
+               if (pr)
+                       freq_qos_remove_request(&pr->thermal_req);
diff --git a/queue-5.10/asm-generic-sections-refactor-memory_intersects.patch b/queue-5.10/asm-generic-sections-refactor-memory_intersects.patch
new file mode 100644 (file)
index 0000000..6c6a1e6
--- /dev/null
@@ -0,0 +1,96 @@
+From 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee Mon Sep 17 00:00:00 2001
+From: Quanyang Wang <quanyang.wang@windriver.com>
+Date: Fri, 19 Aug 2022 16:11:45 +0800
+Subject: asm-generic: sections: refactor memory_intersects
+
+From: Quanyang Wang <quanyang.wang@windriver.com>
+
+commit 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee upstream.
+
+There are two problems with the current code of memory_intersects:
+
+First, it doesn't check whether the region (begin, end) falls inside the
+region (virt, vend), that is (virt < begin && vend > end).
+
+The second problem is if vend is equal to begin, it will return true but
+this is wrong since vend (virt + size) is not the last address of the
+memory region but (virt + size -1) is.  The wrong determination will
+trigger the misreporting when the function check_for_illegal_area calls
+memory_intersects to check if the dma region intersects with stext region.
+
+The misreporting is as below (stext is at 0x80100000):
+ WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168
+ DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536]
+ Modules linked in:
+ CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5
+ Hardware name: Xilinx Zynq Platform
+  unwind_backtrace from show_stack+0x18/0x1c
+  show_stack from dump_stack_lvl+0x58/0x70
+  dump_stack_lvl from __warn+0xb0/0x198
+  __warn from warn_slowpath_fmt+0x80/0xb4
+  warn_slowpath_fmt from check_for_illegal_area+0x130/0x168
+  check_for_illegal_area from debug_dma_map_sg+0x94/0x368
+  debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128
+  __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24
+  dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4
+  usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214
+  usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118
+  usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec
+  usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70
+  usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360
+  usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440
+  usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238
+  usb_stor_control_thread from kthread+0xf8/0x104
+  kthread from ret_from_fork+0x14/0x2c
+
+Refactor memory_intersects to fix the two problems above.
+
+Before the 1d7db834a027e ("dma-debug: use memory_intersects()
+directly"), memory_intersects is called only by printk_late_init:
+
+printk_late_init -> init_section_intersects ->memory_intersects.
+
+There were few places where memory_intersects was called.
+
+When commit 1d7db834a027e ("dma-debug: use memory_intersects()
+directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA
+subsystem uses it to check for an illegal area and the calltrace above
+is triggered.
+
+[akpm@linux-foundation.org: fix nearby comment typo]
+Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.com
+Fixes: 979559362516 ("asm/sections: add helpers to check for section data")
+Signed-off-by: Quanyang Wang <quanyang.wang@windriver.com>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Thierry Reding <treding@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/sections.h |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/include/asm-generic/sections.h
++++ b/include/asm-generic/sections.h
+@@ -114,7 +114,7 @@ static inline bool memory_contains(void
+ /**
+  * memory_intersects - checks if the region occupied by an object intersects
+  *                     with another memory region
+- * @begin: virtual address of the beginning of the memory regien
++ * @begin: virtual address of the beginning of the memory region
+  * @end: virtual address of the end of the memory region
+  * @virt: virtual address of the memory object
+  * @size: size of the memory object
+@@ -127,7 +127,10 @@ static inline bool memory_intersects(voi
+ {
+       void *vend = virt + size;
+-      return (virt >= begin && virt < end) || (vend >= begin && vend < end);
++      if (virt < end && vend > begin)
++              return true;
++
++      return false;
+ }
+ /**
diff --git a/queue-5.10/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch b/queue-5.10/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
new file mode 100644 (file)
index 0000000..ba80a0f
--- /dev/null
@@ -0,0 +1,47 @@
+From f2c3bec215694fb8bc0ef5010f2a758d1906fc2d Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 12 Aug 2022 18:32:19 +0800
+Subject: btrfs: add info when mount fails due to stale replace target
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit f2c3bec215694fb8bc0ef5010f2a758d1906fc2d upstream.
+
+If the replace target device reappears after the suspended replace is
+cancelled, it blocks the mount operation as it can't find the matching
+replace-item in the metadata. As shown below,
+
+   BTRFS error (device sda5): replace devid present without an active replace item
+
+To overcome this situation, the user can run the command
+
+   btrfs device scan --forget <replace target device>
+
+and try the mount command again. And also, to avoid repeating the issue,
+superblock on the devid=0 must be wiped.
+
+   wipefs -a device-path-to-devid=0.
+
+This patch adds some info when this situation occurs.
+
+Reported-by: Samuel Greiner <samuel@balkonien.org>
+Link: https://lore.kernel.org/linux-btrfs/b4f62b10-b295-26ea-71f9-9a5c9299d42c@balkonien.org/T/
+CC: stable@vger.kernel.org # 5.0+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/dev-replace.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -161,7 +161,7 @@ no_valid_dev_replace_entry_found:
+               if (btrfs_find_device(fs_info->fs_devices,
+                                     BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) {
+                       btrfs_err(fs_info,
+-                      "replace devid present without an active replace item");
++"replace without active item, run 'device scan --forget' on the target device");
+                       ret = -EUCLEAN;
+               } else {
+                       dev_replace->srcdev = NULL;
diff --git a/queue-5.10/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch b/queue-5.10/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
new file mode 100644 (file)
index 0000000..1336a7d
--- /dev/null
@@ -0,0 +1,60 @@
+From b51111271b0352aa596c5ae8faf06939e91b3b68 Mon Sep 17 00:00:00 2001
+From: Goldwyn Rodrigues <rgoldwyn@suse.de>
+Date: Tue, 16 Aug 2022 16:42:56 -0500
+Subject: btrfs: check if root is readonly while setting security xattr
+
+From: Goldwyn Rodrigues <rgoldwyn@suse.de>
+
+commit b51111271b0352aa596c5ae8faf06939e91b3b68 upstream.
+
+For a filesystem which has btrfs read-only property set to true, all
+write operations including xattr should be denied. However, security
+xattr can still be changed even if btrfs ro property is true.
+
+This happens because xattr_permission() does not have any restrictions
+on security.*, system.*  and in some cases trusted.* from VFS and
+the decision is left to the underlying filesystem. See comments in
+xattr_permission() for more details.
+
+This patch checks if the root is read-only before performing the set
+xattr operation.
+
+Testcase:
+
+  DEV=/dev/vdb
+  MNT=/mnt
+
+  mkfs.btrfs -f $DEV
+  mount $DEV $MNT
+  echo "file one" > $MNT/f1
+
+  setfattr -n "security.one" -v 2 $MNT/f1
+  btrfs property set /mnt ro true
+
+  setfattr -n "security.one" -v 1 $MNT/f1
+
+  umount $MNT
+
+CC: stable@vger.kernel.org # 4.9+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/xattr.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/xattr.c
++++ b/fs/btrfs/xattr.c
+@@ -389,6 +389,9 @@ static int btrfs_xattr_handler_set(const
+                                  const char *name, const void *buffer,
+                                  size_t size, int flags)
+ {
++      if (btrfs_root_readonly(BTRFS_I(inode)->root))
++              return -EROFS;
++
+       name = xattr_full_name(handler, name);
+       return btrfs_setxattr_trans(inode, name, buffer, size, flags);
+ }
diff --git a/queue-5.10/btrfs-fix-silent-failure-when-deleting-root-reference.patch b/queue-5.10/btrfs-fix-silent-failure-when-deleting-root-reference.patch
new file mode 100644 (file)
index 0000000..b9e3096
--- /dev/null
@@ -0,0 +1,43 @@
+From 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 22 Aug 2022 15:47:09 +0100
+Subject: btrfs: fix silent failure when deleting root reference
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 upstream.
+
+At btrfs_del_root_ref(), if btrfs_search_slot() returns an error, we end
+up returning from the function with a value of 0 (success). This happens
+because the function returns the value stored in the variable 'err',
+which is 0, while the error value we got from btrfs_search_slot() is
+stored in the 'ret' variable.
+
+So fix it by setting 'err' with the error value.
+
+Fixes: 8289ed9f93bef2 ("btrfs: replace the BUG_ON in btrfs_del_root_ref with proper error handling")
+CC: stable@vger.kernel.org # 5.16+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/root-tree.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -336,9 +336,10 @@ int btrfs_del_root_ref(struct btrfs_tran
+       key.offset = ref_id;
+ again:
+       ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+-      if (ret < 0)
++      if (ret < 0) {
++              err = ret;
+               goto out;
+-      if (ret == 0) {
++      } else if (ret == 0) {
+               leaf = path->nodes[0];
+               ref = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_root_ref);
diff --git a/queue-5.10/btrfs-replace-drop-assert-for-suspended-replace.patch b/queue-5.10/btrfs-replace-drop-assert-for-suspended-replace.patch
new file mode 100644 (file)
index 0000000..06c8e2f
--- /dev/null
@@ -0,0 +1,55 @@
+From 59a3991984dbc1fc47e5651a265c5200bd85464e Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 12 Aug 2022 18:32:18 +0800
+Subject: btrfs: replace: drop assert for suspended replace
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 59a3991984dbc1fc47e5651a265c5200bd85464e upstream.
+
+If the filesystem mounts with the replace-operation in a suspended state
+and try to cancel the suspended replace-operation, we hit the assert. The
+assert came from the commit fe97e2e173af ("btrfs: dev-replace: replace's
+scrub must not be running in suspended state") that was actually not
+required. So just remove it.
+
+ $ mount /dev/sda5 /btrfs
+
+    BTRFS info (device sda5): cannot continue dev_replace, tgtdev is missing
+    BTRFS info (device sda5): you may cancel the operation after 'mount -o degraded'
+
+ $ mount -o degraded /dev/sda5 /btrfs <-- success.
+
+ $ btrfs replace cancel /btrfs
+
+    kernel: assertion failed: ret != -ENOTCONN, in fs/btrfs/dev-replace.c:1131
+    kernel: ------------[ cut here ]------------
+    kernel: kernel BUG at fs/btrfs/ctree.h:3750!
+
+After the patch:
+
+ $ btrfs replace cancel /btrfs
+
+    BTRFS info (device sda5): suspended dev_replace from /dev/sda5 (devid 1) to <missing disk> canceled
+
+Fixes: fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state")
+CC: stable@vger.kernel.org # 5.0+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/dev-replace.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -954,8 +954,7 @@ int btrfs_dev_replace_cancel(struct btrf
+               up_write(&dev_replace->rwsem);
+               /* Scrub for replace must not be running in suspended state */
+-              ret = btrfs_scrub_cancel(fs_info);
+-              ASSERT(ret != -ENOTCONN);
++              btrfs_scrub_cancel(fs_info);
+               trans = btrfs_start_transaction(root, 0);
+               if (IS_ERR(trans)) {
diff --git a/queue-5.10/loop-check-for-overflow-while-configuring-loop.patch b/queue-5.10/loop-check-for-overflow-while-configuring-loop.patch
new file mode 100644 (file)
index 0000000..828449d
--- /dev/null
@@ -0,0 +1,59 @@
+From c490a0b5a4f36da3918181a8acdc6991d967c5f3 Mon Sep 17 00:00:00 2001
+From: Siddh Raman Pant <code@siddh.me>
+Date: Tue, 23 Aug 2022 21:38:10 +0530
+Subject: loop: Check for overflow while configuring loop
+
+From: Siddh Raman Pant <code@siddh.me>
+
+commit c490a0b5a4f36da3918181a8acdc6991d967c5f3 upstream.
+
+The userspace can configure a loop using an ioctl call, wherein
+a configuration of type loop_config is passed (see lo_ioctl()'s
+case on line 1550 of drivers/block/loop.c). This proceeds to call
+loop_configure() which in turn calls loop_set_status_from_info()
+(see line 1050 of loop.c), passing &config->info which is of type
+loop_info64*. This function then sets the appropriate values, like
+the offset.
+
+loop_device has lo_offset of type loff_t (see line 52 of loop.c),
+which is typdef-chained to long long, whereas loop_info64 has
+lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h).
+
+The function directly copies offset from info to the device as
+follows (See line 980 of loop.c):
+       lo->lo_offset = info->lo_offset;
+
+This results in an overflow, which triggers a warning in iomap_iter()
+due to a call to iomap_iter_done() which has:
+       WARN_ON_ONCE(iter->iomap.offset > iter->pos);
+
+Thus, check for negative value during loop_set_status_from_info().
+
+Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a29e
+
+Reported-and-tested-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Siddh Raman Pant <code@siddh.me>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/loop.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -1031,6 +1031,11 @@ loop_set_status_from_info(struct loop_de
+       lo->lo_offset = info->lo_offset;
+       lo->lo_sizelimit = info->lo_sizelimit;
++
++      /* loff_t vars have been assigned __u64 */
++      if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
++              return -EOVERFLOW;
++
+       memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
+       memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
+       lo->lo_file_name[LO_NAME_SIZE-1] = 0;
diff --git a/queue-5.10/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch b/queue-5.10/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
new file mode 100644 (file)
index 0000000..9eccd21
--- /dev/null
@@ -0,0 +1,60 @@
+From 32ba156df1b1c8804a4e5be5339616945eafea22 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Tue, 16 Aug 2022 05:56:11 -0700
+Subject: perf/x86/lbr: Enable the branch type for the Arch LBR by default
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 32ba156df1b1c8804a4e5be5339616945eafea22 upstream.
+
+On the platform with Arch LBR, the HW raw branch type encoding may leak
+to the perf tool when the SAVE_TYPE option is not set.
+
+In the intel_pmu_store_lbr(), the HW raw branch type is stored in
+lbr_entries[].type. If the SAVE_TYPE option is set, the
+lbr_entries[].type will be converted into the generic PERF_BR_* type
+in the intel_pmu_lbr_filter() and exposed to the user tools.
+But if the SAVE_TYPE option is NOT set by the user, the current perf
+kernel doesn't clear the field. The HW raw branch type leaks.
+
+There are two solutions to fix the issue for the Arch LBR.
+One is to clear the field if the SAVE_TYPE option is NOT set.
+The other solution is to unconditionally convert the branch type and
+expose the generic type to the user tools.
+
+The latter is implemented here, because
+- The branch type is valuable information. I don't see a case where
+  you would not benefit from the branch type. (Stephane Eranian)
+- Not having the branch type DOES NOT save any space in the
+  branch record (Stephane Eranian)
+- The Arch LBR HW can retrieve the common branch types from the
+  LBR_INFO. It doesn't require the high overhead SW disassemble.
+
+Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR")
+Reported-by: Stephane Eranian <eranian@google.com>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20220816125612.2042397-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/lbr.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/arch/x86/events/intel/lbr.c
++++ b/arch/x86/events/intel/lbr.c
+@@ -1114,6 +1114,14 @@ static int intel_pmu_setup_hw_lbr_filter
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
+               reg->config = mask;
++
++              /*
++               * The Arch LBR HW can retrieve the common branch types
++               * from the LBR_INFO. It doesn't require the high overhead
++               * SW disassemble.
++               * Enable the branch type by default for the Arch LBR.
++               */
++              reg->reg |= X86_BR_TYPE_SAVE;
+               return 0;
+       }
diff --git a/queue-5.10/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch b/queue-5.10/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
new file mode 100644 (file)
index 0000000..508da2f
--- /dev/null
@@ -0,0 +1,81 @@
+From 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Tue, 16 Aug 2022 11:54:07 -0400
+Subject: s390: fix double free of GS and RI CBs on fork() failure
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 upstream.
+
+The pointers for guarded storage and runtime instrumentation control
+blocks are stored in the thread_struct of the associated task. These
+pointers are initially copied on fork() via arch_dup_task_struct()
+and then cleared via copy_thread() before fork() returns. If fork()
+happens to fail after the initial task dup and before copy_thread(),
+the newly allocated task and associated thread_struct memory are
+freed via free_task() -> arch_release_task_struct(). This results in
+a double free of the guarded storage and runtime info structs
+because the fields in the failed task still refer to memory
+associated with the source task.
+
+This problem can manifest as a BUG_ON() in set_freepointer() (with
+CONFIG_SLAB_FREELIST_HARDENED enabled) or KASAN splat (if enabled)
+when running trinity syscall fuzz tests on s390x. To avoid this
+problem, clear the associated pointer fields in
+arch_dup_task_struct() immediately after the new task is copied.
+Note that the RI flag is still cleared in copy_thread() because it
+resides in thread stack memory and that is where stack info is
+copied.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Fixes: 8d9047f8b967c ("s390/runtime instrumentation: simplify task exit handling")
+Fixes: 7b83c6297d2fc ("s390/guarded storage: simplify task exit handling")
+Cc: <stable@vger.kernel.org> # 4.15
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220816155407.537372-1-bfoster@redhat.com
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/process.c |   22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/arch/s390/kernel/process.c
++++ b/arch/s390/kernel/process.c
+@@ -77,6 +77,18 @@ int arch_dup_task_struct(struct task_str
+       memcpy(dst, src, arch_task_struct_size);
+       dst->thread.fpu.regs = dst->thread.fpu.fprs;
++
++      /*
++       * Don't transfer over the runtime instrumentation or the guarded
++       * storage control block pointers. These fields are cleared here instead
++       * of in copy_thread() to avoid premature freeing of associated memory
++       * on fork() failure. Wait to clear the RI flag because ->stack still
++       * refers to the source thread.
++       */
++      dst->thread.ri_cb = NULL;
++      dst->thread.gs_cb = NULL;
++      dst->thread.gs_bc_cb = NULL;
++
+       return 0;
+ }
+@@ -134,13 +146,11 @@ int copy_thread(unsigned long clone_flag
+       frame->childregs.flags = 0;
+       if (new_stackp)
+               frame->childregs.gprs[15] = new_stackp;
+-
+-      /* Don't copy runtime instrumentation info */
+-      p->thread.ri_cb = NULL;
++      /*
++       * Clear the runtime instrumentation flag after the above childregs
++       * copy. The CB pointer was already cleared in arch_dup_task_struct().
++       */
+       frame->childregs.psw.mask &= ~PSW_MASK_RI;
+-      /* Don't copy guarded storage control block */
+-      p->thread.gs_cb = NULL;
+-      p->thread.gs_bc_cb = NULL;
+       /* Set a new TLS ?  */
+       if (clone_flags & CLONE_SETTLS) {
index 12e6dfbd908d32a8a31110fdc5e4d1c72d709361..c9543a3a251d0eefc68ef8450ab3eb105f505206 100644 (file)
@@ -61,3 +61,15 @@ net-fix-a-data-race-around-sysctl_somaxconn.patch
 ixgbe-stop-resetting-systime-in-ixgbe_ptp_start_cycl.patch
 rxrpc-fix-locking-in-rxrpc-s-sendmsg.patch
 ionic-fix-up-issues-with-handling-eagain-on-fw-cmds.patch
+btrfs-fix-silent-failure-when-deleting-root-reference.patch
+btrfs-replace-drop-assert-for-suspended-replace.patch
+btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
+btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
+perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
+x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
+x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
+loop-check-for-overflow-while-configuring-loop.patch
+asm-generic-sections-refactor-memory_intersects.patch
+s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
+acpi-processor-remove-freq-qos-request-for-all-cpus.patch
+xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch
diff --git a/queue-5.10/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch b/queue-5.10/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
new file mode 100644 (file)
index 0000000..f89cc2c
--- /dev/null
@@ -0,0 +1,209 @@
+From 7df548840c496b0141fb2404b889c346380c2b22 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 3 Aug 2022 14:41:32 -0700
+Subject: x86/bugs: Add "unknown" reporting for MMIO Stale Data
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 7df548840c496b0141fb2404b889c346380c2b22 upstream.
+
+Older Intel CPUs that are not in the affected processor list for MMIO
+Stale Data vulnerabilities currently report "Not affected" in sysfs,
+which may not be correct. Vulnerability status for these older CPUs is
+unknown.
+
+Add known-not-affected CPUs to the whitelist. Report "unknown"
+mitigation status for CPUs that are not in blacklist, whitelist and also
+don't enumerate MSR ARCH_CAPABILITIES bits that reflect hardware
+immunity to MMIO Stale Data vulnerabilities.
+
+Mitigation is not deployed when the status is unknown.
+
+  [ bp: Massage, fixup. ]
+
+Fixes: 8d50cdf8b834 ("x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data")
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Suggested-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/a932c154772f2121794a5f2eded1a11013114711.1657846269.git.pawan.kumar.gupta@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst |   14 +++
+ arch/x86/include/asm/cpufeatures.h                              |    5 -
+ arch/x86/kernel/cpu/bugs.c                                      |   14 ++-
+ arch/x86/kernel/cpu/common.c                                    |   42 ++++++----
+ 4 files changed, 56 insertions(+), 19 deletions(-)
+
+--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
++++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+@@ -230,6 +230,20 @@ The possible values in this file are:
+      * - 'Mitigation: Clear CPU buffers'
+        - The processor is vulnerable and the CPU buffer clearing mitigation is
+          enabled.
++     * - 'Unknown: No mitigations'
++       - The processor vulnerability status is unknown because it is
++       out of Servicing period. Mitigation is not attempted.
++
++Definitions:
++------------
++
++Servicing period: The process of providing functional and security updates to
++Intel processors or platforms, utilizing the Intel Platform Update (IPU)
++process or other similar mechanisms.
++
++End of Servicing Updates (ESU): ESU is the date at which Intel will no
++longer provide Servicing, such as through IPU or other similar update
++processes. ESU dates will typically be aligned to end of quarter.
+ If the processor is vulnerable then the following information is appended to
+ the above information:
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -429,7 +429,8 @@
+ #define X86_BUG_ITLB_MULTIHIT         X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS                 X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+ #define X86_BUG_MMIO_STALE_DATA               X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+-#define X86_BUG_RETBLEED              X86_BUG(26) /* CPU is affected by RETBleed */
+-#define X86_BUG_EIBRS_PBRSB           X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
++#define X86_BUG_MMIO_UNKNOWN          X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
++#define X86_BUG_RETBLEED              X86_BUG(27) /* CPU is affected by RETBleed */
++#define X86_BUG_EIBRS_PBRSB           X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -424,7 +424,8 @@ static void __init mmio_select_mitigatio
+       u64 ia32_cap;
+       if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
+-          cpu_mitigations_off()) {
++           boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
++           cpu_mitigations_off()) {
+               mmio_mitigation = MMIO_MITIGATION_OFF;
+               return;
+       }
+@@ -529,6 +530,8 @@ out:
+               pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
+       if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+               pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
++      else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++              pr_info("MMIO Stale Data: Unknown: No mitigations\n");
+ }
+ static void __init md_clear_select_mitigation(void)
+@@ -2198,6 +2201,9 @@ static ssize_t tsx_async_abort_show_stat
+ static ssize_t mmio_stale_data_show_state(char *buf)
+ {
++      if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++              return sysfs_emit(buf, "Unknown: No mitigations\n");
++
+       if (mmio_mitigation == MMIO_MITIGATION_OFF)
+               return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
+@@ -2344,6 +2350,7 @@ static ssize_t cpu_show_common(struct de
+               return srbds_show_state(buf);
+       case X86_BUG_MMIO_STALE_DATA:
++      case X86_BUG_MMIO_UNKNOWN:
+               return mmio_stale_data_show_state(buf);
+       case X86_BUG_RETBLEED:
+@@ -2403,7 +2410,10 @@ ssize_t cpu_show_srbds(struct device *de
+ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+-      return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
++      if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++              return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
++      else
++              return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ }
+ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1024,7 +1024,8 @@ static void identify_cpu_without_cpuid(s
+ #define NO_SWAPGS             BIT(6)
+ #define NO_ITLB_MULTIHIT      BIT(7)
+ #define NO_SPECTRE_V2         BIT(8)
+-#define NO_EIBRS_PBRSB                BIT(9)
++#define NO_MMIO                       BIT(9)
++#define NO_EIBRS_PBRSB                BIT(10)
+ #define VULNWL(vendor, family, model, whitelist)      \
+       X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
+@@ -1045,6 +1046,11 @@ static const __initconst struct x86_cpu_
+       VULNWL(NSC,     5, X86_MODEL_ANY,       NO_SPECULATION),
+       /* Intel Family 6 */
++      VULNWL_INTEL(TIGERLAKE,                 NO_MMIO),
++      VULNWL_INTEL(TIGERLAKE_L,               NO_MMIO),
++      VULNWL_INTEL(ALDERLAKE,                 NO_MMIO),
++      VULNWL_INTEL(ALDERLAKE_L,               NO_MMIO),
++
+       VULNWL_INTEL(ATOM_SALTWELL,             NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SALTWELL_TABLET,      NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SALTWELL_MID,         NO_SPECULATION | NO_ITLB_MULTIHIT),
+@@ -1063,9 +1069,9 @@ static const __initconst struct x86_cpu_
+       VULNWL_INTEL(ATOM_AIRMONT_MID,          NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_AIRMONT_NP,           NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT_D,           NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
++      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_INTEL(ATOM_GOLDMONT_D,           NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+       /*
+        * Technically, swapgs isn't serializing on AMD (despite it previously
+@@ -1080,18 +1086,18 @@ static const __initconst struct x86_cpu_
+       VULNWL_INTEL(ATOM_TREMONT_D,            NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+       /* AMD Family 0xf - 0x12 */
+-      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+       /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+-      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+       /* Zhaoxin Family 7 */
+-      VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS),
+-      VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS),
++      VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
++      VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+       {}
+ };
+@@ -1245,10 +1251,16 @@ static void __init cpu_set_bug_bits(stru
+        * Affected CPU list is generally enough to enumerate the vulnerability,
+        * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
+        * not want the guest to enumerate the bug.
++       *
++       * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
++       * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
+        */
+-      if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
+-          !arch_cap_mmio_immune(ia32_cap))
+-              setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++      if (!arch_cap_mmio_immune(ia32_cap)) {
++              if (cpu_matches(cpu_vuln_blacklist, MMIO))
++                      setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++              else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
++                      setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
++      }
+       if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
+               if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
diff --git a/queue-5.10/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch b/queue-5.10/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
new file mode 100644 (file)
index 0000000..f76b902
--- /dev/null
@@ -0,0 +1,72 @@
+From fc2e426b1161761561624ebd43ce8c8d2fa058da Mon Sep 17 00:00:00 2001
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+Date: Fri, 19 Aug 2022 16:43:34 +0800
+Subject: x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry
+
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+
+commit fc2e426b1161761561624ebd43ce8c8d2fa058da upstream.
+
+When meeting ftrace trampolines in ORC unwinding, unwinder uses address
+of ftrace_{regs_}call address to find the ORC entry, which gets next frame at
+sp+176.
+
+If there is an IRQ hitting at sub $0xa8,%rsp, the next frame should be
+sp+8 instead of 176. It makes unwinder skip correct frame and throw
+warnings such as "wrong direction" or "can't access registers", etc,
+depending on the content of the incorrect frame address.
+
+By adding the base address ftrace_{regs_}caller with the offset
+*ip - ops->trampoline*, we can get the correct address to find the ORC entry.
+
+Also change "caller" to "tramp_addr" to make variable name conform to
+its content.
+
+[ mingo: Clarified the changelog a bit. ]
+
+Fixes: 6be7fa3c74d1 ("ftrace, orc, x86: Handle ftrace dynamically allocated trampolines")
+Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220819084334.244016-1-chenzhongjin@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/unwind_orc.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/unwind_orc.c
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsign
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
+ {
+       struct ftrace_ops *ops;
+-      unsigned long caller;
++      unsigned long tramp_addr, offset;
+       ops = ftrace_ops_trampoline(ip);
+       if (!ops)
+               return NULL;
++      /* Set tramp_addr to the start of the code copied by the trampoline */
+       if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+-              caller = (unsigned long)ftrace_regs_call;
++              tramp_addr = (unsigned long)ftrace_regs_caller;
+       else
+-              caller = (unsigned long)ftrace_call;
++              tramp_addr = (unsigned long)ftrace_caller;
++
++      /* Now place tramp_addr to the location within the trampoline ip is at */
++      offset = ip - ops->trampoline;
++      tramp_addr += offset;
+       /* Prevent unlikely recursion */
+-      if (ip == caller)
++      if (ip == tramp_addr)
+               return NULL;
+-      return orc_find(caller);
++      return orc_find(tramp_addr);
+ }
+ #else
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
diff --git a/queue-5.10/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch b/queue-5.10/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch
new file mode 100644 (file)
index 0000000..c08c929
--- /dev/null
@@ -0,0 +1,95 @@
+From c5deb27895e017a0267de0a20d140ad5fcc55a54 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 25 Aug 2022 16:19:18 +0200
+Subject: xen/privcmd: fix error exit of privcmd_ioctl_dm_op()
+
+From: Juergen Gross <jgross@suse.com>
+
+commit c5deb27895e017a0267de0a20d140ad5fcc55a54 upstream.
+
+The error exit of privcmd_ioctl_dm_op() is calling unlock_pages()
+potentially with pages being NULL, leading to a NULL dereference.
+
+Additionally lock_pages() doesn't check for pin_user_pages_fast()
+having been completely successful, resulting in potentially not
+locking all pages into memory. This could result in sporadic failures
+when using the related memory in user mode.
+
+Fix all of that by calling unlock_pages() always with the real number
+of pinned pages, which will be zero in case pages being NULL, and by
+checking the number of pages pinned by pin_user_pages_fast() matching
+the expected number of pages.
+
+Cc: <stable@vger.kernel.org>
+Fixes: ab520be8cd5d ("xen/privcmd: Add IOCTL_PRIVCMD_DM_OP")
+Reported-by: Rustam Subkhankulov <subkhankulov@ispras.ru>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Link: https://lore.kernel.org/r/20220825141918.3581-1-jgross@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/privcmd.c |   21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/drivers/xen/privcmd.c
++++ b/drivers/xen/privcmd.c
+@@ -581,27 +581,30 @@ static int lock_pages(
+       struct privcmd_dm_op_buf kbufs[], unsigned int num,
+       struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
+ {
+-      unsigned int i;
++      unsigned int i, off = 0;
+-      for (i = 0; i < num; i++) {
++      for (i = 0; i < num; ) {
+               unsigned int requested;
+               int page_count;
+               requested = DIV_ROUND_UP(
+                       offset_in_page(kbufs[i].uptr) + kbufs[i].size,
+-                      PAGE_SIZE);
++                      PAGE_SIZE) - off;
+               if (requested > nr_pages)
+                       return -ENOSPC;
+               page_count = pin_user_pages_fast(
+-                      (unsigned long) kbufs[i].uptr,
++                      (unsigned long)kbufs[i].uptr + off * PAGE_SIZE,
+                       requested, FOLL_WRITE, pages);
+-              if (page_count < 0)
+-                      return page_count;
++              if (page_count <= 0)
++                      return page_count ? : -EFAULT;
+               *pinned += page_count;
+               nr_pages -= page_count;
+               pages += page_count;
++
++              off = (requested == page_count) ? 0 : off + page_count;
++              i += !off;
+       }
+       return 0;
+@@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct f
+       }
+       rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
+-      if (rc < 0) {
+-              nr_pages = pinned;
++      if (rc < 0)
+               goto out;
+-      }
+       for (i = 0; i < kdata.num; i++) {
+               set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
+@@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct f
+       xen_preemptible_hcall_end();
+ out:
+-      unlock_pages(pages, nr_pages);
++      unlock_pages(pages, pinned);
+       kfree(xbufs);
+       kfree(pages);
+       kfree(kbufs);