]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Aug 2022 07:48:42 +0000 (09:48 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Aug 2022 07:48:42 +0000 (09:48 +0200)
added patches:
acpi-processor-remove-freq-qos-request-for-all-cpus.patch
asm-generic-sections-refactor-memory_intersects.patch
bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch
btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch
btrfs-fix-silent-failure-when-deleting-root-reference.patch
btrfs-replace-drop-assert-for-suspended-replace.patch
fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch
loop-check-for-overflow-while-configuring-loop.patch
mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch
nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch
perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
revert-memcg-cleanup-racy-sum-avoidance-code.patch
riscv-traps-add-missing-prototype.patch
s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch
smb3-missing-inode-locks-in-punch-hole.patch
writeback-avoid-use-after-free-after-removing-device.patch
x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
x86-nospec-unwreck-the-rsb-stuffing.patch
x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch

24 files changed:
queue-5.15/acpi-processor-remove-freq-qos-request-for-all-cpus.patch [new file with mode: 0644]
queue-5.15/asm-generic-sections-refactor-memory_intersects.patch [new file with mode: 0644]
queue-5.15/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch [new file with mode: 0644]
queue-5.15/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch [new file with mode: 0644]
queue-5.15/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch [new file with mode: 0644]
queue-5.15/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch [new file with mode: 0644]
queue-5.15/btrfs-fix-silent-failure-when-deleting-root-reference.patch [new file with mode: 0644]
queue-5.15/btrfs-replace-drop-assert-for-suspended-replace.patch [new file with mode: 0644]
queue-5.15/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch [new file with mode: 0644]
queue-5.15/loop-check-for-overflow-while-configuring-loop.patch [new file with mode: 0644]
queue-5.15/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch [new file with mode: 0644]
queue-5.15/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch [new file with mode: 0644]
queue-5.15/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch [new file with mode: 0644]
queue-5.15/revert-memcg-cleanup-racy-sum-avoidance-code.patch [new file with mode: 0644]
queue-5.15/riscv-traps-add-missing-prototype.patch [new file with mode: 0644]
queue-5.15/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch [new file with mode: 0644]
queue-5.15/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/smb3-missing-inode-locks-in-punch-hole.patch [new file with mode: 0644]
queue-5.15/writeback-avoid-use-after-free-after-removing-device.patch [new file with mode: 0644]
queue-5.15/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch [new file with mode: 0644]
queue-5.15/x86-nospec-unwreck-the-rsb-stuffing.patch [new file with mode: 0644]
queue-5.15/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch [new file with mode: 0644]
queue-5.15/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch [new file with mode: 0644]

diff --git a/queue-5.15/acpi-processor-remove-freq-qos-request-for-all-cpus.patch b/queue-5.15/acpi-processor-remove-freq-qos-request-for-all-cpus.patch
new file mode 100644 (file)
index 0000000..7ff6fe1
--- /dev/null
@@ -0,0 +1,38 @@
+From 36527b9d882362567ceb4eea8666813280f30e6f Mon Sep 17 00:00:00 2001
+From: Riwen Lu <luriwen@kylinos.cn>
+Date: Tue, 23 Aug 2022 15:43:42 +0800
+Subject: ACPI: processor: Remove freq Qos request for all CPUs
+
+From: Riwen Lu <luriwen@kylinos.cn>
+
+commit 36527b9d882362567ceb4eea8666813280f30e6f upstream.
+
+The freq Qos request would be removed repeatedly if the cpufreq policy
+relates to more than one CPU. Then, it would cause the "called for unknown
+object" warning.
+
+Remove the freq Qos request for each CPU relates to the cpufreq policy,
+instead of removing repeatedly for the last CPU of it.
+
+Fixes: a1bb46c36ce3 ("ACPI: processor: Add QoS requests for all CPUs")
+Reported-by: Jeremy Linton <Jeremy.Linton@arm.com>
+Tested-by: Jeremy Linton <jeremy.linton@arm.com>
+Signed-off-by: Riwen Lu <luriwen@kylinos.cn>
+Cc: 5.4+ <stable@vger.kernel.org> # 5.4+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/processor_thermal.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/acpi/processor_thermal.c
++++ b/drivers/acpi/processor_thermal.c
+@@ -144,7 +144,7 @@ void acpi_thermal_cpufreq_exit(struct cp
+       unsigned int cpu;
+       for_each_cpu(cpu, policy->related_cpus) {
+-              struct acpi_processor *pr = per_cpu(processors, policy->cpu);
++              struct acpi_processor *pr = per_cpu(processors, cpu);
+               if (pr)
+                       freq_qos_remove_request(&pr->thermal_req);
diff --git a/queue-5.15/asm-generic-sections-refactor-memory_intersects.patch b/queue-5.15/asm-generic-sections-refactor-memory_intersects.patch
new file mode 100644 (file)
index 0000000..6c6a1e6
--- /dev/null
@@ -0,0 +1,96 @@
+From 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee Mon Sep 17 00:00:00 2001
+From: Quanyang Wang <quanyang.wang@windriver.com>
+Date: Fri, 19 Aug 2022 16:11:45 +0800
+Subject: asm-generic: sections: refactor memory_intersects
+
+From: Quanyang Wang <quanyang.wang@windriver.com>
+
+commit 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee upstream.
+
+There are two problems with the current code of memory_intersects:
+
+First, it doesn't check whether the region (begin, end) falls inside the
+region (virt, vend), that is (virt < begin && vend > end).
+
+The second problem is if vend is equal to begin, it will return true but
+this is wrong since vend (virt + size) is not the last address of the
+memory region but (virt + size -1) is.  The wrong determination will
+trigger the misreporting when the function check_for_illegal_area calls
+memory_intersects to check if the dma region intersects with stext region.
+
+The misreporting is as below (stext is at 0x80100000):
+ WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168
+ DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536]
+ Modules linked in:
+ CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5
+ Hardware name: Xilinx Zynq Platform
+  unwind_backtrace from show_stack+0x18/0x1c
+  show_stack from dump_stack_lvl+0x58/0x70
+  dump_stack_lvl from __warn+0xb0/0x198
+  __warn from warn_slowpath_fmt+0x80/0xb4
+  warn_slowpath_fmt from check_for_illegal_area+0x130/0x168
+  check_for_illegal_area from debug_dma_map_sg+0x94/0x368
+  debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128
+  __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24
+  dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4
+  usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214
+  usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118
+  usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec
+  usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70
+  usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360
+  usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440
+  usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238
+  usb_stor_control_thread from kthread+0xf8/0x104
+  kthread from ret_from_fork+0x14/0x2c
+
+Refactor memory_intersects to fix the two problems above.
+
+Before the 1d7db834a027e ("dma-debug: use memory_intersects()
+directly"), memory_intersects is called only by printk_late_init:
+
+printk_late_init -> init_section_intersects ->memory_intersects.
+
+There were few places where memory_intersects was called.
+
+When commit 1d7db834a027e ("dma-debug: use memory_intersects()
+directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA
+subsystem uses it to check for an illegal area and the calltrace above
+is triggered.
+
+[akpm@linux-foundation.org: fix nearby comment typo]
+Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.com
+Fixes: 979559362516 ("asm/sections: add helpers to check for section data")
+Signed-off-by: Quanyang Wang <quanyang.wang@windriver.com>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Thierry Reding <treding@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/sections.h |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/include/asm-generic/sections.h
++++ b/include/asm-generic/sections.h
+@@ -114,7 +114,7 @@ static inline bool memory_contains(void
+ /**
+  * memory_intersects - checks if the region occupied by an object intersects
+  *                     with another memory region
+- * @begin: virtual address of the beginning of the memory regien
++ * @begin: virtual address of the beginning of the memory region
+  * @end: virtual address of the end of the memory region
+  * @virt: virtual address of the memory object
+  * @size: size of the memory object
+@@ -127,7 +127,10 @@ static inline bool memory_intersects(voi
+ {
+       void *vend = virt + size;
+-      return (virt >= begin && virt < end) || (vend >= begin && vend < end);
++      if (virt < end && vend > begin)
++              return true;
++
++      return false;
+ }
+ /**
diff --git a/queue-5.15/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch b/queue-5.15/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch
new file mode 100644 (file)
index 0000000..846bc6b
--- /dev/null
@@ -0,0 +1,55 @@
+From dd0ff4d12dd284c334f7e9b07f8f335af856ac78 Mon Sep 17 00:00:00 2001
+From: Liu Shixin <liushixin2@huawei.com>
+Date: Fri, 19 Aug 2022 17:40:05 +0800
+Subject: bootmem: remove the vmemmap pages from kmemleak in put_page_bootmem
+
+From: Liu Shixin <liushixin2@huawei.com>
+
+commit dd0ff4d12dd284c334f7e9b07f8f335af856ac78 upstream.
+
+The vmemmap pages is marked by kmemleak when allocated from memblock.
+Remove it from kmemleak when freeing the page.  Otherwise, when we reuse
+the page, kmemleak may report such an error and then stop working.
+
+ kmemleak: Cannot insert 0xffff98fb6eab3d40 into the object search tree (overlaps existing)
+ kmemleak: Kernel memory leak detector disabled
+ kmemleak: Object 0xffff98fb6be00000 (size 335544320):
+ kmemleak:   comm "swapper", pid 0, jiffies 4294892296
+ kmemleak:   min_count = 0
+ kmemleak:   count = 0
+ kmemleak:   flags = 0x1
+ kmemleak:   checksum = 0
+ kmemleak:   backtrace:
+
+Link: https://lkml.kernel.org/r/20220819094005.2928241-1-liushixin2@huawei.com
+Fixes: f41f2ed43ca5 (mm: hugetlb: free the vmemmap pages associated with each HugeTLB page)
+Signed-off-by: Liu Shixin <liushixin2@huawei.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/bootmem_info.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/mm/bootmem_info.c
++++ b/mm/bootmem_info.c
+@@ -12,6 +12,7 @@
+ #include <linux/memblock.h>
+ #include <linux/bootmem_info.h>
+ #include <linux/memory_hotplug.h>
++#include <linux/kmemleak.h>
+ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
+ {
+@@ -34,6 +35,7 @@ void put_page_bootmem(struct page *page)
+               ClearPagePrivate(page);
+               set_page_private(page, 0);
+               INIT_LIST_HEAD(&page->lru);
++              kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
+               free_reserved_page(page);
+       }
+ }
diff --git a/queue-5.15/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch b/queue-5.15/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
new file mode 100644 (file)
index 0000000..426112d
--- /dev/null
@@ -0,0 +1,47 @@
+From f2c3bec215694fb8bc0ef5010f2a758d1906fc2d Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 12 Aug 2022 18:32:19 +0800
+Subject: btrfs: add info when mount fails due to stale replace target
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit f2c3bec215694fb8bc0ef5010f2a758d1906fc2d upstream.
+
+If the replace target device reappears after the suspended replace is
+cancelled, it blocks the mount operation as it can't find the matching
+replace-item in the metadata. As shown below,
+
+   BTRFS error (device sda5): replace devid present without an active replace item
+
+To overcome this situation, the user can run the command
+
+   btrfs device scan --forget <replace target device>
+
+and try the mount command again. And also, to avoid repeating the issue,
+superblock on the devid=0 must be wiped.
+
+   wipefs -a device-path-to-devid=0.
+
+This patch adds some info when this situation occurs.
+
+Reported-by: Samuel Greiner <samuel@balkonien.org>
+Link: https://lore.kernel.org/linux-btrfs/b4f62b10-b295-26ea-71f9-9a5c9299d42c@balkonien.org/T/
+CC: stable@vger.kernel.org # 5.0+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/dev-replace.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found:
+                */
+               if (btrfs_find_device(fs_info->fs_devices, &args)) {
+                       btrfs_err(fs_info,
+-                      "replace devid present without an active replace item");
++"replace without active item, run 'device scan --forget' on the target device");
+                       ret = -EUCLEAN;
+               } else {
+                       dev_replace->srcdev = NULL;
diff --git a/queue-5.15/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch b/queue-5.15/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
new file mode 100644 (file)
index 0000000..fe786ec
--- /dev/null
@@ -0,0 +1,60 @@
+From b51111271b0352aa596c5ae8faf06939e91b3b68 Mon Sep 17 00:00:00 2001
+From: Goldwyn Rodrigues <rgoldwyn@suse.de>
+Date: Tue, 16 Aug 2022 16:42:56 -0500
+Subject: btrfs: check if root is readonly while setting security xattr
+
+From: Goldwyn Rodrigues <rgoldwyn@suse.de>
+
+commit b51111271b0352aa596c5ae8faf06939e91b3b68 upstream.
+
+For a filesystem which has btrfs read-only property set to true, all
+write operations including xattr should be denied. However, security
+xattr can still be changed even if btrfs ro property is true.
+
+This happens because xattr_permission() does not have any restrictions
+on security.*, system.*  and in some cases trusted.* from VFS and
+the decision is left to the underlying filesystem. See comments in
+xattr_permission() for more details.
+
+This patch checks if the root is read-only before performing the set
+xattr operation.
+
+Testcase:
+
+  DEV=/dev/vdb
+  MNT=/mnt
+
+  mkfs.btrfs -f $DEV
+  mount $DEV $MNT
+  echo "file one" > $MNT/f1
+
+  setfattr -n "security.one" -v 2 $MNT/f1
+  btrfs property set /mnt ro true
+
+  setfattr -n "security.one" -v 1 $MNT/f1
+
+  umount $MNT
+
+CC: stable@vger.kernel.org # 4.9+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/xattr.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/xattr.c
++++ b/fs/btrfs/xattr.c
+@@ -391,6 +391,9 @@ static int btrfs_xattr_handler_set(const
+                                  const char *name, const void *buffer,
+                                  size_t size, int flags)
+ {
++      if (btrfs_root_readonly(BTRFS_I(inode)->root))
++              return -EROFS;
++
+       name = xattr_full_name(handler, name);
+       return btrfs_setxattr_trans(inode, name, buffer, size, flags);
+ }
diff --git a/queue-5.15/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch b/queue-5.15/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch
new file mode 100644 (file)
index 0000000..1797498
--- /dev/null
@@ -0,0 +1,44 @@
+From 9ea0106a7a3d8116860712e3f17cd52ce99f6707 Mon Sep 17 00:00:00 2001
+From: Zixuan Fu <r33s3n6@gmail.com>
+Date: Mon, 15 Aug 2022 23:16:06 +0800
+Subject: btrfs: fix possible memory leak in btrfs_get_dev_args_from_path()
+
+From: Zixuan Fu <r33s3n6@gmail.com>
+
+commit 9ea0106a7a3d8116860712e3f17cd52ce99f6707 upstream.
+
+In btrfs_get_dev_args_from_path(), btrfs_get_bdev_and_sb() can fail if
+the path is invalid. In this case, btrfs_get_dev_args_from_path()
+returns directly without freeing args->uuid and args->fsid allocated
+before, which causes memory leak.
+
+To fix these possible leaks, when btrfs_get_bdev_and_sb() fails,
+btrfs_put_dev_args_from_path() is called to clean up the memory.
+
+Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
+Fixes: faa775c41d655 ("btrfs: add a btrfs_get_dev_args_from_path helper")
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Zixuan Fu <r33s3n6@gmail.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/volumes.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -2392,8 +2392,11 @@ int btrfs_get_dev_args_from_path(struct
+       ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0,
+                                   &bdev, &disk_super);
+-      if (ret)
++      if (ret) {
++              btrfs_put_dev_args_from_path(args);
+               return ret;
++      }
++
+       args->devid = btrfs_stack_device_id(&disk_super->dev_item);
+       memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE);
+       if (btrfs_fs_incompat(fs_info, METADATA_UUID))
diff --git a/queue-5.15/btrfs-fix-silent-failure-when-deleting-root-reference.patch b/queue-5.15/btrfs-fix-silent-failure-when-deleting-root-reference.patch
new file mode 100644 (file)
index 0000000..f519ade
--- /dev/null
@@ -0,0 +1,43 @@
+From 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 22 Aug 2022 15:47:09 +0100
+Subject: btrfs: fix silent failure when deleting root reference
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 upstream.
+
+At btrfs_del_root_ref(), if btrfs_search_slot() returns an error, we end
+up returning from the function with a value of 0 (success). This happens
+because the function returns the value stored in the variable 'err',
+which is 0, while the error value we got from btrfs_search_slot() is
+stored in the 'ret' variable.
+
+So fix it by setting 'err' with the error value.
+
+Fixes: 8289ed9f93bef2 ("btrfs: replace the BUG_ON in btrfs_del_root_ref with proper error handling")
+CC: stable@vger.kernel.org # 5.16+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/root-tree.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -351,9 +351,10 @@ int btrfs_del_root_ref(struct btrfs_tran
+       key.offset = ref_id;
+ again:
+       ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+-      if (ret < 0)
++      if (ret < 0) {
++              err = ret;
+               goto out;
+-      if (ret == 0) {
++      } else if (ret == 0) {
+               leaf = path->nodes[0];
+               ref = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_root_ref);
diff --git a/queue-5.15/btrfs-replace-drop-assert-for-suspended-replace.patch b/queue-5.15/btrfs-replace-drop-assert-for-suspended-replace.patch
new file mode 100644 (file)
index 0000000..718b72b
--- /dev/null
@@ -0,0 +1,55 @@
+From 59a3991984dbc1fc47e5651a265c5200bd85464e Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 12 Aug 2022 18:32:18 +0800
+Subject: btrfs: replace: drop assert for suspended replace
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 59a3991984dbc1fc47e5651a265c5200bd85464e upstream.
+
+If the filesystem mounts with the replace-operation in a suspended state
+and try to cancel the suspended replace-operation, we hit the assert. The
+assert came from the commit fe97e2e173af ("btrfs: dev-replace: replace's
+scrub must not be running in suspended state") that was actually not
+required. So just remove it.
+
+ $ mount /dev/sda5 /btrfs
+
+    BTRFS info (device sda5): cannot continue dev_replace, tgtdev is missing
+    BTRFS info (device sda5): you may cancel the operation after 'mount -o degraded'
+
+ $ mount -o degraded /dev/sda5 /btrfs <-- success.
+
+ $ btrfs replace cancel /btrfs
+
+    kernel: assertion failed: ret != -ENOTCONN, in fs/btrfs/dev-replace.c:1131
+    kernel: ------------[ cut here ]------------
+    kernel: kernel BUG at fs/btrfs/ctree.h:3750!
+
+After the patch:
+
+ $ btrfs replace cancel /btrfs
+
+    BTRFS info (device sda5): suspended dev_replace from /dev/sda5 (devid 1) to <missing disk> canceled
+
+Fixes: fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state")
+CC: stable@vger.kernel.org # 5.0+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/dev-replace.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -1151,8 +1151,7 @@ int btrfs_dev_replace_cancel(struct btrf
+               up_write(&dev_replace->rwsem);
+               /* Scrub for replace must not be running in suspended state */
+-              ret = btrfs_scrub_cancel(fs_info);
+-              ASSERT(ret != -ENOTCONN);
++              btrfs_scrub_cancel(fs_info);
+               trans = btrfs_start_transaction(root, 0);
+               if (IS_ERR(trans)) {
diff --git a/queue-5.15/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch b/queue-5.15/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch
new file mode 100644 (file)
index 0000000..c1afc43
--- /dev/null
@@ -0,0 +1,88 @@
+From a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 Mon Sep 17 00:00:00 2001
+From: Shigeru Yoshida <syoshida@redhat.com>
+Date: Fri, 19 Aug 2022 03:13:36 +0900
+Subject: fbdev: fbcon: Properly revert changes when vc_resize() failed
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+commit a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 upstream.
+
+fbcon_do_set_font() calls vc_resize() when font size is changed.
+However, if if vc_resize() failed, current implementation doesn't
+revert changes for font size, and this causes inconsistent state.
+
+syzbot reported unable to handle page fault due to this issue [1].
+syzbot's repro uses fault injection which cause failure for memory
+allocation, so vc_resize() failed.
+
+This patch fixes this issue by properly revert changes for font
+related date when vc_resize() failed.
+
+Link: https://syzkaller.appspot.com/bug?id=3443d3a1fa6d964dd7310a0cb1696d165a3e07c4 [1]
+Reported-by: syzbot+a168dbeaaa7778273c1b@syzkaller.appspotmail.com
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+CC: stable@vger.kernel.org # 5.15+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/video/fbdev/core/fbcon.c |   27 +++++++++++++++++++++++++--
+ 1 file changed, 25 insertions(+), 2 deletions(-)
+
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -2413,15 +2413,21 @@ static int fbcon_do_set_font(struct vc_d
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+-      int resize;
++      int resize, ret, old_userfont, old_width, old_height, old_charcount;
+       char *old_data = NULL;
+       resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
+       if (p->userfont)
+               old_data = vc->vc_font.data;
+       vc->vc_font.data = (void *)(p->fontdata = data);
++      old_userfont = p->userfont;
+       if ((p->userfont = userfont))
+               REFCOUNT(data)++;
++
++      old_width = vc->vc_font.width;
++      old_height = vc->vc_font.height;
++      old_charcount = vc->vc_font.charcount;
++
+       vc->vc_font.width = w;
+       vc->vc_font.height = h;
+       vc->vc_font.charcount = charcount;
+@@ -2437,7 +2443,9 @@ static int fbcon_do_set_font(struct vc_d
+               rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
+               cols /= w;
+               rows /= h;
+-              vc_resize(vc, cols, rows);
++              ret = vc_resize(vc, cols, rows);
++              if (ret)
++                      goto err_out;
+       } else if (con_is_visible(vc)
+                  && vc->vc_mode == KD_TEXT) {
+               fbcon_clear_margins(vc, 0);
+@@ -2447,6 +2455,21 @@ static int fbcon_do_set_font(struct vc_d
+       if (old_data && (--REFCOUNT(old_data) == 0))
+               kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
+       return 0;
++
++err_out:
++      p->fontdata = old_data;
++      vc->vc_font.data = (void *)old_data;
++
++      if (userfont) {
++              p->userfont = old_userfont;
++              REFCOUNT(data)--;
++      }
++
++      vc->vc_font.width = old_width;
++      vc->vc_font.height = old_height;
++      vc->vc_font.charcount = old_charcount;
++
++      return ret;
+ }
+ /*
diff --git a/queue-5.15/loop-check-for-overflow-while-configuring-loop.patch b/queue-5.15/loop-check-for-overflow-while-configuring-loop.patch
new file mode 100644 (file)
index 0000000..9771afb
--- /dev/null
@@ -0,0 +1,59 @@
+From c490a0b5a4f36da3918181a8acdc6991d967c5f3 Mon Sep 17 00:00:00 2001
+From: Siddh Raman Pant <code@siddh.me>
+Date: Tue, 23 Aug 2022 21:38:10 +0530
+Subject: loop: Check for overflow while configuring loop
+
+From: Siddh Raman Pant <code@siddh.me>
+
+commit c490a0b5a4f36da3918181a8acdc6991d967c5f3 upstream.
+
+The userspace can configure a loop using an ioctl call, wherein
+a configuration of type loop_config is passed (see lo_ioctl()'s
+case on line 1550 of drivers/block/loop.c). This proceeds to call
+loop_configure() which in turn calls loop_set_status_from_info()
+(see line 1050 of loop.c), passing &config->info which is of type
+loop_info64*. This function then sets the appropriate values, like
+the offset.
+
+loop_device has lo_offset of type loff_t (see line 52 of loop.c),
+which is typdef-chained to long long, whereas loop_info64 has
+lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h).
+
+The function directly copies offset from info to the device as
+follows (See line 980 of loop.c):
+       lo->lo_offset = info->lo_offset;
+
+This results in an overflow, which triggers a warning in iomap_iter()
+due to a call to iomap_iter_done() which has:
+       WARN_ON_ONCE(iter->iomap.offset > iter->pos);
+
+Thus, check for negative value during loop_set_status_from_info().
+
+Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a29e
+
+Reported-and-tested-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Siddh Raman Pant <code@siddh.me>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/loop.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -1154,6 +1154,11 @@ loop_set_status_from_info(struct loop_de
+       lo->lo_offset = info->lo_offset;
+       lo->lo_sizelimit = info->lo_sizelimit;
++
++      /* loff_t vars have been assigned __u64 */
++      if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
++              return -EOVERFLOW;
++
+       memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
+       memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
+       lo->lo_file_name[LO_NAME_SIZE-1] = 0;
diff --git a/queue-5.15/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch b/queue-5.15/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch
new file mode 100644 (file)
index 0000000..5c8c4d0
--- /dev/null
@@ -0,0 +1,53 @@
+From d26f60703606ab425eee9882b32a1781a8bed74d Mon Sep 17 00:00:00 2001
+From: Badari Pulavarty <badari.pulavarty@intel.com>
+Date: Sun, 21 Aug 2022 18:08:53 +0000
+Subject: mm/damon/dbgfs: avoid duplicate context directory creation
+
+From: Badari Pulavarty <badari.pulavarty@intel.com>
+
+commit d26f60703606ab425eee9882b32a1781a8bed74d upstream.
+
+When user tries to create a DAMON context via the DAMON debugfs interface
+with a name of an already existing context, the context directory creation
+fails but a new context is created and added in the internal data
+structure, due to absence of the directory creation success check.  As a
+result, memory could leak and DAMON cannot be turned on.  An example test
+case is as below:
+
+    # cd /sys/kernel/debug/damon/
+    # echo "off" >  monitor_on
+    # echo paddr > target_ids
+    # echo "abc" > mk_context
+    # echo "abc" > mk_context
+    # echo $$ > abc/target_ids
+    # echo "on" > monitor_on  <<< fails
+
+Return value of 'debugfs_create_dir()' is expected to be ignored in
+general, but this is an exceptional case as DAMON feature is depending
+on the debugfs functionality and it has the potential duplicate name
+issue.  This commit therefore fixes the issue by checking the directory
+creation failure and immediately return the error in the case.
+
+Link: https://lkml.kernel.org/r/20220821180853.2400-1-sj@kernel.org
+Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts")
+Signed-off-by: Badari Pulavarty <badari.pulavarty@intel.com>
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>   [ 5.15.x]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/dbgfs.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/damon/dbgfs.c
++++ b/mm/damon/dbgfs.c
+@@ -376,6 +376,9 @@ static int dbgfs_mk_context(char *name)
+               return -ENOENT;
+       new_dir = debugfs_create_dir(name, root);
++      /* Below check is required for a potential duplicated name case */
++      if (IS_ERR(new_dir))
++              return PTR_ERR(new_dir);
+       dbgfs_dirs[dbgfs_nr_ctxs] = new_dir;
+       new_ctx = dbgfs_new_ctx();
diff --git a/queue-5.15/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch b/queue-5.15/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch
new file mode 100644 (file)
index 0000000..74a5a75
--- /dev/null
@@ -0,0 +1,45 @@
+From 6b04ce966a738ecdd9294c9593e48513c0dc90aa Mon Sep 17 00:00:00 2001
+From: Karol Herbst <kherbst@redhat.com>
+Date: Fri, 19 Aug 2022 22:09:28 +0200
+Subject: nouveau: explicitly wait on the fence in nouveau_bo_move_m2mf
+
+From: Karol Herbst <kherbst@redhat.com>
+
+commit 6b04ce966a738ecdd9294c9593e48513c0dc90aa upstream.
+
+It is a bit unlcear to us why that's helping, but it does and unbreaks
+suspend/resume on a lot of GPUs without any known drawbacks.
+
+Cc: stable@vger.kernel.org # v5.15+
+Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/156
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220819200928.401416-1-kherbst@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
+index 05076e530e7d..e29175e4b44c 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
+@@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
+               if (ret == 0) {
+                       ret = nouveau_fence_new(chan, false, &fence);
+                       if (ret == 0) {
++                              /* TODO: figure out a better solution here
++                               *
++                               * wait on the fence here explicitly as going through
++                               * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
++                               *
++                               * Without this the operation can timeout and we'll fallback to a
++                               * software copy, which might take several minutes to finish.
++                               */
++                              nouveau_fence_wait(fence, false, false);
+                               ret = ttm_bo_move_accel_cleanup(bo,
+                                                               &fence->base,
+                                                               evict, false,
+-- 
+2.37.2
+
diff --git a/queue-5.15/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch b/queue-5.15/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
new file mode 100644 (file)
index 0000000..9eccd21
--- /dev/null
@@ -0,0 +1,60 @@
+From 32ba156df1b1c8804a4e5be5339616945eafea22 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Tue, 16 Aug 2022 05:56:11 -0700
+Subject: perf/x86/lbr: Enable the branch type for the Arch LBR by default
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 32ba156df1b1c8804a4e5be5339616945eafea22 upstream.
+
+On the platform with Arch LBR, the HW raw branch type encoding may leak
+to the perf tool when the SAVE_TYPE option is not set.
+
+In the intel_pmu_store_lbr(), the HW raw branch type is stored in
+lbr_entries[].type. If the SAVE_TYPE option is set, the
+lbr_entries[].type will be converted into the generic PERF_BR_* type
+in the intel_pmu_lbr_filter() and exposed to the user tools.
+But if the SAVE_TYPE option is NOT set by the user, the current perf
+kernel doesn't clear the field. The HW raw branch type leaks.
+
+There are two solutions to fix the issue for the Arch LBR.
+One is to clear the field if the SAVE_TYPE option is NOT set.
+The other solution is to unconditionally convert the branch type and
+expose the generic type to the user tools.
+
+The latter is implemented here, because
+- The branch type is valuable information. I don't see a case where
+  you would not benefit from the branch type. (Stephane Eranian)
+- Not having the branch type DOES NOT save any space in the
+  branch record (Stephane Eranian)
+- The Arch LBR HW can retrieve the common branch types from the
+  LBR_INFO. It doesn't require the high overhead SW disassemble.
+
+Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR")
+Reported-by: Stephane Eranian <eranian@google.com>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20220816125612.2042397-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/lbr.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/arch/x86/events/intel/lbr.c
++++ b/arch/x86/events/intel/lbr.c
+@@ -1114,6 +1114,14 @@ static int intel_pmu_setup_hw_lbr_filter
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
+               reg->config = mask;
++
++              /*
++               * The Arch LBR HW can retrieve the common branch types
++               * from the LBR_INFO. It doesn't require the high overhead
++               * SW disassemble.
++               * Enable the branch type by default for the Arch LBR.
++               */
++              reg->reg |= X86_BR_TYPE_SAVE;
+               return 0;
+       }
diff --git a/queue-5.15/revert-memcg-cleanup-racy-sum-avoidance-code.patch b/queue-5.15/revert-memcg-cleanup-racy-sum-avoidance-code.patch
new file mode 100644 (file)
index 0000000..7d6297f
--- /dev/null
@@ -0,0 +1,95 @@
+From dbb16df6443c59e8a1ef21c2272fcf387d600ddf Mon Sep 17 00:00:00 2001
+From: Shakeel Butt <shakeelb@google.com>
+Date: Wed, 17 Aug 2022 17:21:39 +0000
+Subject: Revert "memcg: cleanup racy sum avoidance code"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shakeel Butt <shakeelb@google.com>
+
+commit dbb16df6443c59e8a1ef21c2272fcf387d600ddf upstream.
+
+This reverts commit 96e51ccf1af33e82f429a0d6baebba29c6448d0f.
+
+Recently we started running the kernel with rstat infrastructure on
+production traffic and begin to see negative memcg stats values.
+Particularly the 'sock' stat is the one which we observed having negative
+value.
+
+$ grep "sock " /mnt/memory/job/memory.stat
+sock 253952
+total_sock 18446744073708724224
+
+Re-run after couple of seconds
+
+$ grep "sock " /mnt/memory/job/memory.stat
+sock 253952
+total_sock 53248
+
+For now we are only seeing this issue on large machines (256 CPUs) and
+only with 'sock' stat.  I think the networking stack increase the stat on
+one cpu and decrease it on another cpu much more often.  So, this negative
+sock is due to rstat flusher flushing the stats on the CPU that has seen
+the decrement of sock but missed the CPU that has increments.  A typical
+race condition.
+
+For easy stable backport, revert is the most simple solution.  For long
+term solution, I am thinking of two directions.  First is just reduce the
+race window by optimizing the rstat flusher.  Second is if the reader sees
+a negative stat value, force flush and restart the stat collection.
+Basically retry but limited.
+
+Link: https://lkml.kernel.org/r/20220817172139.3141101-1-shakeelb@google.com
+Fixes: 96e51ccf1af33e8 ("memcg: cleanup racy sum avoidance code")
+Signed-off-by: Shakeel Butt <shakeelb@google.com>
+Cc: "Michal Koutný" <mkoutny@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yosry Ahmed <yosryahmed@google.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: <stable@vger.kernel.org>   [5.15]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memcontrol.h |   15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -966,19 +966,30 @@ static inline void mod_memcg_state(struc
+ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+ {
+-      return READ_ONCE(memcg->vmstats.state[idx]);
++      long x = READ_ONCE(memcg->vmstats.state[idx]);
++#ifdef CONFIG_SMP
++      if (x < 0)
++              x = 0;
++#endif
++      return x;
+ }
+ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
+                                             enum node_stat_item idx)
+ {
+       struct mem_cgroup_per_node *pn;
++      long x;
+       if (mem_cgroup_disabled())
+               return node_page_state(lruvec_pgdat(lruvec), idx);
+       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+-      return READ_ONCE(pn->lruvec_stats.state[idx]);
++      x = READ_ONCE(pn->lruvec_stats.state[idx]);
++#ifdef CONFIG_SMP
++      if (x < 0)
++              x = 0;
++#endif
++      return x;
+ }
+ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
diff --git a/queue-5.15/riscv-traps-add-missing-prototype.patch b/queue-5.15/riscv-traps-add-missing-prototype.patch
new file mode 100644 (file)
index 0000000..ca497e5
--- /dev/null
@@ -0,0 +1,51 @@
+From d951b20b9def73dcc39a5379831525d0d2a537e9 Mon Sep 17 00:00:00 2001
+From: Conor Dooley <conor.dooley@microchip.com>
+Date: Sun, 14 Aug 2022 15:12:38 +0100
+Subject: riscv: traps: add missing prototype
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+commit d951b20b9def73dcc39a5379831525d0d2a537e9 upstream.
+
+Sparse complains:
+arch/riscv/kernel/traps.c:213:6: warning: symbol 'shadow_stack' was not declared. Should it be static?
+
+The variable is used in entry.S, so declare shadow_stack there
+alongside SHADOW_OVERFLOW_STACK_SIZE.
+
+Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220814141237.493457-5-mail@conchuod.ie
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/thread_info.h |    2 ++
+ arch/riscv/kernel/traps.c            |    3 ++-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/riscv/include/asm/thread_info.h
++++ b/arch/riscv/include/asm/thread_info.h
+@@ -42,6 +42,8 @@
+ #ifndef __ASSEMBLY__
++extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
++
+ #include <asm/processor.h>
+ #include <asm/csr.h>
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -20,9 +20,10 @@
+ #include <asm/asm-prototypes.h>
+ #include <asm/bug.h>
++#include <asm/csr.h>
+ #include <asm/processor.h>
+ #include <asm/ptrace.h>
+-#include <asm/csr.h>
++#include <asm/thread_info.h>
+ int show_unhandled_signals = 1;
diff --git a/queue-5.15/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch b/queue-5.15/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
new file mode 100644 (file)
index 0000000..1b84d15
--- /dev/null
@@ -0,0 +1,81 @@
+From 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Tue, 16 Aug 2022 11:54:07 -0400
+Subject: s390: fix double free of GS and RI CBs on fork() failure
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 upstream.
+
+The pointers for guarded storage and runtime instrumentation control
+blocks are stored in the thread_struct of the associated task. These
+pointers are initially copied on fork() via arch_dup_task_struct()
+and then cleared via copy_thread() before fork() returns. If fork()
+happens to fail after the initial task dup and before copy_thread(),
+the newly allocated task and associated thread_struct memory are
+freed via free_task() -> arch_release_task_struct(). This results in
+a double free of the guarded storage and runtime info structs
+because the fields in the failed task still refer to memory
+associated with the source task.
+
+This problem can manifest as a BUG_ON() in set_freepointer() (with
+CONFIG_SLAB_FREELIST_HARDENED enabled) or KASAN splat (if enabled)
+when running trinity syscall fuzz tests on s390x. To avoid this
+problem, clear the associated pointer fields in
+arch_dup_task_struct() immediately after the new task is copied.
+Note that the RI flag is still cleared in copy_thread() because it
+resides in thread stack memory and that is where stack info is
+copied.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Fixes: 8d9047f8b967c ("s390/runtime instrumentation: simplify task exit handling")
+Fixes: 7b83c6297d2fc ("s390/guarded storage: simplify task exit handling")
+Cc: <stable@vger.kernel.org> # 4.15
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220816155407.537372-1-bfoster@redhat.com
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/process.c |   22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/arch/s390/kernel/process.c
++++ b/arch/s390/kernel/process.c
+@@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_str
+       memcpy(dst, src, arch_task_struct_size);
+       dst->thread.fpu.regs = dst->thread.fpu.fprs;
++
++      /*
++       * Don't transfer over the runtime instrumentation or the guarded
++       * storage control block pointers. These fields are cleared here instead
++       * of in copy_thread() to avoid premature freeing of associated memory
++       * on fork() failure. Wait to clear the RI flag because ->stack still
++       * refers to the source thread.
++       */
++      dst->thread.ri_cb = NULL;
++      dst->thread.gs_cb = NULL;
++      dst->thread.gs_bc_cb = NULL;
++
+       return 0;
+ }
+@@ -149,13 +161,11 @@ int copy_thread(unsigned long clone_flag
+       frame->childregs.flags = 0;
+       if (new_stackp)
+               frame->childregs.gprs[15] = new_stackp;
+-
+-      /* Don't copy runtime instrumentation info */
+-      p->thread.ri_cb = NULL;
++      /*
++       * Clear the runtime instrumentation flag after the above childregs
++       * copy. The CB pointer was already cleared in arch_dup_task_struct().
++       */
+       frame->childregs.psw.mask &= ~PSW_MASK_RI;
+-      /* Don't copy guarded storage control block */
+-      p->thread.gs_cb = NULL;
+-      p->thread.gs_bc_cb = NULL;
+       /* Set a new TLS ?  */
+       if (clone_flags & CLONE_SETTLS) {
diff --git a/queue-5.15/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch b/queue-5.15/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch
new file mode 100644 (file)
index 0000000..3287d91
--- /dev/null
@@ -0,0 +1,49 @@
+From 41ac42f137080bc230b5882e3c88c392ab7f2d32 Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Date: Wed, 17 Aug 2022 15:26:03 +0200
+Subject: s390/mm: do not trigger write fault when vma does not allow VM_WRITE
+
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+
+commit 41ac42f137080bc230b5882e3c88c392ab7f2d32 upstream.
+
+For non-protection pXd_none() page faults in do_dat_exception(), we
+call do_exception() with access == (VM_READ | VM_WRITE | VM_EXEC).
+In do_exception(), vma->vm_flags is checked against that before
+calling handle_mm_fault().
+
+Since commit 92f842eac7ee3 ("[S390] store indication fault optimization"),
+we call handle_mm_fault() with FAULT_FLAG_WRITE, when recognizing that
+it was a write access. However, the vma flags check is still only
+checking against (VM_READ | VM_WRITE | VM_EXEC), and therefore also
+calling handle_mm_fault() with FAULT_FLAG_WRITE in cases where the vma
+does not allow VM_WRITE.
+
+Fix this by changing access check in do_exception() to VM_WRITE only,
+when recognizing write access.
+
+Link: https://lkml.kernel.org/r/20220811103435.188481-3-david@redhat.com
+Fixes: 92f842eac7ee3 ("[S390] store indication fault optimization")
+Cc: <stable@vger.kernel.org>
+Reported-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/mm/fault.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/mm/fault.c
++++ b/arch/s390/mm/fault.c
+@@ -397,7 +397,9 @@ static inline vm_fault_t do_exception(st
+       flags = FAULT_FLAG_DEFAULT;
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+-      if (access == VM_WRITE || is_write)
++      if (is_write)
++              access = VM_WRITE;
++      if (access == VM_WRITE)
+               flags |= FAULT_FLAG_WRITE;
+       mmap_read_lock(mm);
index 48cbf58a5e58a8097dc58678f97f5e4ad4a7aa9c..28188538c0248c40a927c778cdfe346510d564ac 100644 (file)
@@ -92,3 +92,26 @@ ionic-clear-broken-state-on-generation-change.patch
 ionic-fix-up-issues-with-handling-eagain-on-fw-cmds.patch
 ionic-vf-initial-random-mac-address-if-no-assigned-m.patch
 net-stmmac-work-around-sporadic-tx-issue-on-link-up.patch
+btrfs-fix-silent-failure-when-deleting-root-reference.patch
+btrfs-replace-drop-assert-for-suspended-replace.patch
+btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
+btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
+btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch
+perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
+x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
+x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
+x86-nospec-unwreck-the-rsb-stuffing.patch
+loop-check-for-overflow-while-configuring-loop.patch
+writeback-avoid-use-after-free-after-removing-device.patch
+asm-generic-sections-refactor-memory_intersects.patch
+mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch
+s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch
+bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch
+s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
+fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch
+revert-memcg-cleanup-racy-sum-avoidance-code.patch
+acpi-processor-remove-freq-qos-request-for-all-cpus.patch
+nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch
+smb3-missing-inode-locks-in-punch-hole.patch
+xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch
+riscv-traps-add-missing-prototype.patch
diff --git a/queue-5.15/smb3-missing-inode-locks-in-punch-hole.patch b/queue-5.15/smb3-missing-inode-locks-in-punch-hole.patch
new file mode 100644 (file)
index 0000000..19bec86
--- /dev/null
@@ -0,0 +1,60 @@
+From ba0803050d610d5072666be727bca5e03e55b242 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Tue, 23 Aug 2022 02:10:56 -0500
+Subject: smb3: missing inode locks in punch hole
+
+From: David Howells <dhowells@redhat.com>
+
+commit ba0803050d610d5072666be727bca5e03e55b242 upstream.
+
+smb3 fallocate punch hole was not grabbing the inode or filemap_invalidate
+locks so could have race with pagemap reinstantiating the page.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/smb2ops.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -3599,7 +3599,7 @@ static long smb3_zero_range(struct file
+ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+                           loff_t offset, loff_t len)
+ {
+-      struct inode *inode;
++      struct inode *inode = file_inode(file);
+       struct cifsFileInfo *cfile = file->private_data;
+       struct file_zero_data_information fsctl_buf;
+       long rc;
+@@ -3608,14 +3608,12 @@ static long smb3_punch_hole(struct file
+       xid = get_xid();
+-      inode = d_inode(cfile->dentry);
+-
++      inode_lock(inode);
+       /* Need to make file sparse, if not already, before freeing range. */
+       /* Consider adding equivalent for compressed since it could also work */
+       if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
+               rc = -EOPNOTSUPP;
+-              free_xid(xid);
+-              return rc;
++              goto out;
+       }
+       filemap_invalidate_lock(inode->i_mapping);
+@@ -3635,8 +3633,10 @@ static long smb3_punch_hole(struct file
+                       true /* is_fctl */, (char *)&fsctl_buf,
+                       sizeof(struct file_zero_data_information),
+                       CIFSMaxBufSize, NULL, NULL);
+-      free_xid(xid);
+       filemap_invalidate_unlock(inode->i_mapping);
++out:
++      inode_unlock(inode);
++      free_xid(xid);
+       return rc;
+ }
diff --git a/queue-5.15/writeback-avoid-use-after-free-after-removing-device.patch b/queue-5.15/writeback-avoid-use-after-free-after-removing-device.patch
new file mode 100644 (file)
index 0000000..537d8ae
--- /dev/null
@@ -0,0 +1,139 @@
+From f87904c075515f3e1d8f4a7115869d3b914674fd Mon Sep 17 00:00:00 2001
+From: Khazhismel Kumykov <khazhy@chromium.org>
+Date: Mon, 1 Aug 2022 08:50:34 -0700
+Subject: writeback: avoid use-after-free after removing device
+
+From: Khazhismel Kumykov <khazhy@chromium.org>
+
+commit f87904c075515f3e1d8f4a7115869d3b914674fd upstream.
+
+When a disk is removed, bdi_unregister gets called to stop further
+writeback and wait for associated delayed work to complete.  However,
+wb_inode_writeback_end() may schedule bandwidth estimation dwork after
+this has completed, which can result in the timer attempting to access the
+just freed bdi_writeback.
+
+Fix this by checking if the bdi_writeback is alive, similar to when
+scheduling writeback work.
+
+Since this requires wb->work_lock, and wb_inode_writeback_end() may get
+called from interrupt, switch wb->work_lock to an irqsafe lock.
+
+Link: https://lkml.kernel.org/r/20220801155034.3772543-1-khazhy@google.com
+Fixes: 45a2966fd641 ("writeback: fix bandwidth estimate for spiky workload")
+Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Michael Stapelberg <stapelberg+linux@google.com>
+Cc: Wu Fengguang <fengguang.wu@intel.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fs-writeback.c   |   12 ++++++------
+ mm/backing-dev.c    |   10 +++++-----
+ mm/page-writeback.c |    6 +++++-
+ 3 files changed, 16 insertions(+), 12 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(st
+ static void wb_wakeup(struct bdi_writeback *wb)
+ {
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (test_bit(WB_registered, &wb->state))
+               mod_delayed_work(bdi_wq, &wb->dwork, 0);
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+ }
+ static void finish_writeback_work(struct bdi_writeback *wb,
+@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_wri
+       if (work->done)
+               atomic_inc(&work->done->cnt);
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (test_bit(WB_registered, &wb->state)) {
+               list_add_tail(&work->list, &wb->work_list);
+@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_wri
+       } else
+               finish_writeback_work(wb, work);
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+ }
+ /**
+@@ -2109,13 +2109,13 @@ static struct wb_writeback_work *get_nex
+ {
+       struct wb_writeback_work *work = NULL;
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (!list_empty(&wb->work_list)) {
+               work = list_entry(wb->work_list.next,
+                                 struct wb_writeback_work, list);
+               list_del_init(&work->list);
+       }
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+       return work;
+ }
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -258,10 +258,10 @@ void wb_wakeup_delayed(struct bdi_writeb
+       unsigned long timeout;
+       timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (test_bit(WB_registered, &wb->state))
+               queue_delayed_work(bdi_wq, &wb->dwork, timeout);
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+ }
+ static void wb_update_bandwidth_workfn(struct work_struct *work)
+@@ -337,12 +337,12 @@ static void cgwb_remove_from_bdi_list(st
+ static void wb_shutdown(struct bdi_writeback *wb)
+ {
+       /* Make sure nobody queues further work */
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (!test_and_clear_bit(WB_registered, &wb->state)) {
+-              spin_unlock_bh(&wb->work_lock);
++              spin_unlock_irq(&wb->work_lock);
+               return;
+       }
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+       cgwb_remove_from_bdi_list(wb);
+       /*
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -2755,6 +2755,7 @@ static void wb_inode_writeback_start(str
+ static void wb_inode_writeback_end(struct bdi_writeback *wb)
+ {
++      unsigned long flags;
+       atomic_dec(&wb->writeback_inodes);
+       /*
+        * Make sure estimate of writeback throughput gets updated after
+@@ -2763,7 +2764,10 @@ static void wb_inode_writeback_end(struc
+        * that if multiple inodes end writeback at a similar time, they get
+        * batched into one bandwidth update.
+        */
+-      queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
++      spin_lock_irqsave(&wb->work_lock, flags);
++      if (test_bit(WB_registered, &wb->state))
++              queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
++      spin_unlock_irqrestore(&wb->work_lock, flags);
+ }
+ int test_clear_page_writeback(struct page *page)
diff --git a/queue-5.15/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch b/queue-5.15/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
new file mode 100644 (file)
index 0000000..1ce6855
--- /dev/null
@@ -0,0 +1,209 @@
+From 7df548840c496b0141fb2404b889c346380c2b22 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 3 Aug 2022 14:41:32 -0700
+Subject: x86/bugs: Add "unknown" reporting for MMIO Stale Data
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 7df548840c496b0141fb2404b889c346380c2b22 upstream.
+
+Older Intel CPUs that are not in the affected processor list for MMIO
+Stale Data vulnerabilities currently report "Not affected" in sysfs,
+which may not be correct. Vulnerability status for these older CPUs is
+unknown.
+
+Add known-not-affected CPUs to the whitelist. Report "unknown"
+mitigation status for CPUs that are not in blacklist, whitelist and also
+don't enumerate MSR ARCH_CAPABILITIES bits that reflect hardware
+immunity to MMIO Stale Data vulnerabilities.
+
+Mitigation is not deployed when the status is unknown.
+
+  [ bp: Massage, fixup. ]
+
+Fixes: 8d50cdf8b834 ("x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data")
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Suggested-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/a932c154772f2121794a5f2eded1a11013114711.1657846269.git.pawan.kumar.gupta@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst |   14 +++
+ arch/x86/include/asm/cpufeatures.h                              |    5 -
+ arch/x86/kernel/cpu/bugs.c                                      |   14 ++-
+ arch/x86/kernel/cpu/common.c                                    |   42 ++++++----
+ 4 files changed, 56 insertions(+), 19 deletions(-)
+
+--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
++++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+@@ -230,6 +230,20 @@ The possible values in this file are:
+      * - 'Mitigation: Clear CPU buffers'
+        - The processor is vulnerable and the CPU buffer clearing mitigation is
+          enabled.
++     * - 'Unknown: No mitigations'
++       - The processor vulnerability status is unknown because it is
++       out of Servicing period. Mitigation is not attempted.
++
++Definitions:
++------------
++
++Servicing period: The process of providing functional and security updates to
++Intel processors or platforms, utilizing the Intel Platform Update (IPU)
++process or other similar mechanisms.
++
++End of Servicing Updates (ESU): ESU is the date at which Intel will no
++longer provide Servicing, such as through IPU or other similar update
++processes. ESU dates will typically be aligned to end of quarter.
+ If the processor is vulnerable then the following information is appended to
+ the above information:
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -446,7 +446,8 @@
+ #define X86_BUG_ITLB_MULTIHIT         X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS                 X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+ #define X86_BUG_MMIO_STALE_DATA               X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+-#define X86_BUG_RETBLEED              X86_BUG(26) /* CPU is affected by RETBleed */
+-#define X86_BUG_EIBRS_PBRSB           X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
++#define X86_BUG_MMIO_UNKNOWN          X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
++#define X86_BUG_RETBLEED              X86_BUG(27) /* CPU is affected by RETBleed */
++#define X86_BUG_EIBRS_PBRSB           X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -433,7 +433,8 @@ static void __init mmio_select_mitigatio
+       u64 ia32_cap;
+       if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
+-          cpu_mitigations_off()) {
++           boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
++           cpu_mitigations_off()) {
+               mmio_mitigation = MMIO_MITIGATION_OFF;
+               return;
+       }
+@@ -538,6 +539,8 @@ out:
+               pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
+       if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+               pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
++      else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++              pr_info("MMIO Stale Data: Unknown: No mitigations\n");
+ }
+ static void __init md_clear_select_mitigation(void)
+@@ -2268,6 +2271,9 @@ static ssize_t tsx_async_abort_show_stat
+ static ssize_t mmio_stale_data_show_state(char *buf)
+ {
++      if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++              return sysfs_emit(buf, "Unknown: No mitigations\n");
++
+       if (mmio_mitigation == MMIO_MITIGATION_OFF)
+               return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
+@@ -2414,6 +2420,7 @@ static ssize_t cpu_show_common(struct de
+               return srbds_show_state(buf);
+       case X86_BUG_MMIO_STALE_DATA:
++      case X86_BUG_MMIO_UNKNOWN:
+               return mmio_stale_data_show_state(buf);
+       case X86_BUG_RETBLEED:
+@@ -2473,7 +2480,10 @@ ssize_t cpu_show_srbds(struct device *de
+ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+-      return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
++      if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++              return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
++      else
++              return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ }
+ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1027,7 +1027,8 @@ static void identify_cpu_without_cpuid(s
+ #define NO_SWAPGS             BIT(6)
+ #define NO_ITLB_MULTIHIT      BIT(7)
+ #define NO_SPECTRE_V2         BIT(8)
+-#define NO_EIBRS_PBRSB                BIT(9)
++#define NO_MMIO                       BIT(9)
++#define NO_EIBRS_PBRSB                BIT(10)
+ #define VULNWL(vendor, family, model, whitelist)      \
+       X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
+@@ -1048,6 +1049,11 @@ static const __initconst struct x86_cpu_
+       VULNWL(NSC,     5, X86_MODEL_ANY,       NO_SPECULATION),
+       /* Intel Family 6 */
++      VULNWL_INTEL(TIGERLAKE,                 NO_MMIO),
++      VULNWL_INTEL(TIGERLAKE_L,               NO_MMIO),
++      VULNWL_INTEL(ALDERLAKE,                 NO_MMIO),
++      VULNWL_INTEL(ALDERLAKE_L,               NO_MMIO),
++
+       VULNWL_INTEL(ATOM_SALTWELL,             NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SALTWELL_TABLET,      NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SALTWELL_MID,         NO_SPECULATION | NO_ITLB_MULTIHIT),
+@@ -1066,9 +1072,9 @@ static const __initconst struct x86_cpu_
+       VULNWL_INTEL(ATOM_AIRMONT_MID,          NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_AIRMONT_NP,           NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT_D,           NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
++      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_INTEL(ATOM_GOLDMONT_D,           NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+       /*
+        * Technically, swapgs isn't serializing on AMD (despite it previously
+@@ -1083,18 +1089,18 @@ static const __initconst struct x86_cpu_
+       VULNWL_INTEL(ATOM_TREMONT_D,            NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+       /* AMD Family 0xf - 0x12 */
+-      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+       /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+-      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+       /* Zhaoxin Family 7 */
+-      VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS),
+-      VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS),
++      VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
++      VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+       {}
+ };
+@@ -1248,10 +1254,16 @@ static void __init cpu_set_bug_bits(stru
+        * Affected CPU list is generally enough to enumerate the vulnerability,
+        * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
+        * not want the guest to enumerate the bug.
++       *
++       * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
++       * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
+        */
+-      if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
+-          !arch_cap_mmio_immune(ia32_cap))
+-              setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++      if (!arch_cap_mmio_immune(ia32_cap)) {
++              if (cpu_matches(cpu_vuln_blacklist, MMIO))
++                      setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++              else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
++                      setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
++      }
+       if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
+               if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
diff --git a/queue-5.15/x86-nospec-unwreck-the-rsb-stuffing.patch b/queue-5.15/x86-nospec-unwreck-the-rsb-stuffing.patch
new file mode 100644 (file)
index 0000000..8ae54df
--- /dev/null
@@ -0,0 +1,128 @@
+From 4e3aa9238277597c6c7624f302d81a7b568b6f2d Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 16 Aug 2022 14:28:36 +0200
+Subject: x86/nospec: Unwreck the RSB stuffing
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 4e3aa9238277597c6c7624f302d81a7b568b6f2d upstream.
+
+Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections")
+made a right mess of the RSB stuffing, rewrite the whole thing to not
+suck.
+
+Thanks to Andrew for the enlightening comment about Post-Barrier RSB
+things so we can make this code less magical.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/YvuNdDWoUZSBjYcm@worktop.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |   80 +++++++++++++++++------------------
+ 1 file changed, 39 insertions(+), 41 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -35,33 +35,44 @@
+ #define RSB_CLEAR_LOOPS               32      /* To forcibly overwrite all entries */
+ /*
++ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
++ */
++#define __FILL_RETURN_SLOT                    \
++      ANNOTATE_INTRA_FUNCTION_CALL;           \
++      call    772f;                           \
++      int3;                                   \
++772:
++
++/*
++ * Stuff the entire RSB.
++ *
+  * Google experimented with loop-unrolling and this turned out to be
+  * the optimal version - two calls, each with their own speculation
+  * trap should their return address end up getting used, in a loop.
+  */
+-#define __FILL_RETURN_BUFFER(reg, nr, sp)     \
+-      mov     $(nr/2), reg;                   \
+-771:                                          \
+-      ANNOTATE_INTRA_FUNCTION_CALL;           \
+-      call    772f;                           \
+-773:  /* speculation trap */                  \
+-      UNWIND_HINT_EMPTY;                      \
+-      pause;                                  \
+-      lfence;                                 \
+-      jmp     773b;                           \
+-772:                                          \
+-      ANNOTATE_INTRA_FUNCTION_CALL;           \
+-      call    774f;                           \
+-775:  /* speculation trap */                  \
+-      UNWIND_HINT_EMPTY;                      \
+-      pause;                                  \
+-      lfence;                                 \
+-      jmp     775b;                           \
+-774:                                          \
+-      add     $(BITS_PER_LONG/8) * 2, sp;     \
+-      dec     reg;                            \
+-      jnz     771b;                           \
+-      /* barrier for jnz misprediction */     \
++#define __FILL_RETURN_BUFFER(reg, nr)                 \
++      mov     $(nr/2), reg;                           \
++771:                                                  \
++      __FILL_RETURN_SLOT                              \
++      __FILL_RETURN_SLOT                              \
++      add     $(BITS_PER_LONG/8) * 2, %_ASM_SP;       \
++      dec     reg;                                    \
++      jnz     771b;                                   \
++      /* barrier for jnz misprediction */             \
++      lfence;
++
++/*
++ * Stuff a single RSB slot.
++ *
++ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
++ * forced to retire before letting a RET instruction execute.
++ *
++ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
++ * before this point.
++ */
++#define __FILL_ONE_RETURN                             \
++      __FILL_RETURN_SLOT                              \
++      add     $(BITS_PER_LONG/8), %_ASM_SP;           \
+       lfence;
+ #ifdef __ASSEMBLY__
+@@ -120,28 +131,15 @@
+ #endif
+ .endm
+-.macro ISSUE_UNBALANCED_RET_GUARD
+-      ANNOTATE_INTRA_FUNCTION_CALL
+-      call .Lunbalanced_ret_guard_\@
+-      int3
+-.Lunbalanced_ret_guard_\@:
+-      add $(BITS_PER_LONG/8), %_ASM_SP
+-      lfence
+-.endm
+-
+  /*
+   * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+   * monstrosity above, manually.
+   */
+-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
+-.ifb \ftr2
+-      ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+-.else
+-      ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
+-.endif
+-      __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
+-.Lunbalanced_\@:
+-      ISSUE_UNBALANCED_RET_GUARD
++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
++      ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
++              __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
++              __stringify(__FILL_ONE_RETURN), \ftr2
++
+ .Lskip_rsb_\@:
+ .endm
diff --git a/queue-5.15/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch b/queue-5.15/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
new file mode 100644 (file)
index 0000000..f76b902
--- /dev/null
@@ -0,0 +1,72 @@
+From fc2e426b1161761561624ebd43ce8c8d2fa058da Mon Sep 17 00:00:00 2001
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+Date: Fri, 19 Aug 2022 16:43:34 +0800
+Subject: x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry
+
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+
+commit fc2e426b1161761561624ebd43ce8c8d2fa058da upstream.
+
+When meeting ftrace trampolines in ORC unwinding, unwinder uses address
+of ftrace_{regs_}call address to find the ORC entry, which gets next frame at
+sp+176.
+
+If there is an IRQ hitting at sub $0xa8,%rsp, the next frame should be
+sp+8 instead of 176. It makes unwinder skip correct frame and throw
+warnings such as "wrong direction" or "can't access registers", etc,
+depending on the content of the incorrect frame address.
+
+By adding the base address ftrace_{regs_}caller with the offset
+*ip - ops->trampoline*, we can get the correct address to find the ORC entry.
+
+Also change "caller" to "tramp_addr" to make variable name conform to
+its content.
+
+[ mingo: Clarified the changelog a bit. ]
+
+Fixes: 6be7fa3c74d1 ("ftrace, orc, x86: Handle ftrace dynamically allocated trampolines")
+Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220819084334.244016-1-chenzhongjin@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/unwind_orc.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/unwind_orc.c
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsign
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
+ {
+       struct ftrace_ops *ops;
+-      unsigned long caller;
++      unsigned long tramp_addr, offset;
+       ops = ftrace_ops_trampoline(ip);
+       if (!ops)
+               return NULL;
++      /* Set tramp_addr to the start of the code copied by the trampoline */
+       if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+-              caller = (unsigned long)ftrace_regs_call;
++              tramp_addr = (unsigned long)ftrace_regs_caller;
+       else
+-              caller = (unsigned long)ftrace_call;
++              tramp_addr = (unsigned long)ftrace_caller;
++
++      /* Now place tramp_addr to the location within the trampoline ip is at */
++      offset = ip - ops->trampoline;
++      tramp_addr += offset;
+       /* Prevent unlikely recursion */
+-      if (ip == caller)
++      if (ip == tramp_addr)
+               return NULL;
+-      return orc_find(caller);
++      return orc_find(tramp_addr);
+ }
+ #else
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
diff --git a/queue-5.15/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch b/queue-5.15/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch
new file mode 100644 (file)
index 0000000..c08c929
--- /dev/null
@@ -0,0 +1,95 @@
+From c5deb27895e017a0267de0a20d140ad5fcc55a54 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 25 Aug 2022 16:19:18 +0200
+Subject: xen/privcmd: fix error exit of privcmd_ioctl_dm_op()
+
+From: Juergen Gross <jgross@suse.com>
+
+commit c5deb27895e017a0267de0a20d140ad5fcc55a54 upstream.
+
+The error exit of privcmd_ioctl_dm_op() is calling unlock_pages()
+potentially with pages being NULL, leading to a NULL dereference.
+
+Additionally lock_pages() doesn't check for pin_user_pages_fast()
+having been completely successful, resulting in potentially not
+locking all pages into memory. This could result in sporadic failures
+when using the related memory in user mode.
+
+Fix all of that by calling unlock_pages() always with the real number
+of pinned pages, which will be zero in case pages being NULL, and by
+checking the number of pages pinned by pin_user_pages_fast() matching
+the expected number of pages.
+
+Cc: <stable@vger.kernel.org>
+Fixes: ab520be8cd5d ("xen/privcmd: Add IOCTL_PRIVCMD_DM_OP")
+Reported-by: Rustam Subkhankulov <subkhankulov@ispras.ru>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Link: https://lore.kernel.org/r/20220825141918.3581-1-jgross@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/privcmd.c |   21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/drivers/xen/privcmd.c
++++ b/drivers/xen/privcmd.c
+@@ -581,27 +581,30 @@ static int lock_pages(
+       struct privcmd_dm_op_buf kbufs[], unsigned int num,
+       struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
+ {
+-      unsigned int i;
++      unsigned int i, off = 0;
+-      for (i = 0; i < num; i++) {
++      for (i = 0; i < num; ) {
+               unsigned int requested;
+               int page_count;
+               requested = DIV_ROUND_UP(
+                       offset_in_page(kbufs[i].uptr) + kbufs[i].size,
+-                      PAGE_SIZE);
++                      PAGE_SIZE) - off;
+               if (requested > nr_pages)
+                       return -ENOSPC;
+               page_count = pin_user_pages_fast(
+-                      (unsigned long) kbufs[i].uptr,
++                      (unsigned long)kbufs[i].uptr + off * PAGE_SIZE,
+                       requested, FOLL_WRITE, pages);
+-              if (page_count < 0)
+-                      return page_count;
++              if (page_count <= 0)
++                      return page_count ? : -EFAULT;
+               *pinned += page_count;
+               nr_pages -= page_count;
+               pages += page_count;
++
++              off = (requested == page_count) ? 0 : off + page_count;
++              i += !off;
+       }
+       return 0;
+@@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct f
+       }
+       rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
+-      if (rc < 0) {
+-              nr_pages = pinned;
++      if (rc < 0)
+               goto out;
+-      }
+       for (i = 0; i < kdata.num; i++) {
+               set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
+@@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct f
+       xen_preemptible_hcall_end();
+ out:
+-      unlock_pages(pages, nr_pages);
++      unlock_pages(pages, pinned);
+       kfree(xbufs);
+       kfree(pages);
+       kfree(kbufs);