From: Greg Kroah-Hartman Date: Mon, 29 Aug 2022 07:48:42 +0000 (+0200) Subject: 5.15-stable patches X-Git-Tag: v5.10.140~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=400868aea901427658e4456f1f4d9bbf061acea6;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: acpi-processor-remove-freq-qos-request-for-all-cpus.patch asm-generic-sections-refactor-memory_intersects.patch bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch btrfs-fix-silent-failure-when-deleting-root-reference.patch btrfs-replace-drop-assert-for-suspended-replace.patch fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch loop-check-for-overflow-while-configuring-loop.patch mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch revert-memcg-cleanup-racy-sum-avoidance-code.patch riscv-traps-add-missing-prototype.patch s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch smb3-missing-inode-locks-in-punch-hole.patch writeback-avoid-use-after-free-after-removing-device.patch x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch x86-nospec-unwreck-the-rsb-stuffing.patch x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch --- diff --git a/queue-5.15/acpi-processor-remove-freq-qos-request-for-all-cpus.patch b/queue-5.15/acpi-processor-remove-freq-qos-request-for-all-cpus.patch new file mode 100644 index 00000000000..7ff6fe10846 --- /dev/null +++ b/queue-5.15/acpi-processor-remove-freq-qos-request-for-all-cpus.patch @@ -0,0 +1,38 @@ +From 36527b9d882362567ceb4eea8666813280f30e6f Mon Sep 17 00:00:00 2001 +From: Riwen Lu +Date: Tue, 23 Aug 2022 15:43:42 +0800 +Subject: ACPI: processor: Remove freq Qos request for all CPUs + +From: Riwen Lu + +commit 36527b9d882362567ceb4eea8666813280f30e6f upstream. + +The freq Qos request would be removed repeatedly if the cpufreq policy +relates to more than one CPU. Then, it would cause the "called for unknown +object" warning. + +Remove the freq Qos request for each CPU relates to the cpufreq policy, +instead of removing repeatedly for the last CPU of it. + +Fixes: a1bb46c36ce3 ("ACPI: processor: Add QoS requests for all CPUs") +Reported-by: Jeremy Linton +Tested-by: Jeremy Linton +Signed-off-by: Riwen Lu +Cc: 5.4+ # 5.4+ +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/processor_thermal.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/acpi/processor_thermal.c ++++ b/drivers/acpi/processor_thermal.c +@@ -144,7 +144,7 @@ void acpi_thermal_cpufreq_exit(struct cp + unsigned int cpu; + + for_each_cpu(cpu, policy->related_cpus) { +- struct acpi_processor *pr = per_cpu(processors, policy->cpu); ++ struct acpi_processor *pr = per_cpu(processors, cpu); + + if (pr) + freq_qos_remove_request(&pr->thermal_req); diff --git a/queue-5.15/asm-generic-sections-refactor-memory_intersects.patch b/queue-5.15/asm-generic-sections-refactor-memory_intersects.patch new file mode 100644 index 00000000000..6c6a1e66fa9 --- /dev/null +++ b/queue-5.15/asm-generic-sections-refactor-memory_intersects.patch @@ -0,0 +1,96 @@ +From 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee Mon Sep 17 00:00:00 2001 +From: Quanyang Wang +Date: Fri, 19 Aug 2022 16:11:45 +0800 +Subject: asm-generic: sections: refactor memory_intersects + +From: Quanyang Wang + +commit 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee upstream. + +There are two problems with the current code of memory_intersects: + +First, it doesn't check whether the region (begin, end) falls inside the +region (virt, vend), that is (virt < begin && vend > end). + +The second problem is if vend is equal to begin, it will return true but +this is wrong since vend (virt + size) is not the last address of the +memory region but (virt + size -1) is. The wrong determination will +trigger the misreporting when the function check_for_illegal_area calls +memory_intersects to check if the dma region intersects with stext region. + +The misreporting is as below (stext is at 0x80100000): + WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168 + DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536] + Modules linked in: + CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5 + Hardware name: Xilinx Zynq Platform + unwind_backtrace from show_stack+0x18/0x1c + show_stack from dump_stack_lvl+0x58/0x70 + dump_stack_lvl from __warn+0xb0/0x198 + __warn from warn_slowpath_fmt+0x80/0xb4 + warn_slowpath_fmt from check_for_illegal_area+0x130/0x168 + check_for_illegal_area from debug_dma_map_sg+0x94/0x368 + debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128 + __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24 + dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4 + usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214 + usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118 + usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec + usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70 + usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360 + usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440 + usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238 + usb_stor_control_thread from kthread+0xf8/0x104 + kthread from ret_from_fork+0x14/0x2c + +Refactor memory_intersects to fix the two problems above. + +Before the 1d7db834a027e ("dma-debug: use memory_intersects() +directly"), memory_intersects is called only by printk_late_init: + +printk_late_init -> init_section_intersects ->memory_intersects. + +There were few places where memory_intersects was called. + +When commit 1d7db834a027e ("dma-debug: use memory_intersects() +directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA +subsystem uses it to check for an illegal area and the calltrace above +is triggered. + +[akpm@linux-foundation.org: fix nearby comment typo] +Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.com +Fixes: 979559362516 ("asm/sections: add helpers to check for section data") +Signed-off-by: Quanyang Wang +Cc: Ard Biesheuvel +Cc: Arnd Bergmann +Cc: Thierry Reding +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/asm-generic/sections.h | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/include/asm-generic/sections.h ++++ b/include/asm-generic/sections.h +@@ -114,7 +114,7 @@ static inline bool memory_contains(void + /** + * memory_intersects - checks if the region occupied by an object intersects + * with another memory region +- * @begin: virtual address of the beginning of the memory regien ++ * @begin: virtual address of the beginning of the memory region + * @end: virtual address of the end of the memory region + * @virt: virtual address of the memory object + * @size: size of the memory object +@@ -127,7 +127,10 @@ static inline bool memory_intersects(voi + { + void *vend = virt + size; + +- return (virt >= begin && virt < end) || (vend >= begin && vend < end); ++ if (virt < end && vend > begin) ++ return true; ++ ++ return false; + } + + /** diff --git a/queue-5.15/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch b/queue-5.15/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch new file mode 100644 index 00000000000..846bc6b7e20 --- /dev/null +++ b/queue-5.15/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch @@ -0,0 +1,55 @@ +From dd0ff4d12dd284c334f7e9b07f8f335af856ac78 Mon Sep 17 00:00:00 2001 +From: Liu Shixin +Date: Fri, 19 Aug 2022 17:40:05 +0800 +Subject: bootmem: remove the vmemmap pages from kmemleak in put_page_bootmem + +From: Liu Shixin + +commit dd0ff4d12dd284c334f7e9b07f8f335af856ac78 upstream. + +The vmemmap pages is marked by kmemleak when allocated from memblock. +Remove it from kmemleak when freeing the page. Otherwise, when we reuse +the page, kmemleak may report such an error and then stop working. + + kmemleak: Cannot insert 0xffff98fb6eab3d40 into the object search tree (overlaps existing) + kmemleak: Kernel memory leak detector disabled + kmemleak: Object 0xffff98fb6be00000 (size 335544320): + kmemleak: comm "swapper", pid 0, jiffies 4294892296 + kmemleak: min_count = 0 + kmemleak: count = 0 + kmemleak: flags = 0x1 + kmemleak: checksum = 0 + kmemleak: backtrace: + +Link: https://lkml.kernel.org/r/20220819094005.2928241-1-liushixin2@huawei.com +Fixes: f41f2ed43ca5 (mm: hugetlb: free the vmemmap pages associated with each HugeTLB page) +Signed-off-by: Liu Shixin +Reviewed-by: Muchun Song +Cc: Matthew Wilcox +Cc: Mike Kravetz +Cc: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/bootmem_info.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/bootmem_info.c ++++ b/mm/bootmem_info.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) + { +@@ -34,6 +35,7 @@ void put_page_bootmem(struct page *page) + ClearPagePrivate(page); + set_page_private(page, 0); + INIT_LIST_HEAD(&page->lru); ++ kmemleak_free_part(page_to_virt(page), PAGE_SIZE); + free_reserved_page(page); + } + } diff --git a/queue-5.15/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch b/queue-5.15/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch new file mode 100644 index 00000000000..426112d5c06 --- /dev/null +++ b/queue-5.15/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch @@ -0,0 +1,47 @@ +From f2c3bec215694fb8bc0ef5010f2a758d1906fc2d Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Fri, 12 Aug 2022 18:32:19 +0800 +Subject: btrfs: add info when mount fails due to stale replace target + +From: Anand Jain + +commit f2c3bec215694fb8bc0ef5010f2a758d1906fc2d upstream. + +If the replace target device reappears after the suspended replace is +cancelled, it blocks the mount operation as it can't find the matching +replace-item in the metadata. As shown below, + + BTRFS error (device sda5): replace devid present without an active replace item + +To overcome this situation, the user can run the command + + btrfs device scan --forget + +and try the mount command again. And also, to avoid repeating the issue, +superblock on the devid=0 must be wiped. + + wipefs -a device-path-to-devid=0. + +This patch adds some info when this situation occurs. + +Reported-by: Samuel Greiner +Link: https://lore.kernel.org/linux-btrfs/b4f62b10-b295-26ea-71f9-9a5c9299d42c@balkonien.org/T/ +CC: stable@vger.kernel.org # 5.0+ +Signed-off-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/dev-replace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/dev-replace.c ++++ b/fs/btrfs/dev-replace.c +@@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found: + */ + if (btrfs_find_device(fs_info->fs_devices, &args)) { + btrfs_err(fs_info, +- "replace devid present without an active replace item"); ++"replace without active item, run 'device scan --forget' on the target device"); + ret = -EUCLEAN; + } else { + dev_replace->srcdev = NULL; diff --git a/queue-5.15/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch b/queue-5.15/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch new file mode 100644 index 00000000000..fe786ec822a --- /dev/null +++ b/queue-5.15/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch @@ -0,0 +1,60 @@ +From b51111271b0352aa596c5ae8faf06939e91b3b68 Mon Sep 17 00:00:00 2001 +From: Goldwyn Rodrigues +Date: Tue, 16 Aug 2022 16:42:56 -0500 +Subject: btrfs: check if root is readonly while setting security xattr + +From: Goldwyn Rodrigues + +commit b51111271b0352aa596c5ae8faf06939e91b3b68 upstream. + +For a filesystem which has btrfs read-only property set to true, all +write operations including xattr should be denied. However, security +xattr can still be changed even if btrfs ro property is true. + +This happens because xattr_permission() does not have any restrictions +on security.*, system.* and in some cases trusted.* from VFS and +the decision is left to the underlying filesystem. See comments in +xattr_permission() for more details. + +This patch checks if the root is read-only before performing the set +xattr operation. + +Testcase: + + DEV=/dev/vdb + MNT=/mnt + + mkfs.btrfs -f $DEV + mount $DEV $MNT + echo "file one" > $MNT/f1 + + setfattr -n "security.one" -v 2 $MNT/f1 + btrfs property set /mnt ro true + + setfattr -n "security.one" -v 1 $MNT/f1 + + umount $MNT + +CC: stable@vger.kernel.org # 4.9+ +Reviewed-by: Qu Wenruo +Reviewed-by: Filipe Manana +Signed-off-by: Goldwyn Rodrigues +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/xattr.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/xattr.c ++++ b/fs/btrfs/xattr.c +@@ -391,6 +391,9 @@ static int btrfs_xattr_handler_set(const + const char *name, const void *buffer, + size_t size, int flags) + { ++ if (btrfs_root_readonly(BTRFS_I(inode)->root)) ++ return -EROFS; ++ + name = xattr_full_name(handler, name); + return btrfs_setxattr_trans(inode, name, buffer, size, flags); + } diff --git a/queue-5.15/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch b/queue-5.15/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch new file mode 100644 index 00000000000..179749804cc --- /dev/null +++ b/queue-5.15/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch @@ -0,0 +1,44 @@ +From 9ea0106a7a3d8116860712e3f17cd52ce99f6707 Mon Sep 17 00:00:00 2001 +From: Zixuan Fu +Date: Mon, 15 Aug 2022 23:16:06 +0800 +Subject: btrfs: fix possible memory leak in btrfs_get_dev_args_from_path() + +From: Zixuan Fu + +commit 9ea0106a7a3d8116860712e3f17cd52ce99f6707 upstream. + +In btrfs_get_dev_args_from_path(), btrfs_get_bdev_and_sb() can fail if +the path is invalid. In this case, btrfs_get_dev_args_from_path() +returns directly without freeing args->uuid and args->fsid allocated +before, which causes memory leak. + +To fix these possible leaks, when btrfs_get_bdev_and_sb() fails, +btrfs_put_dev_args_from_path() is called to clean up the memory. + +Reported-by: TOTE Robot +Fixes: faa775c41d655 ("btrfs: add a btrfs_get_dev_args_from_path helper") +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Boris Burkov +Signed-off-by: Zixuan Fu +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -2392,8 +2392,11 @@ int btrfs_get_dev_args_from_path(struct + + ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, + &bdev, &disk_super); +- if (ret) ++ if (ret) { ++ btrfs_put_dev_args_from_path(args); + return ret; ++ } ++ + args->devid = btrfs_stack_device_id(&disk_super->dev_item); + memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); + if (btrfs_fs_incompat(fs_info, METADATA_UUID)) diff --git a/queue-5.15/btrfs-fix-silent-failure-when-deleting-root-reference.patch b/queue-5.15/btrfs-fix-silent-failure-when-deleting-root-reference.patch new file mode 100644 index 00000000000..f519ade1425 --- /dev/null +++ b/queue-5.15/btrfs-fix-silent-failure-when-deleting-root-reference.patch @@ -0,0 +1,43 @@ +From 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 22 Aug 2022 15:47:09 +0100 +Subject: btrfs: fix silent failure when deleting root reference + +From: Filipe Manana + +commit 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 upstream. + +At btrfs_del_root_ref(), if btrfs_search_slot() returns an error, we end +up returning from the function with a value of 0 (success). This happens +because the function returns the value stored in the variable 'err', +which is 0, while the error value we got from btrfs_search_slot() is +stored in the 'ret' variable. + +So fix it by setting 'err' with the error value. + +Fixes: 8289ed9f93bef2 ("btrfs: replace the BUG_ON in btrfs_del_root_ref with proper error handling") +CC: stable@vger.kernel.org # 5.16+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/root-tree.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/root-tree.c ++++ b/fs/btrfs/root-tree.c +@@ -351,9 +351,10 @@ int btrfs_del_root_ref(struct btrfs_tran + key.offset = ref_id; + again: + ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); +- if (ret < 0) ++ if (ret < 0) { ++ err = ret; + goto out; +- if (ret == 0) { ++ } else if (ret == 0) { + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_root_ref); diff --git a/queue-5.15/btrfs-replace-drop-assert-for-suspended-replace.patch b/queue-5.15/btrfs-replace-drop-assert-for-suspended-replace.patch new file mode 100644 index 00000000000..718b72b401b --- /dev/null +++ b/queue-5.15/btrfs-replace-drop-assert-for-suspended-replace.patch @@ -0,0 +1,55 @@ +From 59a3991984dbc1fc47e5651a265c5200bd85464e Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Fri, 12 Aug 2022 18:32:18 +0800 +Subject: btrfs: replace: drop assert for suspended replace + +From: Anand Jain + +commit 59a3991984dbc1fc47e5651a265c5200bd85464e upstream. + +If the filesystem mounts with the replace-operation in a suspended state +and try to cancel the suspended replace-operation, we hit the assert. The +assert came from the commit fe97e2e173af ("btrfs: dev-replace: replace's +scrub must not be running in suspended state") that was actually not +required. So just remove it. + + $ mount /dev/sda5 /btrfs + + BTRFS info (device sda5): cannot continue dev_replace, tgtdev is missing + BTRFS info (device sda5): you may cancel the operation after 'mount -o degraded' + + $ mount -o degraded /dev/sda5 /btrfs <-- success. + + $ btrfs replace cancel /btrfs + + kernel: assertion failed: ret != -ENOTCONN, in fs/btrfs/dev-replace.c:1131 + kernel: ------------[ cut here ]------------ + kernel: kernel BUG at fs/btrfs/ctree.h:3750! + +After the patch: + + $ btrfs replace cancel /btrfs + + BTRFS info (device sda5): suspended dev_replace from /dev/sda5 (devid 1) to canceled + +Fixes: fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state") +CC: stable@vger.kernel.org # 5.0+ +Signed-off-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/dev-replace.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/btrfs/dev-replace.c ++++ b/fs/btrfs/dev-replace.c +@@ -1151,8 +1151,7 @@ int btrfs_dev_replace_cancel(struct btrf + up_write(&dev_replace->rwsem); + + /* Scrub for replace must not be running in suspended state */ +- ret = btrfs_scrub_cancel(fs_info); +- ASSERT(ret != -ENOTCONN); ++ btrfs_scrub_cancel(fs_info); + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { diff --git a/queue-5.15/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch b/queue-5.15/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch new file mode 100644 index 00000000000..c1afc43722a --- /dev/null +++ b/queue-5.15/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch @@ -0,0 +1,88 @@ +From a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 Mon Sep 17 00:00:00 2001 +From: Shigeru Yoshida +Date: Fri, 19 Aug 2022 03:13:36 +0900 +Subject: fbdev: fbcon: Properly revert changes when vc_resize() failed + +From: Shigeru Yoshida + +commit a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 upstream. + +fbcon_do_set_font() calls vc_resize() when font size is changed. +However, if if vc_resize() failed, current implementation doesn't +revert changes for font size, and this causes inconsistent state. + +syzbot reported unable to handle page fault due to this issue [1]. +syzbot's repro uses fault injection which cause failure for memory +allocation, so vc_resize() failed. + +This patch fixes this issue by properly revert changes for font +related date when vc_resize() failed. + +Link: https://syzkaller.appspot.com/bug?id=3443d3a1fa6d964dd7310a0cb1696d165a3e07c4 [1] +Reported-by: syzbot+a168dbeaaa7778273c1b@syzkaller.appspotmail.com +Signed-off-by: Shigeru Yoshida +Signed-off-by: Helge Deller +CC: stable@vger.kernel.org # 5.15+ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/video/fbdev/core/fbcon.c | 27 +++++++++++++++++++++++++-- + 1 file changed, 25 insertions(+), 2 deletions(-) + +--- a/drivers/video/fbdev/core/fbcon.c ++++ b/drivers/video/fbdev/core/fbcon.c +@@ -2413,15 +2413,21 @@ static int fbcon_do_set_font(struct vc_d + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_ops *ops = info->fbcon_par; + struct fbcon_display *p = &fb_display[vc->vc_num]; +- int resize; ++ int resize, ret, old_userfont, old_width, old_height, old_charcount; + char *old_data = NULL; + + resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); + if (p->userfont) + old_data = vc->vc_font.data; + vc->vc_font.data = (void *)(p->fontdata = data); ++ old_userfont = p->userfont; + if ((p->userfont = userfont)) + REFCOUNT(data)++; ++ ++ old_width = vc->vc_font.width; ++ old_height = vc->vc_font.height; ++ old_charcount = vc->vc_font.charcount; ++ + vc->vc_font.width = w; + vc->vc_font.height = h; + vc->vc_font.charcount = charcount; +@@ -2437,7 +2443,9 @@ static int fbcon_do_set_font(struct vc_d + rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); + cols /= w; + rows /= h; +- vc_resize(vc, cols, rows); ++ ret = vc_resize(vc, cols, rows); ++ if (ret) ++ goto err_out; + } else if (con_is_visible(vc) + && vc->vc_mode == KD_TEXT) { + fbcon_clear_margins(vc, 0); +@@ -2447,6 +2455,21 @@ static int fbcon_do_set_font(struct vc_d + if (old_data && (--REFCOUNT(old_data) == 0)) + kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); + return 0; ++ ++err_out: ++ p->fontdata = old_data; ++ vc->vc_font.data = (void *)old_data; ++ ++ if (userfont) { ++ p->userfont = old_userfont; ++ REFCOUNT(data)--; ++ } ++ ++ vc->vc_font.width = old_width; ++ vc->vc_font.height = old_height; ++ vc->vc_font.charcount = old_charcount; ++ ++ return ret; + } + + /* diff --git a/queue-5.15/loop-check-for-overflow-while-configuring-loop.patch b/queue-5.15/loop-check-for-overflow-while-configuring-loop.patch new file mode 100644 index 00000000000..9771afb59ef --- /dev/null +++ b/queue-5.15/loop-check-for-overflow-while-configuring-loop.patch @@ -0,0 +1,59 @@ +From c490a0b5a4f36da3918181a8acdc6991d967c5f3 Mon Sep 17 00:00:00 2001 +From: Siddh Raman Pant +Date: Tue, 23 Aug 2022 21:38:10 +0530 +Subject: loop: Check for overflow while configuring loop + +From: Siddh Raman Pant + +commit c490a0b5a4f36da3918181a8acdc6991d967c5f3 upstream. + +The userspace can configure a loop using an ioctl call, wherein +a configuration of type loop_config is passed (see lo_ioctl()'s +case on line 1550 of drivers/block/loop.c). This proceeds to call +loop_configure() which in turn calls loop_set_status_from_info() +(see line 1050 of loop.c), passing &config->info which is of type +loop_info64*. This function then sets the appropriate values, like +the offset. + +loop_device has lo_offset of type loff_t (see line 52 of loop.c), +which is typdef-chained to long long, whereas loop_info64 has +lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h). + +The function directly copies offset from info to the device as +follows (See line 980 of loop.c): + lo->lo_offset = info->lo_offset; + +This results in an overflow, which triggers a warning in iomap_iter() +due to a call to iomap_iter_done() which has: + WARN_ON_ONCE(iter->iomap.offset > iter->pos); + +Thus, check for negative value during loop_set_status_from_info(). + +Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a29e + +Reported-and-tested-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com +Cc: stable@vger.kernel.org +Reviewed-by: Matthew Wilcox (Oracle) +Signed-off-by: Siddh Raman Pant +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/loop.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -1154,6 +1154,11 @@ loop_set_status_from_info(struct loop_de + + lo->lo_offset = info->lo_offset; + lo->lo_sizelimit = info->lo_sizelimit; ++ ++ /* loff_t vars have been assigned __u64 */ ++ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0) ++ return -EOVERFLOW; ++ + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); + memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); + lo->lo_file_name[LO_NAME_SIZE-1] = 0; diff --git a/queue-5.15/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch b/queue-5.15/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch new file mode 100644 index 00000000000..5c8c4d08a4d --- /dev/null +++ b/queue-5.15/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch @@ -0,0 +1,53 @@ +From d26f60703606ab425eee9882b32a1781a8bed74d Mon Sep 17 00:00:00 2001 +From: Badari Pulavarty +Date: Sun, 21 Aug 2022 18:08:53 +0000 +Subject: mm/damon/dbgfs: avoid duplicate context directory creation + +From: Badari Pulavarty + +commit d26f60703606ab425eee9882b32a1781a8bed74d upstream. + +When user tries to create a DAMON context via the DAMON debugfs interface +with a name of an already existing context, the context directory creation +fails but a new context is created and added in the internal data +structure, due to absence of the directory creation success check. As a +result, memory could leak and DAMON cannot be turned on. An example test +case is as below: + + # cd /sys/kernel/debug/damon/ + # echo "off" > monitor_on + # echo paddr > target_ids + # echo "abc" > mk_context + # echo "abc" > mk_context + # echo $$ > abc/target_ids + # echo "on" > monitor_on <<< fails + +Return value of 'debugfs_create_dir()' is expected to be ignored in +general, but this is an exceptional case as DAMON feature is depending +on the debugfs functionality and it has the potential duplicate name +issue. This commit therefore fixes the issue by checking the directory +creation failure and immediately return the error in the case. + +Link: https://lkml.kernel.org/r/20220821180853.2400-1-sj@kernel.org +Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts") +Signed-off-by: Badari Pulavarty +Signed-off-by: SeongJae Park +Cc: [ 5.15.x] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/dbgfs.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/damon/dbgfs.c ++++ b/mm/damon/dbgfs.c +@@ -376,6 +376,9 @@ static int dbgfs_mk_context(char *name) + return -ENOENT; + + new_dir = debugfs_create_dir(name, root); ++ /* Below check is required for a potential duplicated name case */ ++ if (IS_ERR(new_dir)) ++ return PTR_ERR(new_dir); + dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; + + new_ctx = dbgfs_new_ctx(); diff --git a/queue-5.15/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch b/queue-5.15/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch new file mode 100644 index 00000000000..74a5a75ecea --- /dev/null +++ b/queue-5.15/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch @@ -0,0 +1,45 @@ +From 6b04ce966a738ecdd9294c9593e48513c0dc90aa Mon Sep 17 00:00:00 2001 +From: Karol Herbst +Date: Fri, 19 Aug 2022 22:09:28 +0200 +Subject: nouveau: explicitly wait on the fence in nouveau_bo_move_m2mf + +From: Karol Herbst + +commit 6b04ce966a738ecdd9294c9593e48513c0dc90aa upstream. + +It is a bit unlcear to us why that's helping, but it does and unbreaks +suspend/resume on a lot of GPUs without any known drawbacks. + +Cc: stable@vger.kernel.org # v5.15+ +Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/156 +Signed-off-by: Karol Herbst +Reviewed-by: Lyude Paul +Link: https://patchwork.freedesktop.org/patch/msgid/20220819200928.401416-1-kherbst@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c +index 05076e530e7d..e29175e4b44c 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_bo.c ++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c +@@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, + if (ret == 0) { + ret = nouveau_fence_new(chan, false, &fence); + if (ret == 0) { ++ /* TODO: figure out a better solution here ++ * ++ * wait on the fence here explicitly as going through ++ * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. ++ * ++ * Without this the operation can timeout and we'll fallback to a ++ * software copy, which might take several minutes to finish. ++ */ ++ nouveau_fence_wait(fence, false, false); + ret = ttm_bo_move_accel_cleanup(bo, + &fence->base, + evict, false, +-- +2.37.2 + diff --git a/queue-5.15/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch b/queue-5.15/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch new file mode 100644 index 00000000000..9eccd21d34d --- /dev/null +++ b/queue-5.15/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch @@ -0,0 +1,60 @@ +From 32ba156df1b1c8804a4e5be5339616945eafea22 Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Tue, 16 Aug 2022 05:56:11 -0700 +Subject: perf/x86/lbr: Enable the branch type for the Arch LBR by default + +From: Kan Liang + +commit 32ba156df1b1c8804a4e5be5339616945eafea22 upstream. + +On the platform with Arch LBR, the HW raw branch type encoding may leak +to the perf tool when the SAVE_TYPE option is not set. + +In the intel_pmu_store_lbr(), the HW raw branch type is stored in +lbr_entries[].type. If the SAVE_TYPE option is set, the +lbr_entries[].type will be converted into the generic PERF_BR_* type +in the intel_pmu_lbr_filter() and exposed to the user tools. +But if the SAVE_TYPE option is NOT set by the user, the current perf +kernel doesn't clear the field. The HW raw branch type leaks. + +There are two solutions to fix the issue for the Arch LBR. +One is to clear the field if the SAVE_TYPE option is NOT set. +The other solution is to unconditionally convert the branch type and +expose the generic type to the user tools. + +The latter is implemented here, because +- The branch type is valuable information. I don't see a case where + you would not benefit from the branch type. (Stephane Eranian) +- Not having the branch type DOES NOT save any space in the + branch record (Stephane Eranian) +- The Arch LBR HW can retrieve the common branch types from the + LBR_INFO. It doesn't require the high overhead SW disassemble. + +Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR") +Reported-by: Stephane Eranian +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20220816125612.2042397-1-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/lbr.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/arch/x86/events/intel/lbr.c ++++ b/arch/x86/events/intel/lbr.c +@@ -1114,6 +1114,14 @@ static int intel_pmu_setup_hw_lbr_filter + + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { + reg->config = mask; ++ ++ /* ++ * The Arch LBR HW can retrieve the common branch types ++ * from the LBR_INFO. It doesn't require the high overhead ++ * SW disassemble. ++ * Enable the branch type by default for the Arch LBR. ++ */ ++ reg->reg |= X86_BR_TYPE_SAVE; + return 0; + } + diff --git a/queue-5.15/revert-memcg-cleanup-racy-sum-avoidance-code.patch b/queue-5.15/revert-memcg-cleanup-racy-sum-avoidance-code.patch new file mode 100644 index 00000000000..7d6297fae2c --- /dev/null +++ b/queue-5.15/revert-memcg-cleanup-racy-sum-avoidance-code.patch @@ -0,0 +1,95 @@ +From dbb16df6443c59e8a1ef21c2272fcf387d600ddf Mon Sep 17 00:00:00 2001 +From: Shakeel Butt +Date: Wed, 17 Aug 2022 17:21:39 +0000 +Subject: Revert "memcg: cleanup racy sum avoidance code" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Shakeel Butt + +commit dbb16df6443c59e8a1ef21c2272fcf387d600ddf upstream. + +This reverts commit 96e51ccf1af33e82f429a0d6baebba29c6448d0f. + +Recently we started running the kernel with rstat infrastructure on +production traffic and begin to see negative memcg stats values. +Particularly the 'sock' stat is the one which we observed having negative +value. + +$ grep "sock " /mnt/memory/job/memory.stat +sock 253952 +total_sock 18446744073708724224 + +Re-run after couple of seconds + +$ grep "sock " /mnt/memory/job/memory.stat +sock 253952 +total_sock 53248 + +For now we are only seeing this issue on large machines (256 CPUs) and +only with 'sock' stat. I think the networking stack increase the stat on +one cpu and decrease it on another cpu much more often. So, this negative +sock is due to rstat flusher flushing the stats on the CPU that has seen +the decrement of sock but missed the CPU that has increments. A typical +race condition. + +For easy stable backport, revert is the most simple solution. For long +term solution, I am thinking of two directions. First is just reduce the +race window by optimizing the rstat flusher. Second is if the reader sees +a negative stat value, force flush and restart the stat collection. +Basically retry but limited. + +Link: https://lkml.kernel.org/r/20220817172139.3141101-1-shakeelb@google.com +Fixes: 96e51ccf1af33e8 ("memcg: cleanup racy sum avoidance code") +Signed-off-by: Shakeel Butt +Cc: "Michal Koutný" +Cc: Johannes Weiner +Cc: Michal Hocko +Cc: Roman Gushchin +Cc: Muchun Song +Cc: David Hildenbrand +Cc: Yosry Ahmed +Cc: Greg Thelen +Cc: [5.15] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memcontrol.h | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -966,19 +966,30 @@ static inline void mod_memcg_state(struc + + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) + { +- return READ_ONCE(memcg->vmstats.state[idx]); ++ long x = READ_ONCE(memcg->vmstats.state[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + static inline unsigned long lruvec_page_state(struct lruvec *lruvec, + enum node_stat_item idx) + { + struct mem_cgroup_per_node *pn; ++ long x; + + if (mem_cgroup_disabled()) + return node_page_state(lruvec_pgdat(lruvec), idx); + + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); +- return READ_ONCE(pn->lruvec_stats.state[idx]); ++ x = READ_ONCE(pn->lruvec_stats.state[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, diff --git a/queue-5.15/riscv-traps-add-missing-prototype.patch b/queue-5.15/riscv-traps-add-missing-prototype.patch new file mode 100644 index 00000000000..ca497e5a63d --- /dev/null +++ b/queue-5.15/riscv-traps-add-missing-prototype.patch @@ -0,0 +1,51 @@ +From d951b20b9def73dcc39a5379831525d0d2a537e9 Mon Sep 17 00:00:00 2001 +From: Conor Dooley +Date: Sun, 14 Aug 2022 15:12:38 +0100 +Subject: riscv: traps: add missing prototype + +From: Conor Dooley + +commit d951b20b9def73dcc39a5379831525d0d2a537e9 upstream. + +Sparse complains: +arch/riscv/kernel/traps.c:213:6: warning: symbol 'shadow_stack' was not declared. Should it be static? + +The variable is used in entry.S, so declare shadow_stack there +alongside SHADOW_OVERFLOW_STACK_SIZE. + +Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection") +Signed-off-by: Conor Dooley +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220814141237.493457-5-mail@conchuod.ie +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/include/asm/thread_info.h | 2 ++ + arch/riscv/kernel/traps.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/riscv/include/asm/thread_info.h ++++ b/arch/riscv/include/asm/thread_info.h +@@ -42,6 +42,8 @@ + + #ifndef __ASSEMBLY__ + ++extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; ++ + #include + #include + +--- a/arch/riscv/kernel/traps.c ++++ b/arch/riscv/kernel/traps.c +@@ -20,9 +20,10 @@ + + #include + #include ++#include + #include + #include +-#include ++#include + + int show_unhandled_signals = 1; + diff --git a/queue-5.15/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch b/queue-5.15/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch new file mode 100644 index 00000000000..1b84d15a82a --- /dev/null +++ b/queue-5.15/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch @@ -0,0 +1,81 @@ +From 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 Mon Sep 17 00:00:00 2001 +From: Brian Foster +Date: Tue, 16 Aug 2022 11:54:07 -0400 +Subject: s390: fix double free of GS and RI CBs on fork() failure + +From: Brian Foster + +commit 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 upstream. + +The pointers for guarded storage and runtime instrumentation control +blocks are stored in the thread_struct of the associated task. These +pointers are initially copied on fork() via arch_dup_task_struct() +and then cleared via copy_thread() before fork() returns. If fork() +happens to fail after the initial task dup and before copy_thread(), +the newly allocated task and associated thread_struct memory are +freed via free_task() -> arch_release_task_struct(). This results in +a double free of the guarded storage and runtime info structs +because the fields in the failed task still refer to memory +associated with the source task. + +This problem can manifest as a BUG_ON() in set_freepointer() (with +CONFIG_SLAB_FREELIST_HARDENED enabled) or KASAN splat (if enabled) +when running trinity syscall fuzz tests on s390x. To avoid this +problem, clear the associated pointer fields in +arch_dup_task_struct() immediately after the new task is copied. +Note that the RI flag is still cleared in copy_thread() because it +resides in thread stack memory and that is where stack info is +copied. + +Signed-off-by: Brian Foster +Fixes: 8d9047f8b967c ("s390/runtime instrumentation: simplify task exit handling") +Fixes: 7b83c6297d2fc ("s390/guarded storage: simplify task exit handling") +Cc: # 4.15 +Reviewed-by: Gerald Schaefer +Reviewed-by: Heiko Carstens +Link: https://lore.kernel.org/r/20220816155407.537372-1-bfoster@redhat.com +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/process.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +--- a/arch/s390/kernel/process.c ++++ b/arch/s390/kernel/process.c +@@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_str + + memcpy(dst, src, arch_task_struct_size); + dst->thread.fpu.regs = dst->thread.fpu.fprs; ++ ++ /* ++ * Don't transfer over the runtime instrumentation or the guarded ++ * storage control block pointers. These fields are cleared here instead ++ * of in copy_thread() to avoid premature freeing of associated memory ++ * on fork() failure. Wait to clear the RI flag because ->stack still ++ * refers to the source thread. ++ */ ++ dst->thread.ri_cb = NULL; ++ dst->thread.gs_cb = NULL; ++ dst->thread.gs_bc_cb = NULL; ++ + return 0; + } + +@@ -149,13 +161,11 @@ int copy_thread(unsigned long clone_flag + frame->childregs.flags = 0; + if (new_stackp) + frame->childregs.gprs[15] = new_stackp; +- +- /* Don't copy runtime instrumentation info */ +- p->thread.ri_cb = NULL; ++ /* ++ * Clear the runtime instrumentation flag after the above childregs ++ * copy. The CB pointer was already cleared in arch_dup_task_struct(). ++ */ + frame->childregs.psw.mask &= ~PSW_MASK_RI; +- /* Don't copy guarded storage control block */ +- p->thread.gs_cb = NULL; +- p->thread.gs_bc_cb = NULL; + + /* Set a new TLS ? */ + if (clone_flags & CLONE_SETTLS) { diff --git a/queue-5.15/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch b/queue-5.15/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch new file mode 100644 index 00000000000..3287d9154d3 --- /dev/null +++ b/queue-5.15/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch @@ -0,0 +1,49 @@ +From 41ac42f137080bc230b5882e3c88c392ab7f2d32 Mon Sep 17 00:00:00 2001 +From: Gerald Schaefer +Date: Wed, 17 Aug 2022 15:26:03 +0200 +Subject: s390/mm: do not trigger write fault when vma does not allow VM_WRITE + +From: Gerald Schaefer + +commit 41ac42f137080bc230b5882e3c88c392ab7f2d32 upstream. + +For non-protection pXd_none() page faults in do_dat_exception(), we +call do_exception() with access == (VM_READ | VM_WRITE | VM_EXEC). +In do_exception(), vma->vm_flags is checked against that before +calling handle_mm_fault(). + +Since commit 92f842eac7ee3 ("[S390] store indication fault optimization"), +we call handle_mm_fault() with FAULT_FLAG_WRITE, when recognizing that +it was a write access. However, the vma flags check is still only +checking against (VM_READ | VM_WRITE | VM_EXEC), and therefore also +calling handle_mm_fault() with FAULT_FLAG_WRITE in cases where the vma +does not allow VM_WRITE. + +Fix this by changing access check in do_exception() to VM_WRITE only, +when recognizing write access. + +Link: https://lkml.kernel.org/r/20220811103435.188481-3-david@redhat.com +Fixes: 92f842eac7ee3 ("[S390] store indication fault optimization") +Cc: +Reported-by: David Hildenbrand +Reviewed-by: Heiko Carstens +Signed-off-by: Gerald Schaefer +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/mm/fault.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/s390/mm/fault.c ++++ b/arch/s390/mm/fault.c +@@ -397,7 +397,9 @@ static inline vm_fault_t do_exception(st + flags = FAULT_FLAG_DEFAULT; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; +- if (access == VM_WRITE || is_write) ++ if (is_write) ++ access = VM_WRITE; ++ if (access == VM_WRITE) + flags |= FAULT_FLAG_WRITE; + mmap_read_lock(mm); + diff --git a/queue-5.15/series b/queue-5.15/series index 48cbf58a5e5..28188538c02 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -92,3 +92,26 @@ ionic-clear-broken-state-on-generation-change.patch ionic-fix-up-issues-with-handling-eagain-on-fw-cmds.patch ionic-vf-initial-random-mac-address-if-no-assigned-m.patch net-stmmac-work-around-sporadic-tx-issue-on-link-up.patch +btrfs-fix-silent-failure-when-deleting-root-reference.patch +btrfs-replace-drop-assert-for-suspended-replace.patch +btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch +btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch +btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch +perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch +x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch +x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch +x86-nospec-unwreck-the-rsb-stuffing.patch +loop-check-for-overflow-while-configuring-loop.patch +writeback-avoid-use-after-free-after-removing-device.patch +asm-generic-sections-refactor-memory_intersects.patch +mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch +s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch +bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch +s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch +fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch +revert-memcg-cleanup-racy-sum-avoidance-code.patch +acpi-processor-remove-freq-qos-request-for-all-cpus.patch +nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch +smb3-missing-inode-locks-in-punch-hole.patch +xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch +riscv-traps-add-missing-prototype.patch diff --git a/queue-5.15/smb3-missing-inode-locks-in-punch-hole.patch b/queue-5.15/smb3-missing-inode-locks-in-punch-hole.patch new file mode 100644 index 00000000000..19bec8607a4 --- /dev/null +++ b/queue-5.15/smb3-missing-inode-locks-in-punch-hole.patch @@ -0,0 +1,60 @@ +From ba0803050d610d5072666be727bca5e03e55b242 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Tue, 23 Aug 2022 02:10:56 -0500 +Subject: smb3: missing inode locks in punch hole + +From: David Howells + +commit ba0803050d610d5072666be727bca5e03e55b242 upstream. + +smb3 fallocate punch hole was not grabbing the inode or filemap_invalidate +locks so could have race with pagemap reinstantiating the page. + +Cc: stable@vger.kernel.org +Signed-off-by: David Howells +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/cifs/smb2ops.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/fs/cifs/smb2ops.c ++++ b/fs/cifs/smb2ops.c +@@ -3599,7 +3599,7 @@ static long smb3_zero_range(struct file + static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len) + { +- struct inode *inode; ++ struct inode *inode = file_inode(file); + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + long rc; +@@ -3608,14 +3608,12 @@ static long smb3_punch_hole(struct file + + xid = get_xid(); + +- inode = d_inode(cfile->dentry); +- ++ inode_lock(inode); + /* Need to make file sparse, if not already, before freeing range. */ + /* Consider adding equivalent for compressed since it could also work */ + if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) { + rc = -EOPNOTSUPP; +- free_xid(xid); +- return rc; ++ goto out; + } + + filemap_invalidate_lock(inode->i_mapping); +@@ -3635,8 +3633,10 @@ static long smb3_punch_hole(struct file + true /* is_fctl */, (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), + CIFSMaxBufSize, NULL, NULL); +- free_xid(xid); + filemap_invalidate_unlock(inode->i_mapping); ++out: ++ inode_unlock(inode); ++ free_xid(xid); + return rc; + } + diff --git a/queue-5.15/writeback-avoid-use-after-free-after-removing-device.patch b/queue-5.15/writeback-avoid-use-after-free-after-removing-device.patch new file mode 100644 index 00000000000..537d8ae61fd --- /dev/null +++ b/queue-5.15/writeback-avoid-use-after-free-after-removing-device.patch @@ -0,0 +1,139 @@ +From f87904c075515f3e1d8f4a7115869d3b914674fd Mon Sep 17 00:00:00 2001 +From: Khazhismel Kumykov +Date: Mon, 1 Aug 2022 08:50:34 -0700 +Subject: writeback: avoid use-after-free after removing device + +From: Khazhismel Kumykov + +commit f87904c075515f3e1d8f4a7115869d3b914674fd upstream. + +When a disk is removed, bdi_unregister gets called to stop further +writeback and wait for associated delayed work to complete. However, +wb_inode_writeback_end() may schedule bandwidth estimation dwork after +this has completed, which can result in the timer attempting to access the +just freed bdi_writeback. + +Fix this by checking if the bdi_writeback is alive, similar to when +scheduling writeback work. + +Since this requires wb->work_lock, and wb_inode_writeback_end() may get +called from interrupt, switch wb->work_lock to an irqsafe lock. + +Link: https://lkml.kernel.org/r/20220801155034.3772543-1-khazhy@google.com +Fixes: 45a2966fd641 ("writeback: fix bandwidth estimate for spiky workload") +Signed-off-by: Khazhismel Kumykov +Reviewed-by: Jan Kara +Cc: Michael Stapelberg +Cc: Wu Fengguang +Cc: Alexander Viro +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/fs-writeback.c | 12 ++++++------ + mm/backing-dev.c | 10 +++++----- + mm/page-writeback.c | 6 +++++- + 3 files changed, 16 insertions(+), 12 deletions(-) + +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(st + + static void wb_wakeup(struct bdi_writeback *wb) + { +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (test_bit(WB_registered, &wb->state)) + mod_delayed_work(bdi_wq, &wb->dwork, 0); +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + } + + static void finish_writeback_work(struct bdi_writeback *wb, +@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_wri + if (work->done) + atomic_inc(&work->done->cnt); + +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + + if (test_bit(WB_registered, &wb->state)) { + list_add_tail(&work->list, &wb->work_list); +@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_wri + } else + finish_writeback_work(wb, work); + +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + } + + /** +@@ -2109,13 +2109,13 @@ static struct wb_writeback_work *get_nex + { + struct wb_writeback_work *work = NULL; + +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (!list_empty(&wb->work_list)) { + work = list_entry(wb->work_list.next, + struct wb_writeback_work, list); + list_del_init(&work->list); + } +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + return work; + } + +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -258,10 +258,10 @@ void wb_wakeup_delayed(struct bdi_writeb + unsigned long timeout; + + timeout = msecs_to_jiffies(dirty_writeback_interval * 10); +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (test_bit(WB_registered, &wb->state)) + queue_delayed_work(bdi_wq, &wb->dwork, timeout); +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + } + + static void wb_update_bandwidth_workfn(struct work_struct *work) +@@ -337,12 +337,12 @@ static void cgwb_remove_from_bdi_list(st + static void wb_shutdown(struct bdi_writeback *wb) + { + /* Make sure nobody queues further work */ +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (!test_and_clear_bit(WB_registered, &wb->state)) { +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + return; + } +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + + cgwb_remove_from_bdi_list(wb); + /* +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -2755,6 +2755,7 @@ static void wb_inode_writeback_start(str + + static void wb_inode_writeback_end(struct bdi_writeback *wb) + { ++ unsigned long flags; + atomic_dec(&wb->writeback_inodes); + /* + * Make sure estimate of writeback throughput gets updated after +@@ -2763,7 +2764,10 @@ static void wb_inode_writeback_end(struc + * that if multiple inodes end writeback at a similar time, they get + * batched into one bandwidth update. + */ +- queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); ++ spin_lock_irqsave(&wb->work_lock, flags); ++ if (test_bit(WB_registered, &wb->state)) ++ queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); ++ spin_unlock_irqrestore(&wb->work_lock, flags); + } + + int test_clear_page_writeback(struct page *page) diff --git a/queue-5.15/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch b/queue-5.15/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch new file mode 100644 index 00000000000..1ce68552a49 --- /dev/null +++ b/queue-5.15/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch @@ -0,0 +1,209 @@ +From 7df548840c496b0141fb2404b889c346380c2b22 Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Wed, 3 Aug 2022 14:41:32 -0700 +Subject: x86/bugs: Add "unknown" reporting for MMIO Stale Data + +From: Pawan Gupta + +commit 7df548840c496b0141fb2404b889c346380c2b22 upstream. + +Older Intel CPUs that are not in the affected processor list for MMIO +Stale Data vulnerabilities currently report "Not affected" in sysfs, +which may not be correct. Vulnerability status for these older CPUs is +unknown. + +Add known-not-affected CPUs to the whitelist. Report "unknown" +mitigation status for CPUs that are not in blacklist, whitelist and also +don't enumerate MSR ARCH_CAPABILITIES bits that reflect hardware +immunity to MMIO Stale Data vulnerabilities. + +Mitigation is not deployed when the status is unknown. + + [ bp: Massage, fixup. ] + +Fixes: 8d50cdf8b834 ("x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data") +Suggested-by: Andrew Cooper +Suggested-by: Tony Luck +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/a932c154772f2121794a5f2eded1a11013114711.1657846269.git.pawan.kumar.gupta@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst | 14 +++ + arch/x86/include/asm/cpufeatures.h | 5 - + arch/x86/kernel/cpu/bugs.c | 14 ++- + arch/x86/kernel/cpu/common.c | 42 ++++++---- + 4 files changed, 56 insertions(+), 19 deletions(-) + +--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst ++++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +@@ -230,6 +230,20 @@ The possible values in this file are: + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. ++ * - 'Unknown: No mitigations' ++ - The processor vulnerability status is unknown because it is ++ out of Servicing period. Mitigation is not attempted. ++ ++Definitions: ++------------ ++ ++Servicing period: The process of providing functional and security updates to ++Intel processors or platforms, utilizing the Intel Platform Update (IPU) ++process or other similar mechanisms. ++ ++End of Servicing Updates (ESU): ESU is the date at which Intel will no ++longer provide Servicing, such as through IPU or other similar update ++processes. ESU dates will typically be aligned to end of quarter. + + If the processor is vulnerable then the following information is appended to + the above information: +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -446,7 +446,8 @@ + #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +-#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ +-#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ ++#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ ++#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ ++#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -433,7 +433,8 @@ static void __init mmio_select_mitigatio + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || +- cpu_mitigations_off()) { ++ boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || ++ cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } +@@ -538,6 +539,8 @@ out: + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); ++ else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) ++ pr_info("MMIO Stale Data: Unknown: No mitigations\n"); + } + + static void __init md_clear_select_mitigation(void) +@@ -2268,6 +2271,9 @@ static ssize_t tsx_async_abort_show_stat + + static ssize_t mmio_stale_data_show_state(char *buf) + { ++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) ++ return sysfs_emit(buf, "Unknown: No mitigations\n"); ++ + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + +@@ -2414,6 +2420,7 @@ static ssize_t cpu_show_common(struct de + return srbds_show_state(buf); + + case X86_BUG_MMIO_STALE_DATA: ++ case X86_BUG_MMIO_UNKNOWN: + return mmio_stale_data_show_state(buf); + + case X86_BUG_RETBLEED: +@@ -2473,7 +2480,10 @@ ssize_t cpu_show_srbds(struct device *de + + ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) + { +- return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); ++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) ++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN); ++ else ++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + } + + ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1027,7 +1027,8 @@ static void identify_cpu_without_cpuid(s + #define NO_SWAPGS BIT(6) + #define NO_ITLB_MULTIHIT BIT(7) + #define NO_SPECTRE_V2 BIT(8) +-#define NO_EIBRS_PBRSB BIT(9) ++#define NO_MMIO BIT(9) ++#define NO_EIBRS_PBRSB BIT(10) + + #define VULNWL(vendor, family, model, whitelist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) +@@ -1048,6 +1049,11 @@ static const __initconst struct x86_cpu_ + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + + /* Intel Family 6 */ ++ VULNWL_INTEL(TIGERLAKE, NO_MMIO), ++ VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), ++ VULNWL_INTEL(ALDERLAKE, NO_MMIO), ++ VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), ++ + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), +@@ -1066,9 +1072,9 @@ static const __initconst struct x86_cpu_ + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + +- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), ++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + + /* + * Technically, swapgs isn't serializing on AMD (despite it previously +@@ -1083,18 +1089,18 @@ static const __initconst struct x86_cpu_ + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + + /* AMD Family 0xf - 0x12 */ +- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ +- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + + /* Zhaoxin Family 7 */ +- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), +- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), ++ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), ++ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + {} + }; + +@@ -1248,10 +1254,16 @@ static void __init cpu_set_bug_bits(stru + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. ++ * ++ * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, ++ * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. + */ +- if (cpu_matches(cpu_vuln_blacklist, MMIO) && +- !arch_cap_mmio_immune(ia32_cap)) +- setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); ++ if (!arch_cap_mmio_immune(ia32_cap)) { ++ if (cpu_matches(cpu_vuln_blacklist, MMIO)) ++ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); ++ else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) ++ setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); ++ } + + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) diff --git a/queue-5.15/x86-nospec-unwreck-the-rsb-stuffing.patch b/queue-5.15/x86-nospec-unwreck-the-rsb-stuffing.patch new file mode 100644 index 00000000000..8ae54dffbf0 --- /dev/null +++ b/queue-5.15/x86-nospec-unwreck-the-rsb-stuffing.patch @@ -0,0 +1,128 @@ +From 4e3aa9238277597c6c7624f302d81a7b568b6f2d Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 16 Aug 2022 14:28:36 +0200 +Subject: x86/nospec: Unwreck the RSB stuffing + +From: Peter Zijlstra + +commit 4e3aa9238277597c6c7624f302d81a7b568b6f2d upstream. + +Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections") +made a right mess of the RSB stuffing, rewrite the whole thing to not +suck. + +Thanks to Andrew for the enlightening comment about Post-Barrier RSB +things so we can make this code less magical. + +Cc: stable@vger.kernel.org +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/YvuNdDWoUZSBjYcm@worktop.programming.kicks-ass.net +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 80 +++++++++++++++++------------------ + 1 file changed, 39 insertions(+), 41 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -35,33 +35,44 @@ + #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + + /* ++ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. ++ */ ++#define __FILL_RETURN_SLOT \ ++ ANNOTATE_INTRA_FUNCTION_CALL; \ ++ call 772f; \ ++ int3; \ ++772: ++ ++/* ++ * Stuff the entire RSB. ++ * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version - two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +-#define __FILL_RETURN_BUFFER(reg, nr, sp) \ +- mov $(nr/2), reg; \ +-771: \ +- ANNOTATE_INTRA_FUNCTION_CALL; \ +- call 772f; \ +-773: /* speculation trap */ \ +- UNWIND_HINT_EMPTY; \ +- pause; \ +- lfence; \ +- jmp 773b; \ +-772: \ +- ANNOTATE_INTRA_FUNCTION_CALL; \ +- call 774f; \ +-775: /* speculation trap */ \ +- UNWIND_HINT_EMPTY; \ +- pause; \ +- lfence; \ +- jmp 775b; \ +-774: \ +- add $(BITS_PER_LONG/8) * 2, sp; \ +- dec reg; \ +- jnz 771b; \ +- /* barrier for jnz misprediction */ \ ++#define __FILL_RETURN_BUFFER(reg, nr) \ ++ mov $(nr/2), reg; \ ++771: \ ++ __FILL_RETURN_SLOT \ ++ __FILL_RETURN_SLOT \ ++ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ ++ dec reg; \ ++ jnz 771b; \ ++ /* barrier for jnz misprediction */ \ ++ lfence; ++ ++/* ++ * Stuff a single RSB slot. ++ * ++ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be ++ * forced to retire before letting a RET instruction execute. ++ * ++ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed ++ * before this point. ++ */ ++#define __FILL_ONE_RETURN \ ++ __FILL_RETURN_SLOT \ ++ add $(BITS_PER_LONG/8), %_ASM_SP; \ + lfence; + + #ifdef __ASSEMBLY__ +@@ -120,28 +131,15 @@ + #endif + .endm + +-.macro ISSUE_UNBALANCED_RET_GUARD +- ANNOTATE_INTRA_FUNCTION_CALL +- call .Lunbalanced_ret_guard_\@ +- int3 +-.Lunbalanced_ret_guard_\@: +- add $(BITS_PER_LONG/8), %_ASM_SP +- lfence +-.endm +- + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 +-.ifb \ftr2 +- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr +-.else +- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 +-.endif +- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) +-.Lunbalanced_\@: +- ISSUE_UNBALANCED_RET_GUARD ++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) ++ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ ++ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ ++ __stringify(__FILL_ONE_RETURN), \ftr2 ++ + .Lskip_rsb_\@: + .endm + diff --git a/queue-5.15/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch b/queue-5.15/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch new file mode 100644 index 00000000000..f76b902f3f3 --- /dev/null +++ b/queue-5.15/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch @@ -0,0 +1,72 @@ +From fc2e426b1161761561624ebd43ce8c8d2fa058da Mon Sep 17 00:00:00 2001 +From: Chen Zhongjin +Date: Fri, 19 Aug 2022 16:43:34 +0800 +Subject: x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry + +From: Chen Zhongjin + +commit fc2e426b1161761561624ebd43ce8c8d2fa058da upstream. + +When meeting ftrace trampolines in ORC unwinding, unwinder uses address +of ftrace_{regs_}call address to find the ORC entry, which gets next frame at +sp+176. + +If there is an IRQ hitting at sub $0xa8,%rsp, the next frame should be +sp+8 instead of 176. It makes unwinder skip correct frame and throw +warnings such as "wrong direction" or "can't access registers", etc, +depending on the content of the incorrect frame address. + +By adding the base address ftrace_{regs_}caller with the offset +*ip - ops->trampoline*, we can get the correct address to find the ORC entry. + +Also change "caller" to "tramp_addr" to make variable name conform to +its content. + +[ mingo: Clarified the changelog a bit. ] + +Fixes: 6be7fa3c74d1 ("ftrace, orc, x86: Handle ftrace dynamically allocated trampolines") +Signed-off-by: Chen Zhongjin +Signed-off-by: Ingo Molnar +Reviewed-by: Steven Rostedt (Google) +Cc: +Link: https://lore.kernel.org/r/20220819084334.244016-1-chenzhongjin@huawei.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/unwind_orc.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/unwind_orc.c ++++ b/arch/x86/kernel/unwind_orc.c +@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsign + static struct orc_entry *orc_ftrace_find(unsigned long ip) + { + struct ftrace_ops *ops; +- unsigned long caller; ++ unsigned long tramp_addr, offset; + + ops = ftrace_ops_trampoline(ip); + if (!ops) + return NULL; + ++ /* Set tramp_addr to the start of the code copied by the trampoline */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) +- caller = (unsigned long)ftrace_regs_call; ++ tramp_addr = (unsigned long)ftrace_regs_caller; + else +- caller = (unsigned long)ftrace_call; ++ tramp_addr = (unsigned long)ftrace_caller; ++ ++ /* Now place tramp_addr to the location within the trampoline ip is at */ ++ offset = ip - ops->trampoline; ++ tramp_addr += offset; + + /* Prevent unlikely recursion */ +- if (ip == caller) ++ if (ip == tramp_addr) + return NULL; + +- return orc_find(caller); ++ return orc_find(tramp_addr); + } + #else + static struct orc_entry *orc_ftrace_find(unsigned long ip) diff --git a/queue-5.15/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch b/queue-5.15/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch new file mode 100644 index 00000000000..c08c929d758 --- /dev/null +++ b/queue-5.15/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch @@ -0,0 +1,95 @@ +From c5deb27895e017a0267de0a20d140ad5fcc55a54 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 25 Aug 2022 16:19:18 +0200 +Subject: xen/privcmd: fix error exit of privcmd_ioctl_dm_op() + +From: Juergen Gross + +commit c5deb27895e017a0267de0a20d140ad5fcc55a54 upstream. + +The error exit of privcmd_ioctl_dm_op() is calling unlock_pages() +potentially with pages being NULL, leading to a NULL dereference. + +Additionally lock_pages() doesn't check for pin_user_pages_fast() +having been completely successful, resulting in potentially not +locking all pages into memory. This could result in sporadic failures +when using the related memory in user mode. + +Fix all of that by calling unlock_pages() always with the real number +of pinned pages, which will be zero in case pages being NULL, and by +checking the number of pages pinned by pin_user_pages_fast() matching +the expected number of pages. + +Cc: +Fixes: ab520be8cd5d ("xen/privcmd: Add IOCTL_PRIVCMD_DM_OP") +Reported-by: Rustam Subkhankulov +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Reviewed-by: Oleksandr Tyshchenko +Link: https://lore.kernel.org/r/20220825141918.3581-1-jgross@suse.com +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/privcmd.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +--- a/drivers/xen/privcmd.c ++++ b/drivers/xen/privcmd.c +@@ -581,27 +581,30 @@ static int lock_pages( + struct privcmd_dm_op_buf kbufs[], unsigned int num, + struct page *pages[], unsigned int nr_pages, unsigned int *pinned) + { +- unsigned int i; ++ unsigned int i, off = 0; + +- for (i = 0; i < num; i++) { ++ for (i = 0; i < num; ) { + unsigned int requested; + int page_count; + + requested = DIV_ROUND_UP( + offset_in_page(kbufs[i].uptr) + kbufs[i].size, +- PAGE_SIZE); ++ PAGE_SIZE) - off; + if (requested > nr_pages) + return -ENOSPC; + + page_count = pin_user_pages_fast( +- (unsigned long) kbufs[i].uptr, ++ (unsigned long)kbufs[i].uptr + off * PAGE_SIZE, + requested, FOLL_WRITE, pages); +- if (page_count < 0) +- return page_count; ++ if (page_count <= 0) ++ return page_count ? : -EFAULT; + + *pinned += page_count; + nr_pages -= page_count; + pages += page_count; ++ ++ off = (requested == page_count) ? 0 : off + page_count; ++ i += !off; + } + + return 0; +@@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct f + } + + rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned); +- if (rc < 0) { +- nr_pages = pinned; ++ if (rc < 0) + goto out; +- } + + for (i = 0; i < kdata.num; i++) { + set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); +@@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct f + xen_preemptible_hcall_end(); + + out: +- unlock_pages(pages, nr_pages); ++ unlock_pages(pages, pinned); + kfree(xbufs); + kfree(pages); + kfree(kbufs);