From c7f8020fe45ff597d8ec2d1a3da4bdf3ea1e86c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Aug 2022 09:49:00 +0200 Subject: [PATCH] 5.19-stable patches added patches: acpi-processor-remove-freq-qos-request-for-all-cpus.patch asm-generic-sections-refactor-memory_intersects.patch audit-move-audit_return_fixup-before-the-filters.patch bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch btrfs-fix-silent-failure-when-deleting-root-reference.patch btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch btrfs-replace-drop-assert-for-suspended-replace.patch btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch cifs-skip-extra-null-byte-in-filenames.patch fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch loop-check-for-overflow-while-configuring-loop.patch mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch perf-x86-intel-fix-pebs-event-constraints-for-adl.patch perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch revert-memcg-cleanup-racy-sum-avoidance-code.patch riscv-dts-microchip-correct-l2-cache-interrupts.patch riscv-signal-fix-missing-prototype-warning.patch riscv-traps-add-missing-prototype.patch s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch shmem-update-folio-if-shmem_replace_page-updates-the-page.patch smb3-missing-inode-locks-in-punch-hole.patch writeback-avoid-use-after-free-after-removing-device.patch x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch x86-nospec-unwreck-the-rsb-stuffing.patch x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch --- ...remove-freq-qos-request-for-all-cpus.patch | 38 +++ ...-sections-refactor-memory_intersects.patch | 96 ++++++ ...udit_return_fixup-before-the-filters.patch | 57 ++++ ...es-from-kmemleak-in-put_page_bootmem.patch | 55 ++++ ...nt-fails-due-to-stale-replace-target.patch | 47 +++ ...eadonly-while-setting-security-xattr.patch | 60 ++++ ...leak-in-btrfs_get_dev_args_from_path.patch | 44 +++ ...failure-when-deleting-root-reference.patch | 43 +++ ...ion-and-potential-double-allocations.patch | 304 ++++++++++++++++++ ...ce-drop-assert-for-suspended-replace.patch | 55 ++++ ...-file-extent-item-when-merging-holes.patch | 92 ++++++ ...fs-skip-extra-null-byte-in-filenames.patch | 61 ++++ ...revert-changes-when-vc_resize-failed.patch | 88 +++++ ...-for-overflow-while-configuring-loop.patch | 59 ++++ ...duplicate-context-directory-creation.patch | 53 +++ ...-mapping-in-hugetlb_mcopy_atomic_pte.patch | 38 +++ ...eference-swap-pfn-page-if-type-match.patch | 75 +++++ ...on-the-fence-in-nouveau_bo_move_m2mf.patch | 40 +++ ...lized-resource-on-ocfs2_dlm_shutdown.patch | 69 ++++ ...l-fix-pebs-event-constraints-for-adl.patch | 36 +++ ...nch-type-for-the-arch-lbr-by-default.patch | 60 ++++ ...emcg-cleanup-racy-sum-avoidance-code.patch | 95 ++++++ ...icrochip-correct-l2-cache-interrupts.patch | 61 ++++ ...signal-fix-missing-prototype-warning.patch | 54 ++++ .../riscv-traps-add-missing-prototype.patch | 51 +++ ...ree-of-gs-and-ri-cbs-on-fork-failure.patch | 81 +++++ ...ult-when-vma-does-not-allow-vm_write.patch | 49 +++ queue-5.19/series | 38 +++ ...-shmem_replace_page-updates-the-page.patch | 41 +++ ...b3-missing-inode-locks-in-punch-hole.patch | 60 ++++ ...use-after-free-after-removing-device.patch | 139 ++++++++ ...tialized-boot_params-cc_blob_address.patch | 88 +++++ ...nknown-reporting-for-mmio-stale-data.patch | 209 ++++++++++++ ...entry_int80_compat-for-xen-pv-guests.patch | 49 +++ .../x86-nospec-unwreck-the-rsb-stuffing.patch | 128 ++++++++ ...ly-reflect-state-when-running-on-xen.patch | 88 +++++ ...platform_has-for-early-sev-snp-calls.patch | 70 ++++ ...e-trampolines-with-correct-orc-entry.patch | 72 +++++ ...ix-error-exit-of-privcmd_ioctl_dm_op.patch | 95 ++++++ 39 files changed, 2938 insertions(+) create mode 100644 queue-5.19/acpi-processor-remove-freq-qos-request-for-all-cpus.patch create mode 100644 queue-5.19/asm-generic-sections-refactor-memory_intersects.patch create mode 100644 queue-5.19/audit-move-audit_return_fixup-before-the-filters.patch create mode 100644 queue-5.19/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch create mode 100644 queue-5.19/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch create mode 100644 queue-5.19/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch create mode 100644 queue-5.19/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch create mode 100644 queue-5.19/btrfs-fix-silent-failure-when-deleting-root-reference.patch create mode 100644 queue-5.19/btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch create mode 100644 queue-5.19/btrfs-replace-drop-assert-for-suspended-replace.patch create mode 100644 queue-5.19/btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch create mode 100644 queue-5.19/cifs-skip-extra-null-byte-in-filenames.patch create mode 100644 queue-5.19/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch create mode 100644 queue-5.19/loop-check-for-overflow-while-configuring-loop.patch create mode 100644 queue-5.19/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch create mode 100644 queue-5.19/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch create mode 100644 queue-5.19/mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch create mode 100644 queue-5.19/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch create mode 100644 queue-5.19/ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch create mode 100644 queue-5.19/perf-x86-intel-fix-pebs-event-constraints-for-adl.patch create mode 100644 queue-5.19/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch create mode 100644 queue-5.19/revert-memcg-cleanup-racy-sum-avoidance-code.patch create mode 100644 queue-5.19/riscv-dts-microchip-correct-l2-cache-interrupts.patch create mode 100644 queue-5.19/riscv-signal-fix-missing-prototype-warning.patch create mode 100644 queue-5.19/riscv-traps-add-missing-prototype.patch create mode 100644 queue-5.19/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch create mode 100644 queue-5.19/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch create mode 100644 queue-5.19/shmem-update-folio-if-shmem_replace_page-updates-the-page.patch create mode 100644 queue-5.19/smb3-missing-inode-locks-in-punch-hole.patch create mode 100644 queue-5.19/writeback-avoid-use-after-free-after-removing-device.patch create mode 100644 queue-5.19/x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch create mode 100644 queue-5.19/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch create mode 100644 queue-5.19/x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch create mode 100644 queue-5.19/x86-nospec-unwreck-the-rsb-stuffing.patch create mode 100644 queue-5.19/x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch create mode 100644 queue-5.19/x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch create mode 100644 queue-5.19/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch create mode 100644 queue-5.19/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch diff --git a/queue-5.19/acpi-processor-remove-freq-qos-request-for-all-cpus.patch b/queue-5.19/acpi-processor-remove-freq-qos-request-for-all-cpus.patch new file mode 100644 index 00000000000..29bbd45081d --- /dev/null +++ b/queue-5.19/acpi-processor-remove-freq-qos-request-for-all-cpus.patch @@ -0,0 +1,38 @@ +From 36527b9d882362567ceb4eea8666813280f30e6f Mon Sep 17 00:00:00 2001 +From: Riwen Lu +Date: Tue, 23 Aug 2022 15:43:42 +0800 +Subject: ACPI: processor: Remove freq Qos request for all CPUs + +From: Riwen Lu + +commit 36527b9d882362567ceb4eea8666813280f30e6f upstream. + +The freq Qos request would be removed repeatedly if the cpufreq policy +relates to more than one CPU. Then, it would cause the "called for unknown +object" warning. + +Remove the freq Qos request for each CPU relates to the cpufreq policy, +instead of removing repeatedly for the last CPU of it. + +Fixes: a1bb46c36ce3 ("ACPI: processor: Add QoS requests for all CPUs") +Reported-by: Jeremy Linton +Tested-by: Jeremy Linton +Signed-off-by: Riwen Lu +Cc: 5.4+ # 5.4+ +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/processor_thermal.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/acpi/processor_thermal.c ++++ b/drivers/acpi/processor_thermal.c +@@ -151,7 +151,7 @@ void acpi_thermal_cpufreq_exit(struct cp + unsigned int cpu; + + for_each_cpu(cpu, policy->related_cpus) { +- struct acpi_processor *pr = per_cpu(processors, policy->cpu); ++ struct acpi_processor *pr = per_cpu(processors, cpu); + + if (pr) + freq_qos_remove_request(&pr->thermal_req); diff --git a/queue-5.19/asm-generic-sections-refactor-memory_intersects.patch b/queue-5.19/asm-generic-sections-refactor-memory_intersects.patch new file mode 100644 index 00000000000..204cd0bd1ad --- /dev/null +++ b/queue-5.19/asm-generic-sections-refactor-memory_intersects.patch @@ -0,0 +1,96 @@ +From 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee Mon Sep 17 00:00:00 2001 +From: Quanyang Wang +Date: Fri, 19 Aug 2022 16:11:45 +0800 +Subject: asm-generic: sections: refactor memory_intersects + +From: Quanyang Wang + +commit 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee upstream. + +There are two problems with the current code of memory_intersects: + +First, it doesn't check whether the region (begin, end) falls inside the +region (virt, vend), that is (virt < begin && vend > end). + +The second problem is if vend is equal to begin, it will return true but +this is wrong since vend (virt + size) is not the last address of the +memory region but (virt + size -1) is. The wrong determination will +trigger the misreporting when the function check_for_illegal_area calls +memory_intersects to check if the dma region intersects with stext region. + +The misreporting is as below (stext is at 0x80100000): + WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168 + DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536] + Modules linked in: + CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5 + Hardware name: Xilinx Zynq Platform + unwind_backtrace from show_stack+0x18/0x1c + show_stack from dump_stack_lvl+0x58/0x70 + dump_stack_lvl from __warn+0xb0/0x198 + __warn from warn_slowpath_fmt+0x80/0xb4 + warn_slowpath_fmt from check_for_illegal_area+0x130/0x168 + check_for_illegal_area from debug_dma_map_sg+0x94/0x368 + debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128 + __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24 + dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4 + usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214 + usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118 + usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec + usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70 + usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360 + usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440 + usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238 + usb_stor_control_thread from kthread+0xf8/0x104 + kthread from ret_from_fork+0x14/0x2c + +Refactor memory_intersects to fix the two problems above. + +Before the 1d7db834a027e ("dma-debug: use memory_intersects() +directly"), memory_intersects is called only by printk_late_init: + +printk_late_init -> init_section_intersects ->memory_intersects. + +There were few places where memory_intersects was called. + +When commit 1d7db834a027e ("dma-debug: use memory_intersects() +directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA +subsystem uses it to check for an illegal area and the calltrace above +is triggered. + +[akpm@linux-foundation.org: fix nearby comment typo] +Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.com +Fixes: 979559362516 ("asm/sections: add helpers to check for section data") +Signed-off-by: Quanyang Wang +Cc: Ard Biesheuvel +Cc: Arnd Bergmann +Cc: Thierry Reding +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/asm-generic/sections.h | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/include/asm-generic/sections.h ++++ b/include/asm-generic/sections.h +@@ -97,7 +97,7 @@ static inline bool memory_contains(void + /** + * memory_intersects - checks if the region occupied by an object intersects + * with another memory region +- * @begin: virtual address of the beginning of the memory regien ++ * @begin: virtual address of the beginning of the memory region + * @end: virtual address of the end of the memory region + * @virt: virtual address of the memory object + * @size: size of the memory object +@@ -110,7 +110,10 @@ static inline bool memory_intersects(voi + { + void *vend = virt + size; + +- return (virt >= begin && virt < end) || (vend >= begin && vend < end); ++ if (virt < end && vend > begin) ++ return true; ++ ++ return false; + } + + /** diff --git a/queue-5.19/audit-move-audit_return_fixup-before-the-filters.patch b/queue-5.19/audit-move-audit_return_fixup-before-the-filters.patch new file mode 100644 index 00000000000..8a63e874e98 --- /dev/null +++ b/queue-5.19/audit-move-audit_return_fixup-before-the-filters.patch @@ -0,0 +1,57 @@ +From d4fefa4801a1c2f9c0c7a48fbb0fdf384e89a4ab Mon Sep 17 00:00:00 2001 +From: Richard Guy Briggs +Date: Thu, 25 Aug 2022 15:32:40 -0400 +Subject: audit: move audit_return_fixup before the filters + +From: Richard Guy Briggs + +commit d4fefa4801a1c2f9c0c7a48fbb0fdf384e89a4ab upstream. + +The success and return_code are needed by the filters. Move +audit_return_fixup() before the filters. This was causing syscall +auditing events to be missed. + +Link: https://github.com/linux-audit/audit-kernel/issues/138 +Cc: stable@vger.kernel.org +Fixes: 12c5e81d3fd0 ("audit: prepare audit_context for use in calling contexts beyond syscalls") +Signed-off-by: Richard Guy Briggs +[PM: manual merge required] +Signed-off-by: Paul Moore +Signed-off-by: Greg Kroah-Hartman +--- + kernel/auditsc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/auditsc.c ++++ b/kernel/auditsc.c +@@ -1965,6 +1965,7 @@ void __audit_uring_exit(int success, lon + goto out; + } + ++ audit_return_fixup(ctx, success, code); + if (ctx->context == AUDIT_CTX_SYSCALL) { + /* + * NOTE: See the note in __audit_uring_entry() about the case +@@ -2006,7 +2007,6 @@ void __audit_uring_exit(int success, lon + audit_filter_inodes(current, ctx); + if (ctx->current_state != AUDIT_STATE_RECORD) + goto out; +- audit_return_fixup(ctx, success, code); + audit_log_exit(); + + out: +@@ -2090,13 +2090,13 @@ void __audit_syscall_exit(int success, l + if (!list_empty(&context->killed_trees)) + audit_kill_trees(context); + ++ audit_return_fixup(context, success, return_code); + /* run through both filters to ensure we set the filterkey properly */ + audit_filter_syscall(current, context); + audit_filter_inodes(current, context); + if (context->current_state < AUDIT_STATE_RECORD) + goto out; + +- audit_return_fixup(context, success, return_code); + audit_log_exit(); + + out: diff --git a/queue-5.19/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch b/queue-5.19/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch new file mode 100644 index 00000000000..3728cdba044 --- /dev/null +++ b/queue-5.19/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch @@ -0,0 +1,55 @@ +From dd0ff4d12dd284c334f7e9b07f8f335af856ac78 Mon Sep 17 00:00:00 2001 +From: Liu Shixin +Date: Fri, 19 Aug 2022 17:40:05 +0800 +Subject: bootmem: remove the vmemmap pages from kmemleak in put_page_bootmem + +From: Liu Shixin + +commit dd0ff4d12dd284c334f7e9b07f8f335af856ac78 upstream. + +The vmemmap pages is marked by kmemleak when allocated from memblock. +Remove it from kmemleak when freeing the page. Otherwise, when we reuse +the page, kmemleak may report such an error and then stop working. + + kmemleak: Cannot insert 0xffff98fb6eab3d40 into the object search tree (overlaps existing) + kmemleak: Kernel memory leak detector disabled + kmemleak: Object 0xffff98fb6be00000 (size 335544320): + kmemleak: comm "swapper", pid 0, jiffies 4294892296 + kmemleak: min_count = 0 + kmemleak: count = 0 + kmemleak: flags = 0x1 + kmemleak: checksum = 0 + kmemleak: backtrace: + +Link: https://lkml.kernel.org/r/20220819094005.2928241-1-liushixin2@huawei.com +Fixes: f41f2ed43ca5 (mm: hugetlb: free the vmemmap pages associated with each HugeTLB page) +Signed-off-by: Liu Shixin +Reviewed-by: Muchun Song +Cc: Matthew Wilcox +Cc: Mike Kravetz +Cc: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/bootmem_info.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/bootmem_info.c ++++ b/mm/bootmem_info.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) + { +@@ -33,6 +34,7 @@ void put_page_bootmem(struct page *page) + ClearPagePrivate(page); + set_page_private(page, 0); + INIT_LIST_HEAD(&page->lru); ++ kmemleak_free_part(page_to_virt(page), PAGE_SIZE); + free_reserved_page(page); + } + } diff --git a/queue-5.19/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch b/queue-5.19/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch new file mode 100644 index 00000000000..426112d5c06 --- /dev/null +++ b/queue-5.19/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch @@ -0,0 +1,47 @@ +From f2c3bec215694fb8bc0ef5010f2a758d1906fc2d Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Fri, 12 Aug 2022 18:32:19 +0800 +Subject: btrfs: add info when mount fails due to stale replace target + +From: Anand Jain + +commit f2c3bec215694fb8bc0ef5010f2a758d1906fc2d upstream. + +If the replace target device reappears after the suspended replace is +cancelled, it blocks the mount operation as it can't find the matching +replace-item in the metadata. As shown below, + + BTRFS error (device sda5): replace devid present without an active replace item + +To overcome this situation, the user can run the command + + btrfs device scan --forget + +and try the mount command again. And also, to avoid repeating the issue, +superblock on the devid=0 must be wiped. + + wipefs -a device-path-to-devid=0. + +This patch adds some info when this situation occurs. + +Reported-by: Samuel Greiner +Link: https://lore.kernel.org/linux-btrfs/b4f62b10-b295-26ea-71f9-9a5c9299d42c@balkonien.org/T/ +CC: stable@vger.kernel.org # 5.0+ +Signed-off-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/dev-replace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/dev-replace.c ++++ b/fs/btrfs/dev-replace.c +@@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found: + */ + if (btrfs_find_device(fs_info->fs_devices, &args)) { + btrfs_err(fs_info, +- "replace devid present without an active replace item"); ++"replace without active item, run 'device scan --forget' on the target device"); + ret = -EUCLEAN; + } else { + dev_replace->srcdev = NULL; diff --git a/queue-5.19/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch b/queue-5.19/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch new file mode 100644 index 00000000000..d28ac9bea71 --- /dev/null +++ b/queue-5.19/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch @@ -0,0 +1,60 @@ +From b51111271b0352aa596c5ae8faf06939e91b3b68 Mon Sep 17 00:00:00 2001 +From: Goldwyn Rodrigues +Date: Tue, 16 Aug 2022 16:42:56 -0500 +Subject: btrfs: check if root is readonly while setting security xattr + +From: Goldwyn Rodrigues + +commit b51111271b0352aa596c5ae8faf06939e91b3b68 upstream. + +For a filesystem which has btrfs read-only property set to true, all +write operations including xattr should be denied. However, security +xattr can still be changed even if btrfs ro property is true. + +This happens because xattr_permission() does not have any restrictions +on security.*, system.* and in some cases trusted.* from VFS and +the decision is left to the underlying filesystem. See comments in +xattr_permission() for more details. + +This patch checks if the root is read-only before performing the set +xattr operation. + +Testcase: + + DEV=/dev/vdb + MNT=/mnt + + mkfs.btrfs -f $DEV + mount $DEV $MNT + echo "file one" > $MNT/f1 + + setfattr -n "security.one" -v 2 $MNT/f1 + btrfs property set /mnt ro true + + setfattr -n "security.one" -v 1 $MNT/f1 + + umount $MNT + +CC: stable@vger.kernel.org # 4.9+ +Reviewed-by: Qu Wenruo +Reviewed-by: Filipe Manana +Signed-off-by: Goldwyn Rodrigues +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/xattr.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/xattr.c ++++ b/fs/btrfs/xattr.c +@@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const + const char *name, const void *buffer, + size_t size, int flags) + { ++ if (btrfs_root_readonly(BTRFS_I(inode)->root)) ++ return -EROFS; ++ + name = xattr_full_name(handler, name); + return btrfs_setxattr_trans(inode, name, buffer, size, flags); + } diff --git a/queue-5.19/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch b/queue-5.19/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch new file mode 100644 index 00000000000..4d931007add --- /dev/null +++ b/queue-5.19/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch @@ -0,0 +1,44 @@ +From 9ea0106a7a3d8116860712e3f17cd52ce99f6707 Mon Sep 17 00:00:00 2001 +From: Zixuan Fu +Date: Mon, 15 Aug 2022 23:16:06 +0800 +Subject: btrfs: fix possible memory leak in btrfs_get_dev_args_from_path() + +From: Zixuan Fu + +commit 9ea0106a7a3d8116860712e3f17cd52ce99f6707 upstream. + +In btrfs_get_dev_args_from_path(), btrfs_get_bdev_and_sb() can fail if +the path is invalid. In this case, btrfs_get_dev_args_from_path() +returns directly without freeing args->uuid and args->fsid allocated +before, which causes memory leak. + +To fix these possible leaks, when btrfs_get_bdev_and_sb() fails, +btrfs_put_dev_args_from_path() is called to clean up the memory. + +Reported-by: TOTE Robot +Fixes: faa775c41d655 ("btrfs: add a btrfs_get_dev_args_from_path helper") +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Boris Burkov +Signed-off-by: Zixuan Fu +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -2344,8 +2344,11 @@ int btrfs_get_dev_args_from_path(struct + + ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, + &bdev, &disk_super); +- if (ret) ++ if (ret) { ++ btrfs_put_dev_args_from_path(args); + return ret; ++ } ++ + args->devid = btrfs_stack_device_id(&disk_super->dev_item); + memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); + if (btrfs_fs_incompat(fs_info, METADATA_UUID)) diff --git a/queue-5.19/btrfs-fix-silent-failure-when-deleting-root-reference.patch b/queue-5.19/btrfs-fix-silent-failure-when-deleting-root-reference.patch new file mode 100644 index 00000000000..aeac04a5d93 --- /dev/null +++ b/queue-5.19/btrfs-fix-silent-failure-when-deleting-root-reference.patch @@ -0,0 +1,43 @@ +From 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 22 Aug 2022 15:47:09 +0100 +Subject: btrfs: fix silent failure when deleting root reference + +From: Filipe Manana + +commit 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 upstream. + +At btrfs_del_root_ref(), if btrfs_search_slot() returns an error, we end +up returning from the function with a value of 0 (success). This happens +because the function returns the value stored in the variable 'err', +which is 0, while the error value we got from btrfs_search_slot() is +stored in the 'ret' variable. + +So fix it by setting 'err' with the error value. + +Fixes: 8289ed9f93bef2 ("btrfs: replace the BUG_ON in btrfs_del_root_ref with proper error handling") +CC: stable@vger.kernel.org # 5.16+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/root-tree.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/root-tree.c ++++ b/fs/btrfs/root-tree.c +@@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_tran + key.offset = ref_id; + again: + ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); +- if (ret < 0) ++ if (ret < 0) { ++ err = ret; + goto out; +- if (ret == 0) { ++ } else if (ret == 0) { + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_root_ref); diff --git a/queue-5.19/btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch b/queue-5.19/btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch new file mode 100644 index 00000000000..b640ab813bb --- /dev/null +++ b/queue-5.19/btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch @@ -0,0 +1,304 @@ +From ced8ecf026fd8084cf175530ff85c76d6085d715 Mon Sep 17 00:00:00 2001 +From: Omar Sandoval +Date: Tue, 23 Aug 2022 11:28:13 -0700 +Subject: btrfs: fix space cache corruption and potential double allocations + +From: Omar Sandoval + +commit ced8ecf026fd8084cf175530ff85c76d6085d715 upstream. + +When testing space_cache v2 on a large set of machines, we encountered a +few symptoms: + +1. "unable to add free space :-17" (EEXIST) errors. +2. Missing free space info items, sometimes caught with a "missing free + space info for X" error. +3. Double-accounted space: ranges that were allocated in the extent tree + and also marked as free in the free space tree, ranges that were + marked as allocated twice in the extent tree, or ranges that were + marked as free twice in the free space tree. If the latter made it + onto disk, the next reboot would hit the BUG_ON() in + add_new_free_space(). +4. On some hosts with no on-disk corruption or error messages, the + in-memory space cache (dumped with drgn) disagreed with the free + space tree. + +All of these symptoms have the same underlying cause: a race between +caching the free space for a block group and returning free space to the +in-memory space cache for pinned extents causes us to double-add a free +range to the space cache. This race exists when free space is cached +from the free space tree (space_cache=v2) or the extent tree +(nospace_cache, or space_cache=v1 if the cache needs to be regenerated). +struct btrfs_block_group::last_byte_to_unpin and struct +btrfs_block_group::progress are supposed to protect against this race, +but commit d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when +waiting for a transaction commit") subtly broke this by allowing +multiple transactions to be unpinning extents at the same time. + +Specifically, the race is as follows: + +1. An extent is deleted from an uncached block group in transaction A. +2. btrfs_commit_transaction() is called for transaction A. +3. btrfs_run_delayed_refs() -> __btrfs_free_extent() runs the delayed + ref for the deleted extent. +4. __btrfs_free_extent() -> do_free_extent_accounting() -> + add_to_free_space_tree() adds the deleted extent back to the free + space tree. +5. do_free_extent_accounting() -> btrfs_update_block_group() -> + btrfs_cache_block_group() queues up the block group to get cached. + block_group->progress is set to block_group->start. +6. btrfs_commit_transaction() for transaction A calls + switch_commit_roots(). It sets block_group->last_byte_to_unpin to + block_group->progress, which is block_group->start because the block + group hasn't been cached yet. +7. The caching thread gets to our block group. Since the commit roots + were already switched, load_free_space_tree() sees the deleted extent + as free and adds it to the space cache. It finishes caching and sets + block_group->progress to U64_MAX. +8. btrfs_commit_transaction() advances transaction A to + TRANS_STATE_SUPER_COMMITTED. +9. fsync calls btrfs_commit_transaction() for transaction B. Since + transaction A is already in TRANS_STATE_SUPER_COMMITTED and the + commit is for fsync, it advances. +10. btrfs_commit_transaction() for transaction B calls + switch_commit_roots(). This time, the block group has already been + cached, so it sets block_group->last_byte_to_unpin to U64_MAX. +11. btrfs_commit_transaction() for transaction A calls + btrfs_finish_extent_commit(), which calls unpin_extent_range() for + the deleted extent. It sees last_byte_to_unpin set to U64_MAX (by + transaction B!), so it adds the deleted extent to the space cache + again! + +This explains all of our symptoms above: + +* If the sequence of events is exactly as described above, when the free + space is re-added in step 11, it will fail with EEXIST. +* If another thread reallocates the deleted extent in between steps 7 + and 11, then step 11 will silently re-add that space to the space + cache as free even though it is actually allocated. Then, if that + space is allocated *again*, the free space tree will be corrupted + (namely, the wrong item will be deleted). +* If we don't catch this free space tree corruption, it will continue + to get worse as extents are deleted and reallocated. + +The v1 space_cache is synchronously loaded when an extent is deleted +(btrfs_update_block_group() with alloc=0 calls btrfs_cache_block_group() +with load_cache_only=1), so it is not normally affected by this bug. +However, as noted above, if we fail to load the space cache, we will +fall back to caching from the extent tree and may hit this bug. + +The easiest fix for this race is to also make caching from the free +space tree or extent tree synchronous. Josef tested this and found no +performance regressions. + +A few extra changes fall out of this change. Namely, this fix does the +following, with step 2 being the crucial fix: + +1. Factor btrfs_caching_ctl_wait_done() out of + btrfs_wait_block_group_cache_done() to allow waiting on a caching_ctl + that we already hold a reference to. +2. Change the call in btrfs_cache_block_group() of + btrfs_wait_space_cache_v1_finished() to + btrfs_caching_ctl_wait_done(), which makes us wait regardless of the + space_cache option. +3. Delete the now unused btrfs_wait_space_cache_v1_finished() and + space_cache_v1_done(). +4. Change btrfs_cache_block_group()'s `int load_cache_only` parameter to + `bool wait` to more accurately describe its new meaning. +5. Change a few callers which had a separate call to + btrfs_wait_block_group_cache_done() to use wait = true instead. +6. Make btrfs_wait_block_group_cache_done() static now that it's not + used outside of block-group.c anymore. + +Fixes: d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when waiting for a transaction commit") +CC: stable@vger.kernel.org # 5.12+ +Reviewed-by: Filipe Manana +Signed-off-by: Omar Sandoval +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/block-group.c | 47 +++++++++++++++-------------------------------- + fs/btrfs/block-group.h | 4 +--- + fs/btrfs/ctree.h | 1 - + fs/btrfs/extent-tree.c | 30 ++++++------------------------ + 4 files changed, 22 insertions(+), 60 deletions(-) + +--- a/fs/btrfs/block-group.c ++++ b/fs/btrfs/block-group.c +@@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progre + btrfs_put_caching_control(caching_ctl); + } + +-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) ++static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache, ++ struct btrfs_caching_control *caching_ctl) ++{ ++ wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); ++ return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0; ++} ++ ++static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) + { + struct btrfs_caching_control *caching_ctl; +- int ret = 0; ++ int ret; + + caching_ctl = btrfs_get_caching_control(cache); + if (!caching_ctl) + return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; +- +- wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); +- if (cache->cached == BTRFS_CACHE_ERROR) +- ret = -EIO; ++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); + btrfs_put_caching_control(caching_ctl); + return ret; + } + +-static bool space_cache_v1_done(struct btrfs_block_group *cache) +-{ +- bool ret; +- +- spin_lock(&cache->lock); +- ret = cache->cached != BTRFS_CACHE_FAST; +- spin_unlock(&cache->lock); +- +- return ret; +-} +- +-void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache, +- struct btrfs_caching_control *caching_ctl) +-{ +- wait_event(caching_ctl->wait, space_cache_v1_done(cache)); +-} +- + #ifdef CONFIG_BTRFS_DEBUG + static void fragment_free_space(struct btrfs_block_group *block_group) + { +@@ -750,9 +737,8 @@ done: + btrfs_put_block_group(block_group); + } + +-int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only) ++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) + { +- DEFINE_WAIT(wait); + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_caching_control *caching_ctl = NULL; + int ret = 0; +@@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs + } + WARN_ON(cache->caching_ctl); + cache->caching_ctl = caching_ctl; +- if (btrfs_test_opt(fs_info, SPACE_CACHE)) +- cache->cached = BTRFS_CACHE_FAST; +- else +- cache->cached = BTRFS_CACHE_STARTED; ++ cache->cached = BTRFS_CACHE_STARTED; + cache->has_caching_ctl = 1; + spin_unlock(&cache->lock); + +@@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs + + btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); + out: +- if (load_cache_only && caching_ctl) +- btrfs_wait_space_cache_v1_finished(cache, caching_ctl); ++ if (wait && caching_ctl) ++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); + if (caching_ctl) + btrfs_put_caching_control(caching_ctl); + +@@ -3313,7 +3296,7 @@ int btrfs_update_block_group(struct btrf + * space back to the block group, otherwise we will leak space. + */ + if (!alloc && !btrfs_block_group_done(cache)) +- btrfs_cache_block_group(cache, 1); ++ btrfs_cache_block_group(cache, true); + + byte_in_group = bytenr - cache->start; + WARN_ON(byte_in_group > cache->length); +--- a/fs/btrfs/block-group.h ++++ b/fs/btrfs/block-group.h +@@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrf + void btrfs_wait_nocow_writers(struct btrfs_block_group *bg); + void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, + u64 num_bytes); +-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache); +-int btrfs_cache_block_group(struct btrfs_block_group *cache, +- int load_cache_only); ++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); + void btrfs_put_caching_control(struct btrfs_caching_control *ctl); + struct btrfs_caching_control *btrfs_get_caching_control( + struct btrfs_block_group *cache); +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -494,7 +494,6 @@ struct btrfs_free_cluster { + enum btrfs_caching_type { + BTRFS_CACHE_NO, + BTRFS_CACHE_STARTED, +- BTRFS_CACHE_FAST, + BTRFS_CACHE_FINISHED, + BTRFS_CACHE_ERROR, + }; +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -2567,17 +2567,10 @@ int btrfs_pin_extent_for_log_replay(stru + return -EINVAL; + + /* +- * pull in the free space cache (if any) so that our pin +- * removes the free space from the cache. We have load_only set +- * to one because the slow code to read in the free extents does check +- * the pinned extents. ++ * Fully cache the free space first so that our pin removes the free space ++ * from the cache. + */ +- btrfs_cache_block_group(cache, 1); +- /* +- * Make sure we wait until the cache is completely built in case it is +- * missing or is invalid and therefore needs to be rebuilt. +- */ +- ret = btrfs_wait_block_group_cache_done(cache); ++ ret = btrfs_cache_block_group(cache, true); + if (ret) + goto out; + +@@ -2600,12 +2593,7 @@ static int __exclude_logged_extent(struc + if (!block_group) + return -EINVAL; + +- btrfs_cache_block_group(block_group, 1); +- /* +- * Make sure we wait until the cache is completely built in case it is +- * missing or is invalid and therefore needs to be rebuilt. +- */ +- ret = btrfs_wait_block_group_cache_done(block_group); ++ ret = btrfs_cache_block_group(block_group, true); + if (ret) + goto out; + +@@ -4415,7 +4403,7 @@ have_block_group: + ffe_ctl->cached = btrfs_block_group_done(block_group); + if (unlikely(!ffe_ctl->cached)) { + ffe_ctl->have_caching_bg = true; +- ret = btrfs_cache_block_group(block_group, 0); ++ ret = btrfs_cache_block_group(block_group, false); + + /* + * If we get ENOMEM here or something else we want to +@@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info * + + if (end - start >= range->minlen) { + if (!btrfs_block_group_done(cache)) { +- ret = btrfs_cache_block_group(cache, 0); +- if (ret) { +- bg_failed++; +- bg_ret = ret; +- continue; +- } +- ret = btrfs_wait_block_group_cache_done(cache); ++ ret = btrfs_cache_block_group(cache, true); + if (ret) { + bg_failed++; + bg_ret = ret; diff --git a/queue-5.19/btrfs-replace-drop-assert-for-suspended-replace.patch b/queue-5.19/btrfs-replace-drop-assert-for-suspended-replace.patch new file mode 100644 index 00000000000..78cdcaa0ba2 --- /dev/null +++ b/queue-5.19/btrfs-replace-drop-assert-for-suspended-replace.patch @@ -0,0 +1,55 @@ +From 59a3991984dbc1fc47e5651a265c5200bd85464e Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Fri, 12 Aug 2022 18:32:18 +0800 +Subject: btrfs: replace: drop assert for suspended replace + +From: Anand Jain + +commit 59a3991984dbc1fc47e5651a265c5200bd85464e upstream. + +If the filesystem mounts with the replace-operation in a suspended state +and try to cancel the suspended replace-operation, we hit the assert. The +assert came from the commit fe97e2e173af ("btrfs: dev-replace: replace's +scrub must not be running in suspended state") that was actually not +required. So just remove it. + + $ mount /dev/sda5 /btrfs + + BTRFS info (device sda5): cannot continue dev_replace, tgtdev is missing + BTRFS info (device sda5): you may cancel the operation after 'mount -o degraded' + + $ mount -o degraded /dev/sda5 /btrfs <-- success. + + $ btrfs replace cancel /btrfs + + kernel: assertion failed: ret != -ENOTCONN, in fs/btrfs/dev-replace.c:1131 + kernel: ------------[ cut here ]------------ + kernel: kernel BUG at fs/btrfs/ctree.h:3750! + +After the patch: + + $ btrfs replace cancel /btrfs + + BTRFS info (device sda5): suspended dev_replace from /dev/sda5 (devid 1) to canceled + +Fixes: fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state") +CC: stable@vger.kernel.org # 5.0+ +Signed-off-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/dev-replace.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/btrfs/dev-replace.c ++++ b/fs/btrfs/dev-replace.c +@@ -1128,8 +1128,7 @@ int btrfs_dev_replace_cancel(struct btrf + up_write(&dev_replace->rwsem); + + /* Scrub for replace must not be running in suspended state */ +- ret = btrfs_scrub_cancel(fs_info); +- ASSERT(ret != -ENOTCONN); ++ btrfs_scrub_cancel(fs_info); + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { diff --git a/queue-5.19/btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch b/queue-5.19/btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch new file mode 100644 index 00000000000..bef1cb2c94b --- /dev/null +++ b/queue-5.19/btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch @@ -0,0 +1,92 @@ +From e6e3dec6c3c288d556b991a85d5d8e3ee71e9046 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 8 Aug 2022 12:18:37 +0100 +Subject: btrfs: update generation of hole file extent item when merging holes + +From: Filipe Manana + +commit e6e3dec6c3c288d556b991a85d5d8e3ee71e9046 upstream. + +When punching a hole into a file range that is adjacent with a hole and we +are not using the no-holes feature, we expand the range of the adjacent +file extent item that represents a hole, to save metadata space. + +However we don't update the generation of hole file extent item, which +means a full fsync will not log that file extent item if the fsync happens +in a later transaction (since commit 7f30c07288bb9e ("btrfs: stop copying +old file extents when doing a full fsync")). + +For example, if we do this: + + $ mkfs.btrfs -f -O ^no-holes /dev/sdb + $ mount /dev/sdb /mnt + $ xfs_io -f -c "pwrite -S 0xab 2M 2M" /mnt/foobar + $ sync + +We end up with 2 file extent items in our file: + +1) One that represents the hole for the file range [0, 2M), with a + generation of 7; + +2) Another one that represents an extent covering the range [2M, 4M). + +After that if we do the following: + + $ xfs_io -c "fpunch 2M 2M" /mnt/foobar + +We end up with a single file extent item in the file, which represents a +hole for the range [0, 4M) and with a generation of 7 - because we end +dropping the data extent for range [2M, 4M) and then update the file +extent item that represented the hole at [0, 2M), by increasing +length from 2M to 4M. + +Then doing a full fsync and power failing: + + $ xfs_io -c "fsync" /mnt/foobar + + +will result in the full fsync not logging the file extent item that +represents the hole for the range [0, 4M), because its generation is 7, +which is lower than the generation of the current transaction (8). +As a consequence, after mounting again the filesystem (after log replay), +the region [2M, 4M) does not have a hole, it still points to the +previous data extent. + +So fix this by always updating the generation of existing file extent +items representing holes when we merge/expand them. This solves the +problem and it's the same approach as when we merge prealloc extents that +got written (at btrfs_mark_extent_written()). Setting the generation to +the current transaction's generation is also what we do when merging +the new hole extent map with the previous one or the next one. + +A test case for fstests, covering both cases of hole file extent item +merging (to the left and to the right), will be sent soon. + +Fixes: 7f30c07288bb9e ("btrfs: stop copying old file extents when doing a full fsync") +CC: stable@vger.kernel.org # 5.18+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/file.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2483,6 +2483,7 @@ static int fill_holes(struct btrfs_trans + btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_offset(leaf, fi, 0); ++ btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_mark_buffer_dirty(leaf); + goto out; + } +@@ -2499,6 +2500,7 @@ static int fill_holes(struct btrfs_trans + btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_offset(leaf, fi, 0); ++ btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_mark_buffer_dirty(leaf); + goto out; + } diff --git a/queue-5.19/cifs-skip-extra-null-byte-in-filenames.patch b/queue-5.19/cifs-skip-extra-null-byte-in-filenames.patch new file mode 100644 index 00000000000..58bef31c00b --- /dev/null +++ b/queue-5.19/cifs-skip-extra-null-byte-in-filenames.patch @@ -0,0 +1,61 @@ +From a1d2eb51f0a33c28f5399a1610e66b3fbd24e884 Mon Sep 17 00:00:00 2001 +From: Paulo Alcantara +Date: Fri, 19 Aug 2022 17:00:19 -0300 +Subject: cifs: skip extra NULL byte in filenames + +From: Paulo Alcantara + +commit a1d2eb51f0a33c28f5399a1610e66b3fbd24e884 upstream. + +Since commit: + cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty +alloc_path_with_tree_prefix() function was no longer including the +trailing separator when @path is empty, although @out_len was still +assuming a path separator thus adding an extra byte to the final +filename. + +This has caused mount issues in some Synology servers due to the extra +NULL byte in filenames when sending SMB2_CREATE requests with +SMB2_FLAGS_DFS_OPERATIONS set. + +Fix this by checking if @path is not empty and then add extra byte for +separator. Also, do not include any trailing NULL bytes in filename +as MS-SMB2 requires it to be 8-byte aligned and not NULL terminated. + +Cc: stable@vger.kernel.org +Fixes: 7eacba3b00a3 ("cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty") +Signed-off-by: Paulo Alcantara (SUSE) +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/cifs/smb2pdu.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -2571,19 +2571,15 @@ alloc_path_with_tree_prefix(__le16 **out + + path_len = UniStrnlen((wchar_t *)path, PATH_MAX); + +- /* +- * make room for one path separator between the treename and +- * path +- */ +- *out_len = treename_len + 1 + path_len; ++ /* make room for one path separator only if @path isn't empty */ ++ *out_len = treename_len + (path[0] ? 1 : 0) + path_len; + + /* +- * final path needs to be null-terminated UTF16 with a +- * size aligned to 8 ++ * final path needs to be 8-byte aligned as specified in ++ * MS-SMB2 2.2.13 SMB2 CREATE Request. + */ +- +- *out_size = roundup((*out_len+1)*2, 8); +- *out_path = kzalloc(*out_size, GFP_KERNEL); ++ *out_size = roundup(*out_len * sizeof(__le16), 8); ++ *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL); + if (!*out_path) + return -ENOMEM; + diff --git a/queue-5.19/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch b/queue-5.19/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch new file mode 100644 index 00000000000..f775d5b93d9 --- /dev/null +++ b/queue-5.19/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch @@ -0,0 +1,88 @@ +From a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 Mon Sep 17 00:00:00 2001 +From: Shigeru Yoshida +Date: Fri, 19 Aug 2022 03:13:36 +0900 +Subject: fbdev: fbcon: Properly revert changes when vc_resize() failed + +From: Shigeru Yoshida + +commit a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 upstream. + +fbcon_do_set_font() calls vc_resize() when font size is changed. +However, if if vc_resize() failed, current implementation doesn't +revert changes for font size, and this causes inconsistent state. + +syzbot reported unable to handle page fault due to this issue [1]. +syzbot's repro uses fault injection which cause failure for memory +allocation, so vc_resize() failed. + +This patch fixes this issue by properly revert changes for font +related date when vc_resize() failed. + +Link: https://syzkaller.appspot.com/bug?id=3443d3a1fa6d964dd7310a0cb1696d165a3e07c4 [1] +Reported-by: syzbot+a168dbeaaa7778273c1b@syzkaller.appspotmail.com +Signed-off-by: Shigeru Yoshida +Signed-off-by: Helge Deller +CC: stable@vger.kernel.org # 5.15+ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/video/fbdev/core/fbcon.c | 27 +++++++++++++++++++++++++-- + 1 file changed, 25 insertions(+), 2 deletions(-) + +--- a/drivers/video/fbdev/core/fbcon.c ++++ b/drivers/video/fbdev/core/fbcon.c +@@ -2402,15 +2402,21 @@ static int fbcon_do_set_font(struct vc_d + struct fb_info *info = fbcon_info_from_console(vc->vc_num); + struct fbcon_ops *ops = info->fbcon_par; + struct fbcon_display *p = &fb_display[vc->vc_num]; +- int resize; ++ int resize, ret, old_userfont, old_width, old_height, old_charcount; + char *old_data = NULL; + + resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); + if (p->userfont) + old_data = vc->vc_font.data; + vc->vc_font.data = (void *)(p->fontdata = data); ++ old_userfont = p->userfont; + if ((p->userfont = userfont)) + REFCOUNT(data)++; ++ ++ old_width = vc->vc_font.width; ++ old_height = vc->vc_font.height; ++ old_charcount = vc->vc_font.charcount; ++ + vc->vc_font.width = w; + vc->vc_font.height = h; + vc->vc_font.charcount = charcount; +@@ -2426,7 +2432,9 @@ static int fbcon_do_set_font(struct vc_d + rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); + cols /= w; + rows /= h; +- vc_resize(vc, cols, rows); ++ ret = vc_resize(vc, cols, rows); ++ if (ret) ++ goto err_out; + } else if (con_is_visible(vc) + && vc->vc_mode == KD_TEXT) { + fbcon_clear_margins(vc, 0); +@@ -2436,6 +2444,21 @@ static int fbcon_do_set_font(struct vc_d + if (old_data && (--REFCOUNT(old_data) == 0)) + kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); + return 0; ++ ++err_out: ++ p->fontdata = old_data; ++ vc->vc_font.data = (void *)old_data; ++ ++ if (userfont) { ++ p->userfont = old_userfont; ++ REFCOUNT(data)--; ++ } ++ ++ vc->vc_font.width = old_width; ++ vc->vc_font.height = old_height; ++ vc->vc_font.charcount = old_charcount; ++ ++ return ret; + } + + /* diff --git a/queue-5.19/loop-check-for-overflow-while-configuring-loop.patch b/queue-5.19/loop-check-for-overflow-while-configuring-loop.patch new file mode 100644 index 00000000000..b1e72efad82 --- /dev/null +++ b/queue-5.19/loop-check-for-overflow-while-configuring-loop.patch @@ -0,0 +1,59 @@ +From c490a0b5a4f36da3918181a8acdc6991d967c5f3 Mon Sep 17 00:00:00 2001 +From: Siddh Raman Pant +Date: Tue, 23 Aug 2022 21:38:10 +0530 +Subject: loop: Check for overflow while configuring loop + +From: Siddh Raman Pant + +commit c490a0b5a4f36da3918181a8acdc6991d967c5f3 upstream. + +The userspace can configure a loop using an ioctl call, wherein +a configuration of type loop_config is passed (see lo_ioctl()'s +case on line 1550 of drivers/block/loop.c). This proceeds to call +loop_configure() which in turn calls loop_set_status_from_info() +(see line 1050 of loop.c), passing &config->info which is of type +loop_info64*. This function then sets the appropriate values, like +the offset. + +loop_device has lo_offset of type loff_t (see line 52 of loop.c), +which is typdef-chained to long long, whereas loop_info64 has +lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h). + +The function directly copies offset from info to the device as +follows (See line 980 of loop.c): + lo->lo_offset = info->lo_offset; + +This results in an overflow, which triggers a warning in iomap_iter() +due to a call to iomap_iter_done() which has: + WARN_ON_ONCE(iter->iomap.offset > iter->pos); + +Thus, check for negative value during loop_set_status_from_info(). + +Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a29e + +Reported-and-tested-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com +Cc: stable@vger.kernel.org +Reviewed-by: Matthew Wilcox (Oracle) +Signed-off-by: Siddh Raman Pant +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/loop.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -979,6 +979,11 @@ loop_set_status_from_info(struct loop_de + + lo->lo_offset = info->lo_offset; + lo->lo_sizelimit = info->lo_sizelimit; ++ ++ /* loff_t vars have been assigned __u64 */ ++ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0) ++ return -EOVERFLOW; ++ + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); + lo->lo_file_name[LO_NAME_SIZE-1] = 0; + lo->lo_flags = info->lo_flags; diff --git a/queue-5.19/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch b/queue-5.19/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch new file mode 100644 index 00000000000..3d8fb5d1cf4 --- /dev/null +++ b/queue-5.19/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch @@ -0,0 +1,53 @@ +From d26f60703606ab425eee9882b32a1781a8bed74d Mon Sep 17 00:00:00 2001 +From: Badari Pulavarty +Date: Sun, 21 Aug 2022 18:08:53 +0000 +Subject: mm/damon/dbgfs: avoid duplicate context directory creation + +From: Badari Pulavarty + +commit d26f60703606ab425eee9882b32a1781a8bed74d upstream. + +When user tries to create a DAMON context via the DAMON debugfs interface +with a name of an already existing context, the context directory creation +fails but a new context is created and added in the internal data +structure, due to absence of the directory creation success check. As a +result, memory could leak and DAMON cannot be turned on. An example test +case is as below: + + # cd /sys/kernel/debug/damon/ + # echo "off" > monitor_on + # echo paddr > target_ids + # echo "abc" > mk_context + # echo "abc" > mk_context + # echo $$ > abc/target_ids + # echo "on" > monitor_on <<< fails + +Return value of 'debugfs_create_dir()' is expected to be ignored in +general, but this is an exceptional case as DAMON feature is depending +on the debugfs functionality and it has the potential duplicate name +issue. This commit therefore fixes the issue by checking the directory +creation failure and immediately return the error in the case. + +Link: https://lkml.kernel.org/r/20220821180853.2400-1-sj@kernel.org +Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts") +Signed-off-by: Badari Pulavarty +Signed-off-by: SeongJae Park +Cc: [ 5.15.x] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/dbgfs.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/damon/dbgfs.c ++++ b/mm/damon/dbgfs.c +@@ -787,6 +787,9 @@ static int dbgfs_mk_context(char *name) + return -ENOENT; + + new_dir = debugfs_create_dir(name, root); ++ /* Below check is required for a potential duplicated name case */ ++ if (IS_ERR(new_dir)) ++ return PTR_ERR(new_dir); + dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; + + new_ctx = dbgfs_new_ctx(); diff --git a/queue-5.19/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch b/queue-5.19/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch new file mode 100644 index 00000000000..244878a954a --- /dev/null +++ b/queue-5.19/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch @@ -0,0 +1,38 @@ +From ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 Mon Sep 17 00:00:00 2001 +From: Miaohe Lin +Date: Tue, 12 Jul 2022 21:05:42 +0800 +Subject: mm/hugetlb: avoid corrupting page->mapping in hugetlb_mcopy_atomic_pte + +From: Miaohe Lin + +commit ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 upstream. + +In MCOPY_ATOMIC_CONTINUE case with a non-shared VMA, pages in the page +cache are installed in the ptes. But hugepage_add_new_anon_rmap is called +for them mistakenly because they're not vm_shared. This will corrupt the +page->mapping used by page cache code. + +Link: https://lkml.kernel.org/r/20220712130542.18836-1-linmiaohe@huawei.com +Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl") +Signed-off-by: Miaohe Lin +Reviewed-by: Mike Kravetz +Cc: Axel Rasmussen +Cc: Peter Xu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -6026,7 +6026,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s + if (!huge_pte_none_mostly(huge_ptep_get(dst_pte))) + goto out_release_unlock; + +- if (vm_shared) { ++ if (page_in_pagecache) { + page_dup_file_rmap(page, true); + } else { + ClearHPageRestoreReserve(page); diff --git a/queue-5.19/mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch b/queue-5.19/mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch new file mode 100644 index 00000000000..6ba95eec013 --- /dev/null +++ b/queue-5.19/mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch @@ -0,0 +1,75 @@ +From 3d2f78f08cd8388035ac375e731ec1ac1b79b09d Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 23 Aug 2022 18:11:38 -0400 +Subject: mm/mprotect: only reference swap pfn page if type match + +From: Peter Xu + +commit 3d2f78f08cd8388035ac375e731ec1ac1b79b09d upstream. + +Yu Zhao reported a bug after the commit "mm/swap: Add swp_offset_pfn() to +fetch PFN from swap entry" added a check in swp_offset_pfn() for swap type [1]: + + kernel BUG at include/linux/swapops.h:117! + CPU: 46 PID: 5245 Comm: EventManager_De Tainted: G S O L 6.0.0-dbg-DEV #2 + RIP: 0010:pfn_swap_entry_to_page+0x72/0xf0 + Code: c6 48 8b 36 48 83 fe ff 74 53 48 01 d1 48 83 c1 08 48 8b 09 f6 + c1 01 75 7b 66 90 48 89 c1 48 8b 09 f6 c1 01 74 74 5d c3 eb 9e <0f> 0b + 48 ba ff ff ff ff 03 00 00 00 eb ae a9 ff 0f 00 00 75 13 48 + RSP: 0018:ffffa59e73fabb80 EFLAGS: 00010282 + RAX: 00000000ffffffe8 RBX: 0c00000000000000 RCX: ffffcd5440000000 + RDX: 1ffffffffff7a80a RSI: 0000000000000000 RDI: 0c0000000000042b + RBP: ffffa59e73fabb80 R08: ffff9965ca6e8bb8 R09: 0000000000000000 + R10: ffffffffa5a2f62d R11: 0000030b372e9fff R12: ffff997b79db5738 + R13: 000000000000042b R14: 0c0000000000042b R15: 1ffffffffff7a80a + FS: 00007f549d1bb700(0000) GS:ffff99d3cf680000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000440d035b3180 CR3: 0000002243176004 CR4: 00000000003706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + + change_pte_range+0x36e/0x880 + change_p4d_range+0x2e8/0x670 + change_protection_range+0x14e/0x2c0 + mprotect_fixup+0x1ee/0x330 + do_mprotect_pkey+0x34c/0x440 + __x64_sys_mprotect+0x1d/0x30 + +It triggers because pfn_swap_entry_to_page() could be called upon e.g. a +genuine swap entry. + +Fix it by only calling it when it's a write migration entry where the page* +is used. + +[1] https://lore.kernel.org/lkml/CAOUHufaVC2Za-p8m0aiHw6YkheDcrO-C3wRGixwDS32VTS+k1w@mail.gmail.com/ + +Link: https://lkml.kernel.org/r/20220823221138.45602-1-peterx@redhat.com +Fixes: 6c287605fd56 ("mm: remember exclusively mapped anonymous pages with PG_anon_exclusive") +Signed-off-by: Peter Xu +Reported-by: Yu Zhao +Tested-by: Yu Zhao +Reviewed-by: David Hildenbrand +Cc: "Huang, Ying" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mprotect.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -158,10 +158,11 @@ static unsigned long change_pte_range(st + pages++; + } else if (is_swap_pte(oldpte)) { + swp_entry_t entry = pte_to_swp_entry(oldpte); +- struct page *page = pfn_swap_entry_to_page(entry); + pte_t newpte; + + if (is_writable_migration_entry(entry)) { ++ struct page *page = pfn_swap_entry_to_page(entry); ++ + /* + * A protection check is difficult so + * just be safe and disable write diff --git a/queue-5.19/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch b/queue-5.19/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch new file mode 100644 index 00000000000..f691a3478aa --- /dev/null +++ b/queue-5.19/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch @@ -0,0 +1,40 @@ +From 6b04ce966a738ecdd9294c9593e48513c0dc90aa Mon Sep 17 00:00:00 2001 +From: Karol Herbst +Date: Fri, 19 Aug 2022 22:09:28 +0200 +Subject: nouveau: explicitly wait on the fence in nouveau_bo_move_m2mf + +From: Karol Herbst + +commit 6b04ce966a738ecdd9294c9593e48513c0dc90aa upstream. + +It is a bit unlcear to us why that's helping, but it does and unbreaks +suspend/resume on a lot of GPUs without any known drawbacks. + +Cc: stable@vger.kernel.org # v5.15+ +Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/156 +Signed-off-by: Karol Herbst +Reviewed-by: Lyude Paul +Link: https://patchwork.freedesktop.org/patch/msgid/20220819200928.401416-1-kherbst@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/gpu/drm/nouveau/nouveau_bo.c ++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c +@@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_o + if (ret == 0) { + ret = nouveau_fence_new(chan, false, &fence); + if (ret == 0) { ++ /* TODO: figure out a better solution here ++ * ++ * wait on the fence here explicitly as going through ++ * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. ++ * ++ * Without this the operation can timeout and we'll fallback to a ++ * software copy, which might take several minutes to finish. ++ */ ++ nouveau_fence_wait(fence, false, false); + ret = ttm_bo_move_accel_cleanup(bo, + &fence->base, + evict, false, diff --git a/queue-5.19/ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch b/queue-5.19/ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch new file mode 100644 index 00000000000..219939aaec3 --- /dev/null +++ b/queue-5.19/ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch @@ -0,0 +1,69 @@ +From 550842cc60987b269e31b222283ade3e1b6c7fc8 Mon Sep 17 00:00:00 2001 +From: Heming Zhao +Date: Mon, 15 Aug 2022 16:57:54 +0800 +Subject: ocfs2: fix freeing uninitialized resource on ocfs2_dlm_shutdown + +From: Heming Zhao + +commit 550842cc60987b269e31b222283ade3e1b6c7fc8 upstream. + +After commit 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job +before return error"), any procedure after ocfs2_dlm_init() fails will +trigger crash when calling ocfs2_dlm_shutdown(). + +ie: On local mount mode, no dlm resource is initialized. If +ocfs2_mount_volume() fails in ocfs2_find_slot(), error handling will call +ocfs2_dlm_shutdown(), then does dlm resource cleanup job, which will +trigger kernel crash. + +This solution should bypass uninitialized resources in +ocfs2_dlm_shutdown(). + +Link: https://lkml.kernel.org/r/20220815085754.20417-1-heming.zhao@suse.com +Fixes: 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job before return error") +Signed-off-by: Heming Zhao +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Gang He +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/dlmglue.c | 8 +++++--- + fs/ocfs2/super.c | 3 +-- + 2 files changed, 6 insertions(+), 5 deletions(-) + +--- a/fs/ocfs2/dlmglue.c ++++ b/fs/ocfs2/dlmglue.c +@@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_sup + ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); + ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); + +- ocfs2_cluster_disconnect(osb->cconn, hangup_pending); +- osb->cconn = NULL; ++ if (osb->cconn) { ++ ocfs2_cluster_disconnect(osb->cconn, hangup_pending); ++ osb->cconn = NULL; + +- ocfs2_dlm_shutdown_debug(osb); ++ ocfs2_dlm_shutdown_debug(osb); ++ } + } + + static int ocfs2_drop_lock(struct ocfs2_super *osb, +--- a/fs/ocfs2/super.c ++++ b/fs/ocfs2/super.c +@@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct + !ocfs2_is_hard_readonly(osb)) + hangup_needed = 1; + +- if (osb->cconn) +- ocfs2_dlm_shutdown(osb, hangup_needed); ++ ocfs2_dlm_shutdown(osb, hangup_needed); + + ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); + debugfs_remove_recursive(osb->osb_debug_root); diff --git a/queue-5.19/perf-x86-intel-fix-pebs-event-constraints-for-adl.patch b/queue-5.19/perf-x86-intel-fix-pebs-event-constraints-for-adl.patch new file mode 100644 index 00000000000..7b72f15b330 --- /dev/null +++ b/queue-5.19/perf-x86-intel-fix-pebs-event-constraints-for-adl.patch @@ -0,0 +1,36 @@ +From cde643ff75bc20c538dfae787ca3b587bab16b50 Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Thu, 18 Aug 2022 11:44:29 -0700 +Subject: perf/x86/intel: Fix pebs event constraints for ADL + +From: Kan Liang + +commit cde643ff75bc20c538dfae787ca3b587bab16b50 upstream. + +According to the latest event list, the LOAD_LATENCY PEBS event only +works on the GP counter 0 and 1 for ADL and RPL. + +Update the pebs event constraints table. + +Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support") +Reported-by: Ammy Yi +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20220818184429.2355857-1-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/ds.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -822,7 +822,7 @@ struct event_constraint intel_glm_pebs_e + + struct event_constraint intel_grt_pebs_event_constraints[] = { + /* Allow all events as PEBS with no flags */ +- INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf), ++ INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3), + INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), + EVENT_CONSTRAINT_END + }; diff --git a/queue-5.19/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch b/queue-5.19/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch new file mode 100644 index 00000000000..a9aee49d94d --- /dev/null +++ b/queue-5.19/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch @@ -0,0 +1,60 @@ +From 32ba156df1b1c8804a4e5be5339616945eafea22 Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Tue, 16 Aug 2022 05:56:11 -0700 +Subject: perf/x86/lbr: Enable the branch type for the Arch LBR by default + +From: Kan Liang + +commit 32ba156df1b1c8804a4e5be5339616945eafea22 upstream. + +On the platform with Arch LBR, the HW raw branch type encoding may leak +to the perf tool when the SAVE_TYPE option is not set. + +In the intel_pmu_store_lbr(), the HW raw branch type is stored in +lbr_entries[].type. If the SAVE_TYPE option is set, the +lbr_entries[].type will be converted into the generic PERF_BR_* type +in the intel_pmu_lbr_filter() and exposed to the user tools. +But if the SAVE_TYPE option is NOT set by the user, the current perf +kernel doesn't clear the field. The HW raw branch type leaks. + +There are two solutions to fix the issue for the Arch LBR. +One is to clear the field if the SAVE_TYPE option is NOT set. +The other solution is to unconditionally convert the branch type and +expose the generic type to the user tools. + +The latter is implemented here, because +- The branch type is valuable information. I don't see a case where + you would not benefit from the branch type. (Stephane Eranian) +- Not having the branch type DOES NOT save any space in the + branch record (Stephane Eranian) +- The Arch LBR HW can retrieve the common branch types from the + LBR_INFO. It doesn't require the high overhead SW disassemble. + +Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR") +Reported-by: Stephane Eranian +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20220816125612.2042397-1-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/lbr.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/arch/x86/events/intel/lbr.c ++++ b/arch/x86/events/intel/lbr.c +@@ -1097,6 +1097,14 @@ static int intel_pmu_setup_hw_lbr_filter + + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { + reg->config = mask; ++ ++ /* ++ * The Arch LBR HW can retrieve the common branch types ++ * from the LBR_INFO. It doesn't require the high overhead ++ * SW disassemble. ++ * Enable the branch type by default for the Arch LBR. ++ */ ++ reg->reg |= X86_BR_TYPE_SAVE; + return 0; + } + diff --git a/queue-5.19/revert-memcg-cleanup-racy-sum-avoidance-code.patch b/queue-5.19/revert-memcg-cleanup-racy-sum-avoidance-code.patch new file mode 100644 index 00000000000..0fbba52f9c3 --- /dev/null +++ b/queue-5.19/revert-memcg-cleanup-racy-sum-avoidance-code.patch @@ -0,0 +1,95 @@ +From dbb16df6443c59e8a1ef21c2272fcf387d600ddf Mon Sep 17 00:00:00 2001 +From: Shakeel Butt +Date: Wed, 17 Aug 2022 17:21:39 +0000 +Subject: Revert "memcg: cleanup racy sum avoidance code" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Shakeel Butt + +commit dbb16df6443c59e8a1ef21c2272fcf387d600ddf upstream. + +This reverts commit 96e51ccf1af33e82f429a0d6baebba29c6448d0f. + +Recently we started running the kernel with rstat infrastructure on +production traffic and begin to see negative memcg stats values. +Particularly the 'sock' stat is the one which we observed having negative +value. + +$ grep "sock " /mnt/memory/job/memory.stat +sock 253952 +total_sock 18446744073708724224 + +Re-run after couple of seconds + +$ grep "sock " /mnt/memory/job/memory.stat +sock 253952 +total_sock 53248 + +For now we are only seeing this issue on large machines (256 CPUs) and +only with 'sock' stat. I think the networking stack increase the stat on +one cpu and decrease it on another cpu much more often. So, this negative +sock is due to rstat flusher flushing the stats on the CPU that has seen +the decrement of sock but missed the CPU that has increments. A typical +race condition. + +For easy stable backport, revert is the most simple solution. For long +term solution, I am thinking of two directions. First is just reduce the +race window by optimizing the rstat flusher. Second is if the reader sees +a negative stat value, force flush and restart the stat collection. +Basically retry but limited. + +Link: https://lkml.kernel.org/r/20220817172139.3141101-1-shakeelb@google.com +Fixes: 96e51ccf1af33e8 ("memcg: cleanup racy sum avoidance code") +Signed-off-by: Shakeel Butt +Cc: "Michal Koutný" +Cc: Johannes Weiner +Cc: Michal Hocko +Cc: Roman Gushchin +Cc: Muchun Song +Cc: David Hildenbrand +Cc: Yosry Ahmed +Cc: Greg Thelen +Cc: [5.15] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memcontrol.h | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -978,19 +978,30 @@ static inline void mod_memcg_page_state( + + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) + { +- return READ_ONCE(memcg->vmstats.state[idx]); ++ long x = READ_ONCE(memcg->vmstats.state[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + static inline unsigned long lruvec_page_state(struct lruvec *lruvec, + enum node_stat_item idx) + { + struct mem_cgroup_per_node *pn; ++ long x; + + if (mem_cgroup_disabled()) + return node_page_state(lruvec_pgdat(lruvec), idx); + + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); +- return READ_ONCE(pn->lruvec_stats.state[idx]); ++ x = READ_ONCE(pn->lruvec_stats.state[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, diff --git a/queue-5.19/riscv-dts-microchip-correct-l2-cache-interrupts.patch b/queue-5.19/riscv-dts-microchip-correct-l2-cache-interrupts.patch new file mode 100644 index 00000000000..376608909be --- /dev/null +++ b/queue-5.19/riscv-dts-microchip-correct-l2-cache-interrupts.patch @@ -0,0 +1,61 @@ +From 34fc9cc3aebe8b9e27d3bc821543dd482dc686ca Mon Sep 17 00:00:00 2001 +From: Heinrich Schuchardt +Date: Wed, 17 Aug 2022 15:25:21 +0200 +Subject: riscv: dts: microchip: correct L2 cache interrupts +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Heinrich Schuchardt + +commit 34fc9cc3aebe8b9e27d3bc821543dd482dc686ca upstream. + +The "PolarFire SoC MSS Technical Reference Manual" documents the +following PLIC interrupts: + +1 - L2 Cache Controller Signals when a metadata correction event occurs +2 - L2 Cache Controller Signals when an uncorrectable metadata event occurs +3 - L2 Cache Controller Signals when a data correction event occurs +4 - L2 Cache Controller Signals when an uncorrectable data event occurs + +This differs from the SiFive FU540 which only has three L2 cache related +interrupts. + +The sequence in the device tree is defined by an enum: + + enum { +         DIR_CORR = 0, +         DATA_CORR, +         DATA_UNCORR, +         DIR_UNCORR, + }; + +So the correct sequence of the L2 cache interrupts is + + interrupts = <1>, <3>, <4>, <2>; + +[Conor] +This manifests as an unusable system if the l2-cache driver is enabled, +as the wrong interrupt gets cleared & the handler prints errors to the +console ad infinitum. + +Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board") +CC: stable@vger.kernel.org # 5.15: e35b07a7df9b: riscv: dts: microchip: mpfs: Group tuples in interrupt properties +Signed-off-by: Heinrich Schuchardt +Signed-off-by: Conor Dooley +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/boot/dts/microchip/mpfs.dtsi | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi ++++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi +@@ -169,7 +169,7 @@ + cache-size = <2097152>; + cache-unified; + interrupt-parent = <&plic>; +- interrupts = <1>, <2>, <3>; ++ interrupts = <1>, <3>, <4>, <2>; + }; + + clint: clint@2000000 { diff --git a/queue-5.19/riscv-signal-fix-missing-prototype-warning.patch b/queue-5.19/riscv-signal-fix-missing-prototype-warning.patch new file mode 100644 index 00000000000..ab41ea44b35 --- /dev/null +++ b/queue-5.19/riscv-signal-fix-missing-prototype-warning.patch @@ -0,0 +1,54 @@ +From b5c3aca86d2698c4850b6ee8b341938025d2780c Mon Sep 17 00:00:00 2001 +From: Conor Dooley +Date: Sun, 14 Aug 2022 15:12:37 +0100 +Subject: riscv: signal: fix missing prototype warning + +From: Conor Dooley + +commit b5c3aca86d2698c4850b6ee8b341938025d2780c upstream. + +Fix the warning: +arch/riscv/kernel/signal.c:316:27: warning: no previous prototype for function 'do_notify_resume' [-Wmissing-prototypes] +asmlinkage __visible void do_notify_resume(struct pt_regs *regs, + +All other functions in the file are static & none of the existing +headers stood out as an obvious location. Create signal.h to hold the +declaration. + +Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API") +Signed-off-by: Conor Dooley +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220814141237.493457-4-mail@conchuod.ie +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/include/asm/signal.h | 12 ++++++++++++ + arch/riscv/kernel/signal.c | 1 + + 2 files changed, 13 insertions(+) + create mode 100644 arch/riscv/include/asm/signal.h + +--- /dev/null ++++ b/arch/riscv/include/asm/signal.h +@@ -0,0 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++ ++#ifndef __ASM_SIGNAL_H ++#define __ASM_SIGNAL_H ++ ++#include ++#include ++ ++asmlinkage __visible ++void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); ++ ++#endif +--- a/arch/riscv/kernel/signal.c ++++ b/arch/riscv/kernel/signal.c +@@ -15,6 +15,7 @@ + + #include + #include ++#include + #include + #include + #include diff --git a/queue-5.19/riscv-traps-add-missing-prototype.patch b/queue-5.19/riscv-traps-add-missing-prototype.patch new file mode 100644 index 00000000000..ca497e5a63d --- /dev/null +++ b/queue-5.19/riscv-traps-add-missing-prototype.patch @@ -0,0 +1,51 @@ +From d951b20b9def73dcc39a5379831525d0d2a537e9 Mon Sep 17 00:00:00 2001 +From: Conor Dooley +Date: Sun, 14 Aug 2022 15:12:38 +0100 +Subject: riscv: traps: add missing prototype + +From: Conor Dooley + +commit d951b20b9def73dcc39a5379831525d0d2a537e9 upstream. + +Sparse complains: +arch/riscv/kernel/traps.c:213:6: warning: symbol 'shadow_stack' was not declared. Should it be static? + +The variable is used in entry.S, so declare shadow_stack there +alongside SHADOW_OVERFLOW_STACK_SIZE. + +Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection") +Signed-off-by: Conor Dooley +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220814141237.493457-5-mail@conchuod.ie +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/include/asm/thread_info.h | 2 ++ + arch/riscv/kernel/traps.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/riscv/include/asm/thread_info.h ++++ b/arch/riscv/include/asm/thread_info.h +@@ -42,6 +42,8 @@ + + #ifndef __ASSEMBLY__ + ++extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; ++ + #include + #include + +--- a/arch/riscv/kernel/traps.c ++++ b/arch/riscv/kernel/traps.c +@@ -20,9 +20,10 @@ + + #include + #include ++#include + #include + #include +-#include ++#include + + int show_unhandled_signals = 1; + diff --git a/queue-5.19/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch b/queue-5.19/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch new file mode 100644 index 00000000000..917d59db996 --- /dev/null +++ b/queue-5.19/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch @@ -0,0 +1,81 @@ +From 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 Mon Sep 17 00:00:00 2001 +From: Brian Foster +Date: Tue, 16 Aug 2022 11:54:07 -0400 +Subject: s390: fix double free of GS and RI CBs on fork() failure + +From: Brian Foster + +commit 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 upstream. + +The pointers for guarded storage and runtime instrumentation control +blocks are stored in the thread_struct of the associated task. These +pointers are initially copied on fork() via arch_dup_task_struct() +and then cleared via copy_thread() before fork() returns. If fork() +happens to fail after the initial task dup and before copy_thread(), +the newly allocated task and associated thread_struct memory are +freed via free_task() -> arch_release_task_struct(). This results in +a double free of the guarded storage and runtime info structs +because the fields in the failed task still refer to memory +associated with the source task. + +This problem can manifest as a BUG_ON() in set_freepointer() (with +CONFIG_SLAB_FREELIST_HARDENED enabled) or KASAN splat (if enabled) +when running trinity syscall fuzz tests on s390x. To avoid this +problem, clear the associated pointer fields in +arch_dup_task_struct() immediately after the new task is copied. +Note that the RI flag is still cleared in copy_thread() because it +resides in thread stack memory and that is where stack info is +copied. + +Signed-off-by: Brian Foster +Fixes: 8d9047f8b967c ("s390/runtime instrumentation: simplify task exit handling") +Fixes: 7b83c6297d2fc ("s390/guarded storage: simplify task exit handling") +Cc: # 4.15 +Reviewed-by: Gerald Schaefer +Reviewed-by: Heiko Carstens +Link: https://lore.kernel.org/r/20220816155407.537372-1-bfoster@redhat.com +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/process.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +--- a/arch/s390/kernel/process.c ++++ b/arch/s390/kernel/process.c +@@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_str + + memcpy(dst, src, arch_task_struct_size); + dst->thread.fpu.regs = dst->thread.fpu.fprs; ++ ++ /* ++ * Don't transfer over the runtime instrumentation or the guarded ++ * storage control block pointers. These fields are cleared here instead ++ * of in copy_thread() to avoid premature freeing of associated memory ++ * on fork() failure. Wait to clear the RI flag because ->stack still ++ * refers to the source thread. ++ */ ++ dst->thread.ri_cb = NULL; ++ dst->thread.gs_cb = NULL; ++ dst->thread.gs_bc_cb = NULL; ++ + return 0; + } + +@@ -150,13 +162,11 @@ int copy_thread(struct task_struct *p, c + frame->childregs.flags = 0; + if (new_stackp) + frame->childregs.gprs[15] = new_stackp; +- +- /* Don't copy runtime instrumentation info */ +- p->thread.ri_cb = NULL; ++ /* ++ * Clear the runtime instrumentation flag after the above childregs ++ * copy. The CB pointer was already cleared in arch_dup_task_struct(). ++ */ + frame->childregs.psw.mask &= ~PSW_MASK_RI; +- /* Don't copy guarded storage control block */ +- p->thread.gs_cb = NULL; +- p->thread.gs_bc_cb = NULL; + + /* Set a new TLS ? */ + if (clone_flags & CLONE_SETTLS) { diff --git a/queue-5.19/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch b/queue-5.19/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch new file mode 100644 index 00000000000..11a7aee0080 --- /dev/null +++ b/queue-5.19/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch @@ -0,0 +1,49 @@ +From 41ac42f137080bc230b5882e3c88c392ab7f2d32 Mon Sep 17 00:00:00 2001 +From: Gerald Schaefer +Date: Wed, 17 Aug 2022 15:26:03 +0200 +Subject: s390/mm: do not trigger write fault when vma does not allow VM_WRITE + +From: Gerald Schaefer + +commit 41ac42f137080bc230b5882e3c88c392ab7f2d32 upstream. + +For non-protection pXd_none() page faults in do_dat_exception(), we +call do_exception() with access == (VM_READ | VM_WRITE | VM_EXEC). +In do_exception(), vma->vm_flags is checked against that before +calling handle_mm_fault(). + +Since commit 92f842eac7ee3 ("[S390] store indication fault optimization"), +we call handle_mm_fault() with FAULT_FLAG_WRITE, when recognizing that +it was a write access. However, the vma flags check is still only +checking against (VM_READ | VM_WRITE | VM_EXEC), and therefore also +calling handle_mm_fault() with FAULT_FLAG_WRITE in cases where the vma +does not allow VM_WRITE. + +Fix this by changing access check in do_exception() to VM_WRITE only, +when recognizing write access. + +Link: https://lkml.kernel.org/r/20220811103435.188481-3-david@redhat.com +Fixes: 92f842eac7ee3 ("[S390] store indication fault optimization") +Cc: +Reported-by: David Hildenbrand +Reviewed-by: Heiko Carstens +Signed-off-by: Gerald Schaefer +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/mm/fault.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/s390/mm/fault.c ++++ b/arch/s390/mm/fault.c +@@ -379,7 +379,9 @@ static inline vm_fault_t do_exception(st + flags = FAULT_FLAG_DEFAULT; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; +- if (access == VM_WRITE || is_write) ++ if (is_write) ++ access = VM_WRITE; ++ if (access == VM_WRITE) + flags |= FAULT_FLAG_WRITE; + mmap_read_lock(mm); + diff --git a/queue-5.19/series b/queue-5.19/series index dd07f300024..889eb5f0314 100644 --- a/queue-5.19/series +++ b/queue-5.19/series @@ -91,3 +91,41 @@ net-stmmac-work-around-sporadic-tx-issue-on-link-up.patch net-lantiq_xrx200-confirm-skb-is-allocated-before-us.patch net-lantiq_xrx200-fix-lock-under-memory-pressure.patch net-lantiq_xrx200-restore-buffer-if-memory-allocatio.patch +btrfs-fix-silent-failure-when-deleting-root-reference.patch +btrfs-replace-drop-assert-for-suspended-replace.patch +btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch +btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch +btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch +btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch +btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch +x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch +perf-x86-intel-fix-pebs-event-constraints-for-adl.patch +perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch +x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch +x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch +x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch +x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch +x86-nospec-unwreck-the-rsb-stuffing.patch +x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch +loop-check-for-overflow-while-configuring-loop.patch +writeback-avoid-use-after-free-after-removing-device.patch +audit-move-audit_return_fixup-before-the-filters.patch +asm-generic-sections-refactor-memory_intersects.patch +mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch +s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch +bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch +mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch +mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch +cifs-skip-extra-null-byte-in-filenames.patch +s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch +fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch +revert-memcg-cleanup-racy-sum-avoidance-code.patch +shmem-update-folio-if-shmem_replace_page-updates-the-page.patch +acpi-processor-remove-freq-qos-request-for-all-cpus.patch +nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch +smb3-missing-inode-locks-in-punch-hole.patch +ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch +xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch +riscv-signal-fix-missing-prototype-warning.patch +riscv-traps-add-missing-prototype.patch +riscv-dts-microchip-correct-l2-cache-interrupts.patch diff --git a/queue-5.19/shmem-update-folio-if-shmem_replace_page-updates-the-page.patch b/queue-5.19/shmem-update-folio-if-shmem_replace_page-updates-the-page.patch new file mode 100644 index 00000000000..fe968789c5d --- /dev/null +++ b/queue-5.19/shmem-update-folio-if-shmem_replace_page-updates-the-page.patch @@ -0,0 +1,41 @@ +From 9dfb3b8d655022760ca68af11821f1c63aa547c3 Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Sat, 30 Jul 2022 05:25:18 +0100 +Subject: shmem: update folio if shmem_replace_page() updates the page + +From: Matthew Wilcox (Oracle) + +commit 9dfb3b8d655022760ca68af11821f1c63aa547c3 upstream. + +If we allocate a new page, we need to make sure that our folio matches +that new page. + +If we do end up in this code path, we store the wrong page in the shmem +inode's page cache, and I would rather imagine that data corruption +ensues. + +This will be solved by changing shmem_replace_page() to +shmem_replace_folio(), but this is the minimal fix. + +Link: https://lkml.kernel.org/r/20220730042518.1264767-1-willy@infradead.org +Fixes: da08e9b79323 ("mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio()") +Signed-off-by: Matthew Wilcox (Oracle) +Reviewed-by: William Kucharski +Cc: Hugh Dickins +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/shmem.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -1771,6 +1771,7 @@ static int shmem_swapin_folio(struct ino + + if (shmem_should_replace_folio(folio, gfp)) { + error = shmem_replace_page(&page, gfp, info, index); ++ folio = page_folio(page); + if (error) + goto failed; + } diff --git a/queue-5.19/smb3-missing-inode-locks-in-punch-hole.patch b/queue-5.19/smb3-missing-inode-locks-in-punch-hole.patch new file mode 100644 index 00000000000..6dcb78f895f --- /dev/null +++ b/queue-5.19/smb3-missing-inode-locks-in-punch-hole.patch @@ -0,0 +1,60 @@ +From ba0803050d610d5072666be727bca5e03e55b242 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Tue, 23 Aug 2022 02:10:56 -0500 +Subject: smb3: missing inode locks in punch hole + +From: David Howells + +commit ba0803050d610d5072666be727bca5e03e55b242 upstream. + +smb3 fallocate punch hole was not grabbing the inode or filemap_invalidate +locks so could have race with pagemap reinstantiating the page. + +Cc: stable@vger.kernel.org +Signed-off-by: David Howells +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/cifs/smb2ops.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/fs/cifs/smb2ops.c ++++ b/fs/cifs/smb2ops.c +@@ -3671,7 +3671,7 @@ static long smb3_zero_range(struct file + static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len) + { +- struct inode *inode; ++ struct inode *inode = file_inode(file); + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + long rc; +@@ -3680,14 +3680,12 @@ static long smb3_punch_hole(struct file + + xid = get_xid(); + +- inode = d_inode(cfile->dentry); +- ++ inode_lock(inode); + /* Need to make file sparse, if not already, before freeing range. */ + /* Consider adding equivalent for compressed since it could also work */ + if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) { + rc = -EOPNOTSUPP; +- free_xid(xid); +- return rc; ++ goto out; + } + + filemap_invalidate_lock(inode->i_mapping); +@@ -3707,8 +3705,10 @@ static long smb3_punch_hole(struct file + true /* is_fctl */, (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), + CIFSMaxBufSize, NULL, NULL); +- free_xid(xid); + filemap_invalidate_unlock(inode->i_mapping); ++out: ++ inode_unlock(inode); ++ free_xid(xid); + return rc; + } + diff --git a/queue-5.19/writeback-avoid-use-after-free-after-removing-device.patch b/queue-5.19/writeback-avoid-use-after-free-after-removing-device.patch new file mode 100644 index 00000000000..a1c2389d5ed --- /dev/null +++ b/queue-5.19/writeback-avoid-use-after-free-after-removing-device.patch @@ -0,0 +1,139 @@ +From f87904c075515f3e1d8f4a7115869d3b914674fd Mon Sep 17 00:00:00 2001 +From: Khazhismel Kumykov +Date: Mon, 1 Aug 2022 08:50:34 -0700 +Subject: writeback: avoid use-after-free after removing device + +From: Khazhismel Kumykov + +commit f87904c075515f3e1d8f4a7115869d3b914674fd upstream. + +When a disk is removed, bdi_unregister gets called to stop further +writeback and wait for associated delayed work to complete. However, +wb_inode_writeback_end() may schedule bandwidth estimation dwork after +this has completed, which can result in the timer attempting to access the +just freed bdi_writeback. + +Fix this by checking if the bdi_writeback is alive, similar to when +scheduling writeback work. + +Since this requires wb->work_lock, and wb_inode_writeback_end() may get +called from interrupt, switch wb->work_lock to an irqsafe lock. + +Link: https://lkml.kernel.org/r/20220801155034.3772543-1-khazhy@google.com +Fixes: 45a2966fd641 ("writeback: fix bandwidth estimate for spiky workload") +Signed-off-by: Khazhismel Kumykov +Reviewed-by: Jan Kara +Cc: Michael Stapelberg +Cc: Wu Fengguang +Cc: Alexander Viro +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/fs-writeback.c | 12 ++++++------ + mm/backing-dev.c | 10 +++++----- + mm/page-writeback.c | 6 +++++- + 3 files changed, 16 insertions(+), 12 deletions(-) + +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(st + + static void wb_wakeup(struct bdi_writeback *wb) + { +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (test_bit(WB_registered, &wb->state)) + mod_delayed_work(bdi_wq, &wb->dwork, 0); +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + } + + static void finish_writeback_work(struct bdi_writeback *wb, +@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_wri + if (work->done) + atomic_inc(&work->done->cnt); + +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + + if (test_bit(WB_registered, &wb->state)) { + list_add_tail(&work->list, &wb->work_list); +@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_wri + } else + finish_writeback_work(wb, work); + +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + } + + /** +@@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_nex + { + struct wb_writeback_work *work = NULL; + +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (!list_empty(&wb->work_list)) { + work = list_entry(wb->work_list.next, + struct wb_writeback_work, list); + list_del_init(&work->list); + } +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + return work; + } + +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -260,10 +260,10 @@ void wb_wakeup_delayed(struct bdi_writeb + unsigned long timeout; + + timeout = msecs_to_jiffies(dirty_writeback_interval * 10); +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (test_bit(WB_registered, &wb->state)) + queue_delayed_work(bdi_wq, &wb->dwork, timeout); +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + } + + static void wb_update_bandwidth_workfn(struct work_struct *work) +@@ -334,12 +334,12 @@ static void cgwb_remove_from_bdi_list(st + static void wb_shutdown(struct bdi_writeback *wb) + { + /* Make sure nobody queues further work */ +- spin_lock_bh(&wb->work_lock); ++ spin_lock_irq(&wb->work_lock); + if (!test_and_clear_bit(WB_registered, &wb->state)) { +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + return; + } +- spin_unlock_bh(&wb->work_lock); ++ spin_unlock_irq(&wb->work_lock); + + cgwb_remove_from_bdi_list(wb); + /* +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -2867,6 +2867,7 @@ static void wb_inode_writeback_start(str + + static void wb_inode_writeback_end(struct bdi_writeback *wb) + { ++ unsigned long flags; + atomic_dec(&wb->writeback_inodes); + /* + * Make sure estimate of writeback throughput gets updated after +@@ -2875,7 +2876,10 @@ static void wb_inode_writeback_end(struc + * that if multiple inodes end writeback at a similar time, they get + * batched into one bandwidth update. + */ +- queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); ++ spin_lock_irqsave(&wb->work_lock, flags); ++ if (test_bit(WB_registered, &wb->state)) ++ queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); ++ spin_unlock_irqrestore(&wb->work_lock, flags); + } + + bool __folio_end_writeback(struct folio *folio) diff --git a/queue-5.19/x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch b/queue-5.19/x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch new file mode 100644 index 00000000000..6d505ead976 --- /dev/null +++ b/queue-5.19/x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch @@ -0,0 +1,88 @@ +From 4b1c742407571eff58b6de9881889f7ca7c4b4dc Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 23 Aug 2022 11:07:34 -0500 +Subject: x86/boot: Don't propagate uninitialized boot_params->cc_blob_address + +From: Michael Roth + +commit 4b1c742407571eff58b6de9881889f7ca7c4b4dc upstream. + +In some cases, bootloaders will leave boot_params->cc_blob_address +uninitialized rather than zeroing it out. This field is only meant to be +set by the boot/compressed kernel in order to pass information to the +uncompressed kernel when SEV-SNP support is enabled. + +Therefore, there are no cases where the bootloader-provided values +should be treated as anything other than garbage. Otherwise, the +uncompressed kernel may attempt to access this bogus address, leading to +a crash during early boot. + +Normally, sanitize_boot_params() would be used to clear out such fields +but that happens too late: sev_enable() may have already initialized +it to a valid value that should not be zeroed out. Instead, have +sev_enable() zero it out unconditionally beforehand. + +Also ensure this happens for !CONFIG_AMD_MEM_ENCRYPT as well by also +including this handling in the sev_enable() stub function. + + [ bp: Massage commit message and comments. ] + +Fixes: b190a043c49a ("x86/sev: Add SEV-SNP feature detection/setup") +Reported-by: Jeremi Piotrowski +Reported-by: watnuss@gmx.de +Signed-off-by: Michael Roth +Signed-off-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216387 +Link: https://lore.kernel.org/r/20220823160734.89036-1-michael.roth@amd.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/boot/compressed/misc.h | 12 +++++++++++- + arch/x86/boot/compressed/sev.c | 8 ++++++++ + 2 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h +index 4910bf230d7b..62208ec04ca4 100644 +--- a/arch/x86/boot/compressed/misc.h ++++ b/arch/x86/boot/compressed/misc.h +@@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr); + void snp_set_page_shared(unsigned long paddr); + void sev_prep_identity_maps(unsigned long top_level_pgt); + #else +-static inline void sev_enable(struct boot_params *bp) { } ++static inline void sev_enable(struct boot_params *bp) ++{ ++ /* ++ * bp->cc_blob_address should only be set by boot/compressed kernel. ++ * Initialize it to 0 unconditionally (thus here in this stub too) to ++ * ensure that uninitialized values from buggy bootloaders aren't ++ * propagated. ++ */ ++ if (bp) ++ bp->cc_blob_address = 0; ++} + static inline void sev_es_shutdown_ghcb(void) { } + static inline bool sev_es_check_ghcb_fault(unsigned long address) + { +diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c +index 52f989f6acc2..c93930d5ccbd 100644 +--- a/arch/x86/boot/compressed/sev.c ++++ b/arch/x86/boot/compressed/sev.c +@@ -276,6 +276,14 @@ void sev_enable(struct boot_params *bp) + struct msr m; + bool snp; + ++ /* ++ * bp->cc_blob_address should only be set by boot/compressed kernel. ++ * Initialize it to 0 to ensure that uninitialized values from ++ * buggy bootloaders aren't propagated. ++ */ ++ if (bp) ++ bp->cc_blob_address = 0; ++ + /* + * Setup/preliminary detection of SNP. This will be sanity-checked + * against CPUID/MSR values later. +-- +2.37.2 + diff --git a/queue-5.19/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch b/queue-5.19/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch new file mode 100644 index 00000000000..b0f41b3a9df --- /dev/null +++ b/queue-5.19/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch @@ -0,0 +1,209 @@ +From 7df548840c496b0141fb2404b889c346380c2b22 Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Wed, 3 Aug 2022 14:41:32 -0700 +Subject: x86/bugs: Add "unknown" reporting for MMIO Stale Data + +From: Pawan Gupta + +commit 7df548840c496b0141fb2404b889c346380c2b22 upstream. + +Older Intel CPUs that are not in the affected processor list for MMIO +Stale Data vulnerabilities currently report "Not affected" in sysfs, +which may not be correct. Vulnerability status for these older CPUs is +unknown. + +Add known-not-affected CPUs to the whitelist. Report "unknown" +mitigation status for CPUs that are not in blacklist, whitelist and also +don't enumerate MSR ARCH_CAPABILITIES bits that reflect hardware +immunity to MMIO Stale Data vulnerabilities. + +Mitigation is not deployed when the status is unknown. + + [ bp: Massage, fixup. ] + +Fixes: 8d50cdf8b834 ("x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data") +Suggested-by: Andrew Cooper +Suggested-by: Tony Luck +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/a932c154772f2121794a5f2eded1a11013114711.1657846269.git.pawan.kumar.gupta@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst | 14 +++ + arch/x86/include/asm/cpufeatures.h | 5 - + arch/x86/kernel/cpu/bugs.c | 14 ++- + arch/x86/kernel/cpu/common.c | 42 ++++++---- + 4 files changed, 56 insertions(+), 19 deletions(-) + +--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst ++++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +@@ -230,6 +230,20 @@ The possible values in this file are: + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. ++ * - 'Unknown: No mitigations' ++ - The processor vulnerability status is unknown because it is ++ out of Servicing period. Mitigation is not attempted. ++ ++Definitions: ++------------ ++ ++Servicing period: The process of providing functional and security updates to ++Intel processors or platforms, utilizing the Intel Platform Update (IPU) ++process or other similar mechanisms. ++ ++End of Servicing Updates (ESU): ESU is the date at which Intel will no ++longer provide Servicing, such as through IPU or other similar update ++processes. ESU dates will typically be aligned to end of quarter. + + If the processor is vulnerable then the following information is appended to + the above information: +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -456,7 +456,8 @@ + #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +-#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ +-#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ ++#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ ++#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ ++#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -433,7 +433,8 @@ static void __init mmio_select_mitigatio + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || +- cpu_mitigations_off()) { ++ boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || ++ cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } +@@ -538,6 +539,8 @@ out: + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); ++ else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) ++ pr_info("MMIO Stale Data: Unknown: No mitigations\n"); + } + + static void __init md_clear_select_mitigation(void) +@@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_stat + + static ssize_t mmio_stale_data_show_state(char *buf) + { ++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) ++ return sysfs_emit(buf, "Unknown: No mitigations\n"); ++ + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + +@@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct de + return srbds_show_state(buf); + + case X86_BUG_MMIO_STALE_DATA: ++ case X86_BUG_MMIO_UNKNOWN: + return mmio_stale_data_show_state(buf); + + case X86_BUG_RETBLEED: +@@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *de + + ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) + { +- return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); ++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) ++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN); ++ else ++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + } + + ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(s + #define NO_SWAPGS BIT(6) + #define NO_ITLB_MULTIHIT BIT(7) + #define NO_SPECTRE_V2 BIT(8) +-#define NO_EIBRS_PBRSB BIT(9) ++#define NO_MMIO BIT(9) ++#define NO_EIBRS_PBRSB BIT(10) + + #define VULNWL(vendor, family, model, whitelist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) +@@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_ + VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION), + + /* Intel Family 6 */ ++ VULNWL_INTEL(TIGERLAKE, NO_MMIO), ++ VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), ++ VULNWL_INTEL(ALDERLAKE, NO_MMIO), ++ VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), ++ + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), +@@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_ + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + +- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), ++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + + /* + * Technically, swapgs isn't serializing on AMD (despite it previously +@@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_ + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + + /* AMD Family 0xf - 0x12 */ +- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ +- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), +- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + + /* Zhaoxin Family 7 */ +- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), +- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), ++ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), ++ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + {} + }; + +@@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(stru + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. ++ * ++ * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, ++ * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. + */ +- if (cpu_matches(cpu_vuln_blacklist, MMIO) && +- !arch_cap_mmio_immune(ia32_cap)) +- setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); ++ if (!arch_cap_mmio_immune(ia32_cap)) { ++ if (cpu_matches(cpu_vuln_blacklist, MMIO)) ++ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); ++ else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) ++ setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); ++ } + + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) diff --git a/queue-5.19/x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch b/queue-5.19/x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch new file mode 100644 index 00000000000..c181cb1d7c6 --- /dev/null +++ b/queue-5.19/x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch @@ -0,0 +1,49 @@ +From 5b9f0c4df1c1152403c738373fb063e9ffdac0a1 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Tue, 16 Aug 2022 09:11:37 +0200 +Subject: x86/entry: Fix entry_INT80_compat for Xen PV guests + +From: Juergen Gross + +commit 5b9f0c4df1c1152403c738373fb063e9ffdac0a1 upstream. + +Commit + + c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS") + +missed one use case of SWAPGS in entry_INT80_compat(). Removing of +the SWAPGS macro led to asm just using "swapgs", as it is accepting +instructions in capital letters, too. + +This in turn leads to splats in Xen PV guests like: + + [ 36.145223] general protection fault, maybe for address 0x2d: 0000 [#1] PREEMPT SMP NOPTI + [ 36.145794] CPU: 2 PID: 1847 Comm: ld-linux.so.2 Not tainted 5.19.1-1-default #1 \ + openSUSE Tumbleweed f3b44bfb672cdb9f235aff53b57724eba8b9411b + [ 36.146608] Hardware name: HP ProLiant ML350p Gen8, BIOS P72 11/14/2013 + [ 36.148126] RIP: e030:entry_INT80_compat+0x3/0xa3 + +Fix that by open coding this single instance of the SWAPGS macro. + +Fixes: c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS") +Signed-off-by: Juergen Gross +Signed-off-by: Borislav Petkov +Reviewed-by: Jan Beulich +Cc: # 5.19 +Link: https://lore.kernel.org/r/20220816071137.4893-1-jgross@suse.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64_compat.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat) + * Interrupts are off on entry. + */ + ASM_CLAC /* Do this early to minimize exposure */ +- SWAPGS ++ ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV + + /* + * User tracing code (ptrace or signal handlers) might assume that diff --git a/queue-5.19/x86-nospec-unwreck-the-rsb-stuffing.patch b/queue-5.19/x86-nospec-unwreck-the-rsb-stuffing.patch new file mode 100644 index 00000000000..8ae54dffbf0 --- /dev/null +++ b/queue-5.19/x86-nospec-unwreck-the-rsb-stuffing.patch @@ -0,0 +1,128 @@ +From 4e3aa9238277597c6c7624f302d81a7b568b6f2d Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 16 Aug 2022 14:28:36 +0200 +Subject: x86/nospec: Unwreck the RSB stuffing + +From: Peter Zijlstra + +commit 4e3aa9238277597c6c7624f302d81a7b568b6f2d upstream. + +Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections") +made a right mess of the RSB stuffing, rewrite the whole thing to not +suck. + +Thanks to Andrew for the enlightening comment about Post-Barrier RSB +things so we can make this code less magical. + +Cc: stable@vger.kernel.org +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/YvuNdDWoUZSBjYcm@worktop.programming.kicks-ass.net +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/nospec-branch.h | 80 +++++++++++++++++------------------ + 1 file changed, 39 insertions(+), 41 deletions(-) + +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -35,33 +35,44 @@ + #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + + /* ++ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. ++ */ ++#define __FILL_RETURN_SLOT \ ++ ANNOTATE_INTRA_FUNCTION_CALL; \ ++ call 772f; \ ++ int3; \ ++772: ++ ++/* ++ * Stuff the entire RSB. ++ * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version - two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +-#define __FILL_RETURN_BUFFER(reg, nr, sp) \ +- mov $(nr/2), reg; \ +-771: \ +- ANNOTATE_INTRA_FUNCTION_CALL; \ +- call 772f; \ +-773: /* speculation trap */ \ +- UNWIND_HINT_EMPTY; \ +- pause; \ +- lfence; \ +- jmp 773b; \ +-772: \ +- ANNOTATE_INTRA_FUNCTION_CALL; \ +- call 774f; \ +-775: /* speculation trap */ \ +- UNWIND_HINT_EMPTY; \ +- pause; \ +- lfence; \ +- jmp 775b; \ +-774: \ +- add $(BITS_PER_LONG/8) * 2, sp; \ +- dec reg; \ +- jnz 771b; \ +- /* barrier for jnz misprediction */ \ ++#define __FILL_RETURN_BUFFER(reg, nr) \ ++ mov $(nr/2), reg; \ ++771: \ ++ __FILL_RETURN_SLOT \ ++ __FILL_RETURN_SLOT \ ++ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ ++ dec reg; \ ++ jnz 771b; \ ++ /* barrier for jnz misprediction */ \ ++ lfence; ++ ++/* ++ * Stuff a single RSB slot. ++ * ++ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be ++ * forced to retire before letting a RET instruction execute. ++ * ++ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed ++ * before this point. ++ */ ++#define __FILL_ONE_RETURN \ ++ __FILL_RETURN_SLOT \ ++ add $(BITS_PER_LONG/8), %_ASM_SP; \ + lfence; + + #ifdef __ASSEMBLY__ +@@ -120,28 +131,15 @@ + #endif + .endm + +-.macro ISSUE_UNBALANCED_RET_GUARD +- ANNOTATE_INTRA_FUNCTION_CALL +- call .Lunbalanced_ret_guard_\@ +- int3 +-.Lunbalanced_ret_guard_\@: +- add $(BITS_PER_LONG/8), %_ASM_SP +- lfence +-.endm +- + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 +-.ifb \ftr2 +- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr +-.else +- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 +-.endif +- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) +-.Lunbalanced_\@: +- ISSUE_UNBALANCED_RET_GUARD ++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) ++ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \ ++ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ ++ __stringify(__FILL_ONE_RETURN), \ftr2 ++ + .Lskip_rsb_\@: + .endm + diff --git a/queue-5.19/x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch b/queue-5.19/x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch new file mode 100644 index 00000000000..64676ddfb56 --- /dev/null +++ b/queue-5.19/x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch @@ -0,0 +1,88 @@ +From 72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Thu, 28 Apr 2022 16:50:29 +0200 +Subject: x86/PAT: Have pat_enabled() properly reflect state when running on Xen + +From: Jan Beulich + +commit 72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce upstream. + +After commit ID in the Fixes: tag, pat_enabled() returns false (because +of PAT initialization being suppressed in the absence of MTRRs being +announced to be available). + +This has become a problem: the i915 driver now fails to initialize when +running PV on Xen (i915_gem_object_pin_map() is where I located the +induced failure), and its error handling is flaky enough to (at least +sometimes) result in a hung system. + +Yet even beyond that problem the keying of the use of WC mappings to +pat_enabled() (see arch_can_pci_mmap_wc()) means that in particular +graphics frame buffer accesses would have been quite a bit less optimal +than possible. + +Arrange for the function to return true in such environments, without +undermining the rest of PAT MSR management logic considering PAT to be +disabled: specifically, no writes to the PAT MSR should occur. + +For the new boolean to live in .init.data, init_cache_modes() also needs +moving to .init.text (where it could/should have lived already before). + + [ bp: This is the "small fix" variant for stable. It'll get replaced + with a proper PAT and MTRR detection split upstream but that is too + involved for a stable backport. + - additional touchups to commit msg. Use cpu_feature_enabled(). ] + +Fixes: bdd8b6c98239 ("drm/i915: replace X86_FEATURE_PAT with pat_enabled()") +Signed-off-by: Jan Beulich +Signed-off-by: Borislav Petkov +Acked-by: Ingo Molnar +Cc: +Cc: Juergen Gross +Cc: Lucas De Marchi +Link: https://lore.kernel.org/r/9385fa60-fa5d-f559-a137-6608408f88b0@suse.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/pat/memtype.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/arch/x86/mm/pat/memtype.c ++++ b/arch/x86/mm/pat/memtype.c +@@ -62,6 +62,7 @@ + + static bool __read_mostly pat_bp_initialized; + static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); ++static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT); + static bool __read_mostly pat_bp_enabled; + static bool __read_mostly pat_cm_initialized; + +@@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason) + static int __init nopat(char *str) + { + pat_disable("PAT support disabled via boot option."); ++ pat_force_disabled = true; + return 0; + } + early_param("nopat", nopat); +@@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat) + wrmsrl(MSR_IA32_CR_PAT, pat); + } + +-void init_cache_modes(void) ++void __init init_cache_modes(void) + { + u64 pat = 0; + +@@ -313,6 +315,12 @@ void init_cache_modes(void) + */ + pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); ++ } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) { ++ /* ++ * Clearly PAT is enabled underneath. Allow pat_enabled() to ++ * reflect this. ++ */ ++ pat_bp_enabled = true; + } + + __init_cache_modes(pat); diff --git a/queue-5.19/x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch b/queue-5.19/x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch new file mode 100644 index 00000000000..c08cdb4c322 --- /dev/null +++ b/queue-5.19/x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch @@ -0,0 +1,70 @@ +From cdaa0a407f1acd3a44861e3aea6e3c7349e668f1 Mon Sep 17 00:00:00 2001 +From: Tom Lendacky +Date: Tue, 23 Aug 2022 16:55:51 -0500 +Subject: x86/sev: Don't use cc_platform_has() for early SEV-SNP calls + +From: Tom Lendacky + +commit cdaa0a407f1acd3a44861e3aea6e3c7349e668f1 upstream. + +When running identity-mapped and depending on the kernel configuration, +it is possible that the compiler uses jump tables when generating code +for cc_platform_has(). + +This causes a boot failure because the jump table uses un-mapped kernel +virtual addresses, not identity-mapped addresses. This has been seen +with CONFIG_RETPOLINE=n. + +Similar to sme_encrypt_kernel(), use an open-coded direct check for the +status of SNP rather than trying to eliminate the jump table. This +preserves any code optimization in cc_platform_has() that can be useful +post boot. It also limits the changes to SEV-specific files so that +future compiler features won't necessarily require possible build changes +just because they are not compatible with running identity-mapped. + + [ bp: Massage commit message. ] + +Fixes: 5e5ccff60a29 ("x86/sev: Add helper for validating pages in early enc attribute changes") +Reported-by: Sean Christopherson +Suggested-by: Sean Christopherson +Signed-off-by: Tom Lendacky +Signed-off-by: Borislav Petkov +Cc: # 5.19.x +Link: https://lore.kernel.org/all/YqfabnTRxFSM+LoX@google.com/ +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/sev.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/sev.c ++++ b/arch/x86/kernel/sev.c +@@ -701,7 +701,13 @@ e_term: + void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned int npages) + { +- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) ++ /* ++ * This can be invoked in early boot while running identity mapped, so ++ * use an open coded check for SNP instead of using cc_platform_has(). ++ * This eliminates worries about jump tables or checking boot_cpu_data ++ * in the cc_platform_has() function. ++ */ ++ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + return; + + /* +@@ -717,7 +723,13 @@ void __init early_snp_set_memory_private + void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned int npages) + { +- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) ++ /* ++ * This can be invoked in early boot while running identity mapped, so ++ * use an open coded check for SNP instead of using cc_platform_has(). ++ * This eliminates worries about jump tables or checking boot_cpu_data ++ * in the cc_platform_has() function. ++ */ ++ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + return; + + /* Invalidate the memory pages before they are marked shared in the RMP table. */ diff --git a/queue-5.19/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch b/queue-5.19/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch new file mode 100644 index 00000000000..f76b902f3f3 --- /dev/null +++ b/queue-5.19/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch @@ -0,0 +1,72 @@ +From fc2e426b1161761561624ebd43ce8c8d2fa058da Mon Sep 17 00:00:00 2001 +From: Chen Zhongjin +Date: Fri, 19 Aug 2022 16:43:34 +0800 +Subject: x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry + +From: Chen Zhongjin + +commit fc2e426b1161761561624ebd43ce8c8d2fa058da upstream. + +When meeting ftrace trampolines in ORC unwinding, unwinder uses address +of ftrace_{regs_}call address to find the ORC entry, which gets next frame at +sp+176. + +If there is an IRQ hitting at sub $0xa8,%rsp, the next frame should be +sp+8 instead of 176. It makes unwinder skip correct frame and throw +warnings such as "wrong direction" or "can't access registers", etc, +depending on the content of the incorrect frame address. + +By adding the base address ftrace_{regs_}caller with the offset +*ip - ops->trampoline*, we can get the correct address to find the ORC entry. + +Also change "caller" to "tramp_addr" to make variable name conform to +its content. + +[ mingo: Clarified the changelog a bit. ] + +Fixes: 6be7fa3c74d1 ("ftrace, orc, x86: Handle ftrace dynamically allocated trampolines") +Signed-off-by: Chen Zhongjin +Signed-off-by: Ingo Molnar +Reviewed-by: Steven Rostedt (Google) +Cc: +Link: https://lore.kernel.org/r/20220819084334.244016-1-chenzhongjin@huawei.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/unwind_orc.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/unwind_orc.c ++++ b/arch/x86/kernel/unwind_orc.c +@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsign + static struct orc_entry *orc_ftrace_find(unsigned long ip) + { + struct ftrace_ops *ops; +- unsigned long caller; ++ unsigned long tramp_addr, offset; + + ops = ftrace_ops_trampoline(ip); + if (!ops) + return NULL; + ++ /* Set tramp_addr to the start of the code copied by the trampoline */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) +- caller = (unsigned long)ftrace_regs_call; ++ tramp_addr = (unsigned long)ftrace_regs_caller; + else +- caller = (unsigned long)ftrace_call; ++ tramp_addr = (unsigned long)ftrace_caller; ++ ++ /* Now place tramp_addr to the location within the trampoline ip is at */ ++ offset = ip - ops->trampoline; ++ tramp_addr += offset; + + /* Prevent unlikely recursion */ +- if (ip == caller) ++ if (ip == tramp_addr) + return NULL; + +- return orc_find(caller); ++ return orc_find(tramp_addr); + } + #else + static struct orc_entry *orc_ftrace_find(unsigned long ip) diff --git a/queue-5.19/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch b/queue-5.19/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch new file mode 100644 index 00000000000..c08c929d758 --- /dev/null +++ b/queue-5.19/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch @@ -0,0 +1,95 @@ +From c5deb27895e017a0267de0a20d140ad5fcc55a54 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 25 Aug 2022 16:19:18 +0200 +Subject: xen/privcmd: fix error exit of privcmd_ioctl_dm_op() + +From: Juergen Gross + +commit c5deb27895e017a0267de0a20d140ad5fcc55a54 upstream. + +The error exit of privcmd_ioctl_dm_op() is calling unlock_pages() +potentially with pages being NULL, leading to a NULL dereference. + +Additionally lock_pages() doesn't check for pin_user_pages_fast() +having been completely successful, resulting in potentially not +locking all pages into memory. This could result in sporadic failures +when using the related memory in user mode. + +Fix all of that by calling unlock_pages() always with the real number +of pinned pages, which will be zero in case pages being NULL, and by +checking the number of pages pinned by pin_user_pages_fast() matching +the expected number of pages. + +Cc: +Fixes: ab520be8cd5d ("xen/privcmd: Add IOCTL_PRIVCMD_DM_OP") +Reported-by: Rustam Subkhankulov +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Reviewed-by: Oleksandr Tyshchenko +Link: https://lore.kernel.org/r/20220825141918.3581-1-jgross@suse.com +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/privcmd.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +--- a/drivers/xen/privcmd.c ++++ b/drivers/xen/privcmd.c +@@ -581,27 +581,30 @@ static int lock_pages( + struct privcmd_dm_op_buf kbufs[], unsigned int num, + struct page *pages[], unsigned int nr_pages, unsigned int *pinned) + { +- unsigned int i; ++ unsigned int i, off = 0; + +- for (i = 0; i < num; i++) { ++ for (i = 0; i < num; ) { + unsigned int requested; + int page_count; + + requested = DIV_ROUND_UP( + offset_in_page(kbufs[i].uptr) + kbufs[i].size, +- PAGE_SIZE); ++ PAGE_SIZE) - off; + if (requested > nr_pages) + return -ENOSPC; + + page_count = pin_user_pages_fast( +- (unsigned long) kbufs[i].uptr, ++ (unsigned long)kbufs[i].uptr + off * PAGE_SIZE, + requested, FOLL_WRITE, pages); +- if (page_count < 0) +- return page_count; ++ if (page_count <= 0) ++ return page_count ? : -EFAULT; + + *pinned += page_count; + nr_pages -= page_count; + pages += page_count; ++ ++ off = (requested == page_count) ? 0 : off + page_count; ++ i += !off; + } + + return 0; +@@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct f + } + + rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned); +- if (rc < 0) { +- nr_pages = pinned; ++ if (rc < 0) + goto out; +- } + + for (i = 0; i < kdata.num; i++) { + set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); +@@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct f + xen_preemptible_hcall_end(); + + out: +- unlock_pages(pages, nr_pages); ++ unlock_pages(pages, pinned); + kfree(xbufs); + kfree(pages); + kfree(kbufs); -- 2.47.2