]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Aug 2022 07:49:00 +0000 (09:49 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Aug 2022 07:49:00 +0000 (09:49 +0200)
added patches:
acpi-processor-remove-freq-qos-request-for-all-cpus.patch
asm-generic-sections-refactor-memory_intersects.patch
audit-move-audit_return_fixup-before-the-filters.patch
bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch
btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch
btrfs-fix-silent-failure-when-deleting-root-reference.patch
btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch
btrfs-replace-drop-assert-for-suspended-replace.patch
btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch
cifs-skip-extra-null-byte-in-filenames.patch
fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch
loop-check-for-overflow-while-configuring-loop.patch
mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch
mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch
mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch
nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch
ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch
perf-x86-intel-fix-pebs-event-constraints-for-adl.patch
perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
revert-memcg-cleanup-racy-sum-avoidance-code.patch
riscv-dts-microchip-correct-l2-cache-interrupts.patch
riscv-signal-fix-missing-prototype-warning.patch
riscv-traps-add-missing-prototype.patch
s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch
shmem-update-folio-if-shmem_replace_page-updates-the-page.patch
smb3-missing-inode-locks-in-punch-hole.patch
writeback-avoid-use-after-free-after-removing-device.patch
x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch
x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch
x86-nospec-unwreck-the-rsb-stuffing.patch
x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch
x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch
x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch

39 files changed:
queue-5.19/acpi-processor-remove-freq-qos-request-for-all-cpus.patch [new file with mode: 0644]
queue-5.19/asm-generic-sections-refactor-memory_intersects.patch [new file with mode: 0644]
queue-5.19/audit-move-audit_return_fixup-before-the-filters.patch [new file with mode: 0644]
queue-5.19/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch [new file with mode: 0644]
queue-5.19/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch [new file with mode: 0644]
queue-5.19/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch [new file with mode: 0644]
queue-5.19/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch [new file with mode: 0644]
queue-5.19/btrfs-fix-silent-failure-when-deleting-root-reference.patch [new file with mode: 0644]
queue-5.19/btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch [new file with mode: 0644]
queue-5.19/btrfs-replace-drop-assert-for-suspended-replace.patch [new file with mode: 0644]
queue-5.19/btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch [new file with mode: 0644]
queue-5.19/cifs-skip-extra-null-byte-in-filenames.patch [new file with mode: 0644]
queue-5.19/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch [new file with mode: 0644]
queue-5.19/loop-check-for-overflow-while-configuring-loop.patch [new file with mode: 0644]
queue-5.19/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch [new file with mode: 0644]
queue-5.19/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch [new file with mode: 0644]
queue-5.19/mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch [new file with mode: 0644]
queue-5.19/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch [new file with mode: 0644]
queue-5.19/ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch [new file with mode: 0644]
queue-5.19/perf-x86-intel-fix-pebs-event-constraints-for-adl.patch [new file with mode: 0644]
queue-5.19/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch [new file with mode: 0644]
queue-5.19/revert-memcg-cleanup-racy-sum-avoidance-code.patch [new file with mode: 0644]
queue-5.19/riscv-dts-microchip-correct-l2-cache-interrupts.patch [new file with mode: 0644]
queue-5.19/riscv-signal-fix-missing-prototype-warning.patch [new file with mode: 0644]
queue-5.19/riscv-traps-add-missing-prototype.patch [new file with mode: 0644]
queue-5.19/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch [new file with mode: 0644]
queue-5.19/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch [new file with mode: 0644]
queue-5.19/series
queue-5.19/shmem-update-folio-if-shmem_replace_page-updates-the-page.patch [new file with mode: 0644]
queue-5.19/smb3-missing-inode-locks-in-punch-hole.patch [new file with mode: 0644]
queue-5.19/writeback-avoid-use-after-free-after-removing-device.patch [new file with mode: 0644]
queue-5.19/x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch [new file with mode: 0644]
queue-5.19/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch [new file with mode: 0644]
queue-5.19/x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch [new file with mode: 0644]
queue-5.19/x86-nospec-unwreck-the-rsb-stuffing.patch [new file with mode: 0644]
queue-5.19/x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch [new file with mode: 0644]
queue-5.19/x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch [new file with mode: 0644]
queue-5.19/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch [new file with mode: 0644]
queue-5.19/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch [new file with mode: 0644]

diff --git a/queue-5.19/acpi-processor-remove-freq-qos-request-for-all-cpus.patch b/queue-5.19/acpi-processor-remove-freq-qos-request-for-all-cpus.patch
new file mode 100644 (file)
index 0000000..29bbd45
--- /dev/null
@@ -0,0 +1,38 @@
+From 36527b9d882362567ceb4eea8666813280f30e6f Mon Sep 17 00:00:00 2001
+From: Riwen Lu <luriwen@kylinos.cn>
+Date: Tue, 23 Aug 2022 15:43:42 +0800
+Subject: ACPI: processor: Remove freq Qos request for all CPUs
+
+From: Riwen Lu <luriwen@kylinos.cn>
+
+commit 36527b9d882362567ceb4eea8666813280f30e6f upstream.
+
+The freq Qos request would be removed repeatedly if the cpufreq policy
+relates to more than one CPU. Then, it would cause the "called for unknown
+object" warning.
+
+Remove the freq Qos request for each CPU relates to the cpufreq policy,
+instead of removing repeatedly for the last CPU of it.
+
+Fixes: a1bb46c36ce3 ("ACPI: processor: Add QoS requests for all CPUs")
+Reported-by: Jeremy Linton <Jeremy.Linton@arm.com>
+Tested-by: Jeremy Linton <jeremy.linton@arm.com>
+Signed-off-by: Riwen Lu <luriwen@kylinos.cn>
+Cc: 5.4+ <stable@vger.kernel.org> # 5.4+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/processor_thermal.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/acpi/processor_thermal.c
++++ b/drivers/acpi/processor_thermal.c
+@@ -151,7 +151,7 @@ void acpi_thermal_cpufreq_exit(struct cp
+       unsigned int cpu;
+       for_each_cpu(cpu, policy->related_cpus) {
+-              struct acpi_processor *pr = per_cpu(processors, policy->cpu);
++              struct acpi_processor *pr = per_cpu(processors, cpu);
+               if (pr)
+                       freq_qos_remove_request(&pr->thermal_req);
diff --git a/queue-5.19/asm-generic-sections-refactor-memory_intersects.patch b/queue-5.19/asm-generic-sections-refactor-memory_intersects.patch
new file mode 100644 (file)
index 0000000..204cd0b
--- /dev/null
@@ -0,0 +1,96 @@
+From 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee Mon Sep 17 00:00:00 2001
+From: Quanyang Wang <quanyang.wang@windriver.com>
+Date: Fri, 19 Aug 2022 16:11:45 +0800
+Subject: asm-generic: sections: refactor memory_intersects
+
+From: Quanyang Wang <quanyang.wang@windriver.com>
+
+commit 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee upstream.
+
+There are two problems with the current code of memory_intersects:
+
+First, it doesn't check whether the region (begin, end) falls inside the
+region (virt, vend), that is (virt < begin && vend > end).
+
+The second problem is if vend is equal to begin, it will return true but
+this is wrong since vend (virt + size) is not the last address of the
+memory region but (virt + size -1) is.  The wrong determination will
+trigger the misreporting when the function check_for_illegal_area calls
+memory_intersects to check if the dma region intersects with stext region.
+
+The misreporting is as below (stext is at 0x80100000):
+ WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168
+ DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536]
+ Modules linked in:
+ CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5
+ Hardware name: Xilinx Zynq Platform
+  unwind_backtrace from show_stack+0x18/0x1c
+  show_stack from dump_stack_lvl+0x58/0x70
+  dump_stack_lvl from __warn+0xb0/0x198
+  __warn from warn_slowpath_fmt+0x80/0xb4
+  warn_slowpath_fmt from check_for_illegal_area+0x130/0x168
+  check_for_illegal_area from debug_dma_map_sg+0x94/0x368
+  debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128
+  __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24
+  dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4
+  usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214
+  usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118
+  usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec
+  usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70
+  usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360
+  usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440
+  usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238
+  usb_stor_control_thread from kthread+0xf8/0x104
+  kthread from ret_from_fork+0x14/0x2c
+
+Refactor memory_intersects to fix the two problems above.
+
+Before the 1d7db834a027e ("dma-debug: use memory_intersects()
+directly"), memory_intersects is called only by printk_late_init:
+
+printk_late_init -> init_section_intersects ->memory_intersects.
+
+There were few places where memory_intersects was called.
+
+When commit 1d7db834a027e ("dma-debug: use memory_intersects()
+directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA
+subsystem uses it to check for an illegal area and the calltrace above
+is triggered.
+
+[akpm@linux-foundation.org: fix nearby comment typo]
+Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.com
+Fixes: 979559362516 ("asm/sections: add helpers to check for section data")
+Signed-off-by: Quanyang Wang <quanyang.wang@windriver.com>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Thierry Reding <treding@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/sections.h |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/include/asm-generic/sections.h
++++ b/include/asm-generic/sections.h
+@@ -97,7 +97,7 @@ static inline bool memory_contains(void
+ /**
+  * memory_intersects - checks if the region occupied by an object intersects
+  *                     with another memory region
+- * @begin: virtual address of the beginning of the memory regien
++ * @begin: virtual address of the beginning of the memory region
+  * @end: virtual address of the end of the memory region
+  * @virt: virtual address of the memory object
+  * @size: size of the memory object
+@@ -110,7 +110,10 @@ static inline bool memory_intersects(voi
+ {
+       void *vend = virt + size;
+-      return (virt >= begin && virt < end) || (vend >= begin && vend < end);
++      if (virt < end && vend > begin)
++              return true;
++
++      return false;
+ }
+ /**
diff --git a/queue-5.19/audit-move-audit_return_fixup-before-the-filters.patch b/queue-5.19/audit-move-audit_return_fixup-before-the-filters.patch
new file mode 100644 (file)
index 0000000..8a63e87
--- /dev/null
@@ -0,0 +1,57 @@
+From d4fefa4801a1c2f9c0c7a48fbb0fdf384e89a4ab Mon Sep 17 00:00:00 2001
+From: Richard Guy Briggs <rgb@redhat.com>
+Date: Thu, 25 Aug 2022 15:32:40 -0400
+Subject: audit: move audit_return_fixup before the filters
+
+From: Richard Guy Briggs <rgb@redhat.com>
+
+commit d4fefa4801a1c2f9c0c7a48fbb0fdf384e89a4ab upstream.
+
+The success and return_code are needed by the filters.  Move
+audit_return_fixup() before the filters.  This was causing syscall
+auditing events to be missed.
+
+Link: https://github.com/linux-audit/audit-kernel/issues/138
+Cc: stable@vger.kernel.org
+Fixes: 12c5e81d3fd0 ("audit: prepare audit_context for use in calling contexts beyond syscalls")
+Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
+[PM: manual merge required]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/auditsc.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/auditsc.c
++++ b/kernel/auditsc.c
+@@ -1965,6 +1965,7 @@ void __audit_uring_exit(int success, lon
+               goto out;
+       }
++      audit_return_fixup(ctx, success, code);
+       if (ctx->context == AUDIT_CTX_SYSCALL) {
+               /*
+                * NOTE: See the note in __audit_uring_entry() about the case
+@@ -2006,7 +2007,6 @@ void __audit_uring_exit(int success, lon
+       audit_filter_inodes(current, ctx);
+       if (ctx->current_state != AUDIT_STATE_RECORD)
+               goto out;
+-      audit_return_fixup(ctx, success, code);
+       audit_log_exit();
+ out:
+@@ -2090,13 +2090,13 @@ void __audit_syscall_exit(int success, l
+       if (!list_empty(&context->killed_trees))
+               audit_kill_trees(context);
++      audit_return_fixup(context, success, return_code);
+       /* run through both filters to ensure we set the filterkey properly */
+       audit_filter_syscall(current, context);
+       audit_filter_inodes(current, context);
+       if (context->current_state < AUDIT_STATE_RECORD)
+               goto out;
+-      audit_return_fixup(context, success, return_code);
+       audit_log_exit();
+ out:
diff --git a/queue-5.19/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch b/queue-5.19/bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch
new file mode 100644 (file)
index 0000000..3728cdb
--- /dev/null
@@ -0,0 +1,55 @@
+From dd0ff4d12dd284c334f7e9b07f8f335af856ac78 Mon Sep 17 00:00:00 2001
+From: Liu Shixin <liushixin2@huawei.com>
+Date: Fri, 19 Aug 2022 17:40:05 +0800
+Subject: bootmem: remove the vmemmap pages from kmemleak in put_page_bootmem
+
+From: Liu Shixin <liushixin2@huawei.com>
+
+commit dd0ff4d12dd284c334f7e9b07f8f335af856ac78 upstream.
+
+The vmemmap pages is marked by kmemleak when allocated from memblock.
+Remove it from kmemleak when freeing the page.  Otherwise, when we reuse
+the page, kmemleak may report such an error and then stop working.
+
+ kmemleak: Cannot insert 0xffff98fb6eab3d40 into the object search tree (overlaps existing)
+ kmemleak: Kernel memory leak detector disabled
+ kmemleak: Object 0xffff98fb6be00000 (size 335544320):
+ kmemleak:   comm "swapper", pid 0, jiffies 4294892296
+ kmemleak:   min_count = 0
+ kmemleak:   count = 0
+ kmemleak:   flags = 0x1
+ kmemleak:   checksum = 0
+ kmemleak:   backtrace:
+
+Link: https://lkml.kernel.org/r/20220819094005.2928241-1-liushixin2@huawei.com
+Fixes: f41f2ed43ca5 (mm: hugetlb: free the vmemmap pages associated with each HugeTLB page)
+Signed-off-by: Liu Shixin <liushixin2@huawei.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/bootmem_info.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/mm/bootmem_info.c
++++ b/mm/bootmem_info.c
+@@ -12,6 +12,7 @@
+ #include <linux/memblock.h>
+ #include <linux/bootmem_info.h>
+ #include <linux/memory_hotplug.h>
++#include <linux/kmemleak.h>
+ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
+ {
+@@ -33,6 +34,7 @@ void put_page_bootmem(struct page *page)
+               ClearPagePrivate(page);
+               set_page_private(page, 0);
+               INIT_LIST_HEAD(&page->lru);
++              kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
+               free_reserved_page(page);
+       }
+ }
diff --git a/queue-5.19/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch b/queue-5.19/btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
new file mode 100644 (file)
index 0000000..426112d
--- /dev/null
@@ -0,0 +1,47 @@
+From f2c3bec215694fb8bc0ef5010f2a758d1906fc2d Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 12 Aug 2022 18:32:19 +0800
+Subject: btrfs: add info when mount fails due to stale replace target
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit f2c3bec215694fb8bc0ef5010f2a758d1906fc2d upstream.
+
+If the replace target device reappears after the suspended replace is
+cancelled, it blocks the mount operation as it can't find the matching
+replace-item in the metadata. As shown below,
+
+   BTRFS error (device sda5): replace devid present without an active replace item
+
+To overcome this situation, the user can run the command
+
+   btrfs device scan --forget <replace target device>
+
+and try the mount command again. And also, to avoid repeating the issue,
+superblock on the devid=0 must be wiped.
+
+   wipefs -a device-path-to-devid=0.
+
+This patch adds some info when this situation occurs.
+
+Reported-by: Samuel Greiner <samuel@balkonien.org>
+Link: https://lore.kernel.org/linux-btrfs/b4f62b10-b295-26ea-71f9-9a5c9299d42c@balkonien.org/T/
+CC: stable@vger.kernel.org # 5.0+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/dev-replace.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found:
+                */
+               if (btrfs_find_device(fs_info->fs_devices, &args)) {
+                       btrfs_err(fs_info,
+-                      "replace devid present without an active replace item");
++"replace without active item, run 'device scan --forget' on the target device");
+                       ret = -EUCLEAN;
+               } else {
+                       dev_replace->srcdev = NULL;
diff --git a/queue-5.19/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch b/queue-5.19/btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
new file mode 100644 (file)
index 0000000..d28ac9b
--- /dev/null
@@ -0,0 +1,60 @@
+From b51111271b0352aa596c5ae8faf06939e91b3b68 Mon Sep 17 00:00:00 2001
+From: Goldwyn Rodrigues <rgoldwyn@suse.de>
+Date: Tue, 16 Aug 2022 16:42:56 -0500
+Subject: btrfs: check if root is readonly while setting security xattr
+
+From: Goldwyn Rodrigues <rgoldwyn@suse.de>
+
+commit b51111271b0352aa596c5ae8faf06939e91b3b68 upstream.
+
+For a filesystem which has btrfs read-only property set to true, all
+write operations including xattr should be denied. However, security
+xattr can still be changed even if btrfs ro property is true.
+
+This happens because xattr_permission() does not have any restrictions
+on security.*, system.*  and in some cases trusted.* from VFS and
+the decision is left to the underlying filesystem. See comments in
+xattr_permission() for more details.
+
+This patch checks if the root is read-only before performing the set
+xattr operation.
+
+Testcase:
+
+  DEV=/dev/vdb
+  MNT=/mnt
+
+  mkfs.btrfs -f $DEV
+  mount $DEV $MNT
+  echo "file one" > $MNT/f1
+
+  setfattr -n "security.one" -v 2 $MNT/f1
+  btrfs property set /mnt ro true
+
+  setfattr -n "security.one" -v 1 $MNT/f1
+
+  umount $MNT
+
+CC: stable@vger.kernel.org # 4.9+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/xattr.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/xattr.c
++++ b/fs/btrfs/xattr.c
+@@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const
+                                  const char *name, const void *buffer,
+                                  size_t size, int flags)
+ {
++      if (btrfs_root_readonly(BTRFS_I(inode)->root))
++              return -EROFS;
++
+       name = xattr_full_name(handler, name);
+       return btrfs_setxattr_trans(inode, name, buffer, size, flags);
+ }
diff --git a/queue-5.19/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch b/queue-5.19/btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch
new file mode 100644 (file)
index 0000000..4d93100
--- /dev/null
@@ -0,0 +1,44 @@
+From 9ea0106a7a3d8116860712e3f17cd52ce99f6707 Mon Sep 17 00:00:00 2001
+From: Zixuan Fu <r33s3n6@gmail.com>
+Date: Mon, 15 Aug 2022 23:16:06 +0800
+Subject: btrfs: fix possible memory leak in btrfs_get_dev_args_from_path()
+
+From: Zixuan Fu <r33s3n6@gmail.com>
+
+commit 9ea0106a7a3d8116860712e3f17cd52ce99f6707 upstream.
+
+In btrfs_get_dev_args_from_path(), btrfs_get_bdev_and_sb() can fail if
+the path is invalid. In this case, btrfs_get_dev_args_from_path()
+returns directly without freeing args->uuid and args->fsid allocated
+before, which causes memory leak.
+
+To fix these possible leaks, when btrfs_get_bdev_and_sb() fails,
+btrfs_put_dev_args_from_path() is called to clean up the memory.
+
+Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
+Fixes: faa775c41d655 ("btrfs: add a btrfs_get_dev_args_from_path helper")
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Zixuan Fu <r33s3n6@gmail.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/volumes.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -2344,8 +2344,11 @@ int btrfs_get_dev_args_from_path(struct
+       ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0,
+                                   &bdev, &disk_super);
+-      if (ret)
++      if (ret) {
++              btrfs_put_dev_args_from_path(args);
+               return ret;
++      }
++
+       args->devid = btrfs_stack_device_id(&disk_super->dev_item);
+       memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE);
+       if (btrfs_fs_incompat(fs_info, METADATA_UUID))
diff --git a/queue-5.19/btrfs-fix-silent-failure-when-deleting-root-reference.patch b/queue-5.19/btrfs-fix-silent-failure-when-deleting-root-reference.patch
new file mode 100644 (file)
index 0000000..aeac04a
--- /dev/null
@@ -0,0 +1,43 @@
+From 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 22 Aug 2022 15:47:09 +0100
+Subject: btrfs: fix silent failure when deleting root reference
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 upstream.
+
+At btrfs_del_root_ref(), if btrfs_search_slot() returns an error, we end
+up returning from the function with a value of 0 (success). This happens
+because the function returns the value stored in the variable 'err',
+which is 0, while the error value we got from btrfs_search_slot() is
+stored in the 'ret' variable.
+
+So fix it by setting 'err' with the error value.
+
+Fixes: 8289ed9f93bef2 ("btrfs: replace the BUG_ON in btrfs_del_root_ref with proper error handling")
+CC: stable@vger.kernel.org # 5.16+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/root-tree.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_tran
+       key.offset = ref_id;
+ again:
+       ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+-      if (ret < 0)
++      if (ret < 0) {
++              err = ret;
+               goto out;
+-      if (ret == 0) {
++      } else if (ret == 0) {
+               leaf = path->nodes[0];
+               ref = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_root_ref);
diff --git a/queue-5.19/btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch b/queue-5.19/btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch
new file mode 100644 (file)
index 0000000..b640ab8
--- /dev/null
@@ -0,0 +1,304 @@
+From ced8ecf026fd8084cf175530ff85c76d6085d715 Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Tue, 23 Aug 2022 11:28:13 -0700
+Subject: btrfs: fix space cache corruption and potential double allocations
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit ced8ecf026fd8084cf175530ff85c76d6085d715 upstream.
+
+When testing space_cache v2 on a large set of machines, we encountered a
+few symptoms:
+
+1. "unable to add free space :-17" (EEXIST) errors.
+2. Missing free space info items, sometimes caught with a "missing free
+   space info for X" error.
+3. Double-accounted space: ranges that were allocated in the extent tree
+   and also marked as free in the free space tree, ranges that were
+   marked as allocated twice in the extent tree, or ranges that were
+   marked as free twice in the free space tree. If the latter made it
+   onto disk, the next reboot would hit the BUG_ON() in
+   add_new_free_space().
+4. On some hosts with no on-disk corruption or error messages, the
+   in-memory space cache (dumped with drgn) disagreed with the free
+   space tree.
+
+All of these symptoms have the same underlying cause: a race between
+caching the free space for a block group and returning free space to the
+in-memory space cache for pinned extents causes us to double-add a free
+range to the space cache. This race exists when free space is cached
+from the free space tree (space_cache=v2) or the extent tree
+(nospace_cache, or space_cache=v1 if the cache needs to be regenerated).
+struct btrfs_block_group::last_byte_to_unpin and struct
+btrfs_block_group::progress are supposed to protect against this race,
+but commit d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when
+waiting for a transaction commit") subtly broke this by allowing
+multiple transactions to be unpinning extents at the same time.
+
+Specifically, the race is as follows:
+
+1. An extent is deleted from an uncached block group in transaction A.
+2. btrfs_commit_transaction() is called for transaction A.
+3. btrfs_run_delayed_refs() -> __btrfs_free_extent() runs the delayed
+   ref for the deleted extent.
+4. __btrfs_free_extent() -> do_free_extent_accounting() ->
+   add_to_free_space_tree() adds the deleted extent back to the free
+   space tree.
+5. do_free_extent_accounting() -> btrfs_update_block_group() ->
+   btrfs_cache_block_group() queues up the block group to get cached.
+   block_group->progress is set to block_group->start.
+6. btrfs_commit_transaction() for transaction A calls
+   switch_commit_roots(). It sets block_group->last_byte_to_unpin to
+   block_group->progress, which is block_group->start because the block
+   group hasn't been cached yet.
+7. The caching thread gets to our block group. Since the commit roots
+   were already switched, load_free_space_tree() sees the deleted extent
+   as free and adds it to the space cache. It finishes caching and sets
+   block_group->progress to U64_MAX.
+8. btrfs_commit_transaction() advances transaction A to
+   TRANS_STATE_SUPER_COMMITTED.
+9. fsync calls btrfs_commit_transaction() for transaction B. Since
+   transaction A is already in TRANS_STATE_SUPER_COMMITTED and the
+   commit is for fsync, it advances.
+10. btrfs_commit_transaction() for transaction B calls
+    switch_commit_roots(). This time, the block group has already been
+    cached, so it sets block_group->last_byte_to_unpin to U64_MAX.
+11. btrfs_commit_transaction() for transaction A calls
+    btrfs_finish_extent_commit(), which calls unpin_extent_range() for
+    the deleted extent. It sees last_byte_to_unpin set to U64_MAX (by
+    transaction B!), so it adds the deleted extent to the space cache
+    again!
+
+This explains all of our symptoms above:
+
+* If the sequence of events is exactly as described above, when the free
+  space is re-added in step 11, it will fail with EEXIST.
+* If another thread reallocates the deleted extent in between steps 7
+  and 11, then step 11 will silently re-add that space to the space
+  cache as free even though it is actually allocated. Then, if that
+  space is allocated *again*, the free space tree will be corrupted
+  (namely, the wrong item will be deleted).
+* If we don't catch this free space tree corruption, it will continue
+  to get worse as extents are deleted and reallocated.
+
+The v1 space_cache is synchronously loaded when an extent is deleted
+(btrfs_update_block_group() with alloc=0 calls btrfs_cache_block_group()
+with load_cache_only=1), so it is not normally affected by this bug.
+However, as noted above, if we fail to load the space cache, we will
+fall back to caching from the extent tree and may hit this bug.
+
+The easiest fix for this race is to also make caching from the free
+space tree or extent tree synchronous. Josef tested this and found no
+performance regressions.
+
+A few extra changes fall out of this change. Namely, this fix does the
+following, with step 2 being the crucial fix:
+
+1. Factor btrfs_caching_ctl_wait_done() out of
+   btrfs_wait_block_group_cache_done() to allow waiting on a caching_ctl
+   that we already hold a reference to.
+2. Change the call in btrfs_cache_block_group() of
+   btrfs_wait_space_cache_v1_finished() to
+   btrfs_caching_ctl_wait_done(), which makes us wait regardless of the
+   space_cache option.
+3. Delete the now unused btrfs_wait_space_cache_v1_finished() and
+   space_cache_v1_done().
+4. Change btrfs_cache_block_group()'s `int load_cache_only` parameter to
+   `bool wait` to more accurately describe its new meaning.
+5. Change a few callers which had a separate call to
+   btrfs_wait_block_group_cache_done() to use wait = true instead.
+6. Make btrfs_wait_block_group_cache_done() static now that it's not
+   used outside of block-group.c anymore.
+
+Fixes: d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when waiting for a transaction commit")
+CC: stable@vger.kernel.org # 5.12+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/block-group.c |   47 +++++++++++++++--------------------------------
+ fs/btrfs/block-group.h |    4 +---
+ fs/btrfs/ctree.h       |    1 -
+ fs/btrfs/extent-tree.c |   30 ++++++------------------------
+ 4 files changed, 22 insertions(+), 60 deletions(-)
+
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progre
+       btrfs_put_caching_control(caching_ctl);
+ }
+-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
++static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache,
++                                     struct btrfs_caching_control *caching_ctl)
++{
++      wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
++      return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0;
++}
++
++static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
+ {
+       struct btrfs_caching_control *caching_ctl;
+-      int ret = 0;
++      int ret;
+       caching_ctl = btrfs_get_caching_control(cache);
+       if (!caching_ctl)
+               return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
+-
+-      wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
+-      if (cache->cached == BTRFS_CACHE_ERROR)
+-              ret = -EIO;
++      ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
+       btrfs_put_caching_control(caching_ctl);
+       return ret;
+ }
+-static bool space_cache_v1_done(struct btrfs_block_group *cache)
+-{
+-      bool ret;
+-
+-      spin_lock(&cache->lock);
+-      ret = cache->cached != BTRFS_CACHE_FAST;
+-      spin_unlock(&cache->lock);
+-
+-      return ret;
+-}
+-
+-void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
+-                              struct btrfs_caching_control *caching_ctl)
+-{
+-      wait_event(caching_ctl->wait, space_cache_v1_done(cache));
+-}
+-
+ #ifdef CONFIG_BTRFS_DEBUG
+ static void fragment_free_space(struct btrfs_block_group *block_group)
+ {
+@@ -750,9 +737,8 @@ done:
+       btrfs_put_block_group(block_group);
+ }
+-int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
+ {
+-      DEFINE_WAIT(wait);
+       struct btrfs_fs_info *fs_info = cache->fs_info;
+       struct btrfs_caching_control *caching_ctl = NULL;
+       int ret = 0;
+@@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs
+       }
+       WARN_ON(cache->caching_ctl);
+       cache->caching_ctl = caching_ctl;
+-      if (btrfs_test_opt(fs_info, SPACE_CACHE))
+-              cache->cached = BTRFS_CACHE_FAST;
+-      else
+-              cache->cached = BTRFS_CACHE_STARTED;
++      cache->cached = BTRFS_CACHE_STARTED;
+       cache->has_caching_ctl = 1;
+       spin_unlock(&cache->lock);
+@@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs
+       btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
+ out:
+-      if (load_cache_only && caching_ctl)
+-              btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
++      if (wait && caching_ctl)
++              ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
+       if (caching_ctl)
+               btrfs_put_caching_control(caching_ctl);
+@@ -3313,7 +3296,7 @@ int btrfs_update_block_group(struct btrf
+                * space back to the block group, otherwise we will leak space.
+                */
+               if (!alloc && !btrfs_block_group_done(cache))
+-                      btrfs_cache_block_group(cache, 1);
++                      btrfs_cache_block_group(cache, true);
+               byte_in_group = bytenr - cache->start;
+               WARN_ON(byte_in_group > cache->length);
+--- a/fs/btrfs/block-group.h
++++ b/fs/btrfs/block-group.h
+@@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrf
+ void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
+ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
+                                          u64 num_bytes);
+-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
+-int btrfs_cache_block_group(struct btrfs_block_group *cache,
+-                          int load_cache_only);
++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
+ void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
+ struct btrfs_caching_control *btrfs_get_caching_control(
+               struct btrfs_block_group *cache);
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -494,7 +494,6 @@ struct btrfs_free_cluster {
+ enum btrfs_caching_type {
+       BTRFS_CACHE_NO,
+       BTRFS_CACHE_STARTED,
+-      BTRFS_CACHE_FAST,
+       BTRFS_CACHE_FINISHED,
+       BTRFS_CACHE_ERROR,
+ };
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -2567,17 +2567,10 @@ int btrfs_pin_extent_for_log_replay(stru
+               return -EINVAL;
+       /*
+-       * pull in the free space cache (if any) so that our pin
+-       * removes the free space from the cache.  We have load_only set
+-       * to one because the slow code to read in the free extents does check
+-       * the pinned extents.
++       * Fully cache the free space first so that our pin removes the free space
++       * from the cache.
+        */
+-      btrfs_cache_block_group(cache, 1);
+-      /*
+-       * Make sure we wait until the cache is completely built in case it is
+-       * missing or is invalid and therefore needs to be rebuilt.
+-       */
+-      ret = btrfs_wait_block_group_cache_done(cache);
++      ret = btrfs_cache_block_group(cache, true);
+       if (ret)
+               goto out;
+@@ -2600,12 +2593,7 @@ static int __exclude_logged_extent(struc
+       if (!block_group)
+               return -EINVAL;
+-      btrfs_cache_block_group(block_group, 1);
+-      /*
+-       * Make sure we wait until the cache is completely built in case it is
+-       * missing or is invalid and therefore needs to be rebuilt.
+-       */
+-      ret = btrfs_wait_block_group_cache_done(block_group);
++      ret = btrfs_cache_block_group(block_group, true);
+       if (ret)
+               goto out;
+@@ -4415,7 +4403,7 @@ have_block_group:
+               ffe_ctl->cached = btrfs_block_group_done(block_group);
+               if (unlikely(!ffe_ctl->cached)) {
+                       ffe_ctl->have_caching_bg = true;
+-                      ret = btrfs_cache_block_group(block_group, 0);
++                      ret = btrfs_cache_block_group(block_group, false);
+                       /*
+                        * If we get ENOMEM here or something else we want to
+@@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *
+               if (end - start >= range->minlen) {
+                       if (!btrfs_block_group_done(cache)) {
+-                              ret = btrfs_cache_block_group(cache, 0);
+-                              if (ret) {
+-                                      bg_failed++;
+-                                      bg_ret = ret;
+-                                      continue;
+-                              }
+-                              ret = btrfs_wait_block_group_cache_done(cache);
++                              ret = btrfs_cache_block_group(cache, true);
+                               if (ret) {
+                                       bg_failed++;
+                                       bg_ret = ret;
diff --git a/queue-5.19/btrfs-replace-drop-assert-for-suspended-replace.patch b/queue-5.19/btrfs-replace-drop-assert-for-suspended-replace.patch
new file mode 100644 (file)
index 0000000..78cdcaa
--- /dev/null
@@ -0,0 +1,55 @@
+From 59a3991984dbc1fc47e5651a265c5200bd85464e Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 12 Aug 2022 18:32:18 +0800
+Subject: btrfs: replace: drop assert for suspended replace
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 59a3991984dbc1fc47e5651a265c5200bd85464e upstream.
+
+If the filesystem mounts with the replace-operation in a suspended state
+and try to cancel the suspended replace-operation, we hit the assert. The
+assert came from the commit fe97e2e173af ("btrfs: dev-replace: replace's
+scrub must not be running in suspended state") that was actually not
+required. So just remove it.
+
+ $ mount /dev/sda5 /btrfs
+
+    BTRFS info (device sda5): cannot continue dev_replace, tgtdev is missing
+    BTRFS info (device sda5): you may cancel the operation after 'mount -o degraded'
+
+ $ mount -o degraded /dev/sda5 /btrfs <-- success.
+
+ $ btrfs replace cancel /btrfs
+
+    kernel: assertion failed: ret != -ENOTCONN, in fs/btrfs/dev-replace.c:1131
+    kernel: ------------[ cut here ]------------
+    kernel: kernel BUG at fs/btrfs/ctree.h:3750!
+
+After the patch:
+
+ $ btrfs replace cancel /btrfs
+
+    BTRFS info (device sda5): suspended dev_replace from /dev/sda5 (devid 1) to <missing disk> canceled
+
+Fixes: fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state")
+CC: stable@vger.kernel.org # 5.0+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/dev-replace.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -1128,8 +1128,7 @@ int btrfs_dev_replace_cancel(struct btrf
+               up_write(&dev_replace->rwsem);
+               /* Scrub for replace must not be running in suspended state */
+-              ret = btrfs_scrub_cancel(fs_info);
+-              ASSERT(ret != -ENOTCONN);
++              btrfs_scrub_cancel(fs_info);
+               trans = btrfs_start_transaction(root, 0);
+               if (IS_ERR(trans)) {
diff --git a/queue-5.19/btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch b/queue-5.19/btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch
new file mode 100644 (file)
index 0000000..bef1cb2
--- /dev/null
@@ -0,0 +1,92 @@
+From e6e3dec6c3c288d556b991a85d5d8e3ee71e9046 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 8 Aug 2022 12:18:37 +0100
+Subject: btrfs: update generation of hole file extent item when merging holes
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit e6e3dec6c3c288d556b991a85d5d8e3ee71e9046 upstream.
+
+When punching a hole into a file range that is adjacent with a hole and we
+are not using the no-holes feature, we expand the range of the adjacent
+file extent item that represents a hole, to save metadata space.
+
+However we don't update the generation of hole file extent item, which
+means a full fsync will not log that file extent item if the fsync happens
+in a later transaction (since commit 7f30c07288bb9e ("btrfs: stop copying
+old file extents when doing a full fsync")).
+
+For example, if we do this:
+
+    $ mkfs.btrfs -f -O ^no-holes /dev/sdb
+    $ mount /dev/sdb /mnt
+    $ xfs_io -f -c "pwrite -S 0xab 2M 2M" /mnt/foobar
+    $ sync
+
+We end up with 2 file extent items in our file:
+
+1) One that represents the hole for the file range [0, 2M), with a
+   generation of 7;
+
+2) Another one that represents an extent covering the range [2M, 4M).
+
+After that if we do the following:
+
+    $ xfs_io -c "fpunch 2M 2M" /mnt/foobar
+
+We end up with a single file extent item in the file, which represents a
+hole for the range [0, 4M) and with a generation of 7 - because we end
+dropping the data extent for range [2M, 4M) and then update the file
+extent item that represented the hole at [0, 2M), by increasing
+length from 2M to 4M.
+
+Then doing a full fsync and power failing:
+
+    $ xfs_io -c "fsync" /mnt/foobar
+    <power failure>
+
+will result in the full fsync not logging the file extent item that
+represents the hole for the range [0, 4M), because its generation is 7,
+which is lower than the generation of the current transaction (8).
+As a consequence, after mounting again the filesystem (after log replay),
+the region [2M, 4M) does not have a hole, it still points to the
+previous data extent.
+
+So fix this by always updating the generation of existing file extent
+items representing holes when we merge/expand them. This solves the
+problem and it's the same approach as when we merge prealloc extents that
+got written (at btrfs_mark_extent_written()). Setting the generation to
+the current transaction's generation is also what we do when merging
+the new hole extent map with the previous one or the next one.
+
+A test case for fstests, covering both cases of hole file extent item
+merging (to the left and to the right), will be sent soon.
+
+Fixes: 7f30c07288bb9e ("btrfs: stop copying old file extents when doing a full fsync")
+CC: stable@vger.kernel.org # 5.18+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2483,6 +2483,7 @@ static int fill_holes(struct btrfs_trans
+               btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_offset(leaf, fi, 0);
++              btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+               btrfs_mark_buffer_dirty(leaf);
+               goto out;
+       }
+@@ -2499,6 +2500,7 @@ static int fill_holes(struct btrfs_trans
+               btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+               btrfs_set_file_extent_offset(leaf, fi, 0);
++              btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+               btrfs_mark_buffer_dirty(leaf);
+               goto out;
+       }
diff --git a/queue-5.19/cifs-skip-extra-null-byte-in-filenames.patch b/queue-5.19/cifs-skip-extra-null-byte-in-filenames.patch
new file mode 100644 (file)
index 0000000..58bef31
--- /dev/null
@@ -0,0 +1,61 @@
+From a1d2eb51f0a33c28f5399a1610e66b3fbd24e884 Mon Sep 17 00:00:00 2001
+From: Paulo Alcantara <pc@cjr.nz>
+Date: Fri, 19 Aug 2022 17:00:19 -0300
+Subject: cifs: skip extra NULL byte in filenames
+
+From: Paulo Alcantara <pc@cjr.nz>
+
+commit a1d2eb51f0a33c28f5399a1610e66b3fbd24e884 upstream.
+
+Since commit:
+ cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty
+alloc_path_with_tree_prefix() function was no longer including the
+trailing separator when @path is empty, although @out_len was still
+assuming a path separator thus adding an extra byte to the final
+filename.
+
+This has caused mount issues in some Synology servers due to the extra
+NULL byte in filenames when sending SMB2_CREATE requests with
+SMB2_FLAGS_DFS_OPERATIONS set.
+
+Fix this by checking if @path is not empty and then add extra byte for
+separator.  Also, do not include any trailing NULL bytes in filename
+as MS-SMB2 requires it to be 8-byte aligned and not NULL terminated.
+
+Cc: stable@vger.kernel.org
+Fixes: 7eacba3b00a3 ("cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty")
+Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/smb2pdu.c |   16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -2571,19 +2571,15 @@ alloc_path_with_tree_prefix(__le16 **out
+       path_len = UniStrnlen((wchar_t *)path, PATH_MAX);
+-      /*
+-       * make room for one path separator between the treename and
+-       * path
+-       */
+-      *out_len = treename_len + 1 + path_len;
++      /* make room for one path separator only if @path isn't empty */
++      *out_len = treename_len + (path[0] ? 1 : 0) + path_len;
+       /*
+-       * final path needs to be null-terminated UTF16 with a
+-       * size aligned to 8
++       * final path needs to be 8-byte aligned as specified in
++       * MS-SMB2 2.2.13 SMB2 CREATE Request.
+        */
+-
+-      *out_size = roundup((*out_len+1)*2, 8);
+-      *out_path = kzalloc(*out_size, GFP_KERNEL);
++      *out_size = roundup(*out_len * sizeof(__le16), 8);
++      *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL);
+       if (!*out_path)
+               return -ENOMEM;
diff --git a/queue-5.19/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch b/queue-5.19/fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch
new file mode 100644 (file)
index 0000000..f775d5b
--- /dev/null
@@ -0,0 +1,88 @@
+From a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 Mon Sep 17 00:00:00 2001
+From: Shigeru Yoshida <syoshida@redhat.com>
+Date: Fri, 19 Aug 2022 03:13:36 +0900
+Subject: fbdev: fbcon: Properly revert changes when vc_resize() failed
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+commit a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 upstream.
+
+fbcon_do_set_font() calls vc_resize() when font size is changed.
+However, if if vc_resize() failed, current implementation doesn't
+revert changes for font size, and this causes inconsistent state.
+
+syzbot reported unable to handle page fault due to this issue [1].
+syzbot's repro uses fault injection which cause failure for memory
+allocation, so vc_resize() failed.
+
+This patch fixes this issue by properly revert changes for font
+related date when vc_resize() failed.
+
+Link: https://syzkaller.appspot.com/bug?id=3443d3a1fa6d964dd7310a0cb1696d165a3e07c4 [1]
+Reported-by: syzbot+a168dbeaaa7778273c1b@syzkaller.appspotmail.com
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+CC: stable@vger.kernel.org # 5.15+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/video/fbdev/core/fbcon.c |   27 +++++++++++++++++++++++++--
+ 1 file changed, 25 insertions(+), 2 deletions(-)
+
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -2402,15 +2402,21 @@ static int fbcon_do_set_font(struct vc_d
+       struct fb_info *info = fbcon_info_from_console(vc->vc_num);
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+-      int resize;
++      int resize, ret, old_userfont, old_width, old_height, old_charcount;
+       char *old_data = NULL;
+       resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
+       if (p->userfont)
+               old_data = vc->vc_font.data;
+       vc->vc_font.data = (void *)(p->fontdata = data);
++      old_userfont = p->userfont;
+       if ((p->userfont = userfont))
+               REFCOUNT(data)++;
++
++      old_width = vc->vc_font.width;
++      old_height = vc->vc_font.height;
++      old_charcount = vc->vc_font.charcount;
++
+       vc->vc_font.width = w;
+       vc->vc_font.height = h;
+       vc->vc_font.charcount = charcount;
+@@ -2426,7 +2432,9 @@ static int fbcon_do_set_font(struct vc_d
+               rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
+               cols /= w;
+               rows /= h;
+-              vc_resize(vc, cols, rows);
++              ret = vc_resize(vc, cols, rows);
++              if (ret)
++                      goto err_out;
+       } else if (con_is_visible(vc)
+                  && vc->vc_mode == KD_TEXT) {
+               fbcon_clear_margins(vc, 0);
+@@ -2436,6 +2444,21 @@ static int fbcon_do_set_font(struct vc_d
+       if (old_data && (--REFCOUNT(old_data) == 0))
+               kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
+       return 0;
++
++err_out:
++      p->fontdata = old_data;
++      vc->vc_font.data = (void *)old_data;
++
++      if (userfont) {
++              p->userfont = old_userfont;
++              REFCOUNT(data)--;
++      }
++
++      vc->vc_font.width = old_width;
++      vc->vc_font.height = old_height;
++      vc->vc_font.charcount = old_charcount;
++
++      return ret;
+ }
+ /*
diff --git a/queue-5.19/loop-check-for-overflow-while-configuring-loop.patch b/queue-5.19/loop-check-for-overflow-while-configuring-loop.patch
new file mode 100644 (file)
index 0000000..b1e72ef
--- /dev/null
@@ -0,0 +1,59 @@
+From c490a0b5a4f36da3918181a8acdc6991d967c5f3 Mon Sep 17 00:00:00 2001
+From: Siddh Raman Pant <code@siddh.me>
+Date: Tue, 23 Aug 2022 21:38:10 +0530
+Subject: loop: Check for overflow while configuring loop
+
+From: Siddh Raman Pant <code@siddh.me>
+
+commit c490a0b5a4f36da3918181a8acdc6991d967c5f3 upstream.
+
+The userspace can configure a loop using an ioctl call, wherein
+a configuration of type loop_config is passed (see lo_ioctl()'s
+case on line 1550 of drivers/block/loop.c). This proceeds to call
+loop_configure() which in turn calls loop_set_status_from_info()
+(see line 1050 of loop.c), passing &config->info which is of type
+loop_info64*. This function then sets the appropriate values, like
+the offset.
+
+loop_device has lo_offset of type loff_t (see line 52 of loop.c),
+which is typdef-chained to long long, whereas loop_info64 has
+lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h).
+
+The function directly copies offset from info to the device as
+follows (See line 980 of loop.c):
+       lo->lo_offset = info->lo_offset;
+
+This results in an overflow, which triggers a warning in iomap_iter()
+due to a call to iomap_iter_done() which has:
+       WARN_ON_ONCE(iter->iomap.offset > iter->pos);
+
+Thus, check for negative value during loop_set_status_from_info().
+
+Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a29e
+
+Reported-and-tested-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Siddh Raman Pant <code@siddh.me>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/loop.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -979,6 +979,11 @@ loop_set_status_from_info(struct loop_de
+       lo->lo_offset = info->lo_offset;
+       lo->lo_sizelimit = info->lo_sizelimit;
++
++      /* loff_t vars have been assigned __u64 */
++      if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
++              return -EOVERFLOW;
++
+       memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
+       lo->lo_file_name[LO_NAME_SIZE-1] = 0;
+       lo->lo_flags = info->lo_flags;
diff --git a/queue-5.19/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch b/queue-5.19/mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch
new file mode 100644 (file)
index 0000000..3d8fb5d
--- /dev/null
@@ -0,0 +1,53 @@
+From d26f60703606ab425eee9882b32a1781a8bed74d Mon Sep 17 00:00:00 2001
+From: Badari Pulavarty <badari.pulavarty@intel.com>
+Date: Sun, 21 Aug 2022 18:08:53 +0000
+Subject: mm/damon/dbgfs: avoid duplicate context directory creation
+
+From: Badari Pulavarty <badari.pulavarty@intel.com>
+
+commit d26f60703606ab425eee9882b32a1781a8bed74d upstream.
+
+When user tries to create a DAMON context via the DAMON debugfs interface
+with a name of an already existing context, the context directory creation
+fails but a new context is created and added in the internal data
+structure, due to absence of the directory creation success check.  As a
+result, memory could leak and DAMON cannot be turned on.  An example test
+case is as below:
+
+    # cd /sys/kernel/debug/damon/
+    # echo "off" >  monitor_on
+    # echo paddr > target_ids
+    # echo "abc" > mk_context
+    # echo "abc" > mk_context
+    # echo $$ > abc/target_ids
+    # echo "on" > monitor_on  <<< fails
+
+Return value of 'debugfs_create_dir()' is expected to be ignored in
+general, but this is an exceptional case as DAMON feature is depending
+on the debugfs functionality and it has the potential duplicate name
+issue.  This commit therefore fixes the issue by checking the directory
+creation failure and immediately return the error in the case.
+
+Link: https://lkml.kernel.org/r/20220821180853.2400-1-sj@kernel.org
+Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts")
+Signed-off-by: Badari Pulavarty <badari.pulavarty@intel.com>
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>   [ 5.15.x]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/dbgfs.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/damon/dbgfs.c
++++ b/mm/damon/dbgfs.c
+@@ -787,6 +787,9 @@ static int dbgfs_mk_context(char *name)
+               return -ENOENT;
+       new_dir = debugfs_create_dir(name, root);
++      /* Below check is required for a potential duplicated name case */
++      if (IS_ERR(new_dir))
++              return PTR_ERR(new_dir);
+       dbgfs_dirs[dbgfs_nr_ctxs] = new_dir;
+       new_ctx = dbgfs_new_ctx();
diff --git a/queue-5.19/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch b/queue-5.19/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch
new file mode 100644 (file)
index 0000000..244878a
--- /dev/null
@@ -0,0 +1,38 @@
+From ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Tue, 12 Jul 2022 21:05:42 +0800
+Subject: mm/hugetlb: avoid corrupting page->mapping in hugetlb_mcopy_atomic_pte
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 upstream.
+
+In MCOPY_ATOMIC_CONTINUE case with a non-shared VMA, pages in the page
+cache are installed in the ptes.  But hugepage_add_new_anon_rmap is called
+for them mistakenly because they're not vm_shared.  This will corrupt the
+page->mapping used by page cache code.
+
+Link: https://lkml.kernel.org/r/20220712130542.18836-1-linmiaohe@huawei.com
+Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -6026,7 +6026,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
+       if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
+               goto out_release_unlock;
+-      if (vm_shared) {
++      if (page_in_pagecache) {
+               page_dup_file_rmap(page, true);
+       } else {
+               ClearHPageRestoreReserve(page);
diff --git a/queue-5.19/mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch b/queue-5.19/mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch
new file mode 100644 (file)
index 0000000..6ba95ee
--- /dev/null
@@ -0,0 +1,75 @@
+From 3d2f78f08cd8388035ac375e731ec1ac1b79b09d Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Tue, 23 Aug 2022 18:11:38 -0400
+Subject: mm/mprotect: only reference swap pfn page if type match
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 3d2f78f08cd8388035ac375e731ec1ac1b79b09d upstream.
+
+Yu Zhao reported a bug after the commit "mm/swap: Add swp_offset_pfn() to
+fetch PFN from swap entry" added a check in swp_offset_pfn() for swap type [1]:
+
+  kernel BUG at include/linux/swapops.h:117!
+  CPU: 46 PID: 5245 Comm: EventManager_De Tainted: G S         O L 6.0.0-dbg-DEV #2
+  RIP: 0010:pfn_swap_entry_to_page+0x72/0xf0
+  Code: c6 48 8b 36 48 83 fe ff 74 53 48 01 d1 48 83 c1 08 48 8b 09 f6
+  c1 01 75 7b 66 90 48 89 c1 48 8b 09 f6 c1 01 74 74 5d c3 eb 9e <0f> 0b
+  48 ba ff ff ff ff 03 00 00 00 eb ae a9 ff 0f 00 00 75 13 48
+  RSP: 0018:ffffa59e73fabb80 EFLAGS: 00010282
+  RAX: 00000000ffffffe8 RBX: 0c00000000000000 RCX: ffffcd5440000000
+  RDX: 1ffffffffff7a80a RSI: 0000000000000000 RDI: 0c0000000000042b
+  RBP: ffffa59e73fabb80 R08: ffff9965ca6e8bb8 R09: 0000000000000000
+  R10: ffffffffa5a2f62d R11: 0000030b372e9fff R12: ffff997b79db5738
+  R13: 000000000000042b R14: 0c0000000000042b R15: 1ffffffffff7a80a
+  FS:  00007f549d1bb700(0000) GS:ffff99d3cf680000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000440d035b3180 CR3: 0000002243176004 CR4: 00000000003706e0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+   <TASK>
+   change_pte_range+0x36e/0x880
+   change_p4d_range+0x2e8/0x670
+   change_protection_range+0x14e/0x2c0
+   mprotect_fixup+0x1ee/0x330
+   do_mprotect_pkey+0x34c/0x440
+   __x64_sys_mprotect+0x1d/0x30
+
+It triggers because pfn_swap_entry_to_page() could be called upon e.g. a
+genuine swap entry.
+
+Fix it by only calling it when it's a write migration entry where the page*
+is used.
+
+[1] https://lore.kernel.org/lkml/CAOUHufaVC2Za-p8m0aiHw6YkheDcrO-C3wRGixwDS32VTS+k1w@mail.gmail.com/
+
+Link: https://lkml.kernel.org/r/20220823221138.45602-1-peterx@redhat.com
+Fixes: 6c287605fd56 ("mm: remember exclusively mapped anonymous pages with PG_anon_exclusive")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reported-by: Yu Zhao <yuzhao@google.com>
+Tested-by: Yu Zhao <yuzhao@google.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mprotect.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -158,10 +158,11 @@ static unsigned long change_pte_range(st
+                       pages++;
+               } else if (is_swap_pte(oldpte)) {
+                       swp_entry_t entry = pte_to_swp_entry(oldpte);
+-                      struct page *page = pfn_swap_entry_to_page(entry);
+                       pte_t newpte;
+                       if (is_writable_migration_entry(entry)) {
++                              struct page *page = pfn_swap_entry_to_page(entry);
++
+                               /*
+                                * A protection check is difficult so
+                                * just be safe and disable write
diff --git a/queue-5.19/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch b/queue-5.19/nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch
new file mode 100644 (file)
index 0000000..f691a34
--- /dev/null
@@ -0,0 +1,40 @@
+From 6b04ce966a738ecdd9294c9593e48513c0dc90aa Mon Sep 17 00:00:00 2001
+From: Karol Herbst <kherbst@redhat.com>
+Date: Fri, 19 Aug 2022 22:09:28 +0200
+Subject: nouveau: explicitly wait on the fence in nouveau_bo_move_m2mf
+
+From: Karol Herbst <kherbst@redhat.com>
+
+commit 6b04ce966a738ecdd9294c9593e48513c0dc90aa upstream.
+
+It is a bit unlcear to us why that's helping, but it does and unbreaks
+suspend/resume on a lot of GPUs without any known drawbacks.
+
+Cc: stable@vger.kernel.org # v5.15+
+Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/156
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220819200928.401416-1-kherbst@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_bo.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
+@@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_o
+               if (ret == 0) {
+                       ret = nouveau_fence_new(chan, false, &fence);
+                       if (ret == 0) {
++                              /* TODO: figure out a better solution here
++                               *
++                               * wait on the fence here explicitly as going through
++                               * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
++                               *
++                               * Without this the operation can timeout and we'll fallback to a
++                               * software copy, which might take several minutes to finish.
++                               */
++                              nouveau_fence_wait(fence, false, false);
+                               ret = ttm_bo_move_accel_cleanup(bo,
+                                                               &fence->base,
+                                                               evict, false,
diff --git a/queue-5.19/ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch b/queue-5.19/ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch
new file mode 100644 (file)
index 0000000..219939a
--- /dev/null
@@ -0,0 +1,69 @@
+From 550842cc60987b269e31b222283ade3e1b6c7fc8 Mon Sep 17 00:00:00 2001
+From: Heming Zhao <ocfs2-devel@oss.oracle.com>
+Date: Mon, 15 Aug 2022 16:57:54 +0800
+Subject: ocfs2: fix freeing uninitialized resource on ocfs2_dlm_shutdown
+
+From: Heming Zhao <ocfs2-devel@oss.oracle.com>
+
+commit 550842cc60987b269e31b222283ade3e1b6c7fc8 upstream.
+
+After commit 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job
+before return error"), any procedure after ocfs2_dlm_init() fails will
+trigger crash when calling ocfs2_dlm_shutdown().
+
+ie: On local mount mode, no dlm resource is initialized.  If
+ocfs2_mount_volume() fails in ocfs2_find_slot(), error handling will call
+ocfs2_dlm_shutdown(), then does dlm resource cleanup job, which will
+trigger kernel crash.
+
+This solution should bypass uninitialized resources in
+ocfs2_dlm_shutdown().
+
+Link: https://lkml.kernel.org/r/20220815085754.20417-1-heming.zhao@suse.com
+Fixes: 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job before return error")
+Signed-off-by: Heming Zhao <heming.zhao@suse.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ocfs2/dlmglue.c |    8 +++++---
+ fs/ocfs2/super.c   |    3 +--
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_sup
+       ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
+       ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
+-      ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
+-      osb->cconn = NULL;
++      if (osb->cconn) {
++              ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
++              osb->cconn = NULL;
+-      ocfs2_dlm_shutdown_debug(osb);
++              ocfs2_dlm_shutdown_debug(osb);
++      }
+ }
+ static int ocfs2_drop_lock(struct ocfs2_super *osb,
+--- a/fs/ocfs2/super.c
++++ b/fs/ocfs2/super.c
+@@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct
+           !ocfs2_is_hard_readonly(osb))
+               hangup_needed = 1;
+-      if (osb->cconn)
+-              ocfs2_dlm_shutdown(osb, hangup_needed);
++      ocfs2_dlm_shutdown(osb, hangup_needed);
+       ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
+       debugfs_remove_recursive(osb->osb_debug_root);
diff --git a/queue-5.19/perf-x86-intel-fix-pebs-event-constraints-for-adl.patch b/queue-5.19/perf-x86-intel-fix-pebs-event-constraints-for-adl.patch
new file mode 100644 (file)
index 0000000..7b72f15
--- /dev/null
@@ -0,0 +1,36 @@
+From cde643ff75bc20c538dfae787ca3b587bab16b50 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Thu, 18 Aug 2022 11:44:29 -0700
+Subject: perf/x86/intel: Fix pebs event constraints for ADL
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit cde643ff75bc20c538dfae787ca3b587bab16b50 upstream.
+
+According to the latest event list, the LOAD_LATENCY PEBS event only
+works on the GP counter 0 and 1 for ADL and RPL.
+
+Update the pebs event constraints table.
+
+Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support")
+Reported-by: Ammy Yi <ammy.yi@intel.com>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20220818184429.2355857-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/ds.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -822,7 +822,7 @@ struct event_constraint intel_glm_pebs_e
+ struct event_constraint intel_grt_pebs_event_constraints[] = {
+       /* Allow all events as PEBS with no flags */
+-      INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf),
++      INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
+       INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
+       EVENT_CONSTRAINT_END
+ };
diff --git a/queue-5.19/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch b/queue-5.19/perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
new file mode 100644 (file)
index 0000000..a9aee49
--- /dev/null
@@ -0,0 +1,60 @@
+From 32ba156df1b1c8804a4e5be5339616945eafea22 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Tue, 16 Aug 2022 05:56:11 -0700
+Subject: perf/x86/lbr: Enable the branch type for the Arch LBR by default
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 32ba156df1b1c8804a4e5be5339616945eafea22 upstream.
+
+On the platform with Arch LBR, the HW raw branch type encoding may leak
+to the perf tool when the SAVE_TYPE option is not set.
+
+In the intel_pmu_store_lbr(), the HW raw branch type is stored in
+lbr_entries[].type. If the SAVE_TYPE option is set, the
+lbr_entries[].type will be converted into the generic PERF_BR_* type
+in the intel_pmu_lbr_filter() and exposed to the user tools.
+But if the SAVE_TYPE option is NOT set by the user, the current perf
+kernel doesn't clear the field. The HW raw branch type leaks.
+
+There are two solutions to fix the issue for the Arch LBR.
+One is to clear the field if the SAVE_TYPE option is NOT set.
+The other solution is to unconditionally convert the branch type and
+expose the generic type to the user tools.
+
+The latter is implemented here, because
+- The branch type is valuable information. I don't see a case where
+  you would not benefit from the branch type. (Stephane Eranian)
+- Not having the branch type DOES NOT save any space in the
+  branch record (Stephane Eranian)
+- The Arch LBR HW can retrieve the common branch types from the
+  LBR_INFO. It doesn't require the high overhead SW disassemble.
+
+Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR")
+Reported-by: Stephane Eranian <eranian@google.com>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20220816125612.2042397-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/lbr.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/arch/x86/events/intel/lbr.c
++++ b/arch/x86/events/intel/lbr.c
+@@ -1097,6 +1097,14 @@ static int intel_pmu_setup_hw_lbr_filter
+       if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
+               reg->config = mask;
++
++              /*
++               * The Arch LBR HW can retrieve the common branch types
++               * from the LBR_INFO. It doesn't require the high overhead
++               * SW disassemble.
++               * Enable the branch type by default for the Arch LBR.
++               */
++              reg->reg |= X86_BR_TYPE_SAVE;
+               return 0;
+       }
diff --git a/queue-5.19/revert-memcg-cleanup-racy-sum-avoidance-code.patch b/queue-5.19/revert-memcg-cleanup-racy-sum-avoidance-code.patch
new file mode 100644 (file)
index 0000000..0fbba52
--- /dev/null
@@ -0,0 +1,95 @@
+From dbb16df6443c59e8a1ef21c2272fcf387d600ddf Mon Sep 17 00:00:00 2001
+From: Shakeel Butt <shakeelb@google.com>
+Date: Wed, 17 Aug 2022 17:21:39 +0000
+Subject: Revert "memcg: cleanup racy sum avoidance code"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shakeel Butt <shakeelb@google.com>
+
+commit dbb16df6443c59e8a1ef21c2272fcf387d600ddf upstream.
+
+This reverts commit 96e51ccf1af33e82f429a0d6baebba29c6448d0f.
+
+Recently we started running the kernel with rstat infrastructure on
+production traffic and begin to see negative memcg stats values.
+Particularly the 'sock' stat is the one which we observed having negative
+value.
+
+$ grep "sock " /mnt/memory/job/memory.stat
+sock 253952
+total_sock 18446744073708724224
+
+Re-run after couple of seconds
+
+$ grep "sock " /mnt/memory/job/memory.stat
+sock 253952
+total_sock 53248
+
+For now we are only seeing this issue on large machines (256 CPUs) and
+only with 'sock' stat.  I think the networking stack increase the stat on
+one cpu and decrease it on another cpu much more often.  So, this negative
+sock is due to rstat flusher flushing the stats on the CPU that has seen
+the decrement of sock but missed the CPU that has increments.  A typical
+race condition.
+
+For easy stable backport, revert is the most simple solution.  For long
+term solution, I am thinking of two directions.  First is just reduce the
+race window by optimizing the rstat flusher.  Second is if the reader sees
+a negative stat value, force flush and restart the stat collection.
+Basically retry but limited.
+
+Link: https://lkml.kernel.org/r/20220817172139.3141101-1-shakeelb@google.com
+Fixes: 96e51ccf1af33e8 ("memcg: cleanup racy sum avoidance code")
+Signed-off-by: Shakeel Butt <shakeelb@google.com>
+Cc: "Michal Koutný" <mkoutny@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yosry Ahmed <yosryahmed@google.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: <stable@vger.kernel.org>   [5.15]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memcontrol.h |   15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -978,19 +978,30 @@ static inline void mod_memcg_page_state(
+ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+ {
+-      return READ_ONCE(memcg->vmstats.state[idx]);
++      long x = READ_ONCE(memcg->vmstats.state[idx]);
++#ifdef CONFIG_SMP
++      if (x < 0)
++              x = 0;
++#endif
++      return x;
+ }
+ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
+                                             enum node_stat_item idx)
+ {
+       struct mem_cgroup_per_node *pn;
++      long x;
+       if (mem_cgroup_disabled())
+               return node_page_state(lruvec_pgdat(lruvec), idx);
+       pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+-      return READ_ONCE(pn->lruvec_stats.state[idx]);
++      x = READ_ONCE(pn->lruvec_stats.state[idx]);
++#ifdef CONFIG_SMP
++      if (x < 0)
++              x = 0;
++#endif
++      return x;
+ }
+ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
diff --git a/queue-5.19/riscv-dts-microchip-correct-l2-cache-interrupts.patch b/queue-5.19/riscv-dts-microchip-correct-l2-cache-interrupts.patch
new file mode 100644 (file)
index 0000000..3766089
--- /dev/null
@@ -0,0 +1,61 @@
+From 34fc9cc3aebe8b9e27d3bc821543dd482dc686ca Mon Sep 17 00:00:00 2001
+From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+Date: Wed, 17 Aug 2022 15:25:21 +0200
+Subject: riscv: dts: microchip: correct L2 cache interrupts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+
+commit 34fc9cc3aebe8b9e27d3bc821543dd482dc686ca upstream.
+
+The "PolarFire SoC MSS Technical Reference Manual" documents the
+following PLIC interrupts:
+
+1 - L2 Cache Controller Signals when a metadata correction event occurs
+2 - L2 Cache Controller Signals when an uncorrectable metadata event occurs
+3 - L2 Cache Controller Signals when a data correction event occurs
+4 - L2 Cache Controller Signals when an uncorrectable data event occurs
+
+This differs from the SiFive FU540 which only has three L2 cache related
+interrupts.
+
+The sequence in the device tree is defined by an enum:
+
+    enum {
+    Â Â Â Â Â Â Â Â DIR_CORR = 0,
+    Â Â Â Â Â Â Â Â DATA_CORR,
+    Â Â Â Â Â Â Â Â DATA_UNCORR,
+    Â Â Â Â Â Â Â Â DIR_UNCORR,
+    };
+
+So the correct sequence of the L2 cache interrupts is
+
+    interrupts = <1>, <3>, <4>, <2>;
+
+[Conor]
+This manifests as an unusable system if the l2-cache driver is enabled,
+as the wrong interrupt gets cleared & the handler prints errors to the
+console ad infinitum.
+
+Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board")
+CC: stable@vger.kernel.org # 5.15: e35b07a7df9b: riscv: dts: microchip: mpfs: Group tuples in interrupt properties
+Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/boot/dts/microchip/mpfs.dtsi |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
++++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
+@@ -169,7 +169,7 @@
+                       cache-size = <2097152>;
+                       cache-unified;
+                       interrupt-parent = <&plic>;
+-                      interrupts = <1>, <2>, <3>;
++                      interrupts = <1>, <3>, <4>, <2>;
+               };
+               clint: clint@2000000 {
diff --git a/queue-5.19/riscv-signal-fix-missing-prototype-warning.patch b/queue-5.19/riscv-signal-fix-missing-prototype-warning.patch
new file mode 100644 (file)
index 0000000..ab41ea4
--- /dev/null
@@ -0,0 +1,54 @@
+From b5c3aca86d2698c4850b6ee8b341938025d2780c Mon Sep 17 00:00:00 2001
+From: Conor Dooley <conor.dooley@microchip.com>
+Date: Sun, 14 Aug 2022 15:12:37 +0100
+Subject: riscv: signal: fix missing prototype warning
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+commit b5c3aca86d2698c4850b6ee8b341938025d2780c upstream.
+
+Fix the warning:
+arch/riscv/kernel/signal.c:316:27: warning: no previous prototype for function 'do_notify_resume' [-Wmissing-prototypes]
+asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
+
+All other functions in the file are static & none of the existing
+headers stood out as an obvious location. Create signal.h to hold the
+declaration.
+
+Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API")
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220814141237.493457-4-mail@conchuod.ie
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/signal.h |   12 ++++++++++++
+ arch/riscv/kernel/signal.c      |    1 +
+ 2 files changed, 13 insertions(+)
+ create mode 100644 arch/riscv/include/asm/signal.h
+
+--- /dev/null
++++ b/arch/riscv/include/asm/signal.h
+@@ -0,0 +1,12 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++
++#ifndef __ASM_SIGNAL_H
++#define __ASM_SIGNAL_H
++
++#include <uapi/asm/signal.h>
++#include <uapi/asm/ptrace.h>
++
++asmlinkage __visible
++void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
++
++#endif
+--- a/arch/riscv/kernel/signal.c
++++ b/arch/riscv/kernel/signal.c
+@@ -15,6 +15,7 @@
+ #include <asm/ucontext.h>
+ #include <asm/vdso.h>
++#include <asm/signal.h>
+ #include <asm/signal32.h>
+ #include <asm/switch_to.h>
+ #include <asm/csr.h>
diff --git a/queue-5.19/riscv-traps-add-missing-prototype.patch b/queue-5.19/riscv-traps-add-missing-prototype.patch
new file mode 100644 (file)
index 0000000..ca497e5
--- /dev/null
@@ -0,0 +1,51 @@
+From d951b20b9def73dcc39a5379831525d0d2a537e9 Mon Sep 17 00:00:00 2001
+From: Conor Dooley <conor.dooley@microchip.com>
+Date: Sun, 14 Aug 2022 15:12:38 +0100
+Subject: riscv: traps: add missing prototype
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+commit d951b20b9def73dcc39a5379831525d0d2a537e9 upstream.
+
+Sparse complains:
+arch/riscv/kernel/traps.c:213:6: warning: symbol 'shadow_stack' was not declared. Should it be static?
+
+The variable is used in entry.S, so declare shadow_stack there
+alongside SHADOW_OVERFLOW_STACK_SIZE.
+
+Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220814141237.493457-5-mail@conchuod.ie
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/thread_info.h |    2 ++
+ arch/riscv/kernel/traps.c            |    3 ++-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/riscv/include/asm/thread_info.h
++++ b/arch/riscv/include/asm/thread_info.h
+@@ -42,6 +42,8 @@
+ #ifndef __ASSEMBLY__
++extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
++
+ #include <asm/processor.h>
+ #include <asm/csr.h>
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -20,9 +20,10 @@
+ #include <asm/asm-prototypes.h>
+ #include <asm/bug.h>
++#include <asm/csr.h>
+ #include <asm/processor.h>
+ #include <asm/ptrace.h>
+-#include <asm/csr.h>
++#include <asm/thread_info.h>
+ int show_unhandled_signals = 1;
diff --git a/queue-5.19/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch b/queue-5.19/s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
new file mode 100644 (file)
index 0000000..917d59d
--- /dev/null
@@ -0,0 +1,81 @@
+From 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Tue, 16 Aug 2022 11:54:07 -0400
+Subject: s390: fix double free of GS and RI CBs on fork() failure
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 upstream.
+
+The pointers for guarded storage and runtime instrumentation control
+blocks are stored in the thread_struct of the associated task. These
+pointers are initially copied on fork() via arch_dup_task_struct()
+and then cleared via copy_thread() before fork() returns. If fork()
+happens to fail after the initial task dup and before copy_thread(),
+the newly allocated task and associated thread_struct memory are
+freed via free_task() -> arch_release_task_struct(). This results in
+a double free of the guarded storage and runtime info structs
+because the fields in the failed task still refer to memory
+associated with the source task.
+
+This problem can manifest as a BUG_ON() in set_freepointer() (with
+CONFIG_SLAB_FREELIST_HARDENED enabled) or KASAN splat (if enabled)
+when running trinity syscall fuzz tests on s390x. To avoid this
+problem, clear the associated pointer fields in
+arch_dup_task_struct() immediately after the new task is copied.
+Note that the RI flag is still cleared in copy_thread() because it
+resides in thread stack memory and that is where stack info is
+copied.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Fixes: 8d9047f8b967c ("s390/runtime instrumentation: simplify task exit handling")
+Fixes: 7b83c6297d2fc ("s390/guarded storage: simplify task exit handling")
+Cc: <stable@vger.kernel.org> # 4.15
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220816155407.537372-1-bfoster@redhat.com
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/process.c |   22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/arch/s390/kernel/process.c
++++ b/arch/s390/kernel/process.c
+@@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_str
+       memcpy(dst, src, arch_task_struct_size);
+       dst->thread.fpu.regs = dst->thread.fpu.fprs;
++
++      /*
++       * Don't transfer over the runtime instrumentation or the guarded
++       * storage control block pointers. These fields are cleared here instead
++       * of in copy_thread() to avoid premature freeing of associated memory
++       * on fork() failure. Wait to clear the RI flag because ->stack still
++       * refers to the source thread.
++       */
++      dst->thread.ri_cb = NULL;
++      dst->thread.gs_cb = NULL;
++      dst->thread.gs_bc_cb = NULL;
++
+       return 0;
+ }
+@@ -150,13 +162,11 @@ int copy_thread(struct task_struct *p, c
+       frame->childregs.flags = 0;
+       if (new_stackp)
+               frame->childregs.gprs[15] = new_stackp;
+-
+-      /* Don't copy runtime instrumentation info */
+-      p->thread.ri_cb = NULL;
++      /*
++       * Clear the runtime instrumentation flag after the above childregs
++       * copy. The CB pointer was already cleared in arch_dup_task_struct().
++       */
+       frame->childregs.psw.mask &= ~PSW_MASK_RI;
+-      /* Don't copy guarded storage control block */
+-      p->thread.gs_cb = NULL;
+-      p->thread.gs_bc_cb = NULL;
+       /* Set a new TLS ?  */
+       if (clone_flags & CLONE_SETTLS) {
diff --git a/queue-5.19/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch b/queue-5.19/s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch
new file mode 100644 (file)
index 0000000..11a7aee
--- /dev/null
@@ -0,0 +1,49 @@
+From 41ac42f137080bc230b5882e3c88c392ab7f2d32 Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Date: Wed, 17 Aug 2022 15:26:03 +0200
+Subject: s390/mm: do not trigger write fault when vma does not allow VM_WRITE
+
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+
+commit 41ac42f137080bc230b5882e3c88c392ab7f2d32 upstream.
+
+For non-protection pXd_none() page faults in do_dat_exception(), we
+call do_exception() with access == (VM_READ | VM_WRITE | VM_EXEC).
+In do_exception(), vma->vm_flags is checked against that before
+calling handle_mm_fault().
+
+Since commit 92f842eac7ee3 ("[S390] store indication fault optimization"),
+we call handle_mm_fault() with FAULT_FLAG_WRITE, when recognizing that
+it was a write access. However, the vma flags check is still only
+checking against (VM_READ | VM_WRITE | VM_EXEC), and therefore also
+calling handle_mm_fault() with FAULT_FLAG_WRITE in cases where the vma
+does not allow VM_WRITE.
+
+Fix this by changing access check in do_exception() to VM_WRITE only,
+when recognizing write access.
+
+Link: https://lkml.kernel.org/r/20220811103435.188481-3-david@redhat.com
+Fixes: 92f842eac7ee3 ("[S390] store indication fault optimization")
+Cc: <stable@vger.kernel.org>
+Reported-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/mm/fault.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/mm/fault.c
++++ b/arch/s390/mm/fault.c
+@@ -379,7 +379,9 @@ static inline vm_fault_t do_exception(st
+       flags = FAULT_FLAG_DEFAULT;
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+-      if (access == VM_WRITE || is_write)
++      if (is_write)
++              access = VM_WRITE;
++      if (access == VM_WRITE)
+               flags |= FAULT_FLAG_WRITE;
+       mmap_read_lock(mm);
index dd07f3000244a392176ea1c7d8180d42318dbd87..889eb5f03148f7d45e9873f40b667eb02682700c 100644 (file)
@@ -91,3 +91,41 @@ net-stmmac-work-around-sporadic-tx-issue-on-link-up.patch
 net-lantiq_xrx200-confirm-skb-is-allocated-before-us.patch
 net-lantiq_xrx200-fix-lock-under-memory-pressure.patch
 net-lantiq_xrx200-restore-buffer-if-memory-allocatio.patch
+btrfs-fix-silent-failure-when-deleting-root-reference.patch
+btrfs-replace-drop-assert-for-suspended-replace.patch
+btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
+btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch
+btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
+btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch
+btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch
+x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch
+perf-x86-intel-fix-pebs-event-constraints-for-adl.patch
+perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
+x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch
+x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
+x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch
+x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
+x86-nospec-unwreck-the-rsb-stuffing.patch
+x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch
+loop-check-for-overflow-while-configuring-loop.patch
+writeback-avoid-use-after-free-after-removing-device.patch
+audit-move-audit_return_fixup-before-the-filters.patch
+asm-generic-sections-refactor-memory_intersects.patch
+mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch
+s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch
+bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch
+mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch
+mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch
+cifs-skip-extra-null-byte-in-filenames.patch
+s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
+fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch
+revert-memcg-cleanup-racy-sum-avoidance-code.patch
+shmem-update-folio-if-shmem_replace_page-updates-the-page.patch
+acpi-processor-remove-freq-qos-request-for-all-cpus.patch
+nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch
+smb3-missing-inode-locks-in-punch-hole.patch
+ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch
+xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch
+riscv-signal-fix-missing-prototype-warning.patch
+riscv-traps-add-missing-prototype.patch
+riscv-dts-microchip-correct-l2-cache-interrupts.patch
diff --git a/queue-5.19/shmem-update-folio-if-shmem_replace_page-updates-the-page.patch b/queue-5.19/shmem-update-folio-if-shmem_replace_page-updates-the-page.patch
new file mode 100644 (file)
index 0000000..fe96878
--- /dev/null
@@ -0,0 +1,41 @@
+From 9dfb3b8d655022760ca68af11821f1c63aa547c3 Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Sat, 30 Jul 2022 05:25:18 +0100
+Subject: shmem: update folio if shmem_replace_page() updates the page
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 9dfb3b8d655022760ca68af11821f1c63aa547c3 upstream.
+
+If we allocate a new page, we need to make sure that our folio matches
+that new page.
+
+If we do end up in this code path, we store the wrong page in the shmem
+inode's page cache, and I would rather imagine that data corruption
+ensues.
+
+This will be solved by changing shmem_replace_page() to
+shmem_replace_folio(), but this is the minimal fix.
+
+Link: https://lkml.kernel.org/r/20220730042518.1264767-1-willy@infradead.org
+Fixes: da08e9b79323 ("mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio()")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: William Kucharski <william.kucharski@oracle.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/shmem.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1771,6 +1771,7 @@ static int shmem_swapin_folio(struct ino
+       if (shmem_should_replace_folio(folio, gfp)) {
+               error = shmem_replace_page(&page, gfp, info, index);
++              folio = page_folio(page);
+               if (error)
+                       goto failed;
+       }
diff --git a/queue-5.19/smb3-missing-inode-locks-in-punch-hole.patch b/queue-5.19/smb3-missing-inode-locks-in-punch-hole.patch
new file mode 100644 (file)
index 0000000..6dcb78f
--- /dev/null
@@ -0,0 +1,60 @@
+From ba0803050d610d5072666be727bca5e03e55b242 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Tue, 23 Aug 2022 02:10:56 -0500
+Subject: smb3: missing inode locks in punch hole
+
+From: David Howells <dhowells@redhat.com>
+
+commit ba0803050d610d5072666be727bca5e03e55b242 upstream.
+
+smb3 fallocate punch hole was not grabbing the inode or filemap_invalidate
+locks so could have race with pagemap reinstantiating the page.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/smb2ops.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -3671,7 +3671,7 @@ static long smb3_zero_range(struct file
+ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+                           loff_t offset, loff_t len)
+ {
+-      struct inode *inode;
++      struct inode *inode = file_inode(file);
+       struct cifsFileInfo *cfile = file->private_data;
+       struct file_zero_data_information fsctl_buf;
+       long rc;
+@@ -3680,14 +3680,12 @@ static long smb3_punch_hole(struct file
+       xid = get_xid();
+-      inode = d_inode(cfile->dentry);
+-
++      inode_lock(inode);
+       /* Need to make file sparse, if not already, before freeing range. */
+       /* Consider adding equivalent for compressed since it could also work */
+       if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
+               rc = -EOPNOTSUPP;
+-              free_xid(xid);
+-              return rc;
++              goto out;
+       }
+       filemap_invalidate_lock(inode->i_mapping);
+@@ -3707,8 +3705,10 @@ static long smb3_punch_hole(struct file
+                       true /* is_fctl */, (char *)&fsctl_buf,
+                       sizeof(struct file_zero_data_information),
+                       CIFSMaxBufSize, NULL, NULL);
+-      free_xid(xid);
+       filemap_invalidate_unlock(inode->i_mapping);
++out:
++      inode_unlock(inode);
++      free_xid(xid);
+       return rc;
+ }
diff --git a/queue-5.19/writeback-avoid-use-after-free-after-removing-device.patch b/queue-5.19/writeback-avoid-use-after-free-after-removing-device.patch
new file mode 100644 (file)
index 0000000..a1c2389
--- /dev/null
@@ -0,0 +1,139 @@
+From f87904c075515f3e1d8f4a7115869d3b914674fd Mon Sep 17 00:00:00 2001
+From: Khazhismel Kumykov <khazhy@chromium.org>
+Date: Mon, 1 Aug 2022 08:50:34 -0700
+Subject: writeback: avoid use-after-free after removing device
+
+From: Khazhismel Kumykov <khazhy@chromium.org>
+
+commit f87904c075515f3e1d8f4a7115869d3b914674fd upstream.
+
+When a disk is removed, bdi_unregister gets called to stop further
+writeback and wait for associated delayed work to complete.  However,
+wb_inode_writeback_end() may schedule bandwidth estimation dwork after
+this has completed, which can result in the timer attempting to access the
+just freed bdi_writeback.
+
+Fix this by checking if the bdi_writeback is alive, similar to when
+scheduling writeback work.
+
+Since this requires wb->work_lock, and wb_inode_writeback_end() may get
+called from interrupt, switch wb->work_lock to an irqsafe lock.
+
+Link: https://lkml.kernel.org/r/20220801155034.3772543-1-khazhy@google.com
+Fixes: 45a2966fd641 ("writeback: fix bandwidth estimate for spiky workload")
+Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Michael Stapelberg <stapelberg+linux@google.com>
+Cc: Wu Fengguang <fengguang.wu@intel.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fs-writeback.c   |   12 ++++++------
+ mm/backing-dev.c    |   10 +++++-----
+ mm/page-writeback.c |    6 +++++-
+ 3 files changed, 16 insertions(+), 12 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(st
+ static void wb_wakeup(struct bdi_writeback *wb)
+ {
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (test_bit(WB_registered, &wb->state))
+               mod_delayed_work(bdi_wq, &wb->dwork, 0);
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+ }
+ static void finish_writeback_work(struct bdi_writeback *wb,
+@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_wri
+       if (work->done)
+               atomic_inc(&work->done->cnt);
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (test_bit(WB_registered, &wb->state)) {
+               list_add_tail(&work->list, &wb->work_list);
+@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_wri
+       } else
+               finish_writeback_work(wb, work);
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+ }
+ /**
+@@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_nex
+ {
+       struct wb_writeback_work *work = NULL;
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (!list_empty(&wb->work_list)) {
+               work = list_entry(wb->work_list.next,
+                                 struct wb_writeback_work, list);
+               list_del_init(&work->list);
+       }
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+       return work;
+ }
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -260,10 +260,10 @@ void wb_wakeup_delayed(struct bdi_writeb
+       unsigned long timeout;
+       timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (test_bit(WB_registered, &wb->state))
+               queue_delayed_work(bdi_wq, &wb->dwork, timeout);
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+ }
+ static void wb_update_bandwidth_workfn(struct work_struct *work)
+@@ -334,12 +334,12 @@ static void cgwb_remove_from_bdi_list(st
+ static void wb_shutdown(struct bdi_writeback *wb)
+ {
+       /* Make sure nobody queues further work */
+-      spin_lock_bh(&wb->work_lock);
++      spin_lock_irq(&wb->work_lock);
+       if (!test_and_clear_bit(WB_registered, &wb->state)) {
+-              spin_unlock_bh(&wb->work_lock);
++              spin_unlock_irq(&wb->work_lock);
+               return;
+       }
+-      spin_unlock_bh(&wb->work_lock);
++      spin_unlock_irq(&wb->work_lock);
+       cgwb_remove_from_bdi_list(wb);
+       /*
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -2867,6 +2867,7 @@ static void wb_inode_writeback_start(str
+ static void wb_inode_writeback_end(struct bdi_writeback *wb)
+ {
++      unsigned long flags;
+       atomic_dec(&wb->writeback_inodes);
+       /*
+        * Make sure estimate of writeback throughput gets updated after
+@@ -2875,7 +2876,10 @@ static void wb_inode_writeback_end(struc
+        * that if multiple inodes end writeback at a similar time, they get
+        * batched into one bandwidth update.
+        */
+-      queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
++      spin_lock_irqsave(&wb->work_lock, flags);
++      if (test_bit(WB_registered, &wb->state))
++              queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
++      spin_unlock_irqrestore(&wb->work_lock, flags);
+ }
+ bool __folio_end_writeback(struct folio *folio)
diff --git a/queue-5.19/x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch b/queue-5.19/x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch
new file mode 100644 (file)
index 0000000..6d505ea
--- /dev/null
@@ -0,0 +1,88 @@
+From 4b1c742407571eff58b6de9881889f7ca7c4b4dc Mon Sep 17 00:00:00 2001
+From: Michael Roth <michael.roth@amd.com>
+Date: Tue, 23 Aug 2022 11:07:34 -0500
+Subject: x86/boot: Don't propagate uninitialized boot_params->cc_blob_address
+
+From: Michael Roth <michael.roth@amd.com>
+
+commit 4b1c742407571eff58b6de9881889f7ca7c4b4dc upstream.
+
+In some cases, bootloaders will leave boot_params->cc_blob_address
+uninitialized rather than zeroing it out. This field is only meant to be
+set by the boot/compressed kernel in order to pass information to the
+uncompressed kernel when SEV-SNP support is enabled.
+
+Therefore, there are no cases where the bootloader-provided values
+should be treated as anything other than garbage. Otherwise, the
+uncompressed kernel may attempt to access this bogus address, leading to
+a crash during early boot.
+
+Normally, sanitize_boot_params() would be used to clear out such fields
+but that happens too late: sev_enable() may have already initialized
+it to a valid value that should not be zeroed out. Instead, have
+sev_enable() zero it out unconditionally beforehand.
+
+Also ensure this happens for !CONFIG_AMD_MEM_ENCRYPT as well by also
+including this handling in the sev_enable() stub function.
+
+  [ bp: Massage commit message and comments. ]
+
+Fixes: b190a043c49a ("x86/sev: Add SEV-SNP feature detection/setup")
+Reported-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
+Reported-by: watnuss@gmx.de
+Signed-off-by: Michael Roth <michael.roth@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216387
+Link: https://lore.kernel.org/r/20220823160734.89036-1-michael.roth@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/misc.h | 12 +++++++++++-
+ arch/x86/boot/compressed/sev.c  |  8 ++++++++
+ 2 files changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
+index 4910bf230d7b..62208ec04ca4 100644
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr);
+ void snp_set_page_shared(unsigned long paddr);
+ void sev_prep_identity_maps(unsigned long top_level_pgt);
+ #else
+-static inline void sev_enable(struct boot_params *bp) { }
++static inline void sev_enable(struct boot_params *bp)
++{
++      /*
++       * bp->cc_blob_address should only be set by boot/compressed kernel.
++       * Initialize it to 0 unconditionally (thus here in this stub too) to
++       * ensure that uninitialized values from buggy bootloaders aren't
++       * propagated.
++       */
++      if (bp)
++              bp->cc_blob_address = 0;
++}
+ static inline void sev_es_shutdown_ghcb(void) { }
+ static inline bool sev_es_check_ghcb_fault(unsigned long address)
+ {
+diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
+index 52f989f6acc2..c93930d5ccbd 100644
+--- a/arch/x86/boot/compressed/sev.c
++++ b/arch/x86/boot/compressed/sev.c
+@@ -276,6 +276,14 @@ void sev_enable(struct boot_params *bp)
+       struct msr m;
+       bool snp;
++      /*
++       * bp->cc_blob_address should only be set by boot/compressed kernel.
++       * Initialize it to 0 to ensure that uninitialized values from
++       * buggy bootloaders aren't propagated.
++       */
++      if (bp)
++              bp->cc_blob_address = 0;
++
+       /*
+        * Setup/preliminary detection of SNP. This will be sanity-checked
+        * against CPUID/MSR values later.
+-- 
+2.37.2
+
diff --git a/queue-5.19/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch b/queue-5.19/x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
new file mode 100644 (file)
index 0000000..b0f41b3
--- /dev/null
@@ -0,0 +1,209 @@
+From 7df548840c496b0141fb2404b889c346380c2b22 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 3 Aug 2022 14:41:32 -0700
+Subject: x86/bugs: Add "unknown" reporting for MMIO Stale Data
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 7df548840c496b0141fb2404b889c346380c2b22 upstream.
+
+Older Intel CPUs that are not in the affected processor list for MMIO
+Stale Data vulnerabilities currently report "Not affected" in sysfs,
+which may not be correct. Vulnerability status for these older CPUs is
+unknown.
+
+Add known-not-affected CPUs to the whitelist. Report "unknown"
+mitigation status for CPUs that are not in blacklist, whitelist and also
+don't enumerate MSR ARCH_CAPABILITIES bits that reflect hardware
+immunity to MMIO Stale Data vulnerabilities.
+
+Mitigation is not deployed when the status is unknown.
+
+  [ bp: Massage, fixup. ]
+
+Fixes: 8d50cdf8b834 ("x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data")
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Suggested-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/a932c154772f2121794a5f2eded1a11013114711.1657846269.git.pawan.kumar.gupta@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst |   14 +++
+ arch/x86/include/asm/cpufeatures.h                              |    5 -
+ arch/x86/kernel/cpu/bugs.c                                      |   14 ++-
+ arch/x86/kernel/cpu/common.c                                    |   42 ++++++----
+ 4 files changed, 56 insertions(+), 19 deletions(-)
+
+--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
++++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+@@ -230,6 +230,20 @@ The possible values in this file are:
+      * - 'Mitigation: Clear CPU buffers'
+        - The processor is vulnerable and the CPU buffer clearing mitigation is
+          enabled.
++     * - 'Unknown: No mitigations'
++       - The processor vulnerability status is unknown because it is
++       out of Servicing period. Mitigation is not attempted.
++
++Definitions:
++------------
++
++Servicing period: The process of providing functional and security updates to
++Intel processors or platforms, utilizing the Intel Platform Update (IPU)
++process or other similar mechanisms.
++
++End of Servicing Updates (ESU): ESU is the date at which Intel will no
++longer provide Servicing, such as through IPU or other similar update
++processes. ESU dates will typically be aligned to end of quarter.
+ If the processor is vulnerable then the following information is appended to
+ the above information:
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -456,7 +456,8 @@
+ #define X86_BUG_ITLB_MULTIHIT         X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS                 X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+ #define X86_BUG_MMIO_STALE_DATA               X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+-#define X86_BUG_RETBLEED              X86_BUG(26) /* CPU is affected by RETBleed */
+-#define X86_BUG_EIBRS_PBRSB           X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
++#define X86_BUG_MMIO_UNKNOWN          X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
++#define X86_BUG_RETBLEED              X86_BUG(27) /* CPU is affected by RETBleed */
++#define X86_BUG_EIBRS_PBRSB           X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -433,7 +433,8 @@ static void __init mmio_select_mitigatio
+       u64 ia32_cap;
+       if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
+-          cpu_mitigations_off()) {
++           boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
++           cpu_mitigations_off()) {
+               mmio_mitigation = MMIO_MITIGATION_OFF;
+               return;
+       }
+@@ -538,6 +539,8 @@ out:
+               pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
+       if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+               pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
++      else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++              pr_info("MMIO Stale Data: Unknown: No mitigations\n");
+ }
+ static void __init md_clear_select_mitigation(void)
+@@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_stat
+ static ssize_t mmio_stale_data_show_state(char *buf)
+ {
++      if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++              return sysfs_emit(buf, "Unknown: No mitigations\n");
++
+       if (mmio_mitigation == MMIO_MITIGATION_OFF)
+               return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
+@@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct de
+               return srbds_show_state(buf);
+       case X86_BUG_MMIO_STALE_DATA:
++      case X86_BUG_MMIO_UNKNOWN:
+               return mmio_stale_data_show_state(buf);
+       case X86_BUG_RETBLEED:
+@@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *de
+ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+-      return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
++      if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++              return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
++      else
++              return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ }
+ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(s
+ #define NO_SWAPGS             BIT(6)
+ #define NO_ITLB_MULTIHIT      BIT(7)
+ #define NO_SPECTRE_V2         BIT(8)
+-#define NO_EIBRS_PBRSB                BIT(9)
++#define NO_MMIO                       BIT(9)
++#define NO_EIBRS_PBRSB                BIT(10)
+ #define VULNWL(vendor, family, model, whitelist)      \
+       X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
+@@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_
+       VULNWL(VORTEX,  6, X86_MODEL_ANY,       NO_SPECULATION),
+       /* Intel Family 6 */
++      VULNWL_INTEL(TIGERLAKE,                 NO_MMIO),
++      VULNWL_INTEL(TIGERLAKE_L,               NO_MMIO),
++      VULNWL_INTEL(ALDERLAKE,                 NO_MMIO),
++      VULNWL_INTEL(ALDERLAKE_L,               NO_MMIO),
++
+       VULNWL_INTEL(ATOM_SALTWELL,             NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SALTWELL_TABLET,      NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SALTWELL_MID,         NO_SPECULATION | NO_ITLB_MULTIHIT),
+@@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_
+       VULNWL_INTEL(ATOM_AIRMONT_MID,          NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_AIRMONT_NP,           NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT_D,           NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
++      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_INTEL(ATOM_GOLDMONT_D,           NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+       /*
+        * Technically, swapgs isn't serializing on AMD (despite it previously
+@@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_
+       VULNWL_INTEL(ATOM_TREMONT_D,            NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+       /* AMD Family 0xf - 0x12 */
+-      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+       /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+-      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++      VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+       /* Zhaoxin Family 7 */
+-      VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS),
+-      VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS),
++      VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
++      VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+       {}
+ };
+@@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(stru
+        * Affected CPU list is generally enough to enumerate the vulnerability,
+        * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
+        * not want the guest to enumerate the bug.
++       *
++       * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
++       * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
+        */
+-      if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
+-          !arch_cap_mmio_immune(ia32_cap))
+-              setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++      if (!arch_cap_mmio_immune(ia32_cap)) {
++              if (cpu_matches(cpu_vuln_blacklist, MMIO))
++                      setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++              else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
++                      setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
++      }
+       if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
+               if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
diff --git a/queue-5.19/x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch b/queue-5.19/x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch
new file mode 100644 (file)
index 0000000..c181cb1
--- /dev/null
@@ -0,0 +1,49 @@
+From 5b9f0c4df1c1152403c738373fb063e9ffdac0a1 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 16 Aug 2022 09:11:37 +0200
+Subject: x86/entry: Fix entry_INT80_compat for Xen PV guests
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 5b9f0c4df1c1152403c738373fb063e9ffdac0a1 upstream.
+
+Commit
+
+  c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS")
+
+missed one use case of SWAPGS in entry_INT80_compat(). Removing of
+the SWAPGS macro led to asm just using "swapgs", as it is accepting
+instructions in capital letters, too.
+
+This in turn leads to splats in Xen PV guests like:
+
+  [   36.145223] general protection fault, maybe for address 0x2d: 0000 [#1] PREEMPT SMP NOPTI
+  [   36.145794] CPU: 2 PID: 1847 Comm: ld-linux.so.2 Not tainted 5.19.1-1-default #1 \
+         openSUSE Tumbleweed f3b44bfb672cdb9f235aff53b57724eba8b9411b
+  [   36.146608] Hardware name: HP ProLiant ML350p Gen8, BIOS P72 11/14/2013
+  [   36.148126] RIP: e030:entry_INT80_compat+0x3/0xa3
+
+Fix that by open coding this single instance of the SWAPGS macro.
+
+Fixes: c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Cc: <stable@vger.kernel.org> # 5.19
+Link: https://lore.kernel.org/r/20220816071137.4893-1-jgross@suse.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64_compat.S |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat)
+        * Interrupts are off on entry.
+        */
+       ASM_CLAC                        /* Do this early to minimize exposure */
+-      SWAPGS
++      ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
+       /*
+        * User tracing code (ptrace or signal handlers) might assume that
diff --git a/queue-5.19/x86-nospec-unwreck-the-rsb-stuffing.patch b/queue-5.19/x86-nospec-unwreck-the-rsb-stuffing.patch
new file mode 100644 (file)
index 0000000..8ae54df
--- /dev/null
@@ -0,0 +1,128 @@
+From 4e3aa9238277597c6c7624f302d81a7b568b6f2d Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 16 Aug 2022 14:28:36 +0200
+Subject: x86/nospec: Unwreck the RSB stuffing
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 4e3aa9238277597c6c7624f302d81a7b568b6f2d upstream.
+
+Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections")
+made a right mess of the RSB stuffing, rewrite the whole thing to not
+suck.
+
+Thanks to Andrew for the enlightening comment about Post-Barrier RSB
+things so we can make this code less magical.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/YvuNdDWoUZSBjYcm@worktop.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h |   80 +++++++++++++++++------------------
+ 1 file changed, 39 insertions(+), 41 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -35,33 +35,44 @@
+ #define RSB_CLEAR_LOOPS               32      /* To forcibly overwrite all entries */
+ /*
++ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
++ */
++#define __FILL_RETURN_SLOT                    \
++      ANNOTATE_INTRA_FUNCTION_CALL;           \
++      call    772f;                           \
++      int3;                                   \
++772:
++
++/*
++ * Stuff the entire RSB.
++ *
+  * Google experimented with loop-unrolling and this turned out to be
+  * the optimal version - two calls, each with their own speculation
+  * trap should their return address end up getting used, in a loop.
+  */
+-#define __FILL_RETURN_BUFFER(reg, nr, sp)     \
+-      mov     $(nr/2), reg;                   \
+-771:                                          \
+-      ANNOTATE_INTRA_FUNCTION_CALL;           \
+-      call    772f;                           \
+-773:  /* speculation trap */                  \
+-      UNWIND_HINT_EMPTY;                      \
+-      pause;                                  \
+-      lfence;                                 \
+-      jmp     773b;                           \
+-772:                                          \
+-      ANNOTATE_INTRA_FUNCTION_CALL;           \
+-      call    774f;                           \
+-775:  /* speculation trap */                  \
+-      UNWIND_HINT_EMPTY;                      \
+-      pause;                                  \
+-      lfence;                                 \
+-      jmp     775b;                           \
+-774:                                          \
+-      add     $(BITS_PER_LONG/8) * 2, sp;     \
+-      dec     reg;                            \
+-      jnz     771b;                           \
+-      /* barrier for jnz misprediction */     \
++#define __FILL_RETURN_BUFFER(reg, nr)                 \
++      mov     $(nr/2), reg;                           \
++771:                                                  \
++      __FILL_RETURN_SLOT                              \
++      __FILL_RETURN_SLOT                              \
++      add     $(BITS_PER_LONG/8) * 2, %_ASM_SP;       \
++      dec     reg;                                    \
++      jnz     771b;                                   \
++      /* barrier for jnz misprediction */             \
++      lfence;
++
++/*
++ * Stuff a single RSB slot.
++ *
++ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
++ * forced to retire before letting a RET instruction execute.
++ *
++ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
++ * before this point.
++ */
++#define __FILL_ONE_RETURN                             \
++      __FILL_RETURN_SLOT                              \
++      add     $(BITS_PER_LONG/8), %_ASM_SP;           \
+       lfence;
+ #ifdef __ASSEMBLY__
+@@ -120,28 +131,15 @@
+ #endif
+ .endm
+-.macro ISSUE_UNBALANCED_RET_GUARD
+-      ANNOTATE_INTRA_FUNCTION_CALL
+-      call .Lunbalanced_ret_guard_\@
+-      int3
+-.Lunbalanced_ret_guard_\@:
+-      add $(BITS_PER_LONG/8), %_ASM_SP
+-      lfence
+-.endm
+-
+  /*
+   * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+   * monstrosity above, manually.
+   */
+-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
+-.ifb \ftr2
+-      ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+-.else
+-      ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
+-.endif
+-      __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
+-.Lunbalanced_\@:
+-      ISSUE_UNBALANCED_RET_GUARD
++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
++      ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
++              __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
++              __stringify(__FILL_ONE_RETURN), \ftr2
++
+ .Lskip_rsb_\@:
+ .endm
diff --git a/queue-5.19/x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch b/queue-5.19/x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch
new file mode 100644 (file)
index 0000000..64676dd
--- /dev/null
@@ -0,0 +1,88 @@
+From 72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Thu, 28 Apr 2022 16:50:29 +0200
+Subject: x86/PAT: Have pat_enabled() properly reflect state when running on Xen
+
+From: Jan Beulich <jbeulich@suse.com>
+
+commit 72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce upstream.
+
+After commit ID in the Fixes: tag, pat_enabled() returns false (because
+of PAT initialization being suppressed in the absence of MTRRs being
+announced to be available).
+
+This has become a problem: the i915 driver now fails to initialize when
+running PV on Xen (i915_gem_object_pin_map() is where I located the
+induced failure), and its error handling is flaky enough to (at least
+sometimes) result in a hung system.
+
+Yet even beyond that problem the keying of the use of WC mappings to
+pat_enabled() (see arch_can_pci_mmap_wc()) means that in particular
+graphics frame buffer accesses would have been quite a bit less optimal
+than possible.
+
+Arrange for the function to return true in such environments, without
+undermining the rest of PAT MSR management logic considering PAT to be
+disabled: specifically, no writes to the PAT MSR should occur.
+
+For the new boolean to live in .init.data, init_cache_modes() also needs
+moving to .init.text (where it could/should have lived already before).
+
+  [ bp: This is the "small fix" variant for stable. It'll get replaced
+    with a proper PAT and MTRR detection split upstream but that is too
+    involved for a stable backport.
+    - additional touchups to commit msg. Use cpu_feature_enabled(). ]
+
+Fixes: bdd8b6c98239 ("drm/i915: replace X86_FEATURE_PAT with pat_enabled()")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: <stable@vger.kernel.org>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Lucas De Marchi <lucas.demarchi@intel.com>
+Link: https://lore.kernel.org/r/9385fa60-fa5d-f559-a137-6608408f88b0@suse.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/pat/memtype.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/pat/memtype.c
++++ b/arch/x86/mm/pat/memtype.c
+@@ -62,6 +62,7 @@
+ static bool __read_mostly pat_bp_initialized;
+ static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
++static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT);
+ static bool __read_mostly pat_bp_enabled;
+ static bool __read_mostly pat_cm_initialized;
+@@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason)
+ static int __init nopat(char *str)
+ {
+       pat_disable("PAT support disabled via boot option.");
++      pat_force_disabled = true;
+       return 0;
+ }
+ early_param("nopat", nopat);
+@@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat)
+       wrmsrl(MSR_IA32_CR_PAT, pat);
+ }
+-void init_cache_modes(void)
++void __init init_cache_modes(void)
+ {
+       u64 pat = 0;
+@@ -313,6 +315,12 @@ void init_cache_modes(void)
+                */
+               pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
+                     PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
++      } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
++              /*
++               * Clearly PAT is enabled underneath. Allow pat_enabled() to
++               * reflect this.
++               */
++              pat_bp_enabled = true;
+       }
+       __init_cache_modes(pat);
diff --git a/queue-5.19/x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch b/queue-5.19/x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch
new file mode 100644 (file)
index 0000000..c08cdb4
--- /dev/null
@@ -0,0 +1,70 @@
+From cdaa0a407f1acd3a44861e3aea6e3c7349e668f1 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Tue, 23 Aug 2022 16:55:51 -0500
+Subject: x86/sev: Don't use cc_platform_has() for early SEV-SNP calls
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit cdaa0a407f1acd3a44861e3aea6e3c7349e668f1 upstream.
+
+When running identity-mapped and depending on the kernel configuration,
+it is possible that the compiler uses jump tables when generating code
+for cc_platform_has().
+
+This causes a boot failure because the jump table uses un-mapped kernel
+virtual addresses, not identity-mapped addresses. This has been seen
+with CONFIG_RETPOLINE=n.
+
+Similar to sme_encrypt_kernel(), use an open-coded direct check for the
+status of SNP rather than trying to eliminate the jump table. This
+preserves any code optimization in cc_platform_has() that can be useful
+post boot. It also limits the changes to SEV-specific files so that
+future compiler features won't necessarily require possible build changes
+just because they are not compatible with running identity-mapped.
+
+  [ bp: Massage commit message. ]
+
+Fixes: 5e5ccff60a29 ("x86/sev: Add helper for validating pages in early enc attribute changes")
+Reported-by: Sean Christopherson <seanjc@google.com>
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: <stable@vger.kernel.org> # 5.19.x
+Link: https://lore.kernel.org/all/YqfabnTRxFSM+LoX@google.com/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/sev.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/sev.c
++++ b/arch/x86/kernel/sev.c
+@@ -701,7 +701,13 @@ e_term:
+ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+                                        unsigned int npages)
+ {
+-      if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
++      /*
++       * This can be invoked in early boot while running identity mapped, so
++       * use an open coded check for SNP instead of using cc_platform_has().
++       * This eliminates worries about jump tables or checking boot_cpu_data
++       * in the cc_platform_has() function.
++       */
++      if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+               return;
+        /*
+@@ -717,7 +723,13 @@ void __init early_snp_set_memory_private
+ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+                                       unsigned int npages)
+ {
+-      if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
++      /*
++       * This can be invoked in early boot while running identity mapped, so
++       * use an open coded check for SNP instead of using cc_platform_has().
++       * This eliminates worries about jump tables or checking boot_cpu_data
++       * in the cc_platform_has() function.
++       */
++      if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+               return;
+       /* Invalidate the memory pages before they are marked shared in the RMP table. */
diff --git a/queue-5.19/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch b/queue-5.19/x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
new file mode 100644 (file)
index 0000000..f76b902
--- /dev/null
@@ -0,0 +1,72 @@
+From fc2e426b1161761561624ebd43ce8c8d2fa058da Mon Sep 17 00:00:00 2001
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+Date: Fri, 19 Aug 2022 16:43:34 +0800
+Subject: x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry
+
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+
+commit fc2e426b1161761561624ebd43ce8c8d2fa058da upstream.
+
+When meeting ftrace trampolines in ORC unwinding, unwinder uses address
+of ftrace_{regs_}call address to find the ORC entry, which gets next frame at
+sp+176.
+
+If there is an IRQ hitting at sub $0xa8,%rsp, the next frame should be
+sp+8 instead of 176. It makes unwinder skip correct frame and throw
+warnings such as "wrong direction" or "can't access registers", etc,
+depending on the content of the incorrect frame address.
+
+By adding the base address ftrace_{regs_}caller with the offset
+*ip - ops->trampoline*, we can get the correct address to find the ORC entry.
+
+Also change "caller" to "tramp_addr" to make variable name conform to
+its content.
+
+[ mingo: Clarified the changelog a bit. ]
+
+Fixes: 6be7fa3c74d1 ("ftrace, orc, x86: Handle ftrace dynamically allocated trampolines")
+Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220819084334.244016-1-chenzhongjin@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/unwind_orc.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/unwind_orc.c
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsign
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
+ {
+       struct ftrace_ops *ops;
+-      unsigned long caller;
++      unsigned long tramp_addr, offset;
+       ops = ftrace_ops_trampoline(ip);
+       if (!ops)
+               return NULL;
++      /* Set tramp_addr to the start of the code copied by the trampoline */
+       if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+-              caller = (unsigned long)ftrace_regs_call;
++              tramp_addr = (unsigned long)ftrace_regs_caller;
+       else
+-              caller = (unsigned long)ftrace_call;
++              tramp_addr = (unsigned long)ftrace_caller;
++
++      /* Now place tramp_addr to the location within the trampoline ip is at */
++      offset = ip - ops->trampoline;
++      tramp_addr += offset;
+       /* Prevent unlikely recursion */
+-      if (ip == caller)
++      if (ip == tramp_addr)
+               return NULL;
+-      return orc_find(caller);
++      return orc_find(tramp_addr);
+ }
+ #else
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
diff --git a/queue-5.19/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch b/queue-5.19/xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch
new file mode 100644 (file)
index 0000000..c08c929
--- /dev/null
@@ -0,0 +1,95 @@
+From c5deb27895e017a0267de0a20d140ad5fcc55a54 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 25 Aug 2022 16:19:18 +0200
+Subject: xen/privcmd: fix error exit of privcmd_ioctl_dm_op()
+
+From: Juergen Gross <jgross@suse.com>
+
+commit c5deb27895e017a0267de0a20d140ad5fcc55a54 upstream.
+
+The error exit of privcmd_ioctl_dm_op() is calling unlock_pages()
+potentially with pages being NULL, leading to a NULL dereference.
+
+Additionally lock_pages() doesn't check for pin_user_pages_fast()
+having been completely successful, resulting in potentially not
+locking all pages into memory. This could result in sporadic failures
+when using the related memory in user mode.
+
+Fix all of that by calling unlock_pages() always with the real number
+of pinned pages, which will be zero in case pages being NULL, and by
+checking the number of pages pinned by pin_user_pages_fast() matching
+the expected number of pages.
+
+Cc: <stable@vger.kernel.org>
+Fixes: ab520be8cd5d ("xen/privcmd: Add IOCTL_PRIVCMD_DM_OP")
+Reported-by: Rustam Subkhankulov <subkhankulov@ispras.ru>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Link: https://lore.kernel.org/r/20220825141918.3581-1-jgross@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/privcmd.c |   21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/drivers/xen/privcmd.c
++++ b/drivers/xen/privcmd.c
+@@ -581,27 +581,30 @@ static int lock_pages(
+       struct privcmd_dm_op_buf kbufs[], unsigned int num,
+       struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
+ {
+-      unsigned int i;
++      unsigned int i, off = 0;
+-      for (i = 0; i < num; i++) {
++      for (i = 0; i < num; ) {
+               unsigned int requested;
+               int page_count;
+               requested = DIV_ROUND_UP(
+                       offset_in_page(kbufs[i].uptr) + kbufs[i].size,
+-                      PAGE_SIZE);
++                      PAGE_SIZE) - off;
+               if (requested > nr_pages)
+                       return -ENOSPC;
+               page_count = pin_user_pages_fast(
+-                      (unsigned long) kbufs[i].uptr,
++                      (unsigned long)kbufs[i].uptr + off * PAGE_SIZE,
+                       requested, FOLL_WRITE, pages);
+-              if (page_count < 0)
+-                      return page_count;
++              if (page_count <= 0)
++                      return page_count ? : -EFAULT;
+               *pinned += page_count;
+               nr_pages -= page_count;
+               pages += page_count;
++
++              off = (requested == page_count) ? 0 : off + page_count;
++              i += !off;
+       }
+       return 0;
+@@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct f
+       }
+       rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
+-      if (rc < 0) {
+-              nr_pages = pinned;
++      if (rc < 0)
+               goto out;
+-      }
+       for (i = 0; i < kdata.num; i++) {
+               set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
+@@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct f
+       xen_preemptible_hcall_end();
+ out:
+-      unlock_pages(pages, nr_pages);
++      unlock_pages(pages, pinned);
+       kfree(xbufs);
+       kfree(pages);
+       kfree(kbufs);