--- /dev/null
+From 36527b9d882362567ceb4eea8666813280f30e6f Mon Sep 17 00:00:00 2001
+From: Riwen Lu <luriwen@kylinos.cn>
+Date: Tue, 23 Aug 2022 15:43:42 +0800
+Subject: ACPI: processor: Remove freq Qos request for all CPUs
+
+From: Riwen Lu <luriwen@kylinos.cn>
+
+commit 36527b9d882362567ceb4eea8666813280f30e6f upstream.
+
+The freq Qos request would be removed repeatedly if the cpufreq policy
+relates to more than one CPU. Then, it would cause the "called for unknown
+object" warning.
+
+Remove the freq Qos request for each CPU relates to the cpufreq policy,
+instead of removing repeatedly for the last CPU of it.
+
+Fixes: a1bb46c36ce3 ("ACPI: processor: Add QoS requests for all CPUs")
+Reported-by: Jeremy Linton <Jeremy.Linton@arm.com>
+Tested-by: Jeremy Linton <jeremy.linton@arm.com>
+Signed-off-by: Riwen Lu <luriwen@kylinos.cn>
+Cc: 5.4+ <stable@vger.kernel.org> # 5.4+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/processor_thermal.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/acpi/processor_thermal.c
++++ b/drivers/acpi/processor_thermal.c
+@@ -151,7 +151,7 @@ void acpi_thermal_cpufreq_exit(struct cp
+ unsigned int cpu;
+
+ for_each_cpu(cpu, policy->related_cpus) {
+- struct acpi_processor *pr = per_cpu(processors, policy->cpu);
++ struct acpi_processor *pr = per_cpu(processors, cpu);
+
+ if (pr)
+ freq_qos_remove_request(&pr->thermal_req);
--- /dev/null
+From 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee Mon Sep 17 00:00:00 2001
+From: Quanyang Wang <quanyang.wang@windriver.com>
+Date: Fri, 19 Aug 2022 16:11:45 +0800
+Subject: asm-generic: sections: refactor memory_intersects
+
+From: Quanyang Wang <quanyang.wang@windriver.com>
+
+commit 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee upstream.
+
+There are two problems with the current code of memory_intersects:
+
+First, it doesn't check whether the region (begin, end) falls inside the
+region (virt, vend), that is (virt < begin && vend > end).
+
+The second problem is if vend is equal to begin, it will return true but
+this is wrong since vend (virt + size) is not the last address of the
+memory region but (virt + size -1) is. The wrong determination will
+trigger the misreporting when the function check_for_illegal_area calls
+memory_intersects to check if the dma region intersects with stext region.
+
+The misreporting is as below (stext is at 0x80100000):
+ WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168
+ DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536]
+ Modules linked in:
+ CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5
+ Hardware name: Xilinx Zynq Platform
+ unwind_backtrace from show_stack+0x18/0x1c
+ show_stack from dump_stack_lvl+0x58/0x70
+ dump_stack_lvl from __warn+0xb0/0x198
+ __warn from warn_slowpath_fmt+0x80/0xb4
+ warn_slowpath_fmt from check_for_illegal_area+0x130/0x168
+ check_for_illegal_area from debug_dma_map_sg+0x94/0x368
+ debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128
+ __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24
+ dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4
+ usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214
+ usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118
+ usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec
+ usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70
+ usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360
+ usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440
+ usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238
+ usb_stor_control_thread from kthread+0xf8/0x104
+ kthread from ret_from_fork+0x14/0x2c
+
+Refactor memory_intersects to fix the two problems above.
+
+Before the 1d7db834a027e ("dma-debug: use memory_intersects()
+directly"), memory_intersects is called only by printk_late_init:
+
+printk_late_init -> init_section_intersects ->memory_intersects.
+
+There were few places where memory_intersects was called.
+
+When commit 1d7db834a027e ("dma-debug: use memory_intersects()
+directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA
+subsystem uses it to check for an illegal area and the calltrace above
+is triggered.
+
+[akpm@linux-foundation.org: fix nearby comment typo]
+Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.com
+Fixes: 979559362516 ("asm/sections: add helpers to check for section data")
+Signed-off-by: Quanyang Wang <quanyang.wang@windriver.com>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Thierry Reding <treding@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/sections.h | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/include/asm-generic/sections.h
++++ b/include/asm-generic/sections.h
+@@ -97,7 +97,7 @@ static inline bool memory_contains(void
+ /**
+ * memory_intersects - checks if the region occupied by an object intersects
+ * with another memory region
+- * @begin: virtual address of the beginning of the memory regien
++ * @begin: virtual address of the beginning of the memory region
+ * @end: virtual address of the end of the memory region
+ * @virt: virtual address of the memory object
+ * @size: size of the memory object
+@@ -110,7 +110,10 @@ static inline bool memory_intersects(voi
+ {
+ void *vend = virt + size;
+
+- return (virt >= begin && virt < end) || (vend >= begin && vend < end);
++ if (virt < end && vend > begin)
++ return true;
++
++ return false;
+ }
+
+ /**
--- /dev/null
+From d4fefa4801a1c2f9c0c7a48fbb0fdf384e89a4ab Mon Sep 17 00:00:00 2001
+From: Richard Guy Briggs <rgb@redhat.com>
+Date: Thu, 25 Aug 2022 15:32:40 -0400
+Subject: audit: move audit_return_fixup before the filters
+
+From: Richard Guy Briggs <rgb@redhat.com>
+
+commit d4fefa4801a1c2f9c0c7a48fbb0fdf384e89a4ab upstream.
+
+The success and return_code are needed by the filters. Move
+audit_return_fixup() before the filters. This was causing syscall
+auditing events to be missed.
+
+Link: https://github.com/linux-audit/audit-kernel/issues/138
+Cc: stable@vger.kernel.org
+Fixes: 12c5e81d3fd0 ("audit: prepare audit_context for use in calling contexts beyond syscalls")
+Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
+[PM: manual merge required]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/auditsc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/auditsc.c
++++ b/kernel/auditsc.c
+@@ -1965,6 +1965,7 @@ void __audit_uring_exit(int success, lon
+ goto out;
+ }
+
++ audit_return_fixup(ctx, success, code);
+ if (ctx->context == AUDIT_CTX_SYSCALL) {
+ /*
+ * NOTE: See the note in __audit_uring_entry() about the case
+@@ -2006,7 +2007,6 @@ void __audit_uring_exit(int success, lon
+ audit_filter_inodes(current, ctx);
+ if (ctx->current_state != AUDIT_STATE_RECORD)
+ goto out;
+- audit_return_fixup(ctx, success, code);
+ audit_log_exit();
+
+ out:
+@@ -2090,13 +2090,13 @@ void __audit_syscall_exit(int success, l
+ if (!list_empty(&context->killed_trees))
+ audit_kill_trees(context);
+
++ audit_return_fixup(context, success, return_code);
+ /* run through both filters to ensure we set the filterkey properly */
+ audit_filter_syscall(current, context);
+ audit_filter_inodes(current, context);
+ if (context->current_state < AUDIT_STATE_RECORD)
+ goto out;
+
+- audit_return_fixup(context, success, return_code);
+ audit_log_exit();
+
+ out:
--- /dev/null
+From dd0ff4d12dd284c334f7e9b07f8f335af856ac78 Mon Sep 17 00:00:00 2001
+From: Liu Shixin <liushixin2@huawei.com>
+Date: Fri, 19 Aug 2022 17:40:05 +0800
+Subject: bootmem: remove the vmemmap pages from kmemleak in put_page_bootmem
+
+From: Liu Shixin <liushixin2@huawei.com>
+
+commit dd0ff4d12dd284c334f7e9b07f8f335af856ac78 upstream.
+
+The vmemmap pages is marked by kmemleak when allocated from memblock.
+Remove it from kmemleak when freeing the page. Otherwise, when we reuse
+the page, kmemleak may report such an error and then stop working.
+
+ kmemleak: Cannot insert 0xffff98fb6eab3d40 into the object search tree (overlaps existing)
+ kmemleak: Kernel memory leak detector disabled
+ kmemleak: Object 0xffff98fb6be00000 (size 335544320):
+ kmemleak: comm "swapper", pid 0, jiffies 4294892296
+ kmemleak: min_count = 0
+ kmemleak: count = 0
+ kmemleak: flags = 0x1
+ kmemleak: checksum = 0
+ kmemleak: backtrace:
+
+Link: https://lkml.kernel.org/r/20220819094005.2928241-1-liushixin2@huawei.com
+Fixes: f41f2ed43ca5 (mm: hugetlb: free the vmemmap pages associated with each HugeTLB page)
+Signed-off-by: Liu Shixin <liushixin2@huawei.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/bootmem_info.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/mm/bootmem_info.c
++++ b/mm/bootmem_info.c
+@@ -12,6 +12,7 @@
+ #include <linux/memblock.h>
+ #include <linux/bootmem_info.h>
+ #include <linux/memory_hotplug.h>
++#include <linux/kmemleak.h>
+
+ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
+ {
+@@ -33,6 +34,7 @@ void put_page_bootmem(struct page *page)
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ INIT_LIST_HEAD(&page->lru);
++ kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
+ free_reserved_page(page);
+ }
+ }
--- /dev/null
+From f2c3bec215694fb8bc0ef5010f2a758d1906fc2d Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 12 Aug 2022 18:32:19 +0800
+Subject: btrfs: add info when mount fails due to stale replace target
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit f2c3bec215694fb8bc0ef5010f2a758d1906fc2d upstream.
+
+If the replace target device reappears after the suspended replace is
+cancelled, it blocks the mount operation as it can't find the matching
+replace-item in the metadata. As shown below,
+
+ BTRFS error (device sda5): replace devid present without an active replace item
+
+To overcome this situation, the user can run the command
+
+ btrfs device scan --forget <replace target device>
+
+and try the mount command again. And also, to avoid repeating the issue,
+superblock on the devid=0 must be wiped.
+
+ wipefs -a device-path-to-devid=0.
+
+This patch adds some info when this situation occurs.
+
+Reported-by: Samuel Greiner <samuel@balkonien.org>
+Link: https://lore.kernel.org/linux-btrfs/b4f62b10-b295-26ea-71f9-9a5c9299d42c@balkonien.org/T/
+CC: stable@vger.kernel.org # 5.0+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/dev-replace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found:
+ */
+ if (btrfs_find_device(fs_info->fs_devices, &args)) {
+ btrfs_err(fs_info,
+- "replace devid present without an active replace item");
++"replace without active item, run 'device scan --forget' on the target device");
+ ret = -EUCLEAN;
+ } else {
+ dev_replace->srcdev = NULL;
--- /dev/null
+From b51111271b0352aa596c5ae8faf06939e91b3b68 Mon Sep 17 00:00:00 2001
+From: Goldwyn Rodrigues <rgoldwyn@suse.de>
+Date: Tue, 16 Aug 2022 16:42:56 -0500
+Subject: btrfs: check if root is readonly while setting security xattr
+
+From: Goldwyn Rodrigues <rgoldwyn@suse.de>
+
+commit b51111271b0352aa596c5ae8faf06939e91b3b68 upstream.
+
+For a filesystem which has btrfs read-only property set to true, all
+write operations including xattr should be denied. However, security
+xattr can still be changed even if btrfs ro property is true.
+
+This happens because xattr_permission() does not have any restrictions
+on security.*, system.* and in some cases trusted.* from VFS and
+the decision is left to the underlying filesystem. See comments in
+xattr_permission() for more details.
+
+This patch checks if the root is read-only before performing the set
+xattr operation.
+
+Testcase:
+
+ DEV=/dev/vdb
+ MNT=/mnt
+
+ mkfs.btrfs -f $DEV
+ mount $DEV $MNT
+ echo "file one" > $MNT/f1
+
+ setfattr -n "security.one" -v 2 $MNT/f1
+ btrfs property set /mnt ro true
+
+ setfattr -n "security.one" -v 1 $MNT/f1
+
+ umount $MNT
+
+CC: stable@vger.kernel.org # 4.9+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/xattr.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/xattr.c
++++ b/fs/btrfs/xattr.c
+@@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const
+ const char *name, const void *buffer,
+ size_t size, int flags)
+ {
++ if (btrfs_root_readonly(BTRFS_I(inode)->root))
++ return -EROFS;
++
+ name = xattr_full_name(handler, name);
+ return btrfs_setxattr_trans(inode, name, buffer, size, flags);
+ }
--- /dev/null
+From 9ea0106a7a3d8116860712e3f17cd52ce99f6707 Mon Sep 17 00:00:00 2001
+From: Zixuan Fu <r33s3n6@gmail.com>
+Date: Mon, 15 Aug 2022 23:16:06 +0800
+Subject: btrfs: fix possible memory leak in btrfs_get_dev_args_from_path()
+
+From: Zixuan Fu <r33s3n6@gmail.com>
+
+commit 9ea0106a7a3d8116860712e3f17cd52ce99f6707 upstream.
+
+In btrfs_get_dev_args_from_path(), btrfs_get_bdev_and_sb() can fail if
+the path is invalid. In this case, btrfs_get_dev_args_from_path()
+returns directly without freeing args->uuid and args->fsid allocated
+before, which causes memory leak.
+
+To fix these possible leaks, when btrfs_get_bdev_and_sb() fails,
+btrfs_put_dev_args_from_path() is called to clean up the memory.
+
+Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
+Fixes: faa775c41d655 ("btrfs: add a btrfs_get_dev_args_from_path helper")
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Zixuan Fu <r33s3n6@gmail.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/volumes.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -2344,8 +2344,11 @@ int btrfs_get_dev_args_from_path(struct
+
+ ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0,
+ &bdev, &disk_super);
+- if (ret)
++ if (ret) {
++ btrfs_put_dev_args_from_path(args);
+ return ret;
++ }
++
+ args->devid = btrfs_stack_device_id(&disk_super->dev_item);
+ memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE);
+ if (btrfs_fs_incompat(fs_info, METADATA_UUID))
--- /dev/null
+From 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 22 Aug 2022 15:47:09 +0100
+Subject: btrfs: fix silent failure when deleting root reference
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 47bf225a8d2cccb15f7e8d4a1ed9b757dd86afd7 upstream.
+
+At btrfs_del_root_ref(), if btrfs_search_slot() returns an error, we end
+up returning from the function with a value of 0 (success). This happens
+because the function returns the value stored in the variable 'err',
+which is 0, while the error value we got from btrfs_search_slot() is
+stored in the 'ret' variable.
+
+So fix it by setting 'err' with the error value.
+
+Fixes: 8289ed9f93bef2 ("btrfs: replace the BUG_ON in btrfs_del_root_ref with proper error handling")
+CC: stable@vger.kernel.org # 5.16+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/root-tree.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_tran
+ key.offset = ref_id;
+ again:
+ ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+- if (ret < 0)
++ if (ret < 0) {
++ err = ret;
+ goto out;
+- if (ret == 0) {
++ } else if (ret == 0) {
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_root_ref);
--- /dev/null
+From ced8ecf026fd8084cf175530ff85c76d6085d715 Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Tue, 23 Aug 2022 11:28:13 -0700
+Subject: btrfs: fix space cache corruption and potential double allocations
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit ced8ecf026fd8084cf175530ff85c76d6085d715 upstream.
+
+When testing space_cache v2 on a large set of machines, we encountered a
+few symptoms:
+
+1. "unable to add free space :-17" (EEXIST) errors.
+2. Missing free space info items, sometimes caught with a "missing free
+ space info for X" error.
+3. Double-accounted space: ranges that were allocated in the extent tree
+ and also marked as free in the free space tree, ranges that were
+ marked as allocated twice in the extent tree, or ranges that were
+ marked as free twice in the free space tree. If the latter made it
+ onto disk, the next reboot would hit the BUG_ON() in
+ add_new_free_space().
+4. On some hosts with no on-disk corruption or error messages, the
+ in-memory space cache (dumped with drgn) disagreed with the free
+ space tree.
+
+All of these symptoms have the same underlying cause: a race between
+caching the free space for a block group and returning free space to the
+in-memory space cache for pinned extents causes us to double-add a free
+range to the space cache. This race exists when free space is cached
+from the free space tree (space_cache=v2) or the extent tree
+(nospace_cache, or space_cache=v1 if the cache needs to be regenerated).
+struct btrfs_block_group::last_byte_to_unpin and struct
+btrfs_block_group::progress are supposed to protect against this race,
+but commit d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when
+waiting for a transaction commit") subtly broke this by allowing
+multiple transactions to be unpinning extents at the same time.
+
+Specifically, the race is as follows:
+
+1. An extent is deleted from an uncached block group in transaction A.
+2. btrfs_commit_transaction() is called for transaction A.
+3. btrfs_run_delayed_refs() -> __btrfs_free_extent() runs the delayed
+ ref for the deleted extent.
+4. __btrfs_free_extent() -> do_free_extent_accounting() ->
+ add_to_free_space_tree() adds the deleted extent back to the free
+ space tree.
+5. do_free_extent_accounting() -> btrfs_update_block_group() ->
+ btrfs_cache_block_group() queues up the block group to get cached.
+ block_group->progress is set to block_group->start.
+6. btrfs_commit_transaction() for transaction A calls
+ switch_commit_roots(). It sets block_group->last_byte_to_unpin to
+ block_group->progress, which is block_group->start because the block
+ group hasn't been cached yet.
+7. The caching thread gets to our block group. Since the commit roots
+ were already switched, load_free_space_tree() sees the deleted extent
+ as free and adds it to the space cache. It finishes caching and sets
+ block_group->progress to U64_MAX.
+8. btrfs_commit_transaction() advances transaction A to
+ TRANS_STATE_SUPER_COMMITTED.
+9. fsync calls btrfs_commit_transaction() for transaction B. Since
+ transaction A is already in TRANS_STATE_SUPER_COMMITTED and the
+ commit is for fsync, it advances.
+10. btrfs_commit_transaction() for transaction B calls
+ switch_commit_roots(). This time, the block group has already been
+ cached, so it sets block_group->last_byte_to_unpin to U64_MAX.
+11. btrfs_commit_transaction() for transaction A calls
+ btrfs_finish_extent_commit(), which calls unpin_extent_range() for
+ the deleted extent. It sees last_byte_to_unpin set to U64_MAX (by
+ transaction B!), so it adds the deleted extent to the space cache
+ again!
+
+This explains all of our symptoms above:
+
+* If the sequence of events is exactly as described above, when the free
+ space is re-added in step 11, it will fail with EEXIST.
+* If another thread reallocates the deleted extent in between steps 7
+ and 11, then step 11 will silently re-add that space to the space
+ cache as free even though it is actually allocated. Then, if that
+ space is allocated *again*, the free space tree will be corrupted
+ (namely, the wrong item will be deleted).
+* If we don't catch this free space tree corruption, it will continue
+ to get worse as extents are deleted and reallocated.
+
+The v1 space_cache is synchronously loaded when an extent is deleted
+(btrfs_update_block_group() with alloc=0 calls btrfs_cache_block_group()
+with load_cache_only=1), so it is not normally affected by this bug.
+However, as noted above, if we fail to load the space cache, we will
+fall back to caching from the extent tree and may hit this bug.
+
+The easiest fix for this race is to also make caching from the free
+space tree or extent tree synchronous. Josef tested this and found no
+performance regressions.
+
+A few extra changes fall out of this change. Namely, this fix does the
+following, with step 2 being the crucial fix:
+
+1. Factor btrfs_caching_ctl_wait_done() out of
+ btrfs_wait_block_group_cache_done() to allow waiting on a caching_ctl
+ that we already hold a reference to.
+2. Change the call in btrfs_cache_block_group() of
+ btrfs_wait_space_cache_v1_finished() to
+ btrfs_caching_ctl_wait_done(), which makes us wait regardless of the
+ space_cache option.
+3. Delete the now unused btrfs_wait_space_cache_v1_finished() and
+ space_cache_v1_done().
+4. Change btrfs_cache_block_group()'s `int load_cache_only` parameter to
+ `bool wait` to more accurately describe its new meaning.
+5. Change a few callers which had a separate call to
+ btrfs_wait_block_group_cache_done() to use wait = true instead.
+6. Make btrfs_wait_block_group_cache_done() static now that it's not
+ used outside of block-group.c anymore.
+
+Fixes: d0c2f4fa555e ("btrfs: make concurrent fsyncs wait less when waiting for a transaction commit")
+CC: stable@vger.kernel.org # 5.12+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/block-group.c | 47 +++++++++++++++--------------------------------
+ fs/btrfs/block-group.h | 4 +---
+ fs/btrfs/ctree.h | 1 -
+ fs/btrfs/extent-tree.c | 30 ++++++------------------------
+ 4 files changed, 22 insertions(+), 60 deletions(-)
+
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progre
+ btrfs_put_caching_control(caching_ctl);
+ }
+
+-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
++static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache,
++ struct btrfs_caching_control *caching_ctl)
++{
++ wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
++ return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0;
++}
++
++static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
+ {
+ struct btrfs_caching_control *caching_ctl;
+- int ret = 0;
++ int ret;
+
+ caching_ctl = btrfs_get_caching_control(cache);
+ if (!caching_ctl)
+ return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
+-
+- wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
+- if (cache->cached == BTRFS_CACHE_ERROR)
+- ret = -EIO;
++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
+ btrfs_put_caching_control(caching_ctl);
+ return ret;
+ }
+
+-static bool space_cache_v1_done(struct btrfs_block_group *cache)
+-{
+- bool ret;
+-
+- spin_lock(&cache->lock);
+- ret = cache->cached != BTRFS_CACHE_FAST;
+- spin_unlock(&cache->lock);
+-
+- return ret;
+-}
+-
+-void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
+- struct btrfs_caching_control *caching_ctl)
+-{
+- wait_event(caching_ctl->wait, space_cache_v1_done(cache));
+-}
+-
+ #ifdef CONFIG_BTRFS_DEBUG
+ static void fragment_free_space(struct btrfs_block_group *block_group)
+ {
+@@ -750,9 +737,8 @@ done:
+ btrfs_put_block_group(block_group);
+ }
+
+-int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
+ {
+- DEFINE_WAIT(wait);
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_caching_control *caching_ctl = NULL;
+ int ret = 0;
+@@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs
+ }
+ WARN_ON(cache->caching_ctl);
+ cache->caching_ctl = caching_ctl;
+- if (btrfs_test_opt(fs_info, SPACE_CACHE))
+- cache->cached = BTRFS_CACHE_FAST;
+- else
+- cache->cached = BTRFS_CACHE_STARTED;
++ cache->cached = BTRFS_CACHE_STARTED;
+ cache->has_caching_ctl = 1;
+ spin_unlock(&cache->lock);
+
+@@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs
+
+ btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
+ out:
+- if (load_cache_only && caching_ctl)
+- btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
++ if (wait && caching_ctl)
++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
+ if (caching_ctl)
+ btrfs_put_caching_control(caching_ctl);
+
+@@ -3313,7 +3296,7 @@ int btrfs_update_block_group(struct btrf
+ * space back to the block group, otherwise we will leak space.
+ */
+ if (!alloc && !btrfs_block_group_done(cache))
+- btrfs_cache_block_group(cache, 1);
++ btrfs_cache_block_group(cache, true);
+
+ byte_in_group = bytenr - cache->start;
+ WARN_ON(byte_in_group > cache->length);
+--- a/fs/btrfs/block-group.h
++++ b/fs/btrfs/block-group.h
+@@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrf
+ void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
+ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
+ u64 num_bytes);
+-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
+-int btrfs_cache_block_group(struct btrfs_block_group *cache,
+- int load_cache_only);
++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
+ void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
+ struct btrfs_caching_control *btrfs_get_caching_control(
+ struct btrfs_block_group *cache);
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -494,7 +494,6 @@ struct btrfs_free_cluster {
+ enum btrfs_caching_type {
+ BTRFS_CACHE_NO,
+ BTRFS_CACHE_STARTED,
+- BTRFS_CACHE_FAST,
+ BTRFS_CACHE_FINISHED,
+ BTRFS_CACHE_ERROR,
+ };
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -2567,17 +2567,10 @@ int btrfs_pin_extent_for_log_replay(stru
+ return -EINVAL;
+
+ /*
+- * pull in the free space cache (if any) so that our pin
+- * removes the free space from the cache. We have load_only set
+- * to one because the slow code to read in the free extents does check
+- * the pinned extents.
++ * Fully cache the free space first so that our pin removes the free space
++ * from the cache.
+ */
+- btrfs_cache_block_group(cache, 1);
+- /*
+- * Make sure we wait until the cache is completely built in case it is
+- * missing or is invalid and therefore needs to be rebuilt.
+- */
+- ret = btrfs_wait_block_group_cache_done(cache);
++ ret = btrfs_cache_block_group(cache, true);
+ if (ret)
+ goto out;
+
+@@ -2600,12 +2593,7 @@ static int __exclude_logged_extent(struc
+ if (!block_group)
+ return -EINVAL;
+
+- btrfs_cache_block_group(block_group, 1);
+- /*
+- * Make sure we wait until the cache is completely built in case it is
+- * missing or is invalid and therefore needs to be rebuilt.
+- */
+- ret = btrfs_wait_block_group_cache_done(block_group);
++ ret = btrfs_cache_block_group(block_group, true);
+ if (ret)
+ goto out;
+
+@@ -4415,7 +4403,7 @@ have_block_group:
+ ffe_ctl->cached = btrfs_block_group_done(block_group);
+ if (unlikely(!ffe_ctl->cached)) {
+ ffe_ctl->have_caching_bg = true;
+- ret = btrfs_cache_block_group(block_group, 0);
++ ret = btrfs_cache_block_group(block_group, false);
+
+ /*
+ * If we get ENOMEM here or something else we want to
+@@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *
+
+ if (end - start >= range->minlen) {
+ if (!btrfs_block_group_done(cache)) {
+- ret = btrfs_cache_block_group(cache, 0);
+- if (ret) {
+- bg_failed++;
+- bg_ret = ret;
+- continue;
+- }
+- ret = btrfs_wait_block_group_cache_done(cache);
++ ret = btrfs_cache_block_group(cache, true);
+ if (ret) {
+ bg_failed++;
+ bg_ret = ret;
--- /dev/null
+From 59a3991984dbc1fc47e5651a265c5200bd85464e Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Fri, 12 Aug 2022 18:32:18 +0800
+Subject: btrfs: replace: drop assert for suspended replace
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit 59a3991984dbc1fc47e5651a265c5200bd85464e upstream.
+
+If the filesystem mounts with the replace-operation in a suspended state
+and try to cancel the suspended replace-operation, we hit the assert. The
+assert came from the commit fe97e2e173af ("btrfs: dev-replace: replace's
+scrub must not be running in suspended state") that was actually not
+required. So just remove it.
+
+ $ mount /dev/sda5 /btrfs
+
+ BTRFS info (device sda5): cannot continue dev_replace, tgtdev is missing
+ BTRFS info (device sda5): you may cancel the operation after 'mount -o degraded'
+
+ $ mount -o degraded /dev/sda5 /btrfs <-- success.
+
+ $ btrfs replace cancel /btrfs
+
+ kernel: assertion failed: ret != -ENOTCONN, in fs/btrfs/dev-replace.c:1131
+ kernel: ------------[ cut here ]------------
+ kernel: kernel BUG at fs/btrfs/ctree.h:3750!
+
+After the patch:
+
+ $ btrfs replace cancel /btrfs
+
+ BTRFS info (device sda5): suspended dev_replace from /dev/sda5 (devid 1) to <missing disk> canceled
+
+Fixes: fe97e2e173af ("btrfs: dev-replace: replace's scrub must not be running in suspended state")
+CC: stable@vger.kernel.org # 5.0+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/dev-replace.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -1128,8 +1128,7 @@ int btrfs_dev_replace_cancel(struct btrf
+ up_write(&dev_replace->rwsem);
+
+ /* Scrub for replace must not be running in suspended state */
+- ret = btrfs_scrub_cancel(fs_info);
+- ASSERT(ret != -ENOTCONN);
++ btrfs_scrub_cancel(fs_info);
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
--- /dev/null
+From e6e3dec6c3c288d556b991a85d5d8e3ee71e9046 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 8 Aug 2022 12:18:37 +0100
+Subject: btrfs: update generation of hole file extent item when merging holes
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit e6e3dec6c3c288d556b991a85d5d8e3ee71e9046 upstream.
+
+When punching a hole into a file range that is adjacent with a hole and we
+are not using the no-holes feature, we expand the range of the adjacent
+file extent item that represents a hole, to save metadata space.
+
+However we don't update the generation of hole file extent item, which
+means a full fsync will not log that file extent item if the fsync happens
+in a later transaction (since commit 7f30c07288bb9e ("btrfs: stop copying
+old file extents when doing a full fsync")).
+
+For example, if we do this:
+
+ $ mkfs.btrfs -f -O ^no-holes /dev/sdb
+ $ mount /dev/sdb /mnt
+ $ xfs_io -f -c "pwrite -S 0xab 2M 2M" /mnt/foobar
+ $ sync
+
+We end up with 2 file extent items in our file:
+
+1) One that represents the hole for the file range [0, 2M), with a
+ generation of 7;
+
+2) Another one that represents an extent covering the range [2M, 4M).
+
+After that if we do the following:
+
+ $ xfs_io -c "fpunch 2M 2M" /mnt/foobar
+
+We end up with a single file extent item in the file, which represents a
+hole for the range [0, 4M) and with a generation of 7 - because we end
+dropping the data extent for range [2M, 4M) and then update the file
+extent item that represented the hole at [0, 2M), by increasing
+length from 2M to 4M.
+
+Then doing a full fsync and power failing:
+
+ $ xfs_io -c "fsync" /mnt/foobar
+ <power failure>
+
+will result in the full fsync not logging the file extent item that
+represents the hole for the range [0, 4M), because its generation is 7,
+which is lower than the generation of the current transaction (8).
+As a consequence, after mounting again the filesystem (after log replay),
+the region [2M, 4M) does not have a hole, it still points to the
+previous data extent.
+
+So fix this by always updating the generation of existing file extent
+items representing holes when we merge/expand them. This solves the
+problem and it's the same approach as when we merge prealloc extents that
+got written (at btrfs_mark_extent_written()). Setting the generation to
+the current transaction's generation is also what we do when merging
+the new hole extent map with the previous one or the next one.
+
+A test case for fstests, covering both cases of hole file extent item
+merging (to the left and to the right), will be sent soon.
+
+Fixes: 7f30c07288bb9e ("btrfs: stop copying old file extents when doing a full fsync")
+CC: stable@vger.kernel.org # 5.18+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2483,6 +2483,7 @@ static int fill_holes(struct btrfs_trans
+ btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_offset(leaf, fi, 0);
++ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+ btrfs_mark_buffer_dirty(leaf);
+ goto out;
+ }
+@@ -2499,6 +2500,7 @@ static int fill_holes(struct btrfs_trans
+ btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+ btrfs_set_file_extent_offset(leaf, fi, 0);
++ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+ btrfs_mark_buffer_dirty(leaf);
+ goto out;
+ }
--- /dev/null
+From a1d2eb51f0a33c28f5399a1610e66b3fbd24e884 Mon Sep 17 00:00:00 2001
+From: Paulo Alcantara <pc@cjr.nz>
+Date: Fri, 19 Aug 2022 17:00:19 -0300
+Subject: cifs: skip extra NULL byte in filenames
+
+From: Paulo Alcantara <pc@cjr.nz>
+
+commit a1d2eb51f0a33c28f5399a1610e66b3fbd24e884 upstream.
+
+Since commit:
+ cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty
+alloc_path_with_tree_prefix() function was no longer including the
+trailing separator when @path is empty, although @out_len was still
+assuming a path separator thus adding an extra byte to the final
+filename.
+
+This has caused mount issues in some Synology servers due to the extra
+NULL byte in filenames when sending SMB2_CREATE requests with
+SMB2_FLAGS_DFS_OPERATIONS set.
+
+Fix this by checking if @path is not empty and then add extra byte for
+separator. Also, do not include any trailing NULL bytes in filename
+as MS-SMB2 requires it to be 8-byte aligned and not NULL terminated.
+
+Cc: stable@vger.kernel.org
+Fixes: 7eacba3b00a3 ("cifs: alloc_path_with_tree_prefix: do not append sep. if the path is empty")
+Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/smb2pdu.c | 16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -2571,19 +2571,15 @@ alloc_path_with_tree_prefix(__le16 **out
+
+ path_len = UniStrnlen((wchar_t *)path, PATH_MAX);
+
+- /*
+- * make room for one path separator between the treename and
+- * path
+- */
+- *out_len = treename_len + 1 + path_len;
++ /* make room for one path separator only if @path isn't empty */
++ *out_len = treename_len + (path[0] ? 1 : 0) + path_len;
+
+ /*
+- * final path needs to be null-terminated UTF16 with a
+- * size aligned to 8
++ * final path needs to be 8-byte aligned as specified in
++ * MS-SMB2 2.2.13 SMB2 CREATE Request.
+ */
+-
+- *out_size = roundup((*out_len+1)*2, 8);
+- *out_path = kzalloc(*out_size, GFP_KERNEL);
++ *out_size = roundup(*out_len * sizeof(__le16), 8);
++ *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL);
+ if (!*out_path)
+ return -ENOMEM;
+
--- /dev/null
+From a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 Mon Sep 17 00:00:00 2001
+From: Shigeru Yoshida <syoshida@redhat.com>
+Date: Fri, 19 Aug 2022 03:13:36 +0900
+Subject: fbdev: fbcon: Properly revert changes when vc_resize() failed
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+commit a5a923038d70d2d4a86cb4e3f32625a5ee6e7e24 upstream.
+
+fbcon_do_set_font() calls vc_resize() when font size is changed.
+However, if if vc_resize() failed, current implementation doesn't
+revert changes for font size, and this causes inconsistent state.
+
+syzbot reported unable to handle page fault due to this issue [1].
+syzbot's repro uses fault injection which cause failure for memory
+allocation, so vc_resize() failed.
+
+This patch fixes this issue by properly revert changes for font
+related date when vc_resize() failed.
+
+Link: https://syzkaller.appspot.com/bug?id=3443d3a1fa6d964dd7310a0cb1696d165a3e07c4 [1]
+Reported-by: syzbot+a168dbeaaa7778273c1b@syzkaller.appspotmail.com
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+CC: stable@vger.kernel.org # 5.15+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/video/fbdev/core/fbcon.c | 27 +++++++++++++++++++++++++--
+ 1 file changed, 25 insertions(+), 2 deletions(-)
+
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -2402,15 +2402,21 @@ static int fbcon_do_set_font(struct vc_d
+ struct fb_info *info = fbcon_info_from_console(vc->vc_num);
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+- int resize;
++ int resize, ret, old_userfont, old_width, old_height, old_charcount;
+ char *old_data = NULL;
+
+ resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
+ if (p->userfont)
+ old_data = vc->vc_font.data;
+ vc->vc_font.data = (void *)(p->fontdata = data);
++ old_userfont = p->userfont;
+ if ((p->userfont = userfont))
+ REFCOUNT(data)++;
++
++ old_width = vc->vc_font.width;
++ old_height = vc->vc_font.height;
++ old_charcount = vc->vc_font.charcount;
++
+ vc->vc_font.width = w;
+ vc->vc_font.height = h;
+ vc->vc_font.charcount = charcount;
+@@ -2426,7 +2432,9 @@ static int fbcon_do_set_font(struct vc_d
+ rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
+ cols /= w;
+ rows /= h;
+- vc_resize(vc, cols, rows);
++ ret = vc_resize(vc, cols, rows);
++ if (ret)
++ goto err_out;
+ } else if (con_is_visible(vc)
+ && vc->vc_mode == KD_TEXT) {
+ fbcon_clear_margins(vc, 0);
+@@ -2436,6 +2444,21 @@ static int fbcon_do_set_font(struct vc_d
+ if (old_data && (--REFCOUNT(old_data) == 0))
+ kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
+ return 0;
++
++err_out:
++ p->fontdata = old_data;
++ vc->vc_font.data = (void *)old_data;
++
++ if (userfont) {
++ p->userfont = old_userfont;
++ REFCOUNT(data)--;
++ }
++
++ vc->vc_font.width = old_width;
++ vc->vc_font.height = old_height;
++ vc->vc_font.charcount = old_charcount;
++
++ return ret;
+ }
+
+ /*
--- /dev/null
+From c490a0b5a4f36da3918181a8acdc6991d967c5f3 Mon Sep 17 00:00:00 2001
+From: Siddh Raman Pant <code@siddh.me>
+Date: Tue, 23 Aug 2022 21:38:10 +0530
+Subject: loop: Check for overflow while configuring loop
+
+From: Siddh Raman Pant <code@siddh.me>
+
+commit c490a0b5a4f36da3918181a8acdc6991d967c5f3 upstream.
+
+The userspace can configure a loop using an ioctl call, wherein
+a configuration of type loop_config is passed (see lo_ioctl()'s
+case on line 1550 of drivers/block/loop.c). This proceeds to call
+loop_configure() which in turn calls loop_set_status_from_info()
+(see line 1050 of loop.c), passing &config->info which is of type
+loop_info64*. This function then sets the appropriate values, like
+the offset.
+
+loop_device has lo_offset of type loff_t (see line 52 of loop.c),
+which is typdef-chained to long long, whereas loop_info64 has
+lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h).
+
+The function directly copies offset from info to the device as
+follows (See line 980 of loop.c):
+ lo->lo_offset = info->lo_offset;
+
+This results in an overflow, which triggers a warning in iomap_iter()
+due to a call to iomap_iter_done() which has:
+ WARN_ON_ONCE(iter->iomap.offset > iter->pos);
+
+Thus, check for negative value during loop_set_status_from_info().
+
+Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a29e
+
+Reported-and-tested-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Siddh Raman Pant <code@siddh.me>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/loop.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -979,6 +979,11 @@ loop_set_status_from_info(struct loop_de
+
+ lo->lo_offset = info->lo_offset;
+ lo->lo_sizelimit = info->lo_sizelimit;
++
++ /* loff_t vars have been assigned __u64 */
++ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
++ return -EOVERFLOW;
++
+ memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
+ lo->lo_file_name[LO_NAME_SIZE-1] = 0;
+ lo->lo_flags = info->lo_flags;
--- /dev/null
+From d26f60703606ab425eee9882b32a1781a8bed74d Mon Sep 17 00:00:00 2001
+From: Badari Pulavarty <badari.pulavarty@intel.com>
+Date: Sun, 21 Aug 2022 18:08:53 +0000
+Subject: mm/damon/dbgfs: avoid duplicate context directory creation
+
+From: Badari Pulavarty <badari.pulavarty@intel.com>
+
+commit d26f60703606ab425eee9882b32a1781a8bed74d upstream.
+
+When user tries to create a DAMON context via the DAMON debugfs interface
+with a name of an already existing context, the context directory creation
+fails but a new context is created and added in the internal data
+structure, due to absence of the directory creation success check. As a
+result, memory could leak and DAMON cannot be turned on. An example test
+case is as below:
+
+ # cd /sys/kernel/debug/damon/
+ # echo "off" > monitor_on
+ # echo paddr > target_ids
+ # echo "abc" > mk_context
+ # echo "abc" > mk_context
+ # echo $$ > abc/target_ids
+ # echo "on" > monitor_on <<< fails
+
+Return value of 'debugfs_create_dir()' is expected to be ignored in
+general, but this is an exceptional case as DAMON feature is depending
+on the debugfs functionality and it has the potential duplicate name
+issue. This commit therefore fixes the issue by checking the directory
+creation failure and immediately return the error in the case.
+
+Link: https://lkml.kernel.org/r/20220821180853.2400-1-sj@kernel.org
+Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts")
+Signed-off-by: Badari Pulavarty <badari.pulavarty@intel.com>
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> [ 5.15.x]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/dbgfs.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/damon/dbgfs.c
++++ b/mm/damon/dbgfs.c
+@@ -787,6 +787,9 @@ static int dbgfs_mk_context(char *name)
+ return -ENOENT;
+
+ new_dir = debugfs_create_dir(name, root);
++ /* Below check is required for a potential duplicated name case */
++ if (IS_ERR(new_dir))
++ return PTR_ERR(new_dir);
+ dbgfs_dirs[dbgfs_nr_ctxs] = new_dir;
+
+ new_ctx = dbgfs_new_ctx();
--- /dev/null
+From ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Tue, 12 Jul 2022 21:05:42 +0800
+Subject: mm/hugetlb: avoid corrupting page->mapping in hugetlb_mcopy_atomic_pte
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 upstream.
+
+In MCOPY_ATOMIC_CONTINUE case with a non-shared VMA, pages in the page
+cache are installed in the ptes. But hugepage_add_new_anon_rmap is called
+for them mistakenly because they're not vm_shared. This will corrupt the
+page->mapping used by page cache code.
+
+Link: https://lkml.kernel.org/r/20220712130542.18836-1-linmiaohe@huawei.com
+Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -6026,7 +6026,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
+ if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
+ goto out_release_unlock;
+
+- if (vm_shared) {
++ if (page_in_pagecache) {
+ page_dup_file_rmap(page, true);
+ } else {
+ ClearHPageRestoreReserve(page);
--- /dev/null
+From 3d2f78f08cd8388035ac375e731ec1ac1b79b09d Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Tue, 23 Aug 2022 18:11:38 -0400
+Subject: mm/mprotect: only reference swap pfn page if type match
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 3d2f78f08cd8388035ac375e731ec1ac1b79b09d upstream.
+
+Yu Zhao reported a bug after the commit "mm/swap: Add swp_offset_pfn() to
+fetch PFN from swap entry" added a check in swp_offset_pfn() for swap type [1]:
+
+ kernel BUG at include/linux/swapops.h:117!
+ CPU: 46 PID: 5245 Comm: EventManager_De Tainted: G S O L 6.0.0-dbg-DEV #2
+ RIP: 0010:pfn_swap_entry_to_page+0x72/0xf0
+ Code: c6 48 8b 36 48 83 fe ff 74 53 48 01 d1 48 83 c1 08 48 8b 09 f6
+ c1 01 75 7b 66 90 48 89 c1 48 8b 09 f6 c1 01 74 74 5d c3 eb 9e <0f> 0b
+ 48 ba ff ff ff ff 03 00 00 00 eb ae a9 ff 0f 00 00 75 13 48
+ RSP: 0018:ffffa59e73fabb80 EFLAGS: 00010282
+ RAX: 00000000ffffffe8 RBX: 0c00000000000000 RCX: ffffcd5440000000
+ RDX: 1ffffffffff7a80a RSI: 0000000000000000 RDI: 0c0000000000042b
+ RBP: ffffa59e73fabb80 R08: ffff9965ca6e8bb8 R09: 0000000000000000
+ R10: ffffffffa5a2f62d R11: 0000030b372e9fff R12: ffff997b79db5738
+ R13: 000000000000042b R14: 0c0000000000042b R15: 1ffffffffff7a80a
+ FS: 00007f549d1bb700(0000) GS:ffff99d3cf680000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000440d035b3180 CR3: 0000002243176004 CR4: 00000000003706e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ <TASK>
+ change_pte_range+0x36e/0x880
+ change_p4d_range+0x2e8/0x670
+ change_protection_range+0x14e/0x2c0
+ mprotect_fixup+0x1ee/0x330
+ do_mprotect_pkey+0x34c/0x440
+ __x64_sys_mprotect+0x1d/0x30
+
+It triggers because pfn_swap_entry_to_page() could be called upon e.g. a
+genuine swap entry.
+
+Fix it by only calling it when it's a write migration entry where the page*
+is used.
+
+[1] https://lore.kernel.org/lkml/CAOUHufaVC2Za-p8m0aiHw6YkheDcrO-C3wRGixwDS32VTS+k1w@mail.gmail.com/
+
+Link: https://lkml.kernel.org/r/20220823221138.45602-1-peterx@redhat.com
+Fixes: 6c287605fd56 ("mm: remember exclusively mapped anonymous pages with PG_anon_exclusive")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reported-by: Yu Zhao <yuzhao@google.com>
+Tested-by: Yu Zhao <yuzhao@google.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mprotect.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -158,10 +158,11 @@ static unsigned long change_pte_range(st
+ pages++;
+ } else if (is_swap_pte(oldpte)) {
+ swp_entry_t entry = pte_to_swp_entry(oldpte);
+- struct page *page = pfn_swap_entry_to_page(entry);
+ pte_t newpte;
+
+ if (is_writable_migration_entry(entry)) {
++ struct page *page = pfn_swap_entry_to_page(entry);
++
+ /*
+ * A protection check is difficult so
+ * just be safe and disable write
--- /dev/null
+From 6b04ce966a738ecdd9294c9593e48513c0dc90aa Mon Sep 17 00:00:00 2001
+From: Karol Herbst <kherbst@redhat.com>
+Date: Fri, 19 Aug 2022 22:09:28 +0200
+Subject: nouveau: explicitly wait on the fence in nouveau_bo_move_m2mf
+
+From: Karol Herbst <kherbst@redhat.com>
+
+commit 6b04ce966a738ecdd9294c9593e48513c0dc90aa upstream.
+
+It is a bit unlcear to us why that's helping, but it does and unbreaks
+suspend/resume on a lot of GPUs without any known drawbacks.
+
+Cc: stable@vger.kernel.org # v5.15+
+Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/156
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220819200928.401416-1-kherbst@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
+@@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_o
+ if (ret == 0) {
+ ret = nouveau_fence_new(chan, false, &fence);
+ if (ret == 0) {
++ /* TODO: figure out a better solution here
++ *
++ * wait on the fence here explicitly as going through
++ * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
++ *
++ * Without this the operation can timeout and we'll fallback to a
++ * software copy, which might take several minutes to finish.
++ */
++ nouveau_fence_wait(fence, false, false);
+ ret = ttm_bo_move_accel_cleanup(bo,
+ &fence->base,
+ evict, false,
--- /dev/null
+From 550842cc60987b269e31b222283ade3e1b6c7fc8 Mon Sep 17 00:00:00 2001
+From: Heming Zhao <ocfs2-devel@oss.oracle.com>
+Date: Mon, 15 Aug 2022 16:57:54 +0800
+Subject: ocfs2: fix freeing uninitialized resource on ocfs2_dlm_shutdown
+
+From: Heming Zhao <ocfs2-devel@oss.oracle.com>
+
+commit 550842cc60987b269e31b222283ade3e1b6c7fc8 upstream.
+
+After commit 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job
+before return error"), any procedure after ocfs2_dlm_init() fails will
+trigger crash when calling ocfs2_dlm_shutdown().
+
+ie: On local mount mode, no dlm resource is initialized. If
+ocfs2_mount_volume() fails in ocfs2_find_slot(), error handling will call
+ocfs2_dlm_shutdown(), then does dlm resource cleanup job, which will
+trigger kernel crash.
+
+This solution should bypass uninitialized resources in
+ocfs2_dlm_shutdown().
+
+Link: https://lkml.kernel.org/r/20220815085754.20417-1-heming.zhao@suse.com
+Fixes: 0737e01de9c4 ("ocfs2: ocfs2_mount_volume does cleanup job before return error")
+Signed-off-by: Heming Zhao <heming.zhao@suse.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ocfs2/dlmglue.c | 8 +++++---
+ fs/ocfs2/super.c | 3 +--
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_sup
+ ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
+ ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
+
+- ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
+- osb->cconn = NULL;
++ if (osb->cconn) {
++ ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
++ osb->cconn = NULL;
+
+- ocfs2_dlm_shutdown_debug(osb);
++ ocfs2_dlm_shutdown_debug(osb);
++ }
+ }
+
+ static int ocfs2_drop_lock(struct ocfs2_super *osb,
+--- a/fs/ocfs2/super.c
++++ b/fs/ocfs2/super.c
+@@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct
+ !ocfs2_is_hard_readonly(osb))
+ hangup_needed = 1;
+
+- if (osb->cconn)
+- ocfs2_dlm_shutdown(osb, hangup_needed);
++ ocfs2_dlm_shutdown(osb, hangup_needed);
+
+ ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
+ debugfs_remove_recursive(osb->osb_debug_root);
--- /dev/null
+From cde643ff75bc20c538dfae787ca3b587bab16b50 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Thu, 18 Aug 2022 11:44:29 -0700
+Subject: perf/x86/intel: Fix pebs event constraints for ADL
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit cde643ff75bc20c538dfae787ca3b587bab16b50 upstream.
+
+According to the latest event list, the LOAD_LATENCY PEBS event only
+works on the GP counter 0 and 1 for ADL and RPL.
+
+Update the pebs event constraints table.
+
+Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support")
+Reported-by: Ammy Yi <ammy.yi@intel.com>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20220818184429.2355857-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/ds.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -822,7 +822,7 @@ struct event_constraint intel_glm_pebs_e
+
+ struct event_constraint intel_grt_pebs_event_constraints[] = {
+ /* Allow all events as PEBS with no flags */
+- INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf),
++ INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
+ INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
+ EVENT_CONSTRAINT_END
+ };
--- /dev/null
+From 32ba156df1b1c8804a4e5be5339616945eafea22 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Tue, 16 Aug 2022 05:56:11 -0700
+Subject: perf/x86/lbr: Enable the branch type for the Arch LBR by default
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 32ba156df1b1c8804a4e5be5339616945eafea22 upstream.
+
+On the platform with Arch LBR, the HW raw branch type encoding may leak
+to the perf tool when the SAVE_TYPE option is not set.
+
+In the intel_pmu_store_lbr(), the HW raw branch type is stored in
+lbr_entries[].type. If the SAVE_TYPE option is set, the
+lbr_entries[].type will be converted into the generic PERF_BR_* type
+in the intel_pmu_lbr_filter() and exposed to the user tools.
+But if the SAVE_TYPE option is NOT set by the user, the current perf
+kernel doesn't clear the field. The HW raw branch type leaks.
+
+There are two solutions to fix the issue for the Arch LBR.
+One is to clear the field if the SAVE_TYPE option is NOT set.
+The other solution is to unconditionally convert the branch type and
+expose the generic type to the user tools.
+
+The latter is implemented here, because
+- The branch type is valuable information. I don't see a case where
+ you would not benefit from the branch type. (Stephane Eranian)
+- Not having the branch type DOES NOT save any space in the
+ branch record (Stephane Eranian)
+- The Arch LBR HW can retrieve the common branch types from the
+ LBR_INFO. It doesn't require the high overhead SW disassemble.
+
+Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR")
+Reported-by: Stephane Eranian <eranian@google.com>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20220816125612.2042397-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/lbr.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/arch/x86/events/intel/lbr.c
++++ b/arch/x86/events/intel/lbr.c
+@@ -1097,6 +1097,14 @@ static int intel_pmu_setup_hw_lbr_filter
+
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
+ reg->config = mask;
++
++ /*
++ * The Arch LBR HW can retrieve the common branch types
++ * from the LBR_INFO. It doesn't require the high overhead
++ * SW disassemble.
++ * Enable the branch type by default for the Arch LBR.
++ */
++ reg->reg |= X86_BR_TYPE_SAVE;
+ return 0;
+ }
+
--- /dev/null
+From dbb16df6443c59e8a1ef21c2272fcf387d600ddf Mon Sep 17 00:00:00 2001
+From: Shakeel Butt <shakeelb@google.com>
+Date: Wed, 17 Aug 2022 17:21:39 +0000
+Subject: Revert "memcg: cleanup racy sum avoidance code"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shakeel Butt <shakeelb@google.com>
+
+commit dbb16df6443c59e8a1ef21c2272fcf387d600ddf upstream.
+
+This reverts commit 96e51ccf1af33e82f429a0d6baebba29c6448d0f.
+
+Recently we started running the kernel with rstat infrastructure on
+production traffic and begin to see negative memcg stats values.
+Particularly the 'sock' stat is the one which we observed having negative
+value.
+
+$ grep "sock " /mnt/memory/job/memory.stat
+sock 253952
+total_sock 18446744073708724224
+
+Re-run after couple of seconds
+
+$ grep "sock " /mnt/memory/job/memory.stat
+sock 253952
+total_sock 53248
+
+For now we are only seeing this issue on large machines (256 CPUs) and
+only with 'sock' stat. I think the networking stack increase the stat on
+one cpu and decrease it on another cpu much more often. So, this negative
+sock is due to rstat flusher flushing the stats on the CPU that has seen
+the decrement of sock but missed the CPU that has increments. A typical
+race condition.
+
+For easy stable backport, revert is the most simple solution. For long
+term solution, I am thinking of two directions. First is just reduce the
+race window by optimizing the rstat flusher. Second is if the reader sees
+a negative stat value, force flush and restart the stat collection.
+Basically retry but limited.
+
+Link: https://lkml.kernel.org/r/20220817172139.3141101-1-shakeelb@google.com
+Fixes: 96e51ccf1af33e8 ("memcg: cleanup racy sum avoidance code")
+Signed-off-by: Shakeel Butt <shakeelb@google.com>
+Cc: "Michal Koutný" <mkoutny@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yosry Ahmed <yosryahmed@google.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: <stable@vger.kernel.org> [5.15]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memcontrol.h | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -978,19 +978,30 @@ static inline void mod_memcg_page_state(
+
+ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+ {
+- return READ_ONCE(memcg->vmstats.state[idx]);
++ long x = READ_ONCE(memcg->vmstats.state[idx]);
++#ifdef CONFIG_SMP
++ if (x < 0)
++ x = 0;
++#endif
++ return x;
+ }
+
+ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+ {
+ struct mem_cgroup_per_node *pn;
++ long x;
+
+ if (mem_cgroup_disabled())
+ return node_page_state(lruvec_pgdat(lruvec), idx);
+
+ pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+- return READ_ONCE(pn->lruvec_stats.state[idx]);
++ x = READ_ONCE(pn->lruvec_stats.state[idx]);
++#ifdef CONFIG_SMP
++ if (x < 0)
++ x = 0;
++#endif
++ return x;
+ }
+
+ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
--- /dev/null
+From 34fc9cc3aebe8b9e27d3bc821543dd482dc686ca Mon Sep 17 00:00:00 2001
+From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+Date: Wed, 17 Aug 2022 15:25:21 +0200
+Subject: riscv: dts: microchip: correct L2 cache interrupts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+
+commit 34fc9cc3aebe8b9e27d3bc821543dd482dc686ca upstream.
+
+The "PolarFire SoC MSS Technical Reference Manual" documents the
+following PLIC interrupts:
+
+1 - L2 Cache Controller Signals when a metadata correction event occurs
+2 - L2 Cache Controller Signals when an uncorrectable metadata event occurs
+3 - L2 Cache Controller Signals when a data correction event occurs
+4 - L2 Cache Controller Signals when an uncorrectable data event occurs
+
+This differs from the SiFive FU540 which only has three L2 cache related
+interrupts.
+
+The sequence in the device tree is defined by an enum:
+
+ enum {
+ Â Â Â Â Â Â Â Â DIR_CORR = 0,
+ Â Â Â Â Â Â Â Â DATA_CORR,
+ Â Â Â Â Â Â Â Â DATA_UNCORR,
+ Â Â Â Â Â Â Â Â DIR_UNCORR,
+ };
+
+So the correct sequence of the L2 cache interrupts is
+
+ interrupts = <1>, <3>, <4>, <2>;
+
+[Conor]
+This manifests as an unusable system if the l2-cache driver is enabled,
+as the wrong interrupt gets cleared & the handler prints errors to the
+console ad infinitum.
+
+Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board")
+CC: stable@vger.kernel.org # 5.15: e35b07a7df9b: riscv: dts: microchip: mpfs: Group tuples in interrupt properties
+Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/boot/dts/microchip/mpfs.dtsi | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
++++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
+@@ -169,7 +169,7 @@
+ cache-size = <2097152>;
+ cache-unified;
+ interrupt-parent = <&plic>;
+- interrupts = <1>, <2>, <3>;
++ interrupts = <1>, <3>, <4>, <2>;
+ };
+
+ clint: clint@2000000 {
--- /dev/null
+From b5c3aca86d2698c4850b6ee8b341938025d2780c Mon Sep 17 00:00:00 2001
+From: Conor Dooley <conor.dooley@microchip.com>
+Date: Sun, 14 Aug 2022 15:12:37 +0100
+Subject: riscv: signal: fix missing prototype warning
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+commit b5c3aca86d2698c4850b6ee8b341938025d2780c upstream.
+
+Fix the warning:
+arch/riscv/kernel/signal.c:316:27: warning: no previous prototype for function 'do_notify_resume' [-Wmissing-prototypes]
+asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
+
+All other functions in the file are static & none of the existing
+headers stood out as an obvious location. Create signal.h to hold the
+declaration.
+
+Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API")
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220814141237.493457-4-mail@conchuod.ie
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/signal.h | 12 ++++++++++++
+ arch/riscv/kernel/signal.c | 1 +
+ 2 files changed, 13 insertions(+)
+ create mode 100644 arch/riscv/include/asm/signal.h
+
+--- /dev/null
++++ b/arch/riscv/include/asm/signal.h
+@@ -0,0 +1,12 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++
++#ifndef __ASM_SIGNAL_H
++#define __ASM_SIGNAL_H
++
++#include <uapi/asm/signal.h>
++#include <uapi/asm/ptrace.h>
++
++asmlinkage __visible
++void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
++
++#endif
+--- a/arch/riscv/kernel/signal.c
++++ b/arch/riscv/kernel/signal.c
+@@ -15,6 +15,7 @@
+
+ #include <asm/ucontext.h>
+ #include <asm/vdso.h>
++#include <asm/signal.h>
+ #include <asm/signal32.h>
+ #include <asm/switch_to.h>
+ #include <asm/csr.h>
--- /dev/null
+From d951b20b9def73dcc39a5379831525d0d2a537e9 Mon Sep 17 00:00:00 2001
+From: Conor Dooley <conor.dooley@microchip.com>
+Date: Sun, 14 Aug 2022 15:12:38 +0100
+Subject: riscv: traps: add missing prototype
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+commit d951b20b9def73dcc39a5379831525d0d2a537e9 upstream.
+
+Sparse complains:
+arch/riscv/kernel/traps.c:213:6: warning: symbol 'shadow_stack' was not declared. Should it be static?
+
+The variable is used in entry.S, so declare shadow_stack there
+alongside SHADOW_OVERFLOW_STACK_SIZE.
+
+Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220814141237.493457-5-mail@conchuod.ie
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/include/asm/thread_info.h | 2 ++
+ arch/riscv/kernel/traps.c | 3 ++-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/riscv/include/asm/thread_info.h
++++ b/arch/riscv/include/asm/thread_info.h
+@@ -42,6 +42,8 @@
+
+ #ifndef __ASSEMBLY__
+
++extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
++
+ #include <asm/processor.h>
+ #include <asm/csr.h>
+
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -20,9 +20,10 @@
+
+ #include <asm/asm-prototypes.h>
+ #include <asm/bug.h>
++#include <asm/csr.h>
+ #include <asm/processor.h>
+ #include <asm/ptrace.h>
+-#include <asm/csr.h>
++#include <asm/thread_info.h>
+
+ int show_unhandled_signals = 1;
+
--- /dev/null
+From 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Tue, 16 Aug 2022 11:54:07 -0400
+Subject: s390: fix double free of GS and RI CBs on fork() failure
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 13cccafe0edcd03bf1c841de8ab8a1c8e34f77d9 upstream.
+
+The pointers for guarded storage and runtime instrumentation control
+blocks are stored in the thread_struct of the associated task. These
+pointers are initially copied on fork() via arch_dup_task_struct()
+and then cleared via copy_thread() before fork() returns. If fork()
+happens to fail after the initial task dup and before copy_thread(),
+the newly allocated task and associated thread_struct memory are
+freed via free_task() -> arch_release_task_struct(). This results in
+a double free of the guarded storage and runtime info structs
+because the fields in the failed task still refer to memory
+associated with the source task.
+
+This problem can manifest as a BUG_ON() in set_freepointer() (with
+CONFIG_SLAB_FREELIST_HARDENED enabled) or KASAN splat (if enabled)
+when running trinity syscall fuzz tests on s390x. To avoid this
+problem, clear the associated pointer fields in
+arch_dup_task_struct() immediately after the new task is copied.
+Note that the RI flag is still cleared in copy_thread() because it
+resides in thread stack memory and that is where stack info is
+copied.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Fixes: 8d9047f8b967c ("s390/runtime instrumentation: simplify task exit handling")
+Fixes: 7b83c6297d2fc ("s390/guarded storage: simplify task exit handling")
+Cc: <stable@vger.kernel.org> # 4.15
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220816155407.537372-1-bfoster@redhat.com
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/process.c | 22 ++++++++++++++++------
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/arch/s390/kernel/process.c
++++ b/arch/s390/kernel/process.c
+@@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_str
+
+ memcpy(dst, src, arch_task_struct_size);
+ dst->thread.fpu.regs = dst->thread.fpu.fprs;
++
++ /*
++ * Don't transfer over the runtime instrumentation or the guarded
++ * storage control block pointers. These fields are cleared here instead
++ * of in copy_thread() to avoid premature freeing of associated memory
++ * on fork() failure. Wait to clear the RI flag because ->stack still
++ * refers to the source thread.
++ */
++ dst->thread.ri_cb = NULL;
++ dst->thread.gs_cb = NULL;
++ dst->thread.gs_bc_cb = NULL;
++
+ return 0;
+ }
+
+@@ -150,13 +162,11 @@ int copy_thread(struct task_struct *p, c
+ frame->childregs.flags = 0;
+ if (new_stackp)
+ frame->childregs.gprs[15] = new_stackp;
+-
+- /* Don't copy runtime instrumentation info */
+- p->thread.ri_cb = NULL;
++ /*
++ * Clear the runtime instrumentation flag after the above childregs
++ * copy. The CB pointer was already cleared in arch_dup_task_struct().
++ */
+ frame->childregs.psw.mask &= ~PSW_MASK_RI;
+- /* Don't copy guarded storage control block */
+- p->thread.gs_cb = NULL;
+- p->thread.gs_bc_cb = NULL;
+
+ /* Set a new TLS ? */
+ if (clone_flags & CLONE_SETTLS) {
--- /dev/null
+From 41ac42f137080bc230b5882e3c88c392ab7f2d32 Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Date: Wed, 17 Aug 2022 15:26:03 +0200
+Subject: s390/mm: do not trigger write fault when vma does not allow VM_WRITE
+
+From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+
+commit 41ac42f137080bc230b5882e3c88c392ab7f2d32 upstream.
+
+For non-protection pXd_none() page faults in do_dat_exception(), we
+call do_exception() with access == (VM_READ | VM_WRITE | VM_EXEC).
+In do_exception(), vma->vm_flags is checked against that before
+calling handle_mm_fault().
+
+Since commit 92f842eac7ee3 ("[S390] store indication fault optimization"),
+we call handle_mm_fault() with FAULT_FLAG_WRITE, when recognizing that
+it was a write access. However, the vma flags check is still only
+checking against (VM_READ | VM_WRITE | VM_EXEC), and therefore also
+calling handle_mm_fault() with FAULT_FLAG_WRITE in cases where the vma
+does not allow VM_WRITE.
+
+Fix this by changing access check in do_exception() to VM_WRITE only,
+when recognizing write access.
+
+Link: https://lkml.kernel.org/r/20220811103435.188481-3-david@redhat.com
+Fixes: 92f842eac7ee3 ("[S390] store indication fault optimization")
+Cc: <stable@vger.kernel.org>
+Reported-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/mm/fault.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/mm/fault.c
++++ b/arch/s390/mm/fault.c
+@@ -379,7 +379,9 @@ static inline vm_fault_t do_exception(st
+ flags = FAULT_FLAG_DEFAULT;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+- if (access == VM_WRITE || is_write)
++ if (is_write)
++ access = VM_WRITE;
++ if (access == VM_WRITE)
+ flags |= FAULT_FLAG_WRITE;
+ mmap_read_lock(mm);
+
net-lantiq_xrx200-confirm-skb-is-allocated-before-us.patch
net-lantiq_xrx200-fix-lock-under-memory-pressure.patch
net-lantiq_xrx200-restore-buffer-if-memory-allocatio.patch
+btrfs-fix-silent-failure-when-deleting-root-reference.patch
+btrfs-replace-drop-assert-for-suspended-replace.patch
+btrfs-add-info-when-mount-fails-due-to-stale-replace-target.patch
+btrfs-fix-space-cache-corruption-and-potential-double-allocations.patch
+btrfs-check-if-root-is-readonly-while-setting-security-xattr.patch
+btrfs-fix-possible-memory-leak-in-btrfs_get_dev_args_from_path.patch
+btrfs-update-generation-of-hole-file-extent-item-when-merging-holes.patch
+x86-boot-don-t-propagate-uninitialized-boot_params-cc_blob_address.patch
+perf-x86-intel-fix-pebs-event-constraints-for-adl.patch
+perf-x86-lbr-enable-the-branch-type-for-the-arch-lbr-by-default.patch
+x86-entry-fix-entry_int80_compat-for-xen-pv-guests.patch
+x86-unwind-orc-unwind-ftrace-trampolines-with-correct-orc-entry.patch
+x86-sev-don-t-use-cc_platform_has-for-early-sev-snp-calls.patch
+x86-bugs-add-unknown-reporting-for-mmio-stale-data.patch
+x86-nospec-unwreck-the-rsb-stuffing.patch
+x86-pat-have-pat_enabled-properly-reflect-state-when-running-on-xen.patch
+loop-check-for-overflow-while-configuring-loop.patch
+writeback-avoid-use-after-free-after-removing-device.patch
+audit-move-audit_return_fixup-before-the-filters.patch
+asm-generic-sections-refactor-memory_intersects.patch
+mm-damon-dbgfs-avoid-duplicate-context-directory-creation.patch
+s390-mm-do-not-trigger-write-fault-when-vma-does-not-allow-vm_write.patch
+bootmem-remove-the-vmemmap-pages-from-kmemleak-in-put_page_bootmem.patch
+mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch
+mm-mprotect-only-reference-swap-pfn-page-if-type-match.patch
+cifs-skip-extra-null-byte-in-filenames.patch
+s390-fix-double-free-of-gs-and-ri-cbs-on-fork-failure.patch
+fbdev-fbcon-properly-revert-changes-when-vc_resize-failed.patch
+revert-memcg-cleanup-racy-sum-avoidance-code.patch
+shmem-update-folio-if-shmem_replace_page-updates-the-page.patch
+acpi-processor-remove-freq-qos-request-for-all-cpus.patch
+nouveau-explicitly-wait-on-the-fence-in-nouveau_bo_move_m2mf.patch
+smb3-missing-inode-locks-in-punch-hole.patch
+ocfs2-fix-freeing-uninitialized-resource-on-ocfs2_dlm_shutdown.patch
+xen-privcmd-fix-error-exit-of-privcmd_ioctl_dm_op.patch
+riscv-signal-fix-missing-prototype-warning.patch
+riscv-traps-add-missing-prototype.patch
+riscv-dts-microchip-correct-l2-cache-interrupts.patch
--- /dev/null
+From 9dfb3b8d655022760ca68af11821f1c63aa547c3 Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Sat, 30 Jul 2022 05:25:18 +0100
+Subject: shmem: update folio if shmem_replace_page() updates the page
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 9dfb3b8d655022760ca68af11821f1c63aa547c3 upstream.
+
+If we allocate a new page, we need to make sure that our folio matches
+that new page.
+
+If we do end up in this code path, we store the wrong page in the shmem
+inode's page cache, and I would rather imagine that data corruption
+ensues.
+
+This will be solved by changing shmem_replace_page() to
+shmem_replace_folio(), but this is the minimal fix.
+
+Link: https://lkml.kernel.org/r/20220730042518.1264767-1-willy@infradead.org
+Fixes: da08e9b79323 ("mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio()")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: William Kucharski <william.kucharski@oracle.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/shmem.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1771,6 +1771,7 @@ static int shmem_swapin_folio(struct ino
+
+ if (shmem_should_replace_folio(folio, gfp)) {
+ error = shmem_replace_page(&page, gfp, info, index);
++ folio = page_folio(page);
+ if (error)
+ goto failed;
+ }
--- /dev/null
+From ba0803050d610d5072666be727bca5e03e55b242 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Tue, 23 Aug 2022 02:10:56 -0500
+Subject: smb3: missing inode locks in punch hole
+
+From: David Howells <dhowells@redhat.com>
+
+commit ba0803050d610d5072666be727bca5e03e55b242 upstream.
+
+smb3 fallocate punch hole was not grabbing the inode or filemap_invalidate
+locks so could have race with pagemap reinstantiating the page.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/smb2ops.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -3671,7 +3671,7 @@ static long smb3_zero_range(struct file
+ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+ loff_t offset, loff_t len)
+ {
+- struct inode *inode;
++ struct inode *inode = file_inode(file);
+ struct cifsFileInfo *cfile = file->private_data;
+ struct file_zero_data_information fsctl_buf;
+ long rc;
+@@ -3680,14 +3680,12 @@ static long smb3_punch_hole(struct file
+
+ xid = get_xid();
+
+- inode = d_inode(cfile->dentry);
+-
++ inode_lock(inode);
+ /* Need to make file sparse, if not already, before freeing range. */
+ /* Consider adding equivalent for compressed since it could also work */
+ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
+ rc = -EOPNOTSUPP;
+- free_xid(xid);
+- return rc;
++ goto out;
+ }
+
+ filemap_invalidate_lock(inode->i_mapping);
+@@ -3707,8 +3705,10 @@ static long smb3_punch_hole(struct file
+ true /* is_fctl */, (char *)&fsctl_buf,
+ sizeof(struct file_zero_data_information),
+ CIFSMaxBufSize, NULL, NULL);
+- free_xid(xid);
+ filemap_invalidate_unlock(inode->i_mapping);
++out:
++ inode_unlock(inode);
++ free_xid(xid);
+ return rc;
+ }
+
--- /dev/null
+From f87904c075515f3e1d8f4a7115869d3b914674fd Mon Sep 17 00:00:00 2001
+From: Khazhismel Kumykov <khazhy@chromium.org>
+Date: Mon, 1 Aug 2022 08:50:34 -0700
+Subject: writeback: avoid use-after-free after removing device
+
+From: Khazhismel Kumykov <khazhy@chromium.org>
+
+commit f87904c075515f3e1d8f4a7115869d3b914674fd upstream.
+
+When a disk is removed, bdi_unregister gets called to stop further
+writeback and wait for associated delayed work to complete. However,
+wb_inode_writeback_end() may schedule bandwidth estimation dwork after
+this has completed, which can result in the timer attempting to access the
+just freed bdi_writeback.
+
+Fix this by checking if the bdi_writeback is alive, similar to when
+scheduling writeback work.
+
+Since this requires wb->work_lock, and wb_inode_writeback_end() may get
+called from interrupt, switch wb->work_lock to an irqsafe lock.
+
+Link: https://lkml.kernel.org/r/20220801155034.3772543-1-khazhy@google.com
+Fixes: 45a2966fd641 ("writeback: fix bandwidth estimate for spiky workload")
+Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Michael Stapelberg <stapelberg+linux@google.com>
+Cc: Wu Fengguang <fengguang.wu@intel.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fs-writeback.c | 12 ++++++------
+ mm/backing-dev.c | 10 +++++-----
+ mm/page-writeback.c | 6 +++++-
+ 3 files changed, 16 insertions(+), 12 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(st
+
+ static void wb_wakeup(struct bdi_writeback *wb)
+ {
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+ if (test_bit(WB_registered, &wb->state))
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ }
+
+ static void finish_writeback_work(struct bdi_writeback *wb,
+@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_wri
+ if (work->done)
+ atomic_inc(&work->done->cnt);
+
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+
+ if (test_bit(WB_registered, &wb->state)) {
+ list_add_tail(&work->list, &wb->work_list);
+@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_wri
+ } else
+ finish_writeback_work(wb, work);
+
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ }
+
+ /**
+@@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_nex
+ {
+ struct wb_writeback_work *work = NULL;
+
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+ if (!list_empty(&wb->work_list)) {
+ work = list_entry(wb->work_list.next,
+ struct wb_writeback_work, list);
+ list_del_init(&work->list);
+ }
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ return work;
+ }
+
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -260,10 +260,10 @@ void wb_wakeup_delayed(struct bdi_writeb
+ unsigned long timeout;
+
+ timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+ if (test_bit(WB_registered, &wb->state))
+ queue_delayed_work(bdi_wq, &wb->dwork, timeout);
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ }
+
+ static void wb_update_bandwidth_workfn(struct work_struct *work)
+@@ -334,12 +334,12 @@ static void cgwb_remove_from_bdi_list(st
+ static void wb_shutdown(struct bdi_writeback *wb)
+ {
+ /* Make sure nobody queues further work */
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+ if (!test_and_clear_bit(WB_registered, &wb->state)) {
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ return;
+ }
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+
+ cgwb_remove_from_bdi_list(wb);
+ /*
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -2867,6 +2867,7 @@ static void wb_inode_writeback_start(str
+
+ static void wb_inode_writeback_end(struct bdi_writeback *wb)
+ {
++ unsigned long flags;
+ atomic_dec(&wb->writeback_inodes);
+ /*
+ * Make sure estimate of writeback throughput gets updated after
+@@ -2875,7 +2876,10 @@ static void wb_inode_writeback_end(struc
+ * that if multiple inodes end writeback at a similar time, they get
+ * batched into one bandwidth update.
+ */
+- queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
++ spin_lock_irqsave(&wb->work_lock, flags);
++ if (test_bit(WB_registered, &wb->state))
++ queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
++ spin_unlock_irqrestore(&wb->work_lock, flags);
+ }
+
+ bool __folio_end_writeback(struct folio *folio)
--- /dev/null
+From 4b1c742407571eff58b6de9881889f7ca7c4b4dc Mon Sep 17 00:00:00 2001
+From: Michael Roth <michael.roth@amd.com>
+Date: Tue, 23 Aug 2022 11:07:34 -0500
+Subject: x86/boot: Don't propagate uninitialized boot_params->cc_blob_address
+
+From: Michael Roth <michael.roth@amd.com>
+
+commit 4b1c742407571eff58b6de9881889f7ca7c4b4dc upstream.
+
+In some cases, bootloaders will leave boot_params->cc_blob_address
+uninitialized rather than zeroing it out. This field is only meant to be
+set by the boot/compressed kernel in order to pass information to the
+uncompressed kernel when SEV-SNP support is enabled.
+
+Therefore, there are no cases where the bootloader-provided values
+should be treated as anything other than garbage. Otherwise, the
+uncompressed kernel may attempt to access this bogus address, leading to
+a crash during early boot.
+
+Normally, sanitize_boot_params() would be used to clear out such fields
+but that happens too late: sev_enable() may have already initialized
+it to a valid value that should not be zeroed out. Instead, have
+sev_enable() zero it out unconditionally beforehand.
+
+Also ensure this happens for !CONFIG_AMD_MEM_ENCRYPT as well by also
+including this handling in the sev_enable() stub function.
+
+ [ bp: Massage commit message and comments. ]
+
+Fixes: b190a043c49a ("x86/sev: Add SEV-SNP feature detection/setup")
+Reported-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
+Reported-by: watnuss@gmx.de
+Signed-off-by: Michael Roth <michael.roth@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216387
+Link: https://lore.kernel.org/r/20220823160734.89036-1-michael.roth@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/misc.h | 12 +++++++++++-
+ arch/x86/boot/compressed/sev.c | 8 ++++++++
+ 2 files changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
+index 4910bf230d7b..62208ec04ca4 100644
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr);
+ void snp_set_page_shared(unsigned long paddr);
+ void sev_prep_identity_maps(unsigned long top_level_pgt);
+ #else
+-static inline void sev_enable(struct boot_params *bp) { }
++static inline void sev_enable(struct boot_params *bp)
++{
++ /*
++ * bp->cc_blob_address should only be set by boot/compressed kernel.
++ * Initialize it to 0 unconditionally (thus here in this stub too) to
++ * ensure that uninitialized values from buggy bootloaders aren't
++ * propagated.
++ */
++ if (bp)
++ bp->cc_blob_address = 0;
++}
+ static inline void sev_es_shutdown_ghcb(void) { }
+ static inline bool sev_es_check_ghcb_fault(unsigned long address)
+ {
+diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
+index 52f989f6acc2..c93930d5ccbd 100644
+--- a/arch/x86/boot/compressed/sev.c
++++ b/arch/x86/boot/compressed/sev.c
+@@ -276,6 +276,14 @@ void sev_enable(struct boot_params *bp)
+ struct msr m;
+ bool snp;
+
++ /*
++ * bp->cc_blob_address should only be set by boot/compressed kernel.
++ * Initialize it to 0 to ensure that uninitialized values from
++ * buggy bootloaders aren't propagated.
++ */
++ if (bp)
++ bp->cc_blob_address = 0;
++
+ /*
+ * Setup/preliminary detection of SNP. This will be sanity-checked
+ * against CPUID/MSR values later.
+--
+2.37.2
+
--- /dev/null
+From 7df548840c496b0141fb2404b889c346380c2b22 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 3 Aug 2022 14:41:32 -0700
+Subject: x86/bugs: Add "unknown" reporting for MMIO Stale Data
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 7df548840c496b0141fb2404b889c346380c2b22 upstream.
+
+Older Intel CPUs that are not in the affected processor list for MMIO
+Stale Data vulnerabilities currently report "Not affected" in sysfs,
+which may not be correct. Vulnerability status for these older CPUs is
+unknown.
+
+Add known-not-affected CPUs to the whitelist. Report "unknown"
+mitigation status for CPUs that are not in blacklist, whitelist and also
+don't enumerate MSR ARCH_CAPABILITIES bits that reflect hardware
+immunity to MMIO Stale Data vulnerabilities.
+
+Mitigation is not deployed when the status is unknown.
+
+ [ bp: Massage, fixup. ]
+
+Fixes: 8d50cdf8b834 ("x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data")
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Suggested-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/a932c154772f2121794a5f2eded1a11013114711.1657846269.git.pawan.kumar.gupta@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst | 14 +++
+ arch/x86/include/asm/cpufeatures.h | 5 -
+ arch/x86/kernel/cpu/bugs.c | 14 ++-
+ arch/x86/kernel/cpu/common.c | 42 ++++++----
+ 4 files changed, 56 insertions(+), 19 deletions(-)
+
+--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
++++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+@@ -230,6 +230,20 @@ The possible values in this file are:
+ * - 'Mitigation: Clear CPU buffers'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
++ * - 'Unknown: No mitigations'
++ - The processor vulnerability status is unknown because it is
++ out of Servicing period. Mitigation is not attempted.
++
++Definitions:
++------------
++
++Servicing period: The process of providing functional and security updates to
++Intel processors or platforms, utilizing the Intel Platform Update (IPU)
++process or other similar mechanisms.
++
++End of Servicing Updates (ESU): ESU is the date at which Intel will no
++longer provide Servicing, such as through IPU or other similar update
++processes. ESU dates will typically be aligned to end of quarter.
+
+ If the processor is vulnerable then the following information is appended to
+ the above information:
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -456,7 +456,8 @@
+ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+-#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
+-#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
++#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
++#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
++#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -433,7 +433,8 @@ static void __init mmio_select_mitigatio
+ u64 ia32_cap;
+
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
+- cpu_mitigations_off()) {
++ boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
++ cpu_mitigations_off()) {
+ mmio_mitigation = MMIO_MITIGATION_OFF;
+ return;
+ }
+@@ -538,6 +539,8 @@ out:
+ pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
+ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
++ else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++ pr_info("MMIO Stale Data: Unknown: No mitigations\n");
+ }
+
+ static void __init md_clear_select_mitigation(void)
+@@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_stat
+
+ static ssize_t mmio_stale_data_show_state(char *buf)
+ {
++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++ return sysfs_emit(buf, "Unknown: No mitigations\n");
++
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
+
+@@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct de
+ return srbds_show_state(buf);
+
+ case X86_BUG_MMIO_STALE_DATA:
++ case X86_BUG_MMIO_UNKNOWN:
+ return mmio_stale_data_show_state(buf);
+
+ case X86_BUG_RETBLEED:
+@@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *de
+
+ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+- return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
++ else
++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ }
+
+ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(s
+ #define NO_SWAPGS BIT(6)
+ #define NO_ITLB_MULTIHIT BIT(7)
+ #define NO_SPECTRE_V2 BIT(8)
+-#define NO_EIBRS_PBRSB BIT(9)
++#define NO_MMIO BIT(9)
++#define NO_EIBRS_PBRSB BIT(10)
+
+ #define VULNWL(vendor, family, model, whitelist) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
+@@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_
+ VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION),
+
+ /* Intel Family 6 */
++ VULNWL_INTEL(TIGERLAKE, NO_MMIO),
++ VULNWL_INTEL(TIGERLAKE_L, NO_MMIO),
++ VULNWL_INTEL(ALDERLAKE, NO_MMIO),
++ VULNWL_INTEL(ALDERLAKE_L, NO_MMIO),
++
+ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
+@@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_
+ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+
+- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+
+ /*
+ * Technically, swapgs isn't serializing on AMD (despite it previously
+@@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_
+ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+
+ /* AMD Family 0xf - 0x12 */
+- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+
+ /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+
+ /* Zhaoxin Family 7 */
+- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
+- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
++ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
++ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ {}
+ };
+
+@@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(stru
+ * Affected CPU list is generally enough to enumerate the vulnerability,
+ * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
+ * not want the guest to enumerate the bug.
++ *
++ * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
++ * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
+ */
+- if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
+- !arch_cap_mmio_immune(ia32_cap))
+- setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++ if (!arch_cap_mmio_immune(ia32_cap)) {
++ if (cpu_matches(cpu_vuln_blacklist, MMIO))
++ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++ else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
++ setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
++ }
+
+ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
--- /dev/null
+From 5b9f0c4df1c1152403c738373fb063e9ffdac0a1 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 16 Aug 2022 09:11:37 +0200
+Subject: x86/entry: Fix entry_INT80_compat for Xen PV guests
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 5b9f0c4df1c1152403c738373fb063e9ffdac0a1 upstream.
+
+Commit
+
+ c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS")
+
+missed one use case of SWAPGS in entry_INT80_compat(). Removing of
+the SWAPGS macro led to asm just using "swapgs", as it is accepting
+instructions in capital letters, too.
+
+This in turn leads to splats in Xen PV guests like:
+
+ [ 36.145223] general protection fault, maybe for address 0x2d: 0000 [#1] PREEMPT SMP NOPTI
+ [ 36.145794] CPU: 2 PID: 1847 Comm: ld-linux.so.2 Not tainted 5.19.1-1-default #1 \
+ openSUSE Tumbleweed f3b44bfb672cdb9f235aff53b57724eba8b9411b
+ [ 36.146608] Hardware name: HP ProLiant ML350p Gen8, BIOS P72 11/14/2013
+ [ 36.148126] RIP: e030:entry_INT80_compat+0x3/0xa3
+
+Fix that by open coding this single instance of the SWAPGS macro.
+
+Fixes: c89191ce67ef ("x86/entry: Convert SWAPGS to swapgs and remove the definition of SWAPGS")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Cc: <stable@vger.kernel.org> # 5.19
+Link: https://lore.kernel.org/r/20220816071137.4893-1-jgross@suse.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64_compat.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat)
+ * Interrupts are off on entry.
+ */
+ ASM_CLAC /* Do this early to minimize exposure */
+- SWAPGS
++ ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
+
+ /*
+ * User tracing code (ptrace or signal handlers) might assume that
--- /dev/null
+From 4e3aa9238277597c6c7624f302d81a7b568b6f2d Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 16 Aug 2022 14:28:36 +0200
+Subject: x86/nospec: Unwreck the RSB stuffing
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 4e3aa9238277597c6c7624f302d81a7b568b6f2d upstream.
+
+Commit 2b1299322016 ("x86/speculation: Add RSB VM Exit protections")
+made a right mess of the RSB stuffing, rewrite the whole thing to not
+suck.
+
+Thanks to Andrew for the enlightening comment about Post-Barrier RSB
+things so we can make this code less magical.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/YvuNdDWoUZSBjYcm@worktop.programming.kicks-ass.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 80 +++++++++++++++++------------------
+ 1 file changed, 39 insertions(+), 41 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -35,33 +35,44 @@
+ #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+
+ /*
++ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
++ */
++#define __FILL_RETURN_SLOT \
++ ANNOTATE_INTRA_FUNCTION_CALL; \
++ call 772f; \
++ int3; \
++772:
++
++/*
++ * Stuff the entire RSB.
++ *
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version - two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+- mov $(nr/2), reg; \
+-771: \
+- ANNOTATE_INTRA_FUNCTION_CALL; \
+- call 772f; \
+-773: /* speculation trap */ \
+- UNWIND_HINT_EMPTY; \
+- pause; \
+- lfence; \
+- jmp 773b; \
+-772: \
+- ANNOTATE_INTRA_FUNCTION_CALL; \
+- call 774f; \
+-775: /* speculation trap */ \
+- UNWIND_HINT_EMPTY; \
+- pause; \
+- lfence; \
+- jmp 775b; \
+-774: \
+- add $(BITS_PER_LONG/8) * 2, sp; \
+- dec reg; \
+- jnz 771b; \
+- /* barrier for jnz misprediction */ \
++#define __FILL_RETURN_BUFFER(reg, nr) \
++ mov $(nr/2), reg; \
++771: \
++ __FILL_RETURN_SLOT \
++ __FILL_RETURN_SLOT \
++ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
++ dec reg; \
++ jnz 771b; \
++ /* barrier for jnz misprediction */ \
++ lfence;
++
++/*
++ * Stuff a single RSB slot.
++ *
++ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
++ * forced to retire before letting a RET instruction execute.
++ *
++ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
++ * before this point.
++ */
++#define __FILL_ONE_RETURN \
++ __FILL_RETURN_SLOT \
++ add $(BITS_PER_LONG/8), %_ASM_SP; \
+ lfence;
+
+ #ifdef __ASSEMBLY__
+@@ -120,28 +131,15 @@
+ #endif
+ .endm
+
+-.macro ISSUE_UNBALANCED_RET_GUARD
+- ANNOTATE_INTRA_FUNCTION_CALL
+- call .Lunbalanced_ret_guard_\@
+- int3
+-.Lunbalanced_ret_guard_\@:
+- add $(BITS_PER_LONG/8), %_ASM_SP
+- lfence
+-.endm
+-
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
+-.ifb \ftr2
+- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+-.else
+- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
+-.endif
+- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
+-.Lunbalanced_\@:
+- ISSUE_UNBALANCED_RET_GUARD
++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
++ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
++ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
++ __stringify(__FILL_ONE_RETURN), \ftr2
++
+ .Lskip_rsb_\@:
+ .endm
+
--- /dev/null
+From 72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Thu, 28 Apr 2022 16:50:29 +0200
+Subject: x86/PAT: Have pat_enabled() properly reflect state when running on Xen
+
+From: Jan Beulich <jbeulich@suse.com>
+
+commit 72cbc8f04fe2fa93443c0fcccb7ad91dfea3d9ce upstream.
+
+After commit ID in the Fixes: tag, pat_enabled() returns false (because
+of PAT initialization being suppressed in the absence of MTRRs being
+announced to be available).
+
+This has become a problem: the i915 driver now fails to initialize when
+running PV on Xen (i915_gem_object_pin_map() is where I located the
+induced failure), and its error handling is flaky enough to (at least
+sometimes) result in a hung system.
+
+Yet even beyond that problem the keying of the use of WC mappings to
+pat_enabled() (see arch_can_pci_mmap_wc()) means that in particular
+graphics frame buffer accesses would have been quite a bit less optimal
+than possible.
+
+Arrange for the function to return true in such environments, without
+undermining the rest of PAT MSR management logic considering PAT to be
+disabled: specifically, no writes to the PAT MSR should occur.
+
+For the new boolean to live in .init.data, init_cache_modes() also needs
+moving to .init.text (where it could/should have lived already before).
+
+ [ bp: This is the "small fix" variant for stable. It'll get replaced
+ with a proper PAT and MTRR detection split upstream but that is too
+ involved for a stable backport.
+ - additional touchups to commit msg. Use cpu_feature_enabled(). ]
+
+Fixes: bdd8b6c98239 ("drm/i915: replace X86_FEATURE_PAT with pat_enabled()")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: <stable@vger.kernel.org>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Lucas De Marchi <lucas.demarchi@intel.com>
+Link: https://lore.kernel.org/r/9385fa60-fa5d-f559-a137-6608408f88b0@suse.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/pat/memtype.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/pat/memtype.c
++++ b/arch/x86/mm/pat/memtype.c
+@@ -62,6 +62,7 @@
+
+ static bool __read_mostly pat_bp_initialized;
+ static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
++static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT);
+ static bool __read_mostly pat_bp_enabled;
+ static bool __read_mostly pat_cm_initialized;
+
+@@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason)
+ static int __init nopat(char *str)
+ {
+ pat_disable("PAT support disabled via boot option.");
++ pat_force_disabled = true;
+ return 0;
+ }
+ early_param("nopat", nopat);
+@@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat)
+ wrmsrl(MSR_IA32_CR_PAT, pat);
+ }
+
+-void init_cache_modes(void)
++void __init init_cache_modes(void)
+ {
+ u64 pat = 0;
+
+@@ -313,6 +315,12 @@ void init_cache_modes(void)
+ */
+ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
++ } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
++ /*
++ * Clearly PAT is enabled underneath. Allow pat_enabled() to
++ * reflect this.
++ */
++ pat_bp_enabled = true;
+ }
+
+ __init_cache_modes(pat);
--- /dev/null
+From cdaa0a407f1acd3a44861e3aea6e3c7349e668f1 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Tue, 23 Aug 2022 16:55:51 -0500
+Subject: x86/sev: Don't use cc_platform_has() for early SEV-SNP calls
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit cdaa0a407f1acd3a44861e3aea6e3c7349e668f1 upstream.
+
+When running identity-mapped and depending on the kernel configuration,
+it is possible that the compiler uses jump tables when generating code
+for cc_platform_has().
+
+This causes a boot failure because the jump table uses un-mapped kernel
+virtual addresses, not identity-mapped addresses. This has been seen
+with CONFIG_RETPOLINE=n.
+
+Similar to sme_encrypt_kernel(), use an open-coded direct check for the
+status of SNP rather than trying to eliminate the jump table. This
+preserves any code optimization in cc_platform_has() that can be useful
+post boot. It also limits the changes to SEV-specific files so that
+future compiler features won't necessarily require possible build changes
+just because they are not compatible with running identity-mapped.
+
+ [ bp: Massage commit message. ]
+
+Fixes: 5e5ccff60a29 ("x86/sev: Add helper for validating pages in early enc attribute changes")
+Reported-by: Sean Christopherson <seanjc@google.com>
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: <stable@vger.kernel.org> # 5.19.x
+Link: https://lore.kernel.org/all/YqfabnTRxFSM+LoX@google.com/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/sev.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/sev.c
++++ b/arch/x86/kernel/sev.c
+@@ -701,7 +701,13 @@ e_term:
+ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned int npages)
+ {
+- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
++ /*
++ * This can be invoked in early boot while running identity mapped, so
++ * use an open coded check for SNP instead of using cc_platform_has().
++ * This eliminates worries about jump tables or checking boot_cpu_data
++ * in the cc_platform_has() function.
++ */
++ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ return;
+
+ /*
+@@ -717,7 +723,13 @@ void __init early_snp_set_memory_private
+ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+ unsigned int npages)
+ {
+- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
++ /*
++ * This can be invoked in early boot while running identity mapped, so
++ * use an open coded check for SNP instead of using cc_platform_has().
++ * This eliminates worries about jump tables or checking boot_cpu_data
++ * in the cc_platform_has() function.
++ */
++ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ return;
+
+ /* Invalidate the memory pages before they are marked shared in the RMP table. */
--- /dev/null
+From fc2e426b1161761561624ebd43ce8c8d2fa058da Mon Sep 17 00:00:00 2001
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+Date: Fri, 19 Aug 2022 16:43:34 +0800
+Subject: x86/unwind/orc: Unwind ftrace trampolines with correct ORC entry
+
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+
+commit fc2e426b1161761561624ebd43ce8c8d2fa058da upstream.
+
+When meeting ftrace trampolines in ORC unwinding, unwinder uses address
+of ftrace_{regs_}call address to find the ORC entry, which gets next frame at
+sp+176.
+
+If there is an IRQ hitting at sub $0xa8,%rsp, the next frame should be
+sp+8 instead of 176. It makes unwinder skip correct frame and throw
+warnings such as "wrong direction" or "can't access registers", etc,
+depending on the content of the incorrect frame address.
+
+By adding the base address ftrace_{regs_}caller with the offset
+*ip - ops->trampoline*, we can get the correct address to find the ORC entry.
+
+Also change "caller" to "tramp_addr" to make variable name conform to
+its content.
+
+[ mingo: Clarified the changelog a bit. ]
+
+Fixes: 6be7fa3c74d1 ("ftrace, orc, x86: Handle ftrace dynamically allocated trampolines")
+Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220819084334.244016-1-chenzhongjin@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/unwind_orc.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/unwind_orc.c
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsign
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
+ {
+ struct ftrace_ops *ops;
+- unsigned long caller;
++ unsigned long tramp_addr, offset;
+
+ ops = ftrace_ops_trampoline(ip);
+ if (!ops)
+ return NULL;
+
++ /* Set tramp_addr to the start of the code copied by the trampoline */
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+- caller = (unsigned long)ftrace_regs_call;
++ tramp_addr = (unsigned long)ftrace_regs_caller;
+ else
+- caller = (unsigned long)ftrace_call;
++ tramp_addr = (unsigned long)ftrace_caller;
++
++ /* Now place tramp_addr to the location within the trampoline ip is at */
++ offset = ip - ops->trampoline;
++ tramp_addr += offset;
+
+ /* Prevent unlikely recursion */
+- if (ip == caller)
++ if (ip == tramp_addr)
+ return NULL;
+
+- return orc_find(caller);
++ return orc_find(tramp_addr);
+ }
+ #else
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
--- /dev/null
+From c5deb27895e017a0267de0a20d140ad5fcc55a54 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 25 Aug 2022 16:19:18 +0200
+Subject: xen/privcmd: fix error exit of privcmd_ioctl_dm_op()
+
+From: Juergen Gross <jgross@suse.com>
+
+commit c5deb27895e017a0267de0a20d140ad5fcc55a54 upstream.
+
+The error exit of privcmd_ioctl_dm_op() is calling unlock_pages()
+potentially with pages being NULL, leading to a NULL dereference.
+
+Additionally lock_pages() doesn't check for pin_user_pages_fast()
+having been completely successful, resulting in potentially not
+locking all pages into memory. This could result in sporadic failures
+when using the related memory in user mode.
+
+Fix all of that by calling unlock_pages() always with the real number
+of pinned pages, which will be zero in case pages being NULL, and by
+checking the number of pages pinned by pin_user_pages_fast() matching
+the expected number of pages.
+
+Cc: <stable@vger.kernel.org>
+Fixes: ab520be8cd5d ("xen/privcmd: Add IOCTL_PRIVCMD_DM_OP")
+Reported-by: Rustam Subkhankulov <subkhankulov@ispras.ru>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
+Link: https://lore.kernel.org/r/20220825141918.3581-1-jgross@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/privcmd.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/drivers/xen/privcmd.c
++++ b/drivers/xen/privcmd.c
+@@ -581,27 +581,30 @@ static int lock_pages(
+ struct privcmd_dm_op_buf kbufs[], unsigned int num,
+ struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
+ {
+- unsigned int i;
++ unsigned int i, off = 0;
+
+- for (i = 0; i < num; i++) {
++ for (i = 0; i < num; ) {
+ unsigned int requested;
+ int page_count;
+
+ requested = DIV_ROUND_UP(
+ offset_in_page(kbufs[i].uptr) + kbufs[i].size,
+- PAGE_SIZE);
++ PAGE_SIZE) - off;
+ if (requested > nr_pages)
+ return -ENOSPC;
+
+ page_count = pin_user_pages_fast(
+- (unsigned long) kbufs[i].uptr,
++ (unsigned long)kbufs[i].uptr + off * PAGE_SIZE,
+ requested, FOLL_WRITE, pages);
+- if (page_count < 0)
+- return page_count;
++ if (page_count <= 0)
++ return page_count ? : -EFAULT;
+
+ *pinned += page_count;
+ nr_pages -= page_count;
+ pages += page_count;
++
++ off = (requested == page_count) ? 0 : off + page_count;
++ i += !off;
+ }
+
+ return 0;
+@@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct f
+ }
+
+ rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
+- if (rc < 0) {
+- nr_pages = pinned;
++ if (rc < 0)
+ goto out;
+- }
+
+ for (i = 0; i < kdata.num; i++) {
+ set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
+@@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct f
+ xen_preemptible_hcall_end();
+
+ out:
+- unlock_pages(pages, nr_pages);
++ unlock_pages(pages, pinned);
+ kfree(xbufs);
+ kfree(pages);
+ kfree(kbufs);